├── tests ├── test_nupack │ ├── __init__.py │ └── data │ │ ├── pairs_rna99.tsv │ │ ├── pairs_dna.tsv │ │ ├── pairs_rna.tsv │ │ ├── pairs_multi_dna.epairs │ │ ├── pairs_multi_dna.ppairs │ │ ├── pairs_multi_rna99.epairs │ │ ├── pairs_multi_rna99.ppairs │ │ ├── pairs_multi_rna99.tsv │ │ ├── pairs_multi_rna.epairs │ │ ├── pairs_multi_rna.ppairs │ │ ├── pairs_multi_rna.tsv │ │ ├── concentrations_pairs.cx-fpairs │ │ ├── complexes_mfe_rna.ocx-mfe │ │ ├── complexes_mfe_rna99.ocx-mfe │ │ └── complexes_mfe_dna.ocx-mfe ├── tests │ ├── test_analysis │ │ ├── __init__.py │ │ ├── test_tm.py │ │ ├── test_repeats.py │ │ └── test_utils.py │ ├── test_design │ │ ├── __init__.py │ │ ├── test_randomdna.py │ │ ├── test_randomcodons.py │ │ ├── test_gibson.py │ │ ├── test_oligoassembly.py │ │ └── test_primer.py │ ├── test_reaction │ │ ├── __init__.py │ │ ├── test_gibson │ │ │ ├── __init__.py │ │ │ ├── fragment_1.fasta │ │ │ ├── fragment_2.fasta │ │ │ ├── fragment_3_linear.fasta │ │ │ ├── fragment_3.fasta │ │ │ ├── gibson_test.fasta │ │ │ └── test_gibson_construction.py │ │ ├── test_restriction.py │ │ ├── test_utils.py │ │ └── test_central_dogma.py │ ├── test_sequence │ │ ├── __init__.py │ │ ├── test_rna.py │ │ └── test_peptide.py │ └── __init__.py └── optional_tests │ ├── slow_tests │ ├── __init__.py │ ├── test_analysis │ │ ├── __init__.py │ │ ├── test_needle.py │ │ └── test_structure_windows.py │ └── test_design │ │ └── test_oligoassembly.py │ └── test_dimers.py ├── coral ├── utils │ ├── __init__.py │ └── tempdirs.py ├── analysis │ ├── _sequencing │ │ ├── __init__.py │ │ ├── substitution_matrices │ │ │ ├── __init__.py │ │ │ ├── dna_simple.py │ │ │ ├── substitution_matrix.py │ │ │ ├── dna.py │ │ │ └── blosum62.py │ │ └── mafft.py │ ├── _sequence │ │ ├── __init__.py │ │ ├── repeats.py │ │ └── tm_params.py │ ├── _structure │ │ ├── __init__.py │ │ ├── structure_analyzer.py │ │ ├── dimers.py │ │ └── structure_windows.py │ ├── utils.py │ └── __init__.py ├── design │ ├── _oligo_synthesis │ │ └── __init__.py │ ├── _sequence_generation │ │ ├── __init__.py │ │ └── random_sequences.py │ ├── __init__.py │ └── _primers.py ├── constants │ ├── __init__.py │ ├── restriction_sites.py │ └── genbank.py ├── database │ ├── __init__.py │ ├── _entrez.py │ └── _rebase.py ├── seqio │ └── __init__.py ├── sequence │ ├── __init__.py │ ├── _rna.py │ └── _peptide.py ├── __init__.py └── reaction │ ├── __init__.py │ ├── _resect.py │ ├── _central_dogma.py │ ├── utils.py │ └── _restriction.py ├── docs ├── tutorial │ ├── sequences_files │ │ ├── sequences_23_0.text │ │ ├── sequences_25_0.text │ │ ├── sequences_9_0.text │ │ ├── sequences_21_0.text │ │ ├── sequences_22_0.text │ │ ├── sequences_10_0.text │ │ ├── sequences_6_0.text │ │ ├── sequences_8_0.text │ │ ├── sequences_11_0.text │ │ ├── sequences_7_0.text │ │ ├── sequences_12_0.text │ │ ├── sequences_19_0.text │ │ ├── sequences_27_0.text │ │ ├── sequences_15_0.text │ │ ├── sequences_24_0.text │ │ ├── sequences_28_0.text │ │ ├── sequences_4_0.text │ │ ├── sequences_4_1.text │ │ ├── sequences_26_0.text │ │ └── sequences_17_0.text │ ├── seqio_files │ │ ├── seqio_3_0.text │ │ ├── seqio_4_0.text │ │ ├── seqio_12_0.text │ │ ├── seqio_13_0.text │ │ ├── seqio_9_0.text │ │ ├── seqio_10_0.text │ │ ├── seqio_5_0.text │ │ ├── seqio_6_0.text │ │ ├── seqio_7_0.text │ │ └── seqio_8_0.text │ ├── design │ │ ├── design_primers_files │ │ │ ├── design_primers_3_0.text │ │ │ ├── design_primers_7_0.text │ │ │ ├── design_primers_5_0.text │ │ │ ├── design_primers_11_0.text │ │ │ └── design_primers_3_1.text │ │ ├── index.rst │ │ ├── designed_primers.csv │ │ └── design_primers.rst │ ├── introduction │ │ ├── modules_files │ │ │ ├── modules_6_0.text │ │ │ ├── modules_3_0.text │ │ │ └── modules_1_0.text │ │ ├── index.rst │ │ └── modules.rst │ ├── analysis │ │ ├── analysis_sequencing_files │ │ │ ├── analysis_sequencing_5_0.text │ │ │ ├── analysis_sequencing_9_0.text │ │ │ ├── analysis_sequencing_7_0.png │ │ │ ├── analysis_sequencing_9_0.png │ │ │ └── analysis_sequencing_7_0.text │ │ ├── index.rst │ │ └── analysis_sequencing.rst │ ├── index.rst │ └── files_for_tutorial │ │ └── sequencing_files │ │ ├── pMODKan-HO-pACT1GEV_C3-904.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-905.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-66.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-806.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-559.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-686.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-716.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-771.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-676.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-344.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-T7-EEV.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-M13R.seq │ │ ├── pMODKan-HO-pACT1GEV_C3-M13F(-47).seq │ │ ├── pMODKan-HO-pACT1GEV_C3-675.seq │ │ └── pMODKan-HO-pACT1GEV_C3-889.seq ├── coral_256.png ├── index.rst └── api.rst ├── requirements.txt ├── dev-requirements.txt ├── Developers.md ├── setup.cfg ├── .gitignore ├── .travis.yml ├── bin ├── build_sphinx_docs.py ├── ipynb2rst.py └── run_docs_server.py ├── MANIFEST.in ├── COPYING ├── pre-commit.sh ├── README.md ├── setup.py └── HISTORY.md /tests/test_nupack/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tests/test_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tests/test_design/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/tests/test_sequence/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/optional_tests/slow_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /coral/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import tempdirs 2 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_23_0.text: -------------------------------------------------------------------------------- 1 | 'ds' -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_gibson/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_25_0.text: -------------------------------------------------------------------------------- 1 | 'linear' -------------------------------------------------------------------------------- /tests/optional_tests/slow_tests/test_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_3_0.text: -------------------------------------------------------------------------------- 1 | 'pMODKan_HO_pACT1GE' -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_4_0.text: -------------------------------------------------------------------------------- 1 | 'pMODKan_HO_pACT1GE' -------------------------------------------------------------------------------- /coral/analysis/_sequencing/__init__.py: -------------------------------------------------------------------------------- 1 | '''Sequencing analysis.''' 2 | -------------------------------------------------------------------------------- /docs/tutorial/design/design_primers_files/design_primers_3_0.text: -------------------------------------------------------------------------------- 1 | 717 2 | -------------------------------------------------------------------------------- /docs/tutorial/design/design_primers_files/design_primers_7_0.text: -------------------------------------------------------------------------------- 1 | True -------------------------------------------------------------------------------- /coral/analysis/_sequence/__init__.py: -------------------------------------------------------------------------------- 1 | '''Sequence analysis utilities.''' 2 | -------------------------------------------------------------------------------- /coral/analysis/_structure/__init__.py: -------------------------------------------------------------------------------- 1 | '''Structure analysis utilities.''' 2 | -------------------------------------------------------------------------------- /docs/tutorial/introduction/modules_files/modules_6_0.text: -------------------------------------------------------------------------------- 1 | 48.03216557174494 -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_9_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | ATG 3 | TAC -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_21_0.text: -------------------------------------------------------------------------------- 1 | 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA' -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_22_0.text: -------------------------------------------------------------------------------- 1 | 'TCCAGTGAAAAGTTCTTCTCCTTTACTCAT' -------------------------------------------------------------------------------- /docs/coral_256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klavinslab/coral/HEAD/docs/coral_256.png -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_10_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | CACTGGA 3 | GTGACCT -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_6_0.text: -------------------------------------------------------------------------------- 1 | Encountered a non-DNA character 2 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_8_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | cgtacgta 3 | gcatgcat -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | argparse==1.2.1 2 | biopython==1.66 3 | numpy==1.10.2 4 | wsgiref==0.1.2 5 | -------------------------------------------------------------------------------- /coral/design/_oligo_synthesis/__init__.py: -------------------------------------------------------------------------------- 1 | '''Oligo synthesis and assembly design tools.''' 2 | -------------------------------------------------------------------------------- /docs/tutorial/analysis/analysis_sequencing_files/analysis_sequencing_5_0.text: -------------------------------------------------------------------------------- 1 | (Aligning...) 2 | -------------------------------------------------------------------------------- /docs/tutorial/design/index.rst: -------------------------------------------------------------------------------- 1 | Design 2 | ========= 3 | 4 | .. include:: design_primers.rst 5 | -------------------------------------------------------------------------------- /coral/design/_sequence_generation/__init__.py: -------------------------------------------------------------------------------- 1 | '''Generate sequences de novo or from templates.''' 2 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_11_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | AGGAAGGAACTTATG 3 | TCCTTCCTTGAATAC -------------------------------------------------------------------------------- /docs/tutorial/analysis/index.rst: -------------------------------------------------------------------------------- 1 | Analysis 2 | ========= 3 | 4 | .. include:: analysis_sequencing.rst 5 | -------------------------------------------------------------------------------- /dev-requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx==1.3.3 2 | nose==1.3.7 3 | twine==1.6.5 4 | wheel==0.29.0 5 | zest.releaser==6.6.2 6 | -------------------------------------------------------------------------------- /docs/tutorial/analysis/analysis_sequencing_files/analysis_sequencing_9_0.text: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_7_0.text: -------------------------------------------------------------------------------- 1 | AGGTCACTTTTCAAGAAGAGGAAATGAGTA 2 | AGGTCACTTTTCAAGAAGAGGAAATGAGTA 3 | -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_12_0.text: -------------------------------------------------------------------------------- 1 | [[78, 286, 1380, 2431, 4177, 4315, 7261, 7556], [737, 3718, 3828, 4131, 6939]] -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_13_0.text: -------------------------------------------------------------------------------- 1 | [[78, 286, 1380, 2431, 4177, 4315, 7261, 7556], [737, 3718, 3828, 4131, 6939]] -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_12_0.text: -------------------------------------------------------------------------------- 1 | 'AT' is in our sequence: True. 2 | 'ATT' is in our sequence: False. 3 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_19_0.text: -------------------------------------------------------------------------------- 1 | ATGAGTAAAGGAGAAGAACTTTTCACTGGA 2 | 3 | TCCAGTGAAAAGTTCTTCTCCTTTACTCAT 4 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_27_0.text: -------------------------------------------------------------------------------- 1 | ATGAGTAAAGGAGAAGAACTTTTCACTGGA 2 | 3 | GAGTAAAGGAGAAGAACTTTTCACTGGAAT 4 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_15_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | TCCAGTGAAAAGTTCTTCTCCTTTACTCAT 3 | AGGTCACTTTTCAAGAAGAGGAAATGAGTA -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_24_0.text: -------------------------------------------------------------------------------- 1 | linear ssDNA: 2 | ATGAGTAAAGGAGAAGAACTTTTCACTGGA 3 | ------------------------------ -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_28_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | TCCAGTGAAAAGTTCTTCTCCTTTACTCAT 3 | AGGTCACTTTTCAAGAAGAGGAAATGAGTA -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_4_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | ATGAGTAAAGGAGAAGAACTTTTCACTGGA 3 | TACTCATTTCCTCTTCTTGAAAAGTGACCT -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_4_1.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | atgagtaaaggagaagaacttttcactgga 3 | tactcatttcctcttcttgaaaagtgacct -------------------------------------------------------------------------------- /docs/tutorial/design/design_primers_files/design_primers_5_0.text: -------------------------------------------------------------------------------- 1 | ATGGTGAGCAAGGGCG 2 | 3 | GGGGGATCGATATGGTGAGCAAGGGCGAGGAGCTGTTCAC 4 | -------------------------------------------------------------------------------- /docs/tutorial/introduction/modules_files/modules_3_0.text: -------------------------------------------------------------------------------- 1 | DNA: ATGC 2 | Palindrome?: False 3 | 4 | RNA: AUGC 5 | 6 | Peptide: MLNP 7 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_26_0.text: -------------------------------------------------------------------------------- 1 | circular dsDNA: 2 | ATGAGTAAAGGAGAAGAACTTTTCACTGGA 3 | TACTCATTTCCTCTTCTTGAAAAGTGACCT -------------------------------------------------------------------------------- /docs/tutorial/design/designed_primers.csv: -------------------------------------------------------------------------------- 1 | name,sequence,notes 2 | Forward EYFP primer,ATGGTGAGCAAGGGCG, 3 | Reverse EYFP primer,CTTGTACAGCTCGTCCATGCC, 4 | -------------------------------------------------------------------------------- /Developers.md: -------------------------------------------------------------------------------- 1 | Style 2 | ===== 3 | 4 | * Follow [pep8](https://www.python.org/dev/peps/pep-0008/) 5 | 6 | * Always use single quotes, even within a string. 7 | -------------------------------------------------------------------------------- /coral/constants/__init__.py: -------------------------------------------------------------------------------- 1 | '''Convenient data files for use in other modules.''' 2 | from . import molecular_bio 3 | from . import genbank 4 | from .restriction_sites import fallback_enzymes 5 | -------------------------------------------------------------------------------- /docs/tutorial/analysis/analysis_sequencing_files/analysis_sequencing_7_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klavinslab/coral/HEAD/docs/tutorial/analysis/analysis_sequencing_files/analysis_sequencing_7_0.png -------------------------------------------------------------------------------- /docs/tutorial/analysis/analysis_sequencing_files/analysis_sequencing_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/klavinslab/coral/HEAD/docs/tutorial/analysis/analysis_sequencing_files/analysis_sequencing_9_0.png -------------------------------------------------------------------------------- /docs/tutorial/design/design_primers_files/design_primers_11_0.text: -------------------------------------------------------------------------------- 1 | ['name', 'sequence', 'notes'] 2 | ['Forward EYFP primer', 'ATGGTGAGCAAGGGCG', ''] 3 | ['Reverse EYFP primer', 'CTTGTACAGCTCGTCCATGCC', ''] 4 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [nosetests] 2 | exclude = optional_tests|test_nupack 3 | [zest.releaser] 4 | python-file-with-version = coral/__init__.py 5 | create-wheel = no 6 | [check-manifest] 7 | ignore = 8 | .travis.yml 9 | -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_9_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | ttaccaatgcttaatcagtgaggcacctatctcagcgatc ... aaaagggaataagggcgacacggaaatgttgaatactcat 3 | aatggttacgaattagtcactccgtggatagagtcgctag ... ttttcccttattcccgctgtgcctttacaacttatgagta -------------------------------------------------------------------------------- /docs/tutorial/index.rst: -------------------------------------------------------------------------------- 1 | .. _tutorial: 2 | 3 | Tutorial 4 | ======== 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | introduction/index 10 | sequences 11 | seqio 12 | design/index 13 | analysis/index 14 | -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_10_0.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | TTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATC ... AAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCAT 3 | AATGGTTACGAATTAGTCACTCCGTGGATAGAGTCGCTAG ... TTTTCCCTTATTCCCGCTGTGCCTTTACAACTTATGAGTA -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_5_0.text: -------------------------------------------------------------------------------- 1 | circular dsDNA: 2 | tcgcgcgtttcggtgatgacggtgaaaacctctgacacat ... ttaacctataaaaataggcgtatcacgaggccctttcgtc 3 | agcgcgcaaagccactactgccacttttggagactgtgta ... aattggatatttttatccgcatagtgctccgggaaagcag -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_6_0.text: -------------------------------------------------------------------------------- 1 | circular dsDNA: 2 | TCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACAT ... TTAACCTATAAAAATAGGCGTATCACGAGGCCCTTTCGTC 3 | AGCGCGCAAAGCCACTACTGCCACTTTTGGAGACTGTGTA ... AATTGGATATTTTTATCCGCATAGTGCTCCGGGAAAGCAG -------------------------------------------------------------------------------- /coral/database/__init__.py: -------------------------------------------------------------------------------- 1 | from ._rebase import Rebase 2 | from ._entrez import fetch_genome 3 | from ._yeast import fetch_yeast_locus_sequence, get_yeast_sequence 4 | from ._yeast import get_yeast_gene_location, get_yeast_promoter_ypa 5 | -------------------------------------------------------------------------------- /docs/tutorial/introduction/index.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | So you want to use coral? Congratulations, because it's super awesome. 5 | 6 | Let's start with an introduction to the main coral modules. 7 | 8 | .. include:: modules.rst 9 | -------------------------------------------------------------------------------- /docs/tutorial/design/design_primers_files/design_primers_3_1.text: -------------------------------------------------------------------------------- 1 | linear dsDNA: 2 | ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGC ... CGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAG 3 | TACCACTCGTTCCCGCTCCTCGACAAGTGGCCCCACCACG ... GCGGCGGCCCTAGTGAGAGCCGTACCTGCTCGACATGTTC -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ~* 2 | *__pycache__ 3 | 4 | *.coverage 5 | *.egg-info 6 | *.error 7 | *.idea 8 | *.ipynb_checkpoints 9 | *.log 10 | *.pyc 11 | *.so 12 | *.spyderworkspace 13 | *.swp 14 | 15 | .pypirc 16 | 17 | build 18 | dist 19 | docs/_build 20 | venv 21 | -------------------------------------------------------------------------------- /coral/analysis/_sequencing/substitution_matrices/__init__.py: -------------------------------------------------------------------------------- 1 | '''Substitution Matrix data structures and constants''' 2 | from .substitution_matrix import SubstitutionMatrix 3 | from .dna import DNA 4 | from .dna_simple import DNA_SIMPLE 5 | from .blosum62 import BLOSUM62 6 | -------------------------------------------------------------------------------- /coral/seqio/__init__.py: -------------------------------------------------------------------------------- 1 | '''Read and write cloning-relevant formats (sequences, chromatograms, etc).''' 2 | from coral.seqio._dna import read_dna 3 | from coral.seqio._dna import read_sequencing 4 | from coral.seqio._dna import write_dna 5 | from coral.seqio._dna import write_primers 6 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_rna99.tsv: -------------------------------------------------------------------------------- 1 | 1 9 9.5268e-03 2 | 5 10 2.4387e-03 3 | 1 11 9.8981e-01 4 | 2 11 9.9989e-01 5 | 3 11 9.9930e-01 6 | 4 11 1.0000e+00 7 | 5 11 9.9690e-01 8 | 6 11 9.9989e-01 9 | 7 11 9.9978e-01 10 | 8 11 9.9952e-01 11 | 9 11 9.9047e-01 12 | 10 11 9.9756e-01 13 | -------------------------------------------------------------------------------- /coral/sequence/__init__.py: -------------------------------------------------------------------------------- 1 | '''Classes to contain and manipulate DNA, RNA, and protein sequences.''' 2 | from ._dna import DNA 3 | from ._dna import ssDNA 4 | from ._dna import RestrictionSite 5 | from ._dna import Primer 6 | from ._peptide import Peptide 7 | from ._rna import RNA 8 | from ._sequence import Feature 9 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: python 3 | python: 4 | - "2.7" 5 | # install dependencies 6 | install: 7 | - "pip install -r requirements.txt" 8 | # run tests 9 | script: nosetests --exclude optional_tests\|test_nupack 10 | # speed up CI by caching dependencies 11 | cache: 12 | pip: true 13 | custom_install: true 14 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_dna.tsv: -------------------------------------------------------------------------------- 1 | 1 5 7.3358e-03 2 | 1 9 7.6248e-03 3 | 2 6 1.6009e-02 4 | 3 7 1.0501e-02 5 | 5 10 3.9654e-02 6 | 1 11 9.8504e-01 7 | 2 11 9.8399e-01 8 | 3 11 9.8947e-01 9 | 4 11 1.0000e+00 10 | 5 11 9.5301e-01 11 | 6 11 9.8399e-01 12 | 7 11 9.8950e-01 13 | 8 11 9.9997e-01 14 | 9 11 9.9238e-01 15 | 10 11 9.6035e-01 16 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_rna.tsv: -------------------------------------------------------------------------------- 1 | 1 5 7.7816e-03 2 | 1 9 1.1106e-01 3 | 2 6 6.7306e-03 4 | 3 7 1.1399e-02 5 | 3 8 1.8067e-03 6 | 5 10 7.7816e-03 7 | 1 11 8.8116e-01 8 | 2 11 9.9327e-01 9 | 3 11 9.8679e-01 10 | 4 11 1.0000e+00 11 | 5 11 9.8444e-01 12 | 6 11 9.9327e-01 13 | 7 11 9.8860e-01 14 | 8 11 9.9819e-01 15 | 9 11 8.8894e-01 16 | 10 11 9.9222e-01 17 | -------------------------------------------------------------------------------- /coral/design/__init__.py: -------------------------------------------------------------------------------- 1 | '''Cloning design classes and functions.''' 2 | from ._primers import primer, primers 3 | from ._sequence_generation.random_sequences import random_dna 4 | from ._sequence_generation.random_sequences import random_codons 5 | from ._oligo_synthesis.oligo_assembly import OligoAssembly 6 | from ._gibson import gibson_primers 7 | from ._gibson import gibson 8 | -------------------------------------------------------------------------------- /coral/analysis/_sequencing/substitution_matrices/dna_simple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .substitution_matrix import SubstitutionMatrix 3 | 4 | 5 | DNA_SIMPLE = SubstitutionMatrix( 6 | np.array([[1, -1, -1, -1, -1], 7 | [-1, 1, -1, -1, -1], 8 | [-1, -1, 1, -1, -1], 9 | [-1, -1, -1, 1, -1], 10 | [-1, -1, -1, -1, -1]]), 11 | 'ATGCN') 12 | -------------------------------------------------------------------------------- /tests/tests/test_analysis/test_tm.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for the Tm analysis class. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal 7 | from coral import analysis, DNA 8 | 9 | 10 | def test_finnzymes(): 11 | ''' 12 | Tests finnzymes method output. 13 | 14 | ''' 15 | 16 | melt = analysis.tm(DNA('ATGCGATAGCGATAGC'), parameters='cloning') 17 | assert_equal(melt, 55.2370030020752) 18 | -------------------------------------------------------------------------------- /docs/tutorial/introduction/modules_files/modules_1_0.text: -------------------------------------------------------------------------------- 1 | ['DNA', 2 | 'Feature', 3 | 'Peptide', 4 | 'Primer', 5 | 'RNA', 6 | 'RestrictionSite', 7 | '__builtins__', 8 | '__doc__', 9 | '__file__', 10 | '__name__', 11 | '__package__', 12 | '__path__', 13 | '_sequence', 14 | 'analysis', 15 | 'constants', 16 | 'database', 17 | 'design', 18 | 'matplotlib', 19 | 'reaction', 20 | 'seqio', 21 | 'simulation'] -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_gibson/fragment_1.fasta: -------------------------------------------------------------------------------- 1 | >fragment1 2 | acctcccggccactattatccaaacagagttggaggagtataactgatatttgaagccga 3 | ccgtaacgcaccgcctatttcaacctcacacgttgctaacctcacgatcggattagaatt 4 | acggcttgccattggttgggaggagttgctctcaggggtggtctgaagcgccctaattta 5 | ttgcaccgcaataatcacgtccctacgccagcgcgtactgagaccttgaactgccgtccc 6 | taggagacggaaagtacattcctcctccaatgctgatagcgagaatccggctgcgtgttt 7 | ttcacgccaatatgctccctttctaaagatctgacaaccctccgatgcag 8 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_gibson/fragment_2.fasta: -------------------------------------------------------------------------------- 1 | >fragment2 2 | ttcacgccaatatgctccctttctaaagatctgacaaccctccgatgcagcgtgttgcta 3 | cgcaacattggggggtacattccactgggggacctgcagtcgaatttccagaccagagct 4 | ttgtggtctttcgatctcccctgcggaaatggattgaacggatttttgccacgaatgctg 5 | gcatgaatttggcaacgcgacgatgcgtaacatctcaatttgctgggcctctgaagctgg 6 | atttctaaaggttgctatttttgagctgcttgatattagacagtacttctctctagatcc 7 | cgcaatccacatataacttgaacctgaccaatagcaaagtaattcgaacg 8 | -------------------------------------------------------------------------------- /tests/tests/test_design/test_randomdna.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for RandomBases class of analysis module. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal 7 | from coral import design 8 | 9 | 10 | def test_randomdna(): 11 | ''' 12 | This test is pretty basic right now - not sure how much checking 13 | can be done for a random DNA base generator. 14 | 15 | ''' 16 | 17 | output = design.random_dna(200) 18 | assert_equal(len(output), 200) 19 | -------------------------------------------------------------------------------- /tests/tests/test_analysis/test_repeats.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests Repeats analysis class. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal 7 | from coral import analysis, DNA 8 | 9 | 10 | def test_find_repeats(): 11 | input_sequence = DNA('atgatgccccgatagtagtagtag') 12 | expected = [('ATG', 2), ('GTA', 3), ('GAT', 2), ('AGT', 3), ('CCC', 2), 13 | ('TAG', 4)] 14 | 15 | output = analysis.repeats(input_sequence, 3) 16 | assert_equal(output, expected) 17 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_gibson/fragment_3_linear.fasta: -------------------------------------------------------------------------------- 1 | >fragment3_linear 2 | cgcaatccacatataacttgaacctgaccaatagcaaagtaattcgaacgtctgcttttg 3 | gcggtcgcattctcctttgcgggttagcataacataagtcatcgtgtaacgttgcgttag 4 | gcacgttaacgcagctaatgtaactctgagacgtctttccatggaccactcccgtacgcc 5 | tgtagtactggcagtcgattttactctcagacttgcgcgctcgggttctgtccctctccg 6 | tagtgcccaagcctgaatagcccgatgatctagtcggctagttgttggaactcaaacgcc 7 | ctaagctgcggctcgtcgtagttcacgccgctagaataagatttcccgcgatcttttgat 8 | atccgcttatggtcaagcagggtttgctagagtgcaccct 9 | -------------------------------------------------------------------------------- /coral/__init__.py: -------------------------------------------------------------------------------- 1 | '''Coral, core modules for synthetic DNA design.''' 2 | __version__ = '0.5.0' 3 | from . import analysis 4 | from . import constants 5 | from . import database 6 | from . import design 7 | from . import reaction 8 | from . import seqio 9 | from . import utils 10 | from .sequence import DNA 11 | from .sequence import ssDNA 12 | from .sequence import RNA 13 | from .sequence import Peptide 14 | from .sequence import Primer 15 | from .sequence import RestrictionSite 16 | from .sequence import Feature 17 | -------------------------------------------------------------------------------- /coral/reaction/__init__.py: -------------------------------------------------------------------------------- 1 | '''Reactions for simulating and designing cloning reactions and assemblies.''' 2 | from ._resect import three_resect 3 | from ._resect import five_resect 4 | from ._central_dogma import transcribe 5 | from ._central_dogma import translate 6 | from ._central_dogma import reverse_transcribe 7 | from ._central_dogma import coding_sequence 8 | from ._restriction import digest 9 | from ._pcr import pcr 10 | from ._gibson import gibson 11 | from ._oligo_assembly import assemble_oligos 12 | from ._oligo_assembly import bind_unique 13 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_gibson/fragment_3.fasta: -------------------------------------------------------------------------------- 1 | >fragment3 2 | cgcaatccacatataacttgaacctgaccaatagcaaagtaattcgaacgtctgcttttg 3 | gcggtcgcattctcctttgcgggttagcataacataagtcatcgtgtaacgttgcgttag 4 | gcacgttaacgcagctaatgtaactctgagacgtctttccatggaccactcccgtacgcc 5 | tgtagtactggcagtcgattttactctcagacttgcgcgctcgggttctgtccctctccg 6 | tagtgcccaagcctgaatagcccgatgatctagtcggctagttgttggaactcaaacgcc 7 | ctaagctgcggctcgtcgtagttcacgccgctagaataagatttcccgcgatcttttgat 8 | atccgcttatggtcaagcagggtttgctagagtgcaccctacctcccggccactattatc 9 | caaacagagttggaggagtataactgatat 10 | -------------------------------------------------------------------------------- /bin/build_sphinx_docs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Builds sphinx docs for coral.""" 4 | import os 5 | import subprocess 6 | 7 | 8 | DOCSDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../docs")) 9 | 10 | 11 | def build_docs(directory): 12 | """Builds sphinx docs from a given directory.""" 13 | os.chdir(directory) 14 | process = subprocess.Popen(["make", "html"], cwd=directory) 15 | process.communicate() 16 | 17 | 18 | if __name__ == "__main__": 19 | # Build sphinx docs (produces html) 20 | build_docs(DOCSDIR) 21 | -------------------------------------------------------------------------------- /coral/analysis/_structure/structure_analyzer.py: -------------------------------------------------------------------------------- 1 | '''Provides the Structure class for convenient structural analysis 2 | questions.''' 3 | 4 | 5 | from coral.analysis import ViennaRNA 6 | 7 | 8 | class Structure(object): 9 | 10 | def __init__(self, mode='viennarna'): 11 | allowed = ['viennarna'] 12 | if mode not in allowed: 13 | raise ValueError('Accepted values for mode are {}'.format(allowed)) 14 | self.mode = mode 15 | self.calculator = ViennaRNA() 16 | 17 | def mfe(self, strand): 18 | if self.mode == 'viennarna': 19 | return self.calculator.fold(strand)['mfe'] 20 | -------------------------------------------------------------------------------- /tests/optional_tests/test_dimers.py: -------------------------------------------------------------------------------- 1 | '''Test dimers submodule of analysis module.''' 2 | from coral import analysis, DNA, Primer 3 | from nose.tools import assert_equal 4 | 5 | 6 | def test_dimers(): 7 | '''Test dimers function.''' 8 | anneal_f = DNA('gatcgatcgatacgatcgatatgcgat', ds=False) 9 | tm_f = 71.86183729637946 10 | primer_f = Primer(anneal_f, tm_f) 11 | 12 | anneal_r = DNA('atatcgatcatatcgcatatcgatcgtatcgat', ds=False) 13 | tm_r = 72.14300162714233 14 | primer_r = Primer(anneal_r, tm_r) 15 | 16 | dimer_output = analysis.dimers(primer_f, primer_r) 17 | 18 | assert_equal(dimer_output, 0.8529446) 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include coral *.pyx 2 | recursive-include bin *.py 3 | 4 | recursive-include docs *.ipynb *.py *.rst 5 | recursive-include docs *.ape *.csv *.seq *.text 6 | recursive-include docs *.png 7 | recursive-include docs *.bat Makefile 8 | 9 | recursive-include tests *.py 10 | recursive-include tests *.ape *.fasta *.tsv 11 | recursive-include tests *.epairs *.ppairs *.cx-epairs *.ocx-epairs *.cx-fpairs *.ocx-mfe 12 | 13 | include requirements.txt 14 | include dev-requirements.txt 15 | include coral/sequence/d3-plasmid.js 16 | include COPYING 17 | include *.md 18 | include *.sh 19 | 20 | prune *.pyc 21 | prune *.ipynb_checkpoints 22 | -------------------------------------------------------------------------------- /tests/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Tests to write: 2 | # 3 | # analysis: 4 | # TODO: write nupack_multiprocessing tests 5 | # TODO: write Sanger tests 6 | # 7 | # reaction: 8 | # TODO: write ReverseTranscription tests 9 | # TODO: write Transcription tests 10 | # TODO: write Translation tests 11 | # TODO: write Restriction tests 12 | # 13 | # seqio: 14 | # TODO: write read_dna tests 15 | # TODO: write read_seq tests 16 | # 17 | # sequence: 18 | # TODO: dna: 19 | # TODO: write tests for every public DNA method 20 | # TODO: write tests for RestrictionSite 21 | # TODO: rna: 22 | # TODO: write tests for every public RNA method 23 | # TODO: peptide: 24 | # TODO: write tests for every public Peptide method 25 | -------------------------------------------------------------------------------- /tests/tests/test_analysis/test_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for utils submodule of the analysis module. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal, assert_raises 7 | from coral import analysis, DNA, RNA, Peptide 8 | 9 | 10 | def test_utils(): 11 | test_DNA = DNA('ATAGCGATACGAT') 12 | test_RNA = RNA('AUGCGAUAGCGAU') 13 | test_peptide = Peptide('msvkkkpvqg') 14 | test_str = 'msvkkkpvgq' 15 | 16 | assert_equal(analysis.utils.sequence_type(test_DNA), 'dna') 17 | assert_equal(analysis.utils.sequence_type(test_RNA), 'rna') 18 | assert_equal(analysis.utils.sequence_type(test_peptide), 'peptide') 19 | assert_raises(Exception, analysis.utils.sequence_type, test_str) 20 | -------------------------------------------------------------------------------- /coral/analysis/utils.py: -------------------------------------------------------------------------------- 1 | '''Utils for analysis module.''' 2 | import coral 3 | 4 | 5 | def sequence_type(seq): 6 | '''Validates a coral.sequence data type. 7 | 8 | :param sequence_in: input DNA sequence. 9 | :type sequence_in: any 10 | :returns: The material - 'dna', 'rna', or 'peptide'. 11 | :rtype: str 12 | :raises: ValueError 13 | 14 | ''' 15 | if isinstance(seq, coral.DNA): 16 | material = 'dna' 17 | elif isinstance(seq, coral.RNA): 18 | material = 'rna' 19 | elif isinstance(seq, coral.Peptide): 20 | material = 'peptide' 21 | else: 22 | raise ValueError('Input was not a recognized coral.sequence object.') 23 | return material 24 | -------------------------------------------------------------------------------- /coral/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | '''Analyze sequences.''' 2 | from ._sequence.anneal import anneal 3 | from ._sequence.melting_temp import tm 4 | from ._sequence.repeats import repeats 5 | from ._sequencing.mafft import MAFFT 6 | from ._sequencing.needle import needle 7 | from ._sequencing.needle import needle_msa 8 | from ._sequencing.needle import needle_multi 9 | from ._sequencing.sanger import Sanger 10 | from ._sequencing import substitution_matrices 11 | from ._structure.nupack import NUPACK 12 | from ._structure.nupack import nupack_multi 13 | from ._structure.dimers import dimers 14 | from ._structure.viennarna import ViennaRNA 15 | from ._structure.structure_analyzer import Structure 16 | from ._structure.structure_windows import StructureWindows 17 | from . import utils 18 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. coral documentation master file, created by 2 | sphinx-quickstart on Tue Sep 24 18:33:33 2013. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to coral 7 | ================================= 8 | 9 | :Author: Nick Bolten 10 | :Source code: `github.com project `_ 11 | :Issue Tracker: `github.com issues `_ 12 | 13 | :Generated: |today| 14 | 15 | :License: MIT 16 | 17 | :Version: |release| 18 | 19 | **Tutorial:** 20 | 21 | .. toctree:: 22 | :maxdepth: 3 23 | 24 | tutorial/index 25 | 26 | **API Documentation:** 27 | 28 | .. toctree:: 29 | api 30 | 31 | 32 | Indices and tables 33 | ================== 34 | 35 | * :ref:`genindex` 36 | 37 | * :ref:`modindex` 38 | 39 | * :ref:`search` 40 | -------------------------------------------------------------------------------- /coral/analysis/_sequence/repeats.py: -------------------------------------------------------------------------------- 1 | '''Check sequences for repeats that may impact cloning efficiency.''' 2 | from collections import Counter 3 | 4 | 5 | def repeats(seq, size): 6 | '''Count times that a sequence of a certain size is repeated. 7 | 8 | :param seq: Input sequence. 9 | :type seq: coral.DNA or coral.RNA 10 | :param size: Size of the repeat to count. 11 | :type size: int 12 | :returns: Occurrences of repeats and how many 13 | :rtype: tuple of the matched sequence and how many times it occurs 14 | 15 | ''' 16 | seq = str(seq) 17 | n_mers = [seq[i:i + size] for i in range(len(seq) - size + 1)] 18 | counted = Counter(n_mers) 19 | # No one cares about patterns that appear once, so exclude them 20 | found_repeats = [(key, value) for key, value in counted.iteritems() if 21 | value > 1] 22 | return found_repeats 23 | -------------------------------------------------------------------------------- /coral/utils/tempdirs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 2 | '''Temporary directory helpers for scripts that call command line 3 | applications. ''' 4 | import os 5 | import shutil 6 | import tempfile 7 | 8 | 9 | def tempdir(fun): 10 | '''For use as a decorator of instance methods - creates a temporary dir 11 | named self._tempdir and then deletes it after the method runs. 12 | 13 | :param fun: function to decorate 14 | :type fun: instance method 15 | 16 | ''' 17 | def wrapper(*args, **kwargs): 18 | self = args[0] 19 | if os.path.isdir(self._tempdir): 20 | shutil.rmtree(self._tempdir) 21 | self._tempdir = tempfile.mkdtemp() 22 | # If the method raises an exception, delete the temporary dir 23 | try: 24 | retval = fun(*args, **kwargs) 25 | finally: 26 | shutil.rmtree(self._tempdir) 27 | if os.path.isdir(self._tempdir): 28 | shutil.rmtree(self._tempdir) 29 | return retval 30 | return wrapper 31 | -------------------------------------------------------------------------------- /tests/optional_tests/slow_tests/test_analysis/test_needle.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for Needleman-Wunsch function 'needle' 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal 7 | from coral import analysis, DNA 8 | 9 | 10 | def test_needle(): 11 | ref_seq = DNA("ATGCGATACGATA") 12 | 13 | res_seq0 = DNA("ATGCGATA---TA") # Gapped 14 | res_seq1 = DNA("ATGCGATAATGCGATA") # Insertion 15 | res_seq2 = DNA("ATGCGATATA") # Deletion 16 | res_seq3 = DNA("ATGCGATAAGATA") # Mismatch 17 | results = [res_seq0, res_seq1, res_seq2, res_seq3] 18 | 19 | exp_seq0 = (DNA("ATGCGATACGATA"), DNA("ATGCGATA---TA"), 9) 20 | exp_seq1 = (DNA("ATGCGATA---CGATA"), DNA("ATGCGATAATGCGATA"), 12) 21 | exp_seq2 = (DNA("ATGCGATACGATA"), DNA("ATGCGATA---TA"), 9) 22 | exp_seq3 = (DNA("ATGCGATACGATA"), DNA("ATGCGATAAGATA"), 11) 23 | 24 | expected = [exp_seq0, exp_seq1, exp_seq2, exp_seq3] 25 | 26 | for seq, exp in zip(results, expected): 27 | aligned = analysis.needle(ref_seq, seq, gap_open=-1, gap_extend=0) 28 | assert_equal(aligned, exp) 29 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2016 University of Washington 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 5 | 6 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 7 | 8 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 9 | -------------------------------------------------------------------------------- /coral/constants/restriction_sites.py: -------------------------------------------------------------------------------- 1 | '''Common restriction sites.''' 2 | 3 | 4 | fallback_enzymes = {'AflII': ('CTTAAG', (1, 5)), 5 | 'AgeI': ('ACCGGT', (1, 5)), 6 | 'BamHI': ('GGATCC', (1, 5)), 7 | 'DpnI': ('GATC', (2, 2)), 8 | 'DraI': ('TTTAAA', (3, 3)), 9 | 'EcoRI': ('GAATTC', (1, 5)), 10 | 'EcoRV': ('GATATC', (3, 3)), 11 | 'FokI': ('GGATG', (14, 18)), 12 | 'HindIII': ('AAGCTT', (1, 5)), 13 | 'NcoI': ('CCATGG', (1, 5)), 14 | 'NheI': ('GCTAGC', (1, 5)), 15 | 'NruI': ('TCGCGA', (3, 3)), 16 | 'PmeI': ('GTTTAAAC', (4, 4)), 17 | 'PstI': ('CTGCAG', (5, 1)), 18 | 'SpeI': ('ACTAGT', (1, 5)), 19 | 'XbaI': ('TCTAGA', (1, 5)), 20 | 'XhoI': ('CTCGAG', (1, 5)), 21 | 'XmaI': ('CCCGGG', (1, 5)), 22 | 'SnaBI': ('TACGTA', (3, 3)), 23 | 'AclI': ('AACGTT', (2, 3))} 24 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_gibson/gibson_test.fasta: -------------------------------------------------------------------------------- 1 | > 2 | acctcccggccactattatccaaacagagttggaggagtataactgatatttgaagccga 3 | ccgtaacgcaccgcctatttcaacctcacacgttgctaacctcacgatcggattagaatt 4 | acggcttgccattggttgggaggagttgctctcaggggtggtctgaagcgccctaattta 5 | ttgcaccgcaataatcacgtccctacgccagcgcgtactgagaccttgaactgccgtccc 6 | taggagacggaaagtacattcctcctccaatgctgatagcgagaatccggctgcgtgttt 7 | ttcacgccaatatgctccctttctaaagatctgacaaccctccgatgcagcgtgttgcta 8 | cgcaacattggggggtacattccactgggggacctgcagtcgaatttccagaccagagct 9 | ttgtggtctttcgatctcccctgcggaaatggattgaacggatttttgccacgaatgctg 10 | gcatgaatttggcaacgcgacgatgcgtaacatctcaatttgctgggcctctgaagctgg 11 | atttctaaaggttgctatttttgagctgcttgatattagacagtacttctctctagatcc 12 | cgcaatccacatataacttgaacctgaccaatagcaaagtaattcgaacgtctgcttttg 13 | gcggtcgcattctcctttgcgggttagcataacataagtcatcgtgtaacgttgcgttag 14 | gcacgttaacgcagctaatgtaactctgagacgtctttccatggaccactcccgtacgcc 15 | tgtagtactggcagtcgattttactctcagacttgcgcgctcgggttctgtccctctccg 16 | tagtgcccaagcctgaatagcccgatgatctagtcggctagttgttggaactcaaacgcc 17 | ctaagctgcggctcgtcgtagttcacgccgctagaataagatttcccgcgatcttttgat 18 | atccgcttatggtcaagcagggtttgctagagtgcaccct 19 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-904.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-904_D12.ab1 2 | NNNNNNNNNNNGANAGAAGACNGTAGCTTCTTTTTTTGNATCNGAACCTACCTGANNCNAANNNNNCTGAAGAACAACTG 3 | GGAGTGTCGCTACTCTCCCAAAACCAAAAGGTCTCCGCTGACTAGGGCACATCTGACAGAAGTGGAATCAAGGCTAGAAA 4 | GACTGGAACAGCTATTTCTACTGATTTTTCCTCGAGAAGACCTTGACATGATTTTGAAAATGGATTCTTTACAGGATATA 5 | AAAGCATTGTTGGGTACCCCTGCAGCTGCGTCGACTCTAGAGGATCCATCTGCTGGAGACATGAGAGCTGCCAACCTTTG 6 | GCCAAGCCCGCTCATGATCAAACGCTCTAAGAAGAACAGCCTGGCCTTGTCCCTGACGGCCGACCAGATGGTCAGTGCCT 7 | TGTTGGATGCTGAGCCCCCCATACTCTATTCCGAGTATGATCCTACCAGACCCTTCAGTGAAGCTTCGATGATGGGCTTA 8 | CTGACCAACCTGGCAGACAGGGAGCTGGTTCACATGATCAACTGGGCGAAGAGGGTGCCAGGCTTTGTGGATTTGACCCT 9 | CCATGATCAGGTCCACCTTCTAGAATGTGCCTGGCTAGAGATCCTGATGATTGGTCTCGTCTGGCGCTCCATGGAGCACC 10 | CAGTGAAGCTACTGTTTGCTCCTAACTTGCTCTTGGACAGGAACCAGGGAAAATGTGTAGAGGGCATGGTGGAGATCTTC 11 | GACATGCTGCTGGCTACATCATCTCGGTTCCGCATGATGAATCTGCAGGGAGAGGAGTTTGTGTGCCTCAAATCTATTAT 12 | TTTGCTTAATTCTGGAGTGTACACATTTCTGTCCAGCACCCTGAAGTCTCTGGAAGAGAANNACCATATCCACCGAGTCC 13 | TGNACAAGATCACAGACACTTTGATCCACCTGATGGCCAAGGCAGGCCTGANCCTGCANCAGCAGCACCAGCGGCTGGNC 14 | CAGCTCCTCNCATCNCTCCCACATCAGNACATGAGTAACAAAGGCATGNAGCATCTGTACAGCATGAAGTGCAAGAANGN 15 | NGNN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-905.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-905_E12.ab1 2 | NNNNNANNNCANNAANCNNGAAAAATCTAAAAGCTGATGTAGTAGAAGATCCTATTCTTTAACAAAGATTGACCTTTTCT 3 | TTTTCTTCTTGGTTTGAGTAGAAAGGGGAAGGAAGAATACAAGAGAGAGGAAAAAAAGGAAGATAAAAAGAGAGCGTGAT 4 | ATAAATGAATATATATTAAACAAGAGAGATTGGGAAGGAAAGGATCAAACAAACCCAAAAATATTTCAAAAAGGAGAGAG 5 | AGAGGCGAGTTTGGTTTCAAAACGGTTTATTTATTTATGCAAGAGGACGTGGAAGAAAAAGAAGAAGGAAGAAAAAAATT 6 | TGAAAGAAAAAAACGCGTGGCGGGTAAAGAAGAAAATGGAAAATAGAGGCCGGGTGACAGAGAAATATTGAGGGTTAATT 7 | GGAAAATATGTTAGGGTGAGGCATATGTTTTTAAGGGTTTTGAGGATCCGATAAGGAAGAATGTAGGTTAAATGTTGTGC 8 | ATTAATTGCTGTGGCAGCTTACCCGCTTCCCCACACATTTTGGTAGTATCTGTCCTCTTGTTATTGTTACTGTAATTGTG 9 | TATATATGTTCTCGCGTGTGTCTTATTTACTTATTTAGTTATTATATTATATGGGTCTGCAAGGTAGAGGCGTCGTGACT 10 | GGGAAAACCCTGGCGGACACTGGATGGCGGCGTTAGTATCGAATCGACAGCAGTATAGCGACCAGCATTCACATACGATT 11 | GACGCATGATATTACTTTCTGCGCACTTAACTTCGCATCTGGGCAGATGATGTCGAGGCGAAAAAAAATATAAATCACGC 12 | TAACATTTGATTAAAATAGAACAACTACAATATAAAAAAACTATACAAATGACAAGTTCTTGAAAACAAGAATCTTTTTA 13 | TTGTCAGTACTGATTAGAAAAACTCATCGAGCATCANTGAAACTGCAATTTATTCATATCNNGATTATCAATACCATATT 14 | TTTGAAAAGCCGTTTCTGTAATGANNAGAAACTCNCNGAGGCAGTTCCATAGGATGGCAGATCCTGGNNNTCGGTNNTGC 15 | GATTC 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-66.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-66_A12.ab1 2 | NNNNNNNNNNNNNNNNNCANATACGAGCCGGAAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACA 3 | TTAATTGCGTTGCGCTCACTGCCCGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAACGCGC 4 | GGGGAGAGGCGGTTTGCGTATTGGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCTGCGCTCGGTCGTTCGGCTGCGGC 5 | GAGCGGTATCAGCTCACTCAAAGGCGGTAATACGGTTATCCACAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCA 6 | AAAGGCCAGCAAAAGGCCAGGAACCGTAAAAAGGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCA 7 | TCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCT 8 | CCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTT 9 | TCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGT 10 | TCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAG 11 | CAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGC 12 | TACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTGATC 13 | CGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGNATCTCAAG 14 | ANATCCTTTGATCTTTTCTANGGGGTCTGACGCTCAGTGNACGAAAACTCACGTTAAAGGNTTTTGGTCATGAGATTATC 15 | AAAAAGGANNNTCNNNCTANATCN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-806.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-806_C12.ab1 2 | TNNNNNNTANTTANNNCACGCTTACATTCACGCCCTCCCCCCACATCCGCTCTAACCGAAAAGGAAGGAGTTAGACAACC 3 | TGAAGTCTAGGTCCCTATTTATTTTTTTATAGTTATGTTAGTATTAAGAACGTTATTTATATTTCAAATTTTTCTTTTTT 4 | TTCTGTACAGACGCGTGTACGCATGTAACATTATACTGAAAACCTTGCTTGAGAAGGTTTTGGGACGCTCGAAGGCTTTA 5 | ATTTGATGTCGTAATAACCCCGCCCCGACGCCATTTTAAGTCCAAAGGCACAATTTTACGTTGGAACTAAACGGTAAAGA 6 | TCAAATATCACCTTTATTCTTTTTTGGTATGCATCGATTTCTTTGCCAGTAAGAACTACGTGCAACTCTACATTAGATCA 7 | AAATATAACGGATCATAAATGCAAACAACATTTGTCCAAAAGAAACAAAAATGTTGGCGTTTGTCTCGAACATGACACAA 8 | TACTCCTTGATCGAGGGACATTGCCCGATTAACTATGATTTATACTTTTTACTACAGGTACTCGTTCTCCCTACTATTTC 9 | ACCAGTATACGGTCGTTTTATGATTCATTTATTGCCTATAATGGTAAAAAGGGACATCTTAACCTTACGTGTTGCAATGA 10 | GACCATTCTTTTCGATTCATCAACTGTGAAATTTCTGAAATCCAGTACAATGCGAAACGCTACCGATAATGGCACCGTCT 11 | TTTGAATTATGAGGCCCAGATATGGCGTTCAATCTTCAACAAAAAAATGAATATCAAAACTAAATTCTATCTGGATAACA 12 | ATCTTTCTTTGGTTTGCAGAAATATCATTAAAAAACAATCCATAACTACTAANNNAAGGGTATAAATGGTTTGGTTTACT 13 | AATTTTATATTTTGATTTGATTAGCAATTATAATCTCTAATATACTATTTCTGTAGCCCAAAATCTGCAATGTCAATAAG 14 | TAGCAAGTGCCGCTTACTCNTGANANATCANNTTATANCATATGTTTTCNATTTTTTTTTTTTTGCTTATGGANAATNAT 15 | GNACATTNNACNNCNTNANNNN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-559.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-559_B12.ab1 2 | TTNNNNNNNNGGGGNANNTGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTG 3 | ACACCACGATGCCTGTAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAA 4 | CAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGC 5 | TGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCG 6 | TAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATT 7 | AAGCATTGGTAACTGTCAGACCAAGTTTACTCATATATACTTTAGATTGATTTAAAACTTCATTTTTAATTTAAAAGGAT 8 | CTAGGTGAAGATCCTTTTTGATAATCTCATGACCAAAATCCCTTAACGTGAGTTTTCGTTCCACTGAGCGTCAGACCCCG 9 | TAGAAAAGATCAAAGGATCTTCTTGAGATCCTTTTTTTCTGCGCGTAATCTGCTGCTTGCAAACAAAAAAACCACCGCTA 10 | CCAGCGGTGGTTTGTTTGCCGGATCAAGAGCTACCAACTCTTTTTCCGAAGGTAACTGGCTTCAGCAGAGCGCAGATACC 11 | AAATACTGTTCTTCTAGTGTAGCCGTAGTTAGGCCACCACTTCAAGAACTCTGTAGCACCGCCTACATACCTCGCTCTGC 12 | TAATCCTGTTACCAGTGGCTGCTGCCAGTGGCGATAAGTCGTGTCTTACCGGGTTGGACTCAAGACGATAGTTACCGGAT 13 | AAGGCGCAGCGGTCGGGCTGAACGGGGGGTTCGTGCACACAGCCCAGCTTGGANCGAACGACCTACACCGAACTGAGATA 14 | CCTACAGCGTGAGCTATGAGAAGCGCCACGCTTCCCGAAGGNGAAAGGNGGANAGGTATCCNGTAAGCGGCNGGNCGGAA 15 | CAGGANAGCNNCNCGAGGNANCTTCCN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-686.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-686_A12.ab1 2 | NNNNNNNNNNNNNNNNNNNTAGTATCGNNCGACAGCAGTATAGCGACCAGCATTCACATACGATTGACGCATGATATTAC 3 | TTTCTGCGCACTTAACTTCGCATCTGGGCAGATGATGTCGAGGCGAAAAAAAATATAAATCACGCTAACATTTGATTAAA 4 | ATAGAACAACTACAATATAAAAAAACTATACAAATGACAAGTTCTTGAAAACAAGAATCTTTTTATTGTCAGTACTGATT 5 | AGAAAAACTCATCGAGCATCAAATGAAACTGCAATTTATTCATATCAGGATTATCAATACCATATTTTTGAAAAAGCCGT 6 | TTCTGTAATGAAGGAGAAAACTCACCGAGGCAGTTCCATAGGATGGCAAGATCCTGGTATCGGTCTGCGATTCCGACTCG 7 | TCCAACATCAATACAACCTATTAATTTCCCCTCGTCAAAAATAAGGTTATCAAGTGAGAAATCACCATGAGTGACGACTG 8 | AATCCGGTGAGAATGGCAAAAGCTTATGCATTTCTTTCCAGACTTGTTCAACAGGCCAGCCATTACGCTCGTCATCAAAA 9 | TCACTCGCATCAACCAAACCGTTATTCATTCGTGATTGCGCCTGAGCGAGACGAAATACGCGATCGCTGTTAAAAGGACA 10 | ATTACAAACAGGAATCGAATGCAACCGGCGCAGGAACACTGCCAGCGCATCAACAATATTTTCACCTGAATCAGGATATT 11 | CTTCTAATATCTGGAATGCTGTTTTGCCGGGGATCGCAGTGGTGAGTAACCATGCATCATCAGGAGTACGGATAAAATGC 12 | TTGATGGTCGGAAGAGGCATAAATTCCGTCAGCCAGTTTAGTCTGACCATCTCATCTGTAACATCATTGGCAACGCTACC 13 | TTTGCCATGTTTCAGANCAACTCTGGCGCATCGGGCTTCCCATACAATCGATAGATTGTCGCACCTGATTGCCCGACATT 14 | ATCGCGAGCCCATTTATACCCATATAAATCAGCATCCATGNTNGNNTTNNCGNNNNNNAAACGTGAGTCTTTNNNNACCC 15 | ATNNNNTTATGTNNGANGTGATGTGANA 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-716.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-716_B12.ab1 2 | NNNNNNNNNNNNNNNCTGTNNAGCTTGCCTCGTCNNCGCCGGGTCACCCGGCCAGCGACATGGGGGCCCAGAATACCCTC 3 | CTTGACAGTCTTGACGTGCGCAGCTCAGGGGCATGATGTGACTGTCGCCCGTACATTTAGCCCATACATCCCCATGTATA 4 | ATCATTTGCATCCATACATTTTGATGGCCGCACGGCGCGAAGCAAAAATTACGGCTCCTCGCTGCAGACCTGCGAGCAGG 5 | GAAACGCTCCCCTCACAGACGCGTTGAATTGTCCCCACGCCGCGCCCCTGTAGAGAAATATAAAAGGTTAGGATTTGCCA 6 | CTGAGGTTCTTCTTTCATATACTTCCTTTTAAAATCTTGCTAGGATACAGTTCTCACATCACATCCGAACATAAACAACC 7 | ATGGGTAAGGAAAAGACTCACGTTTCGAGGCCGCGATTAAATTCCAACATGGATGCTGATTTATATGGGTATAAATGGGC 8 | TCGCGATAATGTCGGGCAATCAGGTGCGACAATCTATCGATTGTATGGGAAGCCCGATGCGCCAGAGTTGTTTCTGAAAC 9 | ATGGCAAAGGTAGCGTTGCCAATGATGTTACAGATGAGATGGTCAGACTAAACTGGCTGACGGAATTTATGCCTCTTCCG 10 | ACCATCAAGCATTTTATCCGTACTCCTGATGATGCATGGTTACTCACCACTGCGATCCCCGGCAAAACAGCATTCCAGAT 11 | ATTAGAAGAATATCCTGATTCAGGTGAAAATATTGTTGATGCGCTGGCAGTGTTCCTGCGCCGGTTGCATTCGATTCCTG 12 | TTTGTAATTGTCCTTTTAACAGCGATCGCGTATTTCGTCTCGCTCNNCGCAATCACGAATGAATAACGGTTTGGTTGATG 13 | CGAGTGATTTTGATGACGAGCGTAATGGCTGGCCTGTTGAACAAGTCTGGAAAGAAATGCATAAGCTTTTGCCATTCTCA 14 | CCGGATTCAGTCGTCACTCATGGNGATTTCTCACTTGANNNNNTATTTTTNACGAGGGNAATTNNNNNNGTATTGATGNN 15 | GACGANTCNNAATCGCNNANCGATACCANNAN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-771.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-771_C12.ab1 2 | NNNNNNNNNNNNNNNNNNNGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATC 3 | AAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGT 4 | TGGCCGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCT 5 | GTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACG 6 | GGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGA 7 | TCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGC 8 | GTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCAT 9 | ACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGA 10 | AAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCCACCTGACGTCTAAGAAACCATTATTATCATGACA 11 | TTAACCTATAAAAATAGGCGTATCACGAGGCCCTTTCGTCTCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACAT 12 | GCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGGGAGCAGACAAGCCCGTCAGGGCGCGTCAGCGGGTG 13 | TTGGCGGGTGTCGGGGCTGGCTTAACTATGCGGCGTTTAAACTTAGCAGATGCGCGCACCTGCGTTGTTACCACAACTCT 14 | TATGNNNCCGCGGACAGCATCAAACTGTAANATTCCGCCACATTTTATACACTCTGGTCCTTTAACTGGNAAACCTTCGG 15 | GCGTAATGCCCAATTTTTCGCCTTTGTCTTTTGNCC 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-676.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-676_H11.ab1 2 | NNNNNNANNNNNNNNNGCANGNNNACGCGCTAGACGATTTCGATCTGGACATGTTGGGGGACGGGGATTCCCCGGGTCCG 3 | GGATTTACCCCCCACGACTCCGCCCCCTACGGCGCTCTGGATATGGCCGACTTCGAGTTTGAGCAGATGTTTACCGATGC 4 | CCTTGGAATTGACGAGTACGGTGGGTAGGGATCCGGTTCCGGAAGTGGATCCGGATCCTGATACCGTCGACCTCGAGTCA 5 | TGTAATTAGTTATGTCACGCTTACATTCACGCCCTCCCCCCACATCCGCTCTAACCGAAAAGGAAGGAGTTAGACAACCT 6 | GAAGTCTAGGTCCCTATTTATTTTTTTATAGTTATGTTAGTATTAAGAACGTTATTTATATTTCAAATTTTTCTTTTTTT 7 | TCTGTACAGACGCGTGTACGCATGTAACATTATACTGAAAACCTTGCTTGAGAAGGTTTTGGGACGCTCGAAGGCTTTAA 8 | TTTGATGTCGTAATAACCCCGCCCCGACGCCATTTTAAGTCCAAAGGCACAATTTTACGTTGGAACTAAACGGTAAAGAT 9 | CAAATATCACCTTTATTCTTTTTTGGTATGCATCGATTTCTTTGCCAGTAAGAACTACGTGCAACTCTACATTAGATCAA 10 | AATATAACGGATCATAAATGCAAACAACATTTGTCCAAAAGAAACAAAAATGTTGGCGTTTGTCTCGAACATGACACAAT 11 | ACTCCTTGATCGAGGGACATTGCCCGATTAACTATGATTTATACTTTTTACTACAGGTACTCGTTCTCCCTACTATTTCA 12 | CCAGTATACGGTCGTTTTATGATTCATTTATTGCCTATAATGGTAAAAAGGGACATCTTAACCTTACGTGTTGCAATGAG 13 | ACCATTCTTTTCGATTCATCAACTGTGAAATTTCTGAAATCCAGTACAATGCGAAACGCTACCGATAATGGCACCGTCTT 14 | TGANTTATGAGGCCCAGANATNGGCGTTCAATCNTCAACAAAAAATGANNTCAAACTAAATTCTATCTGGANANNNCTTN 15 | CNTTNGNTTGCNNNNNTCATTNAAAAACANCCNTNACTACTAAGGAAGGGTANNAATGNNNN 16 | -------------------------------------------------------------------------------- /coral/analysis/_sequencing/substitution_matrices/substitution_matrix.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class SubstitutionMatrix(object): 5 | '''Container for a subsitution matrix - is simply a numpy ndarray with an 6 | additional .alphabet attribute indicating the labels for each row and 7 | column.''' 8 | 9 | def __init__(self, matrix, alphabet): 10 | ''' 11 | :param matrix: A square 2D array-like (e.g. list of lists of numbers or 12 | numpy array) corresponding to substitution matrix values 13 | (often log-odds, as in BLOSUM62). Note: the 14 | SubstitutionMatrix will use use integer values, so any 15 | floats will be rounded (standard substitution matrices 16 | use integers to speed up calculations). 17 | :type matrix: 2D array-like: np.ndarray or list of lists of integers 18 | :param alphabet: A string of the characters to use, in order, for the 19 | square subsitution matrix (e.g. 'ATGCN' for a simple 20 | 5X5 DNA substitution matrix). 21 | :type alphabet: str 22 | 23 | ''' 24 | self.matrix = matrix 25 | self.alphabet = alphabet 26 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-344.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-344_F11.ab1 2 | NNNNNNNNNNNNNNNCTGTAGCGGANGCCGGGAGCAGACAAGCCCGTCAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGG 3 | GGCTGGCTTAACTATGCGGCGTTTAAACTTAGCAGATGCGCGCACCTGCGTTGTTACCACAACTCTTATGAGGCCCGCGG 4 | ACAGCATCAAACTGTAAGATTCCGCCACATTTTATACACTCTGGTCCTTTAACTGGCAAACCTTCGGGCGTAATGCCCAA 5 | TTTTTCGCCTTTGTCTTTTGCCTTTTTCACTTCACGTGCTTCTGGTACATACTTGCAATTTATACAGTGATGACCGCTGA 6 | ATTTGTATCTTCCATAGCATCTAGCACATACTCGATTTTTACCACTCCAATCTTTATAAAAATACTTGATTCCCTTTCTG 7 | GGACAAGCAACACAGTGTTTTAGATTCTTTTTTTGTGATATTTTAAGCTGTTCTCCCACACAGCAGCCTCGACATGATTT 8 | CACTTCTATTTTGTTGCCAAGCAAGAAATTTTTATGGCCTTCTATCGTAAGCCCATATACAGTACTCTCACCCTGGAAAT 9 | CATCCGTGAAGCTGAAATATACGGGTTCCCTTTTTATAATTGGCGGAACTTCTCTTGTTTTGTGACCACTTCGACAATAT 10 | GACAAAACATTCTGTGAAGTTGTTCCCCCAGCATCAGAGCAGATTGTACTGAGAGTGCACCGGCGCGCCAGATCTGTTTA 11 | GCTTGCCTCGTCCCCGCCGGGTCACCCGGCCAGCGACATGGGGGCCCAGAATACCCTCCTTGACAGTCTTGACGTGCGCA 12 | GCTCAGGGGCATGATGTGACTGTCGCCCGTACATTTAGCCCATACATCCCCATGTATAATCATTTGCATCCATACATTTT 13 | GATGGCCGCACGGCGCGAAGCAAAAATTACNGCTCCTCGCTGCAGACCTGCGAGCNGNNAACGCTCCCCTCACAGACGCG 14 | TTGAATTGTCCCCACGNCGCGCCCCTGTAGANAANNANAAAGGTTAGNATTTGCCNCTGANNTTCNTNNTTCANATACTN 15 | CNTTTAAAATNNTGCTAGGANACAGTTCTNNCATCACATCCGANCATAAACNANNATGGGNNAGNN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-T7-EEV.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-T7-EEV_D11.ab1 2 | NNNNNNNNNCNANNNCACATTTTACGTTGGAACTAAACGGTAAAGATCAAATATCACCTTTATTCTTTTTTGGTATGCAT 3 | CGATTTCTTTGCCAGTAAGAACTACGTGCAACTCTACATTAGATCAAAATATAACGGATCATAAATGCAAACAACATTTG 4 | TCCAAAAGAAACAAAAATGTTGGCGTTTGTCTCGAACATGACACAATACTCCTTGATCGAGGGACATTGCCCGATTAACT 5 | ATGATTTATACTTTTTACTACAGGTACTCGTTCTCCCTACTATTTCACCAGTATACGGTCGTTTTATGATTCATTTATTG 6 | CCTATAATGGTAAAAAGGGACATCTTAACCTTACGTGTTGCAATGAGACCATTCTTTTCGATTCATCAACTGTGAAATTT 7 | CTGAAATCCAGTACAATGCGAAACGCTACCGATAATGGCACCGTCTTTTGAATTATGAGGCCCAGATATGGCGTTCAATC 8 | TTCAACAAAAAAATGAATATCAAAACTAAATTCTATCTGGATAACAATCTTTCTTTGGTTTGCAGAAATATCATTAAAAA 9 | ACAATCCATAACTACTAAGGAAGGGTATAAATGGTTTGGTTTACTAATTTTATATTTTGATTTGATTAGCAATTATAATC 10 | TCTAATATACTATTTCTGTAGCCAAAAATCTGCAATGTCAATAAGTAGCAAGTGCCGCTTACTCGTGAGAATATCAACCT 11 | TATAGCATATGTTTTCTATTTTTTTTTTTTGCTTATGGAGATAATGAACATTGTACACATGAACAAAGTGGTAGTTCTCT 12 | CTCCTTGATTTTTTGTGCTTGCGATATATATAATTGTTACATTCAATGGCCATGCTAAATAAAGGTTTTAGCTTTGTTTC 13 | ACTCGTGCCCAGGATAATTGTTTAAACCATGGTCATAGCTGTTTCCTGTGTGAAATTGTTATCCGCTCACAATTCCACAC 14 | AACATACGAGCNNGAAGCATAAAGTGTAAAGCCTGGGNGNCTANGAGTGAGCTAACTCACATTNATTGCNTNCGCTCACT 15 | GCCGCNTTCNGTNNNNNNNNTNNNNNGCTGCANNANNANCNGCNACNNNNGGANANN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-M13R.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-M13R_E11.ab1 2 | GNNNNNNNNNCNNNGNCNANTGAACAAAGCTAAAACCTTTATTTAGCATGGCCATTGAATGTAACAATTATATATATCGC 3 | AAGCACAAAAAATCAAGGAGAGAGAACTACCACTTTGTTCATGTGTACAATGTTCATTATCTCCATAAGCAAAAAAAAAA 4 | AATAGAAAACATATGCTATAAGGTTGATATTCTCACGAGTAAGCGGCACTTGCTACTTATTGACATTGCAGATTTTTGGC 5 | TACAGAAATAGTATATTAGAGATTATAATTGCTAATCAAATCAAAATATAAAATTAGTAAACCAAACCATTTATACCCTT 6 | CCTTAGTAGTTATGGATTGTTTTTTAATGATATTTCTGCAAACCAAAGAAAGATTGTTATCCAGATAGAATTTAGTTTTG 7 | ATATTCATTTTTTTGTTGAAGATTGAACGCCATATCTGGGCCTCATAATTCAAAAGACGGTGCCATTATCGGTAGCGTTT 8 | CGCATTGTACTGGATTTCAGAAATTTCACAGTTGATGAATCGAAAAGAATGGTCTCATTGCAACACGTAAGGTTAAGATG 9 | TCCCTTTTTACCATTATAGGCAATAAATGAATCATAAAACGACCGTATACTGGTGAAATAGTAGGGAGAACGAGTACCTG 10 | TAGTAAAAAGTATAAATCATAGTTAATCGGGCAATGTCCCTCGATCAAGGAGTATTGTGTCATGTTCGAGACAAACGCCA 11 | ACATTTTTGTTTCTTTTGGACAAATGTTGTTTGCATTTATGATCCGTTATATTTTGATCTAATGTAGAGTTGCACGTAGT 12 | TCTTACTGGCAAAGAAATCGATGCATACCAAAAAAGAATAAAGGTGATATTTGATCTTTACCGTTTAGTTCCAACGTAAA 13 | ATTGTGCCTTTGGACTTAAAATGGCGTNGGGGCGGGGTTATTACGACATCAAATTAAAGCCTTCGAGCGTCCCAAAACCT 14 | TCTCAAGCAAGGTTTTCAGTATAATGNTACATGCGTACACGCGTCTGTACAGAAAAAAAAGAAAAATTTNAAATANAANN 15 | ANNNNTNNACTANATANTATAAAAAAATAANNGGNACTANACTTCNGTNNNCTACTCNNCTTTCNGTTANNNN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-M13F(-47).seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-M13F(-47)_C11.ab1 2 | GNNNNNNNNNNNNNNNNNNNATAATATAATAACTAAATAAGTAAATAAGACACACGCGAGAACATATATACACAATTACA 3 | GTAACAATAACAAGAGGACAGATACTACCAAAATGTGTGGGGAAGCGGGTAAGCTGCCACAGCAATTAATGCACAACATT 4 | TAACCTACATTCTTCCTTATCGGATCCTCAAAACCCTTAAAAACATATGCCTCACCCTAACATATTTTCCAATTAACCCT 5 | CAATATTTCTCTGTCACCCGGCCTCTATTTTCCATTTTCTTCTTTACCCGCCACGCGTTTTTTTCTTTCAAATTTTTTTC 6 | TTCCTTCTTCTTTTTCTTCCACGTCCTCTTGCATAAATAAATAAACCGTTTTGAAACCAAACTCGCCTCTCTCTCTCCTT 7 | TTTGAAATATTTTTGGGTTTGTTTGATCCTTTCCTTCCCAATCTCTCTTGTTTAATATATATTCATTTATATCACGCTCT 8 | CTTTTTATCTTCCTTTTTTTCCTCTCTCTTGTATTCTTCCTTCCCCTTTCTACTCAAACCAAGAAGAAAAAGAAAAGGTC 9 | AATCTTTGTTAAAGAATAGGATCTTCTACTACATCAGCTTTTAGATTTTTCACGCTTACTGCTTTTTTCTTCCCAAGATC 10 | GAAAATTTACTGAATTAACAGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGAAGCAAGCCTCCTGAAAGATGAA 11 | GCTACTGTCTTCTATCGAACAAGCATGCGATATTTGCCGACTTAAAAAGCTCAAGTGCTCCAAAGAAAAACCGAAGTGCG 12 | CCAAGTGTCTGAAGAACAACTGGGAGTGTCGCTACTCTCCCAAAACCAAAAGGTCTCCGCTGACTAGGGCACATCTGACA 13 | GAAGTGGAATCAAGGCTAGAAAGACTGGAACAGCTATTTCTACTGATTTTTCCTCGAGAAGACCTTGACATGATTTTGAA 14 | AATGGATTCTTTACAGGATATAAAAGCATTGTTGGGTACCCCTGCAGCTGCGTCGACTCTAGAGGATCNTCTGCTGGANA 15 | CNTGAGAGCTGCNNCCTTTNGNNAAGCCCGCTCATGATCAAACGCTCTAAGAANAANNGCCNNNNN 16 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-675.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-675_G11.ab1 2 | NNNNNNNNNNNNNNNANGCTTTTATATCCTGTNNNAATCCATTTTCAAAATCATGTCAAGGTCTTCTCGAGGAAAAATCA 3 | GTAGAAATAGCTGTTCCAGTCTTTCTAGCCTTGATTCCACTTCTGTCAGATGTGCCCTAGTCAGCGGAGACCTTTTGGTT 4 | TTGGGAGAGTAGCGACACTCCCAGTTGTTCTTCAGACACTTGGCGCACTTCGGTTTTTCTTTGGAGCACTTGAGCTTTTT 5 | AAGTCGGCAAATATCGCATGCTTGTTCGATAGAAGACAGTAGCTTCATCTTTCAGGAGGCTTGCTTCAAGCTTATCGATA 6 | CCGTCGACCTCGAGGGGGGGCCCTGTTAATTCAGTAAATTTTCGATCTTGGGAAGAAAAAAGCAGTAAGCGTGAAAAATC 7 | TAAAAGCTGATGTAGTAGAAGATCCTATTCTTTAACAAAGATTGACCTTTTCTTTTTCTTCTTGGTTTGAGTAGAAAGGG 8 | GAAGGAAGAATACAAGAGAGAGGAAAAAAAGGAAGATAAAAAGAGAGCGTGATATAAATGAATATATATTAAACAAGAGA 9 | GATTGGGAAGGAAAGGATCAAACAAACCCAAAAATATTTCAAAAAGGAGAGAGAGAGGCGAGTTTGGTTTCAAAACGGTT 10 | TATTTATTTATGCAAGAGGACGTGGAAGAAAAAGAAGAAGGAAGAAAAAAATTTGAAAGNNAAAACGCGTGGCGGGTAAA 11 | GAAGAAAATGGAAAATAGAGGCCGGGTGACAGAGAAATATTGAGGGTTAATTGGAAAATATGTTAGGGTGAGGCATATGT 12 | TTTTAAGGGTTTTGAGGATCCGATAAGGAAGAATGTNNTTAAATGTTGTGCATTAATTGCTGTGGCAGCTTACCCGCTTC 13 | CCCACACATTTTGGTAGTATCTGTCCTCTTGTTATTGTTACTGTAATTGTGTATATATGTTCTCGCGTGTGTCTTATTTA 14 | CTTATTTAGTTATTATATNATATGGGNCTGCANGGTANAGGCGTCNNGACTGGGAAAACCCNGGNNGNANNCTGGNNGGC 15 | GGNNTTAGTATCNANCNNCNGCAGTATAGCGACNGCATTCNCATACNATNGACGCANGANATTACTTTCNNNNCNNCNTN 16 | AACTTCN 17 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_multi_dna.epairs: -------------------------------------------------------------------------------- 1 | 1 13 2.8569e-03 2 | 1 22 7.5721e-02 3 | 2 6 1.7143e-03 4 | 2 11 6.9552e-03 5 | 2 20 4.4100e-01 6 | 3 12 1.3121e-02 7 | 3 19 4.3914e-01 8 | 3 21 4.3453e-02 9 | 4 11 8.7884e-03 10 | 4 20 4.3497e-02 11 | 5 14 9.7341e-03 12 | 5 18 1.3117e-01 13 | 5 23 5.5300e-02 14 | 6 12 7.8965e-03 15 | 6 15 1.7889e-03 16 | 6 19 1.1399e-02 17 | 6 21 1.7459e-02 18 | 7 11 1.3103e-02 19 | 7 16 2.2691e-01 20 | 7 17 3.9490e-03 21 | 7 20 3.6382e-02 22 | 8 13 7.4523e-03 23 | 8 22 9.9800e-03 24 | 9 14 8.7338e-01 25 | 9 23 4.4397e-02 26 | 10 13 8.6796e-01 27 | 10 22 4.4087e-02 28 | 11 19 2.4116e-03 29 | 11 21 1.2373e-03 30 | 12 20 2.7627e-03 31 | 13 18 1.2894e-02 32 | 13 23 3.7417e-02 33 | 14 22 4.0051e-02 34 | 15 20 5.3722e-02 35 | 16 19 6.0455e-02 36 | 16 21 8.1458e-03 37 | 17 19 8.4382e-03 38 | 17 21 6.4102e-03 39 | 1 24 9.2041e-01 40 | 2 24 5.4989e-01 41 | 3 24 5.0278e-01 42 | 4 24 9.4663e-01 43 | 5 24 8.0195e-01 44 | 6 24 9.5974e-01 45 | 7 24 7.1884e-01 46 | 8 24 9.8240e-01 47 | 9 24 8.2092e-02 48 | 10 24 8.7106e-02 49 | 11 24 9.6749e-01 50 | 12 24 9.7618e-01 51 | 13 24 7.1417e-02 52 | 14 24 7.6830e-02 53 | 15 24 9.4379e-01 54 | 16 24 7.0334e-01 55 | 17 24 9.8061e-01 56 | 18 24 8.5572e-01 57 | 19 24 4.7815e-01 58 | 20 24 4.2264e-01 59 | 21 24 9.2329e-01 60 | 22 24 8.3006e-01 61 | 23 24 8.6289e-01 62 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_multi_dna.ppairs: -------------------------------------------------------------------------------- 1 | 1 13 2.8569e-03 2 | 1 22 7.5721e-02 3 | 2 6 1.7143e-03 4 | 2 11 6.9552e-03 5 | 2 20 4.4100e-01 6 | 3 12 1.3121e-02 7 | 3 19 4.3914e-01 8 | 3 21 4.3453e-02 9 | 4 11 8.7884e-03 10 | 4 20 4.3497e-02 11 | 5 14 9.7341e-03 12 | 5 18 1.3117e-01 13 | 5 23 5.5300e-02 14 | 6 12 7.8965e-03 15 | 6 15 1.7889e-03 16 | 6 19 1.1399e-02 17 | 6 21 1.7459e-02 18 | 7 11 1.3103e-02 19 | 7 16 2.2691e-01 20 | 7 17 3.9490e-03 21 | 7 20 3.6382e-02 22 | 8 13 7.4523e-03 23 | 8 22 9.9800e-03 24 | 9 14 8.7338e-01 25 | 9 23 4.4397e-02 26 | 10 13 8.6796e-01 27 | 10 22 4.4087e-02 28 | 11 19 2.4116e-03 29 | 11 21 1.2373e-03 30 | 12 20 2.7627e-03 31 | 13 18 1.2894e-02 32 | 13 23 3.7417e-02 33 | 14 22 4.0051e-02 34 | 15 20 5.3722e-02 35 | 16 19 6.0455e-02 36 | 16 21 8.1458e-03 37 | 17 19 8.4382e-03 38 | 17 21 6.4102e-03 39 | 1 24 9.2041e-01 40 | 2 24 5.4989e-01 41 | 3 24 5.0278e-01 42 | 4 24 9.4663e-01 43 | 5 24 8.0195e-01 44 | 6 24 9.5974e-01 45 | 7 24 7.1884e-01 46 | 8 24 9.8240e-01 47 | 9 24 8.2092e-02 48 | 10 24 8.7106e-02 49 | 11 24 9.6749e-01 50 | 12 24 9.7618e-01 51 | 13 24 7.1417e-02 52 | 14 24 7.6830e-02 53 | 15 24 9.4379e-01 54 | 16 24 7.0334e-01 55 | 17 24 9.8061e-01 56 | 18 24 8.5572e-01 57 | 19 24 4.7815e-01 58 | 20 24 4.2264e-01 59 | 21 24 9.2329e-01 60 | 22 24 8.3006e-01 61 | 23 24 8.6289e-01 62 | -------------------------------------------------------------------------------- /docs/tutorial/files_for_tutorial/sequencing_files/pMODKan-HO-pACT1GEV_C3-889.seq: -------------------------------------------------------------------------------- 1 | >pMODKan-HO-pACT1GEV_C3-889_F09.ab1 2 | NNNNGGNANCGNNAAACATCTGCTCAAACTCGAAGTCGGCCATATCCAGAGCGCCGTAGGGGGCGGAGTCGTGGGGGGTA 3 | AATCCCGGACCCGGGGAATCCCCGTCCCCCAACATGTCCAGATCGAAATCGTCTAGCGCGTCGGCATGCGCCATCGCCAC 4 | GTCCTCGCCGTCTAAGTGGAGCTCCGATGAAGTAGAGCCCGCAGTGGCCAAGTGGCTTTGGTCCGTCTCCTCCACGGATG 5 | CCCCTCCACGGCTAGTGGGCGCATGTAGGCGGTGGGCGTCCAGCATCTCCAGCAGCAGGTCATAGAGGGGCACCACGTTC 6 | TTGCACTTCATGCTGTACAGATGCTCCATGCCTTTGTTACTCATGTGCCTGATGTGGGAGAGGATGAGGAGGAGCTGGGC 7 | CAGCCGCTGGTGCTGCTGCTGCAGGGTCAGGCCTGCCTTGGCCATCAGGTGGATCAAAGTGTCTGTGATCTTGTCCAGGA 8 | CTCGGTGGATATGGTCCTTCTCTTCCAGAGACTTCAGGGTGCTGGACAGAAATGTGTACACTCCAGAATTAAGCAAAATA 9 | ATAGATTTGAGGCACACAAACTCCTCTCCCTGCAGATTCATCATGCGGAACCGAGATGATGTAGCCAGCAGCATGTCGAA 10 | GATCTCCACCATGCCCTCTACACATTTTCCCTGGTTCCTGTCCAAGAGCAAGTTAGGAGCAAACAGTAGCTTCACTGGGT 11 | GCTCCATGGAGCGCCAGACGAGACCAATCATCAGGATCTCTAGCCAGGCACATTCTAGAAGGTGGACCTGATCATGGAGG 12 | GTCAAATCCACAAAGCCTNNACCCTCTTCGCCCAGTTGATCATGTGAACCAGCTCCCTGTCTGCCAGGTTGGTCAGTAAG 13 | CCCATCATCGAAGCTTCACTGAAAGGGTCTGGTAGGATCATACTCGGAATAGAGTATGGGGGGGCTCAGCATCCAACAAG 14 | GCACTGACCATCTGGGTCGGCCGTCNGNACNANNCCAGGCTGTNCTTCTTANANCGTTTGATCATGANCGGGCTTNNCCA 15 | AANNTNNNGCTCTCATGTNNCAGNNNANGATCNCTANANTCGACGCANCTGCNGGGNNCCCANANGCTTTNANNNTNCNN 16 | NAAAGAANCCATTTTCAAANCNN 17 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_multi_rna99.epairs: -------------------------------------------------------------------------------- 1 | 1 13 6.9146e-03 2 | 1 22 4.6932e-01 3 | 2 11 3.9209e-03 4 | 2 20 1.3502e-01 5 | 3 12 1.2828e-02 6 | 3 15 1.6487e-03 7 | 3 19 1.3057e-01 8 | 3 21 8.2488e-02 9 | 4 11 1.3217e-02 10 | 4 20 7.9732e-02 11 | 5 14 4.5453e-02 12 | 5 18 1.7231e-01 13 | 5 23 4.4646e-02 14 | 6 12 2.1290e-02 15 | 6 15 2.0157e-03 16 | 6 19 5.9111e-03 17 | 6 21 3.3689e-02 18 | 7 11 2.3910e-02 19 | 7 16 8.4312e-03 20 | 7 17 1.2841e-02 21 | 7 20 3.5094e-02 22 | 8 13 5.1289e-02 23 | 8 16 1.2193e-02 24 | 8 22 6.5687e-02 25 | 9 14 6.5965e-01 26 | 9 23 4.6180e-02 27 | 10 13 5.7209e-01 28 | 10 22 4.9958e-02 29 | 11 19 3.0338e-03 30 | 11 21 3.4994e-03 31 | 12 20 4.8121e-03 32 | 13 18 1.4190e-01 33 | 13 23 1.7132e-02 34 | 14 22 3.3620e-02 35 | 15 20 1.5741e-02 36 | 16 19 1.5690e-02 37 | 16 21 1.7818e-03 38 | 17 19 3.7130e-03 39 | 17 21 1.6594e-03 40 | 1 24 5.2366e-01 41 | 2 24 8.6043e-01 42 | 3 24 7.7244e-01 43 | 4 24 9.0594e-01 44 | 5 24 7.3732e-01 45 | 6 24 9.3708e-01 46 | 7 24 9.1970e-01 47 | 8 24 8.7083e-01 48 | 9 24 2.9378e-01 49 | 10 24 3.7778e-01 50 | 11 24 9.5242e-01 51 | 12 24 9.6107e-01 52 | 13 24 2.1067e-01 53 | 14 24 2.6128e-01 54 | 15 24 9.8059e-01 55 | 16 24 9.6082e-01 56 | 17 24 9.8114e-01 57 | 18 24 6.8540e-01 58 | 19 24 8.4108e-01 59 | 20 24 7.2960e-01 60 | 21 24 8.7688e-01 61 | 22 24 3.8142e-01 62 | 23 24 8.9204e-01 63 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_multi_rna99.ppairs: -------------------------------------------------------------------------------- 1 | 1 13 6.9146e-03 2 | 1 22 4.6932e-01 3 | 2 11 3.9209e-03 4 | 2 20 1.3502e-01 5 | 3 12 1.2828e-02 6 | 3 15 1.6487e-03 7 | 3 19 1.3057e-01 8 | 3 21 8.2488e-02 9 | 4 11 1.3217e-02 10 | 4 20 7.9732e-02 11 | 5 14 4.5453e-02 12 | 5 18 1.7231e-01 13 | 5 23 4.4646e-02 14 | 6 12 2.1290e-02 15 | 6 15 2.0157e-03 16 | 6 19 5.9111e-03 17 | 6 21 3.3689e-02 18 | 7 11 2.3910e-02 19 | 7 16 8.4312e-03 20 | 7 17 1.2841e-02 21 | 7 20 3.5094e-02 22 | 8 13 5.1289e-02 23 | 8 16 1.2193e-02 24 | 8 22 6.5687e-02 25 | 9 14 6.5965e-01 26 | 9 23 4.6180e-02 27 | 10 13 5.7209e-01 28 | 10 22 4.9958e-02 29 | 11 19 3.0338e-03 30 | 11 21 3.4994e-03 31 | 12 20 4.8121e-03 32 | 13 18 1.4190e-01 33 | 13 23 1.7132e-02 34 | 14 22 3.3620e-02 35 | 15 20 1.5741e-02 36 | 16 19 1.5690e-02 37 | 16 21 1.7818e-03 38 | 17 19 3.7130e-03 39 | 17 21 1.6594e-03 40 | 1 24 5.2366e-01 41 | 2 24 8.6043e-01 42 | 3 24 7.7244e-01 43 | 4 24 9.0594e-01 44 | 5 24 7.3732e-01 45 | 6 24 9.3708e-01 46 | 7 24 9.1970e-01 47 | 8 24 8.7083e-01 48 | 9 24 2.9378e-01 49 | 10 24 3.7778e-01 50 | 11 24 9.5242e-01 51 | 12 24 9.6107e-01 52 | 13 24 2.1067e-01 53 | 14 24 2.6128e-01 54 | 15 24 9.8059e-01 55 | 16 24 9.6082e-01 56 | 17 24 9.8114e-01 57 | 18 24 6.8540e-01 58 | 19 24 8.4108e-01 59 | 20 24 7.2960e-01 60 | 21 24 8.7688e-01 61 | 22 24 3.8142e-01 62 | 23 24 8.9204e-01 63 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_multi_rna99.tsv: -------------------------------------------------------------------------------- 1 | 1 13 6.9146e-03 2 | 1 22 4.6932e-01 3 | 2 11 3.9209e-03 4 | 2 20 1.3502e-01 5 | 3 12 1.2828e-02 6 | 3 15 1.6487e-03 7 | 3 19 1.3057e-01 8 | 3 21 8.2488e-02 9 | 4 11 1.3217e-02 10 | 4 20 7.9732e-02 11 | 5 14 4.5453e-02 12 | 5 18 1.7231e-01 13 | 5 23 4.4646e-02 14 | 6 12 2.1290e-02 15 | 6 15 2.0157e-03 16 | 6 19 5.9111e-03 17 | 6 21 3.3689e-02 18 | 7 11 2.3910e-02 19 | 7 16 8.4312e-03 20 | 7 17 1.2841e-02 21 | 7 20 3.5094e-02 22 | 8 13 5.1289e-02 23 | 8 16 1.2193e-02 24 | 8 22 6.5687e-02 25 | 9 14 6.5965e-01 26 | 9 23 4.6180e-02 27 | 10 13 5.7209e-01 28 | 10 22 4.9958e-02 29 | 11 19 3.0338e-03 30 | 11 21 3.4994e-03 31 | 12 20 4.8121e-03 32 | 13 18 1.4190e-01 33 | 13 23 1.7132e-02 34 | 14 22 3.3620e-02 35 | 15 20 1.5741e-02 36 | 16 19 1.5690e-02 37 | 16 21 1.7818e-03 38 | 17 19 3.7130e-03 39 | 17 21 1.6594e-03 40 | 1 24 5.2366e-01 41 | 2 24 8.6043e-01 42 | 3 24 7.7244e-01 43 | 4 24 9.0594e-01 44 | 5 24 7.3732e-01 45 | 6 24 9.3708e-01 46 | 7 24 9.1970e-01 47 | 8 24 8.7083e-01 48 | 9 24 2.9378e-01 49 | 10 24 3.7778e-01 50 | 11 24 9.5242e-01 51 | 12 24 9.6107e-01 52 | 13 24 2.1067e-01 53 | 14 24 2.6128e-01 54 | 15 24 9.8059e-01 55 | 16 24 9.6082e-01 56 | 17 24 9.8114e-01 57 | 18 24 6.8540e-01 58 | 19 24 8.4108e-01 59 | 20 24 7.2960e-01 60 | 21 24 8.7688e-01 61 | 22 24 3.8142e-01 62 | 23 24 8.9204e-01 63 | -------------------------------------------------------------------------------- /coral/database/_entrez.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tempfile import mkdtemp 3 | from Bio import Entrez 4 | import coral.sequence 5 | 6 | 7 | # FIXME: If a Genome is a data structure, it should be in the DNA sequence 8 | # module. Then rename the genome acquirer after Entrez / NCBI, etc. 9 | # FIXME: If a Genome is not a new special data structure, should be a function 10 | # not a class unless there are more operations than fetch. 11 | # TODO: docstring 12 | # TODO: Figure out why reading in the DNA is so slow and if it can be sped up 13 | # - MG1655 takes 30-60 seconds to process into memory and coral.DNA. 14 | # MG1655 id is 'U00096.3' 15 | def fetch_genome(genome_id): 16 | '''Acquire a genome from Entrez 17 | 18 | ''' 19 | # TODO: Can strandedness by found in fetched genome attributes? 20 | # TODO: skip read/write step? 21 | # Using a dummy email for now - does this violate NCBI guidelines? 22 | email = 'loremipsum@gmail.com' 23 | Entrez.email = email 24 | 25 | print 'Downloading Genome...' 26 | handle = Entrez.efetch(db='nucleotide', id=str(genome_id), rettype='gb', 27 | retmode='text') 28 | print 'Genome Downloaded...' 29 | tmpfile = os.path.join(mkdtemp(), 'tmp.gb') 30 | with open(tmpfile, 'w') as f: 31 | f.write(handle.read()) 32 | genome = coral.seqio.read_dna(tmpfile) 33 | 34 | return genome 35 | -------------------------------------------------------------------------------- /tests/tests/test_design/test_randomcodons.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for RandomCodons class of analysis module. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal, assert_not_equal, assert_raises 7 | from coral import design, reaction, RNA 8 | 9 | 10 | def test_randomcodons(): 11 | ''' 12 | This test is pretty basic right now - not sure how much checking 13 | can be done for a random DNA base generator. 14 | 15 | ''' 16 | 17 | reference_seq = RNA('AUGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUAG') 18 | reference_peptide = reaction.translate(reference_seq) 19 | output = design.random_codons(reference_peptide) 20 | output_peptide = reaction.translate(reference_seq) 21 | 22 | assert_equal(len(output), len(reference_seq) - 3) 23 | assert_equal(reference_peptide, output_peptide) 24 | assert_not_equal(reference_seq, output) 25 | 26 | # Setting too high a threshold should raise ValueError 27 | assert_raises(ValueError, design.random_codons, reference_peptide, 28 | frequency_cutoff=1.5) 29 | 30 | # Weighted should work 31 | w_output = design.random_codons(reference_peptide, weighted=True) 32 | w_output_peptide = reaction.translate(reference_seq) 33 | 34 | assert_equal(len(w_output), len(reference_seq) - 3) 35 | assert_equal(reference_peptide, w_output_peptide) 36 | assert_not_equal(reference_seq, w_output) 37 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_multi_rna.epairs: -------------------------------------------------------------------------------- 1 | 1 5 1.2725e-03 2 | 1 13 1.4919e-02 3 | 1 22 3.0928e-01 4 | 2 11 1.2280e-02 5 | 2 16 1.7915e-03 6 | 2 20 1.9583e-01 7 | 3 7 1.2304e-03 8 | 3 12 3.5198e-02 9 | 3 15 5.7397e-03 10 | 3 19 1.8743e-01 11 | 3 21 1.1444e-01 12 | 4 11 3.5932e-02 13 | 4 16 1.7564e-03 14 | 4 17 2.0643e-03 15 | 4 20 1.0982e-01 16 | 5 14 4.9136e-02 17 | 5 18 8.6030e-02 18 | 5 23 2.7049e-02 19 | 6 12 6.3807e-02 20 | 6 15 1.1392e-02 21 | 6 19 1.3030e-02 22 | 6 21 6.1387e-02 23 | 7 11 7.3528e-02 24 | 7 16 2.5425e-02 25 | 7 17 1.2921e-02 26 | 7 20 6.5669e-02 27 | 8 13 6.3288e-02 28 | 8 16 7.5030e-03 29 | 8 22 4.3221e-02 30 | 9 14 5.0529e-01 31 | 9 23 3.0197e-02 32 | 10 13 3.9596e-01 33 | 10 22 3.2621e-02 34 | 11 19 8.4591e-03 35 | 11 21 6.7151e-03 36 | 12 20 9.8408e-03 37 | 13 18 1.2592e-01 38 | 13 23 3.0770e-02 39 | 14 22 6.8823e-02 40 | 15 20 5.0315e-02 41 | 16 19 4.7269e-02 42 | 16 21 5.9887e-03 43 | 17 19 1.2418e-02 44 | 17 21 5.5071e-03 45 | 1 24 6.7444e-01 46 | 2 24 7.8900e-01 47 | 3 24 6.5596e-01 48 | 4 24 8.5043e-01 49 | 5 24 8.3564e-01 50 | 6 24 8.4946e-01 51 | 7 24 8.2123e-01 52 | 8 24 8.8599e-01 53 | 9 24 4.6404e-01 54 | 10 24 5.7055e-01 55 | 11 24 8.6306e-01 56 | 12 24 8.9113e-01 57 | 13 24 3.6915e-01 58 | 14 24 3.7675e-01 59 | 15 24 9.3252e-01 60 | 16 24 9.1025e-01 61 | 17 24 9.6690e-01 62 | 18 24 7.8765e-01 63 | 19 24 7.3140e-01 64 | 20 24 5.6853e-01 65 | 21 24 8.0596e-01 66 | 22 24 5.4604e-01 67 | 23 24 9.1198e-01 68 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_multi_rna.ppairs: -------------------------------------------------------------------------------- 1 | 1 5 1.2725e-03 2 | 1 13 1.4919e-02 3 | 1 22 3.0928e-01 4 | 2 11 1.2280e-02 5 | 2 16 1.7915e-03 6 | 2 20 1.9583e-01 7 | 3 7 1.2304e-03 8 | 3 12 3.5198e-02 9 | 3 15 5.7397e-03 10 | 3 19 1.8743e-01 11 | 3 21 1.1444e-01 12 | 4 11 3.5932e-02 13 | 4 16 1.7564e-03 14 | 4 17 2.0643e-03 15 | 4 20 1.0982e-01 16 | 5 14 4.9136e-02 17 | 5 18 8.6030e-02 18 | 5 23 2.7049e-02 19 | 6 12 6.3807e-02 20 | 6 15 1.1392e-02 21 | 6 19 1.3030e-02 22 | 6 21 6.1387e-02 23 | 7 11 7.3528e-02 24 | 7 16 2.5425e-02 25 | 7 17 1.2921e-02 26 | 7 20 6.5669e-02 27 | 8 13 6.3288e-02 28 | 8 16 7.5030e-03 29 | 8 22 4.3221e-02 30 | 9 14 5.0529e-01 31 | 9 23 3.0197e-02 32 | 10 13 3.9596e-01 33 | 10 22 3.2621e-02 34 | 11 19 8.4591e-03 35 | 11 21 6.7151e-03 36 | 12 20 9.8408e-03 37 | 13 18 1.2592e-01 38 | 13 23 3.0770e-02 39 | 14 22 6.8823e-02 40 | 15 20 5.0315e-02 41 | 16 19 4.7269e-02 42 | 16 21 5.9887e-03 43 | 17 19 1.2418e-02 44 | 17 21 5.5071e-03 45 | 1 24 6.7444e-01 46 | 2 24 7.8900e-01 47 | 3 24 6.5596e-01 48 | 4 24 8.5043e-01 49 | 5 24 8.3564e-01 50 | 6 24 8.4946e-01 51 | 7 24 8.2123e-01 52 | 8 24 8.8599e-01 53 | 9 24 4.6404e-01 54 | 10 24 5.7055e-01 55 | 11 24 8.6306e-01 56 | 12 24 8.9113e-01 57 | 13 24 3.6915e-01 58 | 14 24 3.7675e-01 59 | 15 24 9.3252e-01 60 | 16 24 9.1025e-01 61 | 17 24 9.6690e-01 62 | 18 24 7.8765e-01 63 | 19 24 7.3140e-01 64 | 20 24 5.6853e-01 65 | 21 24 8.0596e-01 66 | 22 24 5.4604e-01 67 | 23 24 9.1198e-01 68 | -------------------------------------------------------------------------------- /tests/test_nupack/data/pairs_multi_rna.tsv: -------------------------------------------------------------------------------- 1 | 1 5 1.2725e-03 2 | 1 13 1.4919e-02 3 | 1 22 3.0928e-01 4 | 2 11 1.2280e-02 5 | 2 16 1.7915e-03 6 | 2 20 1.9583e-01 7 | 3 7 1.2304e-03 8 | 3 12 3.5198e-02 9 | 3 15 5.7397e-03 10 | 3 19 1.8743e-01 11 | 3 21 1.1444e-01 12 | 4 11 3.5932e-02 13 | 4 16 1.7564e-03 14 | 4 17 2.0643e-03 15 | 4 20 1.0982e-01 16 | 5 14 4.9136e-02 17 | 5 18 8.6030e-02 18 | 5 23 2.7049e-02 19 | 6 12 6.3807e-02 20 | 6 15 1.1392e-02 21 | 6 19 1.3030e-02 22 | 6 21 6.1387e-02 23 | 7 11 7.3528e-02 24 | 7 16 2.5425e-02 25 | 7 17 1.2921e-02 26 | 7 20 6.5669e-02 27 | 8 13 6.3288e-02 28 | 8 16 7.5030e-03 29 | 8 22 4.3221e-02 30 | 9 14 5.0529e-01 31 | 9 23 3.0197e-02 32 | 10 13 3.9596e-01 33 | 10 22 3.2621e-02 34 | 11 19 8.4591e-03 35 | 11 21 6.7151e-03 36 | 12 20 9.8408e-03 37 | 13 18 1.2592e-01 38 | 13 23 3.0770e-02 39 | 14 22 6.8823e-02 40 | 15 20 5.0315e-02 41 | 16 19 4.7269e-02 42 | 16 21 5.9887e-03 43 | 17 19 1.2418e-02 44 | 17 21 5.5071e-03 45 | 1 24 6.7444e-01 46 | 2 24 7.8900e-01 47 | 3 24 6.5596e-01 48 | 4 24 8.5043e-01 49 | 5 24 8.3564e-01 50 | 6 24 8.4946e-01 51 | 7 24 8.2123e-01 52 | 8 24 8.8599e-01 53 | 9 24 4.6404e-01 54 | 10 24 5.7055e-01 55 | 11 24 8.6306e-01 56 | 12 24 8.9113e-01 57 | 13 24 3.6915e-01 58 | 14 24 3.7675e-01 59 | 15 24 9.3252e-01 60 | 16 24 9.1025e-01 61 | 17 24 9.6690e-01 62 | 18 24 7.8765e-01 63 | 19 24 7.3140e-01 64 | 20 24 5.6853e-01 65 | 21 24 8.0596e-01 66 | 22 24 5.4604e-01 67 | 23 24 9.1198e-01 68 | -------------------------------------------------------------------------------- /coral/sequence/_rna.py: -------------------------------------------------------------------------------- 1 | '''RNA sequences classes.''' 2 | import coral.reaction 3 | from coral.sequence._nucleicacid import NucleicAcid 4 | 5 | 6 | class RNA(NucleicAcid): 7 | '''ssRNA sequence.''' 8 | 9 | def __init__(self, rna, circular=False, run_checks=True): 10 | ''' 11 | :param rna: Input sequence (RNA). 12 | :type rna: str 13 | :param run_checks: Check inputs / formats (disabling increases speed): 14 | alphabet check 15 | case 16 | :type run_checks: bool 17 | :returns: coral.RNA instance. 18 | 19 | ''' 20 | super(RNA, self).__init__(rna, 'rna', circular=circular, 21 | run_checks=run_checks, any_char='N') 22 | self.ds = False 23 | 24 | def copy(self): 25 | return type(self)(self.seq, circular=self.circular, run_checks=False) 26 | 27 | def reverse_transcribe(self): 28 | '''Reverse transcribe to DNA. 29 | 30 | :returns: The reverse transcribed (DNA) version of the current RNA. 31 | :rtype: coral.DNA 32 | 33 | ''' 34 | return coral.reaction.reverse_transcribe(self) 35 | 36 | def translate(self): 37 | '''Translate sequence into a peptide. 38 | 39 | :returns: A translated peptide from the current sequence. 40 | :rtype: coral.Peptide 41 | 42 | ''' 43 | return coral.reaction.translate(self) 44 | -------------------------------------------------------------------------------- /bin/ipynb2rst.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Uses nbconvert to recursively convert all ipynbs in a directory to .rst.""" 4 | # TODO: catch conversion errors (right now they pass silently) 5 | # Doesn't even use IPython API (TODO!) 6 | from __future__ import print_function 7 | import os 8 | import subprocess 9 | import sys 10 | 11 | 12 | # Build docs 13 | def ipynb_to_rst(directory, filename): 14 | """Converts a given file in a directory to an rst in the same directory.""" 15 | print(filename) 16 | os.chdir(directory) 17 | subprocess.Popen(["ipython", "nbconvert", "--to", "rst", 18 | filename], 19 | stdout=subprocess.PIPE, 20 | stderr=subprocess.PIPE, 21 | cwd=directory) 22 | 23 | 24 | def convert_ipynbs(directory): 25 | """Recursively converts all ipynb files in a directory into rst files in 26 | the same directory.""" 27 | # The ipython_examples dir has to be in the same dir as this script 28 | for root, subfolders, files in os.walk(os.path.abspath(directory)): 29 | for f in files: 30 | if ".ipynb_checkpoints" not in root: 31 | if f.endswith("ipynb"): 32 | ipynb_to_rst(root, f) 33 | 34 | 35 | if __name__ == "__main__": 36 | # Convert notebooks from ipynb to rst 37 | if len(sys.argv) != 2: 38 | print("\nipy2nb:\n=======\nUsage: ipynb2rst \n") 39 | else: 40 | directory = sys.argv[1] 41 | convert_ipynbs(directory) 42 | -------------------------------------------------------------------------------- /coral/analysis/_sequencing/substitution_matrices/dna.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .substitution_matrix import SubstitutionMatrix 3 | 4 | 5 | DNA = SubstitutionMatrix( 6 | np.array([[5, -4, -4, -4, -4, 1, 1, -4, -4, 1, -4, -1, -1, -1, -2, -4], 7 | [-4, 5, -4, -4, -4, 1, -4, 1, 1, -4, -1, -4, -1, -1, -2, 5], 8 | [-4, -4, 5, -4, 1, -4, 1, -4, 1, -4, -1, -1, -4, -1, -2, -4], 9 | [-4, -4, -4, 5, 1, -4, -4, 1, -4, 1, -1, -1, -1, -4, -2, -4], 10 | [-4, -4, 1, 1, -1, -4, -2, -2, -2, -2, -1, -1, -3, -3, -1, 11 | -4], 12 | [1, 1, -4, -4, -4, -1, -2, -2, -2, -2, -3, -3, -1, -1, -1, 1], 13 | [1, -4, 1, -4, -2, -2, -1, -4, -2, -2, -3, -1, -3, -1, -1, 14 | -4], 15 | [-4, 1, -4, 1, -2, -2, -4, -1, -2, -2, -1, -3, -1, -3, -1, 1], 16 | [-4, 1, 1, -4, -2, -2, -2, -2, -1, -4, -1, -3, -3, -1, -1, 1], 17 | [1, -4, -4, 1, -2, -2, -2, -2, -4, -1, -3, -1, -1, -3, -1, 18 | -4], 19 | [-4, -1, -1, -1, -1, -3, -3, -1, -1, -3, -1, -2, -2, -2, -1, 20 | -1], 21 | [-1, -4, -1, -1, -1, -3, -1, -3, -3, -1, -2, -1, -2, -2, -1, 22 | -4], 23 | [-1, -1, -4, -1, -3, -1, -3, -1, -3, -1, -2, -2, -1, -2, -1, 24 | -1], 25 | [-1, -1, -1, -4, -3, -1, -1, -3, -1, -3, -2, -2, -2, -1, -1, 26 | -1], 27 | [-2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 28 | -2], 29 | [-4, 5, -4, -4, -4, 1, -4, 1, 1, -4, -1, -4, -1, -1, -2, 5]]), 30 | 'ATGCSWRYKMBVHDNU') 31 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | Source documentation: 2 | ===================== 3 | 4 | 5 | :mod:`coral` -- Main Package 6 | **************************** 7 | 8 | .. automodule:: coral 9 | :members: 10 | :imported-members: 11 | 12 | 13 | :mod:`coral.analysis` -- Sequence analysis 14 | ------------------------------------------ 15 | 16 | .. automodule:: coral.analysis 17 | :members: 18 | :imported-members: 19 | 20 | 21 | :mod:`coral.constants` -- Data and constants 22 | -------------------------------------------- 23 | 24 | .. automodule:: coral.constants 25 | :members: 26 | :imported-members: 27 | 28 | 29 | :mod:`coral.database` -- Scientific database access 30 | --------------------------------------------------- 31 | 32 | .. automodule:: coral.database 33 | :members: 34 | :imported-members: 35 | 36 | 37 | :mod:`coral.design` -- Design sequences, primers, etc 38 | ------------------------------------------------------ 39 | 40 | .. automodule:: coral.design 41 | :members: 42 | :imported-members: 43 | 44 | 45 | :mod:`coral.reaction` -- Cloning and molecular genetics reactions 46 | ----------------------------------------------------------------- 47 | 48 | .. automodule:: coral.reaction 49 | :members: 50 | :imported-members: 51 | 52 | 53 | :mod:`coral.seqio` -- Read and write sequences 54 | ---------------------------------------------- 55 | 56 | .. automodule:: coral.seqio 57 | :members: 58 | :imported-members: 59 | 60 | 61 | .. :mod:`coral.sequence` -- DNA, RNA, and Peptide sequences 62 | .. -------------------------------------------------------- 63 | .. 64 | .. .. automodule:: coral.sequence 65 | .. :members: 66 | .. :imported-members: 67 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_restriction.py: -------------------------------------------------------------------------------- 1 | '''Test restriction reaction module.''' 2 | 3 | from nose.tools import assert_equal 4 | from coral import reaction, DNA, RestrictionSite 5 | 6 | 7 | class TestDigest(object): 8 | '''Test digest function.''' 9 | def __init__(self): 10 | # Contains NcoI site 11 | self.dna = DNA('TGACCATGGAAA') 12 | 13 | def test_not_found(self): 14 | '''If site not found, should return input sequence in list.''' 15 | ecorv = RestrictionSite(DNA('GATATC'), (3, 3), name='EcoRV') 16 | assert_equal(self.dna, reaction.digest(self.dna, ecorv)[0]) 17 | 18 | def test_ncoi_cut(self): 19 | '''Test standard TypeII cutter.''' 20 | ncoi = RestrictionSite(DNA('CCATGG'), (1, 5), name='NcoI') 21 | assert_equal(reaction.digest(self.dna, ncoi), 22 | [DNA('TGAC----', bottom='CATGGTCA'), 23 | DNA('CATGGAAA', bottom='TTTC----')]) 24 | assert_equal(reaction.digest(self.dna.circularize(), ncoi), 25 | [DNA('CATGGAAATGAC----', bottom='CATGGTCATTTC----')]) 26 | 27 | def test_ecorv_cut(self): 28 | '''Test blunt-end cutter.''' 29 | ecorv = RestrictionSite(DNA('GATATC'), (3, 3), name='EcoRV') 30 | assert_equal(reaction.digest(DNA('GATATC'), ecorv), 31 | [DNA('GAT'), DNA('ATC')]) 32 | 33 | def test_psti_cut(self): 34 | '''Test 3\' cutter.''' 35 | psti = RestrictionSite(DNA('CTGCAG'), (5, 1), name='PstI') 36 | assert_equal(reaction.digest(DNA('ACTGCAGA'), psti), 37 | [DNA('ACTGCA', bottom='----GT'), 38 | DNA('----GA', bottom='TCTGCA')]) 39 | -------------------------------------------------------------------------------- /docs/tutorial/analysis/analysis_sequencing_files/analysis_sequencing_7_0.text: -------------------------------------------------------------------------------- 1 | 2 | Summary: 3 | -------- 4 | 5 | Mismatches: 3 6 | Insertions: 1 7 | Deletions: 0 8 | 9 | ## Mismatches 10 | pMODKan-HO-pACT1GEV_C3-T7-EEV_D11.ab1 11 | 12 | Positions 4687 to 4689: 13 | AGTCCAAAGGACAATTTTACG 14 | |||||||||| |||||||| 15 | ----------CACATTTTACG 16 | *** 17 | 18 | pMODKan-HO-pACT1GEV_C3-M13R_E11.ab1 19 | 20 | Positions 5550 to 5551: 21 | TTAGCTTTGTTCACTCGTGCC 22 | |||||||||| ||||||||| 23 | TTAGCTTTGTCA--------- 24 | ** 25 | 26 | pMODKan-HO-pACT1GEV_C3-676_H11.ab1 27 | 28 | Positions 5120 to 5121: 29 | GCACCGTCTTTGAATTATGAG 30 | |||||||||| ||||||||| 31 | GCACCGTCTTGA--------- 32 | ** 33 | 34 | ## Insertions 35 | pMODKan-HO-pACT1GEV_C3-771_C12.ab1 36 | 37 | Positions 7805 to 8008: 38 | GCCCTTTCGT------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ 39 | |||||||||| 40 | GCCCTTTCGTTCGCGCGTTTCGGTGATGACGGTGAAAACCTCTGACACATGCAGCTCCCGGAGACGGTCACAGCTTGTCTGTAAGCGGATGCCGGGAGCAGACAAGCCCGTCAGGGCGCGTCAGCGGGTGTTGGCGGGTGTCGGGGCTGGCTTAACTATGCGGCGTTTAAACTTAGCAGATGCGCGCACCTGCGTTGTTACCACAACTCTTATG 41 | ************************************************************************************************************************************************************************************************************ 42 | 43 | -------------------------------------------------------------------------------- /coral/analysis/_structure/dimers.py: -------------------------------------------------------------------------------- 1 | '''Check for primer dimers using NUPACK.''' 2 | import coral.analysis 3 | 4 | 5 | def dimers(primer1, primer2, concentrations=[5e-7, 3e-11]): 6 | '''Calculate expected fraction of primer dimers. 7 | 8 | :param primer1: Forward primer. 9 | :type primer1: coral.DNA 10 | :param primer2: Reverse primer. 11 | :type primer2: coral.DNA 12 | :param template: DNA template. 13 | :type template: coral.DNA 14 | :param concentrations: list of concentrations for primers and the 15 | template. Defaults are those for PCR with 1kb 16 | template. 17 | :type concentrations: list 18 | :returns: Fraction of dimers versus the total amount of primer added. 19 | :rtype: float 20 | 21 | ''' 22 | # It is not reasonable (yet) to use a long template for doing these 23 | # computations directly, as NUPACK does an exhaustive calculation and 24 | # would take too long without a cluster. 25 | # Instead, this function compares primer-primer binding to 26 | # primer-complement binding 27 | 28 | # Simulate binding of template vs. primers 29 | nupack = coral.analysis.NUPACK([primer1.primer(), primer2.primer(), 30 | primer1.primer().reverse_complement(), 31 | primer2.primer().reverse_complement()]) 32 | # Include reverse complement concentration 33 | primer_concs = [concentrations[0]] * 2 34 | template_concs = [concentrations[1]] * 2 35 | concs = primer_concs + template_concs 36 | nupack_concs = nupack.concentrations(2, conc=concs) 37 | dimer_conc = nupack_concs['concentrations'][5] 38 | #primer1_template = nupack_concs['concentrations'][6] 39 | #primer2_template = nupack_concs['concentrations'][10] 40 | return dimer_conc / concs[0] 41 | -------------------------------------------------------------------------------- /bin/run_docs_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """Uses nbconvert to convert all ipynbs in the ipython_examples dir until I 4 | can figure out a way to use a proper sphinx extension. This is really hacky and 5 | should not be used as a secure production server.""" 6 | # TODO: catch conversion errors (right now they pass silently) 7 | # Doesn't even use IPython API (TODO!) 8 | import os 9 | from tornado import web, ioloop, httpserver 10 | from ipynb2rst import convert_ipynbs 11 | from build_sphinx_docs import build_docs 12 | 13 | 14 | DOCSDIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../docs")) 15 | ROOT = os.path.abspath(os.path.join(DOCSDIR, "_build/html")) 16 | STATIC = os.path.join(ROOT, "_static") 17 | 18 | 19 | # Web server (Tornado) classes 20 | class MainHandler(web.RequestHandler): 21 | def get(self, path, *args): 22 | uri = self.request.uri 23 | if uri == "/" or uri == "index.html": 24 | self.render("index.html") 25 | else: 26 | self.render(self.request.uri[1:]) 27 | 28 | 29 | class Application(web.Application): 30 | def __init__(self): 31 | handlers = [(r"^(.(?!_static))*$", MainHandler), 32 | (r"/_static/(.*)", web.StaticFileHandler, 33 | {"path": STATIC})] 34 | settings = {"template_path": ROOT, 35 | "static_url_prefix": "_static"} 36 | web.Application.__init__(self, handlers, **settings) 37 | 38 | 39 | if __name__ == "__main__": 40 | # Convert notebooks from ipynb to rst 41 | convert_ipynbs(DOCSDIR) 42 | # Build sphinx docs (produces html) 43 | build_docs(DOCSDIR) 44 | # Launch server 45 | applicaton = Application() 46 | http_server = httpserver.HTTPServer(applicaton) 47 | http_server.listen(3089) 48 | 49 | ioloop.IOLoop.instance().start() 50 | -------------------------------------------------------------------------------- /pre-commit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 3 | # A pre-commit hook that runs nosetests prior to commits 4 | # 5 | # To enable this hook, link it to .git/hooks using 6 | # ln -s ../../pre-commit.sh .git/hooks/pre-commit 7 | 8 | do_nosetests() { 9 | $1 -v 10 | } 11 | 12 | fail () { 13 | echo "$@: [FAILED]" 14 | exit 1 15 | } 16 | 17 | 18 | if git rev-parse --verify HEAD >/dev/null 2>&1 19 | then 20 | against=HEAD 21 | else 22 | # Initial commit: diff against an empty tree object 23 | against=4b825dc642cb6eb9a060e54bf8d69288fbee4904 24 | fi 25 | 26 | # If you want to allow non-ASCII filenames set this variable to true. 27 | allownonascii=$(git config --bool hooks.allownonascii) 28 | 29 | # Redirect output to stderr. 30 | exec 1>&2 31 | 32 | # Cross platform projects tend to avoid non-ASCII filenames; prevent 33 | # them from being added to the repository. We exploit the fact that the 34 | # printable range starts at the space character and ends with tilde. 35 | if [ "$allownonascii" != "true" ] && 36 | # Note that the use of brackets around a tr range is ok here, (it's 37 | # even required, for portability to Solaris 10's /usr/bin/tr), since 38 | # the square bracket bytes happen to fall in the designated range. 39 | test $(git diff --cached --name-only --diff-filter=A -z $against | 40 | LC_ALL=C tr -d '[ -~]\0' | wc -c) != 0 41 | then 42 | cat <<\EOF 43 | Error: Attempt to add a non-ASCII file name. 44 | 45 | This can cause problems if you want to work with people on other platforms. 46 | 47 | To be portable it is advisable to rename the file. 48 | 49 | If you know what you are doing you can disable this check using: 50 | 51 | git config hooks.allownonascii true 52 | EOF 53 | exit 1 54 | fi 55 | 56 | echo "Running unit tests" 57 | do_nosetests nosetests || fail nosetests 58 | 59 | # If there are whitespace errors, print the offending file names and fail. 60 | exec git diff-index --check --cached $against --relative=coral -- 61 | -------------------------------------------------------------------------------- /docs/tutorial/sequences_files/sequences_17_0.text: -------------------------------------------------------------------------------- 1 | ['ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'AAGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATAAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAATAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGAAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAAGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGAAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAAAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAAAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAAATTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACATTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTATTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTATCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTACACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTAACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA', 'ATGAGTAAAGGAGAAGAACTTTTCAATGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACAGGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTAGA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGAA', 'ATGAGTAAAGGAGAAGAACTTTTCACTGGA'] 2 | 3 | ['AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'] 4 | -------------------------------------------------------------------------------- /tests/test_nupack/data/concentrations_pairs.cx-fpairs: -------------------------------------------------------------------------------- 1 | % NUPACK 3.0 2 | % This is test_concentrations.fpairs, an output file generated for a "concentrations" 3 | % calculation of base pair fractions. 4 | % For information on contents, see NUPACK manual. 5 | % Program: concentrations 6 | % Start time: Thu Jan 28 10:59:24 2016 PST 7 | % Command: ../bin/concentrations -pairs test_concentrations 8 | % Minimum output pair fraction: 0.001 9 | % Initial monomer concentrations: 10 | % 1: 1.000000e-06 Molar 11 | % 2: 1.000000e-06 Molar 12 | % 13 | % Following is the header from the input file (test_concentrations.cx): 14 | % 15 | % NUPACK 3.0 16 | % Program: complexes 17 | % Start time: Thu Jan 28 01:27:25 2016 PST 18 | % 19 | % Command: ../bin/complexes -quiet -pairs -cutoff 0.001 test_complexes 20 | % Maximum complex size to enumerate: 4 21 | % Number of complexes from enumeration: 14 22 | % Additional complexes from .list file: 0 23 | % Total number of permutations to calculate: 15 24 | % Parameters: RNA, 1995 25 | % Dangles setting: 1 26 | % Temperature (C): 37.0 27 | % Sodium concentration: 1.0000 M 28 | % Magnesium concentration: 0.0000 M 29 | % 30 | % Do not change the comments below this line, as they may be read by other programs! 31 | % 32 | % Number of strands: 2 33 | % id sequence 34 | % 1 GATACTAGCG 35 | % 2 TACGATT 36 | % T = 37.0 37 | 17 38 | 1 5 7.781987e-03 39 | 1 9 1.110839e-01 40 | 1 18 8.812159e-01 41 | 2 6 6.730637e-03 42 | 2 18 9.933000e-01 43 | 3 7 1.139972e-02 44 | 3 8 1.806719e-03 45 | 3 18 9.868000e-01 46 | 4 18 9.999984e-01 47 | 5 1 7.781987e-03 48 | 5 10 7.780911e-03 49 | 5 18 9.843713e-01 50 | 6 2 6.730637e-03 51 | 6 18 9.932675e-01 52 | 7 3 1.139972e-02 53 | 7 18 9.885687e-01 54 | 8 3 1.806719e-03 55 | 8 18 9.980516e-01 56 | 9 1 1.110839e-01 57 | 9 18 8.887963e-01 58 | 10 5 7.780911e-03 59 | 10 18 9.922003e-01 60 | 11 15 3.394092e-03 61 | 11 18 9.965996e-01 62 | 12 16 2.086032e-03 63 | 12 17 1.507996e-03 64 | 12 18 9.963997e-01 65 | 13 18 9.999922e-01 66 | 14 18 9.999922e-01 67 | 15 11 3.394092e-03 68 | 15 18 9.965996e-01 69 | 16 12 2.086032e-03 70 | 16 18 9.978997e-01 71 | 17 12 1.507996e-03 72 | 17 18 9.984999e-01 73 | -------------------------------------------------------------------------------- /coral/analysis/_sequencing/mafft.py: -------------------------------------------------------------------------------- 1 | '''A Coral wrapper for the MAFFT command line multiple sequence aligner.''' 2 | import coral 3 | import os 4 | import shutil 5 | import subprocess 6 | import tempfile 7 | 8 | 9 | def MAFFT(sequences, gap_open=1.53, gap_extension=0.0, retree=2): 10 | '''A Coral wrapper for the MAFFT command line multiple sequence aligner. 11 | 12 | :param sequences: A list of sequences to align. 13 | :type sequences: List of homogeneous sequences (all DNA, or all RNA, 14 | etc.) 15 | :param gap_open: --op (gap open) penalty in MAFFT cli. 16 | :type gap_open: float 17 | :param gap_extension: --ep (gap extension) penalty in MAFFT cli. 18 | :type gap_extension: float 19 | :param retree: Number of times to build the guide tree. 20 | :type retree: int 21 | 22 | ''' 23 | arguments = ['mafft'] 24 | arguments += ['--op', str(gap_open)] 25 | arguments += ['--ep', str(gap_extension)] 26 | arguments += ['--retree', str(retree)] 27 | arguments.append('input.fasta') 28 | tempdir = tempfile.mkdtemp() 29 | try: 30 | with open(os.path.join(tempdir, 'input.fasta'), 'w') as f: 31 | for i, sequence in enumerate(sequences): 32 | if hasattr(sequence, 'name'): 33 | name = sequence.name 34 | else: 35 | name = 'sequence{}'.format(i) 36 | f.write('>{}\n'.format(name)) 37 | f.write(str(sequence) + '\n') 38 | process = subprocess.Popen(arguments, stdout=subprocess.PIPE, 39 | stderr=open(os.devnull, 'w'), cwd=tempdir) 40 | stdout = process.communicate()[0] 41 | finally: 42 | shutil.rmtree(tempdir) 43 | 44 | # Process stdout into something downstream process can use 45 | 46 | records = stdout.split('>') 47 | # First line is now blank 48 | records.pop(0) 49 | aligned_list = [] 50 | for record in records: 51 | lines = record.split('\n') 52 | name = lines.pop(0) 53 | aligned_list.append(coral.DNA(''.join(lines))) 54 | 55 | return aligned_list 56 | -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_7_0.text: -------------------------------------------------------------------------------- 1 | [pGEX_3_primer 'misc_feature' feature (28 to 51) on strand 1, 2 | pMOD_t1pre 'misc_feature' feature (132 to 154) on strand 0, 3 | PmeI(1) 'misc_feature' feature (154 to 162) on strand 0, 4 | HO Targeting 1 'misc_feature' feature (162 to 725) on strand 0, 5 | pMOD_t1suf 'misc_feature' feature (725 to 755) on strand 0, 6 | KANMX Wach et al 1994 (genome del. project) 'misc_feature' feature (755 to 1152) on strand 0, 7 | KanMX CDS 'misc_feature' feature (1152 to 1962) on strand 0, 8 | KanMX terminator 'misc_feature' feature (1962 to 2200) on strand 0, 9 | M13 Forward (-47) primer 'primer_bind' feature (2200 to 2224) on strand 0, 10 | pACT1 'misc_feature' feature (2224 to 2885) on strand 0, 11 | Extra sequence not found in Gottschling map 'misc_feature' feature (2921 to 2932) on strand 0, 12 | GAL4(1-93) DBD 'misc_feature' feature (2940 to 3218) on strand 0, 13 | Differs from Gottschling map (backbone) 'misc_feature' feature (3218 to 3219) on strand 0, 14 | hER HBD 'misc_feature' feature (3255 to 4140) on strand 0, 15 | HSV1 VP16 'misc_feature' feature (4140 to 4344) on strand 0, 16 | Differs from Gottschling Map 'misc_feature' feature (4235 to 4236) on strand 0, 17 | stop codon 'misc_feature' feature (4344 to 4347) on strand 0, 18 | L2 'misc_feature' feature (4347 to 4377) on strand 0, 19 | T + pBluescript KS linker 'misc_feature' feature (4377 to 4399) on strand 0, 20 | CYC1 'terminator' feature (4403 to 4643) on strand 0, 21 | pYESTrp_rev primer 'primer_bind' feature (4412 to 4431) on strand 1, 22 | T7 EEV primer 'primer_bind' feature (4643 to 4665) on strand 0, 23 | upstream HO targeting 'misc_feature' feature (4665 to 5571) on strand 0, 24 | PmeI 'misc_feature' feature (5571 to 5579) on strand 0, 25 | PmeI site 'misc_feature' feature (5571 to 5579) on strand 0, 26 | M13R 'misc_feature' feature (5579 to 5619) on strand 0, 27 | origin-extended 'misc_feature' feature (5804 to 5889) on strand 0, 28 | ori 'misc_feature' feature (5889 to 6744) on strand 0, 29 | is a g in normal maps. 'misc_feature' feature (6426 to 6427) on strand 0, 30 | bla 'misc_feature' feature (6744 to 7605) on strand 0, 31 | AmpR promoter 'misc_feature' feature (7605 to 7684) on strand 0, 32 | New Feature 'misc_feature' feature (7684 to 7704) on strand 0] -------------------------------------------------------------------------------- /docs/tutorial/seqio_files/seqio_8_0.text: -------------------------------------------------------------------------------- 1 | [pGEX_3_primer 'misc_feature' feature (28 to 51) on strand 1, 2 | pMOD_t1pre 'misc_feature' feature (132 to 154) on strand 0, 3 | PmeI(1) 'misc_feature' feature (154 to 162) on strand 0, 4 | HO Targeting 1 'misc_feature' feature (162 to 725) on strand 0, 5 | pMOD_t1suf 'misc_feature' feature (725 to 755) on strand 0, 6 | KANMX Wach et al 1994 (genome del. project) 'misc_feature' feature (755 to 1152) on strand 0, 7 | KanMX CDS 'misc_feature' feature (1152 to 1962) on strand 0, 8 | KanMX terminator 'misc_feature' feature (1962 to 2200) on strand 0, 9 | M13 Forward (-47) primer 'primer_bind' feature (2200 to 2224) on strand 0, 10 | pACT1 'misc_feature' feature (2224 to 2885) on strand 0, 11 | Extra sequence not found in Gottschling map 'misc_feature' feature (2921 to 2932) on strand 0, 12 | GAL4(1-93) DBD 'misc_feature' feature (2940 to 3218) on strand 0, 13 | Differs from Gottschling map (backbone) 'misc_feature' feature (3218 to 3219) on strand 0, 14 | hER HBD 'misc_feature' feature (3255 to 4140) on strand 0, 15 | HSV1 VP16 'misc_feature' feature (4140 to 4344) on strand 0, 16 | Differs from Gottschling Map 'misc_feature' feature (4235 to 4236) on strand 0, 17 | stop codon 'misc_feature' feature (4344 to 4347) on strand 0, 18 | L2 'misc_feature' feature (4347 to 4377) on strand 0, 19 | T + pBluescript KS linker 'misc_feature' feature (4377 to 4399) on strand 0, 20 | CYC1 'terminator' feature (4403 to 4643) on strand 0, 21 | pYESTrp_rev primer 'primer_bind' feature (4412 to 4431) on strand 1, 22 | T7 EEV primer 'primer_bind' feature (4643 to 4665) on strand 0, 23 | upstream HO targeting 'misc_feature' feature (4665 to 5571) on strand 0, 24 | PmeI 'misc_feature' feature (5571 to 5579) on strand 0, 25 | PmeI site 'misc_feature' feature (5571 to 5579) on strand 0, 26 | M13R 'misc_feature' feature (5579 to 5619) on strand 0, 27 | origin-extended 'misc_feature' feature (5804 to 5889) on strand 0, 28 | ori 'misc_feature' feature (5889 to 6744) on strand 0, 29 | is a g in normal maps. 'misc_feature' feature (6426 to 6427) on strand 0, 30 | bla 'misc_feature' feature (6744 to 7605) on strand 0, 31 | AmpR promoter 'misc_feature' feature (7605 to 7684) on strand 0, 32 | New Feature 'misc_feature' feature (7684 to 7704) on strand 0] -------------------------------------------------------------------------------- /tests/tests/test_design/test_gibson.py: -------------------------------------------------------------------------------- 1 | '''Test gibson design module.''' 2 | from nose.tools import assert_equal, assert_raises 3 | from coral import design, DNA, Primer 4 | 5 | 6 | def test_gibson_primers(): 7 | '''Test gibson_primers function.''' 8 | # Fuse tdh3 promoter sequence to yfp (trimmed for readability) 9 | tdh3_3prime = DNA('aaccagttccctgaaattattcccctacttgactaataagtat' + 10 | 'ataaagacggtaggtattgattgtaattctgtaaatctatttc' + 11 | 'ttaaacttc') 12 | yfp_nterm = DNA('atggtgagcaagggcgaggagctgttcaccggggtggtgcccatc' + 13 | 'ctggtcgagctggacggcgacgtaaacggccacaagttcagcgtg' + 14 | 'tccggcgagggcgagggcgatgccacctacggcaagctgaccctg' + 15 | 'aag') 16 | # Expected annealing sequences and their Tms 17 | fwd_anneal = DNA('atggtgagcaagggcg') 18 | fwd_tm = 64.64172107821065 19 | rev_anneal = DNA('gaagtttaagaaatagatttacagaattacaatcaatac') 20 | rev_tm = 64.24536287254085 21 | # Expected overlaps 22 | all_right = DNA('TCGCCCTTGCTCACCAT') 23 | all_left = DNA('GGTATTGATTGTAATTCTGTAAATCTATTTCTTAAACTTC') 24 | mixed_fwd = DNA('TTCTTAAACTTC') 25 | mixed_rev = DNA('CCTTGCTCACCAT') 26 | # Design primers - with homology all on left side, right side, or mixed 27 | # All on the 'right' - i.e. fwd primer 28 | right = design.gibson_primers(tdh3_3prime, yfp_nterm, 'right') 29 | right_rev = Primer(rev_anneal, tm=rev_tm, overhang=all_right) 30 | right_fwd = Primer(fwd_anneal, tm=fwd_tm) 31 | assert_equal(right, (right_rev, right_fwd)) 32 | # All on the 'left' - i.e. rev primer 33 | left = design.gibson_primers(tdh3_3prime, yfp_nterm, 'left') 34 | left_rev = Primer(rev_anneal, tm=rev_tm) 35 | left_fwd = Primer(fwd_anneal, tm=fwd_tm, overhang=all_left) 36 | assert_equal(left, (left_rev, left_fwd)) 37 | # On both primers 38 | mixed = design.gibson_primers(tdh3_3prime, yfp_nterm, 'mixed') 39 | mixed_primer1 = Primer(rev_anneal, tm=rev_tm, overhang=mixed_rev) 40 | mixed_primer2 = Primer(fwd_anneal, tm=fwd_tm, overhang=mixed_fwd) 41 | assert_equal(mixed, (mixed_primer1, mixed_primer2)) 42 | 43 | assert_raises(ValueError, design.gibson_primers, tdh3_3prime, 44 | yfp_nterm, 'duck') 45 | -------------------------------------------------------------------------------- /coral/reaction/_resect.py: -------------------------------------------------------------------------------- 1 | '''Resection (need a new name!) - exonuclease activity.''' 2 | import coral 3 | 4 | 5 | def five_resect(dna, n_bases): 6 | '''Remove bases from 5' end of top strand. 7 | 8 | :param dna: Sequence to resect. 9 | :type dna: coral.DNA 10 | :param n_bases: Number of bases cut back. 11 | :type n_bases: int 12 | :returns: DNA sequence resected at the 5' end by n_bases. 13 | :rtype: coral.DNA 14 | 15 | ''' 16 | new_instance = dna.copy() 17 | if n_bases >= len(dna): 18 | new_instance.top.seq = ''.join(['-' for i in range(len(dna))]) 19 | else: 20 | new_instance.top.seq = '-' * n_bases + str(dna)[n_bases:] 21 | 22 | new_instance = _remove_end_gaps(new_instance) 23 | 24 | return new_instance 25 | 26 | 27 | def three_resect(dna, n_bases): 28 | '''Remove bases from 3' end of top strand. 29 | 30 | :param dna: Sequence to resect. 31 | :type dna: coral.DNA 32 | :param n_bases: Number of bases cut back. 33 | :type n_bases: int 34 | :returns: DNA sequence resected at the 3' end by n_bases. 35 | :rtype: coral.DNA 36 | 37 | ''' 38 | new_instance = dna.copy() 39 | if n_bases >= len(dna): 40 | new_instance.top.seq = ''.join(['-' for i in range(len(dna))]) 41 | else: 42 | new_instance.top.seq = str(dna)[:-n_bases] + '-' * n_bases 43 | 44 | new_instance = _remove_end_gaps(new_instance) 45 | 46 | return new_instance 47 | 48 | 49 | def _remove_end_gaps(sequence): 50 | '''Removes double-stranded gaps from ends of the sequence. 51 | 52 | :returns: The current sequence with terminal double-strand gaps ('-') 53 | removed. 54 | :rtype: coral.DNA 55 | 56 | ''' 57 | # Count terminal blank sequences 58 | def count_end_gaps(seq): 59 | gap = coral.DNA('-') 60 | count = 0 61 | for base in seq: 62 | if base == gap: 63 | count += 1 64 | else: 65 | break 66 | 67 | return count 68 | 69 | top_left = count_end_gaps(sequence.top) 70 | top_right = count_end_gaps(reversed(sequence.top)) 71 | bottom_left = count_end_gaps(reversed(sequence.bottom)) 72 | bottom_right = count_end_gaps(sequence.bottom) 73 | 74 | # Trim sequence 75 | left_index = min(top_left, bottom_left) 76 | right_index = len(sequence) - min(top_right, bottom_right) 77 | 78 | return sequence[left_index:right_index] 79 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_gibson/test_gibson_construction.py: -------------------------------------------------------------------------------- 1 | import os 2 | from nose.tools import assert_equal, assert_raises, assert_true, assert_false 3 | from coral import reaction, seqio 4 | 5 | 6 | def test_construction(): 7 | plasmid_path = os.path.join(os.path.dirname(__file__), 'gibson_test.fasta') 8 | f1_path = os.path.join(os.path.dirname(__file__), 'fragment_1.fasta') 9 | f2_path = os.path.join(os.path.dirname(__file__), 'fragment_2.fasta') 10 | f3_path = os.path.join(os.path.dirname(__file__), 'fragment_3.fasta') 11 | f3_linear_path = os.path.join(os.path.dirname(__file__), 12 | 'fragment_3_linear.fasta') 13 | plasmid = seqio.read_dna(plasmid_path).circularize() 14 | f1 = seqio.read_dna(f1_path) 15 | f2 = seqio.read_dna(f2_path) 16 | f3 = seqio.read_dna(f3_path) 17 | f3_linear = seqio.read_dna(f3_linear_path) 18 | 19 | gibsoned_circular = reaction.gibson([f1, f2, f3]) 20 | gibsoned_linear = reaction.gibson([f1, f2, f3_linear], linear=True) 21 | # 22 | expected_length = len(plasmid) 23 | gibsoned_circular_length = len(gibsoned_circular) 24 | gibsoned_linear_length = len(gibsoned_linear) 25 | assert_equal(gibsoned_circular_length, expected_length) 26 | assert_equal(gibsoned_linear_length, expected_length) 27 | assert_true(gibsoned_circular.circular) 28 | assert_false(gibsoned_linear.circular) 29 | assert(plasmid.is_rotation(gibsoned_circular)) 30 | try: 31 | assert_equal(str(plasmid), str(gibsoned_linear)) 32 | except AssertionError: 33 | assert_equal(str(plasmid), str(gibsoned_linear.flip())) 34 | 35 | # Should fail with circular input 36 | assert_raises(ValueError, reaction.gibson, [f1.circularize()]) 37 | # Should fail if compatible end can't be found 38 | assert_raises(Exception, reaction.gibson, [f1, f3[50:-50]], linear=True) 39 | normal = [f1, f2, f3] 40 | rotated = [f1, f2, f3.reverse_complement()] 41 | # Gibson should work regardless of fragment orientation 42 | assert_true(reaction.gibson(normal).is_rotation(reaction.gibson(rotated))) 43 | # A redundant fragment shouldn't affect the outcome 44 | assert_equal(reaction.gibson([f1, f2, f3]), 45 | reaction.gibson([f1, f2, f2, f3])) 46 | # A fragment that can't circularize should raise a ValueError 47 | assert_raises(ValueError, reaction.gibson, [f1, f2, f3[:-80]]) 48 | # But should still work fine as a linear fragment 49 | 50 | 51 | def test_annotations(): 52 | pass 53 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests utils submodule of reaction module. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal, assert_raises 7 | from coral import reaction, DNA 8 | 9 | 10 | def test_convert_sequence(): 11 | '''Tests DNA translation function.''' 12 | 13 | seq = 'ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGC' + \ 14 | 'GACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAG' + \ 15 | 'CTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACC' + \ 16 | 'ACCTTCGGCTACGGCCTGCAGTGCTTCGCCCGCTACCCCGACCACATGAAGCAGCACGACTTC' + \ 17 | 'TTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGC' + \ 18 | 'AACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTG' + \ 19 | 'AAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAAC' + \ 20 | 'AGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATC' + \ 21 | 'CGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATC' + \ 22 | 'GGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAA' + \ 23 | 'GACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACT' + \ 24 | 'CTCGGCATGGACGAGCTGTACAAGTAA' 25 | dna = DNA(seq) 26 | prot = 'MVSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLV' + \ 27 | 'TTFGYGLQCFARYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRI' + \ 28 | 'ELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQN' + \ 29 | 'TPIGDGPVLLPDNHYLSYQSALSKDPNEKRDHMVLLEFVTAAGITLGMDELYK' 30 | rna = reaction.utils.convert_sequence(dna, 'rna') 31 | r_trans = reaction.utils.convert_sequence(rna, 'dna') 32 | trans = reaction.utils.convert_sequence(rna, 'peptide') 33 | assert_equal(str(trans), prot) 34 | assert_equal(str(r_trans), seq) 35 | assert_raises(ValueError, reaction.utils.convert_sequence, seq, 'rna') 36 | 37 | # Gapped sequence shouldfail 38 | assert_raises(ValueError, reaction.utils.convert_sequence, 39 | DNA('atg-'), 'rna') 40 | 41 | # Sequence without stop codon should still work 42 | nostop_dna = DNA('atgaaaaaaaaaaaa') 43 | nostop_rna = reaction.utils.convert_sequence(nostop_dna, 'rna') 44 | nostop_peptide = reaction.utils.convert_sequence(nostop_rna, 'peptide') 45 | assert_equal(str(nostop_rna), 'AUGAAAAAAAAAAAA') 46 | assert_equal(str(nostop_peptide), 'MKKKK') 47 | 48 | assert_raises(ValueError, reaction.utils.convert_sequence, 'duck', 'rna') 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # coral 2 | 3 | 4 | [![Build Status](https://travis-ci.org/klavinslab/coral.svg?branch=master)](https://travis-ci.org/klavinslab/coral) 5 | 6 | [![Documentation Status](https://readthedocs.org/projects/coral/badge/?version=latest)](https://readthedocs.org/projects/coral/?badge=latest) 7 | 8 | 9 | Coral: Core tools for synthetic DNA design. Read the documentation at http://coral.readthedocs.org. 10 | 11 | Coral is a Python library for encoding the process of designing synthetic DNA constructs. Coral mirrors the traditional design steps used in GUI-based sequence design (ApE, j5, Benchling, etc.) as operations on data structures, enables iterative design through analysis modules, and connects seamlessly to outside libraries. Through the use of Coral, you can translate your DNA design processes into concise, executable, and reusable scripts. 12 | 13 | Coral encodes synthetic DNA design rules into its core sequence data types (`DNA`, `RNA`, and `Peptide`), enabling concise, dependable methods for automated DNA design. 14 | 15 | Coral works with PyPy so long as a PyPy-compatible numpy is installed. 16 | 17 | ## Installation: 18 | 19 | Most users: 20 | ``` 21 | pip install coral 22 | ``` 23 | 24 | Windows users: 25 | 26 | Install vcpython2.7 (http://aka.ms/vcpython27) if it's not already installed, 27 | then 28 | ``` 29 | pip install coral 30 | ``` 31 | 32 | Many Windows users have a better time installing python packages, including 33 | coral, using [Anaconda](https://www.continuum.io/downloads). 34 | 35 | To get the latest on git: 36 | 37 | ``` 38 | git clone https://github.com/klavinslab/coral.git 39 | cd coral 40 | pip install . 41 | ``` 42 | 43 | Docker images available here: https://hub.docker.com/u/coraldesign/ 44 | 45 | ## Requirements: 46 | 47 | ###python (pip-compatible): 48 | 49 | ``` 50 | numpy 51 | biopython 52 | ``` 53 | 54 | optional: 55 | 56 | | Package | Added functionality | 57 | | --- | --- | 58 | | `matplotlib` | plotting sequencing analysis | 59 | | `intermine`, `requests` | yeast database (intermine) functions | 60 | 61 | ###system: 62 | 63 | | Package | Added functionality | 64 | | --- | --- | 65 | | `NuPack` | Structural analysis | 66 | | `ViennaRNA` | Structural analysis | 67 | 68 | ## Development: 69 | 70 | Install the extra packages in dev-requirements.txt (`Sphinx`, `cython`, 71 | `nose`). 72 | 73 | ## License 74 | 75 | MIT 76 | 77 | ## Author 78 | 79 | The author and maintainer of Coral is Nick Bolten. Contact him (me) at 80 | nbolten@gmail.com with any and all Coral questions (don't be shy!). 81 | 82 | ## Changelog 83 | 84 | View the changelog [here](HISTORY.md). 85 | -------------------------------------------------------------------------------- /coral/reaction/_central_dogma.py: -------------------------------------------------------------------------------- 1 | '''The central dogma of biology - transcription and translation.''' 2 | import coral 3 | from . import utils 4 | 5 | 6 | def transcribe(dna): 7 | '''Transcribe DNA to RNA (no post-transcriptional processing). 8 | 9 | :param seq: Sequence to transcribe (DNA). 10 | :type seq: coral.DNA 11 | :returns: Transcribed sequence - an RNA sequence. 12 | :rtype: coral.RNA 13 | 14 | ''' 15 | return utils.convert_sequence(dna, 'rna') 16 | 17 | 18 | def translate(rna): 19 | '''Translate RNA to peptide. 20 | 21 | :param rna: Sequence to translate (RNA). 22 | :type rna: coral.RNA 23 | :returns: Translated sequence - a peptide. 24 | :rtype: coral.Peptide 25 | 26 | ''' 27 | return utils.convert_sequence(rna, 'peptide') 28 | 29 | 30 | def reverse_transcribe(rna): 31 | '''Reverse transcribe RNA to DNA. 32 | 33 | :param rna: Sequence to reverse transcribe (RNA). 34 | :type rna: coral.RNA 35 | :returns: Reverse-transcribed sequence - a DNA sequence. 36 | :rtype: coral.DNA 37 | 38 | ''' 39 | return utils.convert_sequence(rna, 'dna') 40 | 41 | 42 | def coding_sequence(rna): 43 | '''Extract coding sequence from an RNA template. 44 | 45 | :param seq: Sequence from which to extract a coding sequence. 46 | :type seq: coral.RNA 47 | :param material: Type of sequence ('dna' or 'rna') 48 | :type material: str 49 | :returns: The first coding sequence (start codon -> stop codon) matched 50 | from 5' to 3'. 51 | :rtype: coral.RNA 52 | :raises: ValueError if rna argument has no start codon. 53 | ValueError if rna argument has no stop codon in-frame with the 54 | first start codon. 55 | 56 | ''' 57 | if isinstance(rna, coral.DNA): 58 | rna = transcribe(rna) 59 | codons_left = len(rna) // 3 60 | start_codon = coral.RNA('aug') 61 | stop_codons = [coral.RNA('uag'), coral.RNA('uga'), coral.RNA('uaa')] 62 | start = None 63 | stop = None 64 | valid = [None, None] 65 | index = 0 66 | while codons_left: 67 | codon = rna[index:index + 3] 68 | if valid[0] is None: 69 | if codon in start_codon: 70 | start = index 71 | valid[0] = True 72 | else: 73 | if codon in stop_codons: 74 | stop = index + 3 75 | valid[1] = True 76 | break 77 | index += 3 78 | codons_left -= 1 79 | 80 | if valid[0] is None: 81 | raise ValueError('Sequence has no start codon.') 82 | elif stop is None: 83 | raise ValueError('Sequence has no stop codon.') 84 | coding_rna = rna[start:stop] 85 | 86 | return coding_rna 87 | -------------------------------------------------------------------------------- /coral/constants/genbank.py: -------------------------------------------------------------------------------- 1 | '''Genbank constants''' 2 | 3 | TO_CORAL = {'-10_signal': '-10_signal', 4 | '-35_signal': '-35_signal', 5 | '3\'UTR': '3\'UTR', 6 | '5\'UTR': '5\'UTR', 7 | 'assembly_gap': 'assembly_gap', 8 | 'attenuator': 'attenuator', 9 | 'C_region': 'C_region', 10 | 'CAAT_signal': 'CAAT_signal', 11 | 'CDS': 'CDS', 12 | 'centromere': 'centromere', 13 | 'D-loop': 'D-loop', 14 | 'D_segment': 'D_segment', 15 | 'enhancer': 'enhancer', 16 | 'exon': 'exon', 17 | 'gap': 'gap', 18 | 'GC_signal': 'GC_signal', 19 | 'gene': 'gene', 20 | 'iDNA': 'iDNA', 21 | 'intron': 'intron', 22 | 'J_segment': 'J_segment', 23 | 'LTR': 'LTR', 24 | 'mat_peptide': 'mat_peptide', 25 | 'misc_binding': 'misc_binding', 26 | 'misc_difference': 'misc_difference', 27 | 'misc_feature': 'misc_feature', 28 | 'misc_recomb': 'misc_recomb', 29 | 'misc_RNA': 'misc_RNA', 30 | 'misc_signal': 'misc_signal', 31 | 'misc_structure': 'misc_structure', 32 | 'mobile_element': 'mobile_element', 33 | 'modified_base': 'modified_base', 34 | 'mRNA': 'mRNA', 35 | 'ncRNA': 'ncRNA', 36 | 'N_region': 'N_region', 37 | 'old_sequence': 'old_sequence', 38 | 'operon': 'operon', 39 | 'oritT': 'oriT', 40 | 'polyA_signal': 'polyA_signal', 41 | 'polyA_site': 'polyA_site', 42 | 'precursor_RNA': 'precursor_RNA', 43 | 'prim_transcript': 'prim_transcript', 44 | 'primer_bind': 'primer_bind', 45 | 'promoter': 'promoter', 46 | 'protein_bind': 'protein_bind', 47 | 'RBS': 'RBS', 48 | 'repeat_region': 'repeat_region', 49 | 'rep_origin': 'rep_origin', 50 | 'rRNA': 'rRNA', 51 | 'S_region': 'S_region', 52 | 'sig_peptide': 'sig_peptide', 53 | 'source': 'source', 54 | 'stem_loop': 'stem_loop', 55 | 'STS': 'STS', 56 | 'TATA_signal': 'TATA_signal', 57 | 'telomere': 'telomere', 58 | 'terminator': 'terminator', 59 | 'tmRNA': 'tmRNA', 60 | 'transit_peptide': 'transit_peptide', 61 | 'tRNA': 'tRNA', 62 | 'unsure': 'unsure', 63 | 'V_region': 'V_region', 64 | 'V_segment': 'V_segment', 65 | 'variation': 'variation'} 66 | # 'site' appears in some ApE files / Geneious exports, not sure why 67 | TO_CORAL['site'] = 'site' 68 | TO_BIO = {value: key for key, value in TO_CORAL.iteritems()} 69 | -------------------------------------------------------------------------------- /coral/reaction/utils.py: -------------------------------------------------------------------------------- 1 | '''Utilities for reactions.''' 2 | import coral 3 | from coral.constants.molecular_bio import ALPHABETS, CODONS 4 | 5 | 6 | def convert_sequence(seq, to_material): 7 | '''Translate a DNA sequence into peptide sequence. 8 | 9 | The following conversions are supported: 10 | Transcription (seq is DNA, to_material is 'rna') 11 | Reverse transcription (seq is RNA, to_material is 'dna') 12 | Translation (seq is RNA, to_material is 'peptide') 13 | 14 | :param seq: DNA or RNA sequence. 15 | :type seq: coral.DNA or coral.RNA 16 | :param to_material: material to which to convert ('rna', 'dna', or 17 | 'peptide'). 18 | :type to_material: str 19 | :returns: sequence of type coral.sequence.[material type] 20 | 21 | ''' 22 | if isinstance(seq, coral.DNA) and to_material == 'rna': 23 | # Transcribe 24 | 25 | # Can't transcribe a gap 26 | if '-' in seq: 27 | raise ValueError('Cannot transcribe gapped DNA') 28 | # Convert DNA chars to RNA chars 29 | origin = ALPHABETS['dna'][:-1] 30 | destination = ALPHABETS['rna'] 31 | code = dict(zip(origin, destination)) 32 | converted = ''.join([code.get(str(k), str(k)) for k in seq]) 33 | # Instantiate RNA object 34 | converted = coral.RNA(converted) 35 | elif isinstance(seq, coral.RNA): 36 | if to_material == 'dna': 37 | # Reverse transcribe 38 | origin = ALPHABETS['rna'] 39 | destination = ALPHABETS['dna'][:-1] 40 | code = dict(zip(origin, destination)) 41 | converted = ''.join([code.get(str(k), str(k)) for k in seq]) 42 | # Instantiate DNA object 43 | converted = coral.DNA(converted) 44 | elif to_material == 'peptide': 45 | # Translate 46 | seq_list = list(str(seq)) 47 | # Convert to peptide until stop codon is found. 48 | converted = [] 49 | while True: 50 | if len(seq_list) >= 3: 51 | base_1 = seq_list.pop(0) 52 | base_2 = seq_list.pop(0) 53 | base_3 = seq_list.pop(0) 54 | codon = ''.join(base_1 + base_2 + base_3).upper() 55 | amino_acid = CODONS[codon] 56 | # Stop when stop codon is found 57 | if amino_acid == '*': 58 | break 59 | converted.append(amino_acid) 60 | else: 61 | break 62 | converted = ''.join(converted) 63 | converted = coral.Peptide(converted) 64 | else: 65 | msg1 = 'Conversion from ' 66 | msg2 = '{0} to {1} is not supported.'.format(seq.__class__.__name__, 67 | to_material) 68 | raise ValueError(msg1 + msg2) 69 | 70 | return converted 71 | -------------------------------------------------------------------------------- /docs/tutorial/analysis/analysis_sequencing.rst: -------------------------------------------------------------------------------- 1 | 2 | Sequencing 3 | ---------- 4 | 5 | Analyzing sequencing results (Sanger method) is a common cloning task 6 | that can eat up a lot of time without a lot of gain. The basic principle 7 | is that you receive the sequence for a short region of a template 8 | (usually a plasmid or PCR fragment) and need to compare it to your 9 | expected sequence to see if any errors were made (usualy 10 | mismatches/indels) or if you have confirmed a large enough part of your 11 | sequence (coverage). We'll go over how coral makes this straightforward 12 | and fast with a short example. 13 | 14 | First, we'll need to analyze our sequencing using the Sanger class 15 | (``analysis`` module) and read in sequences (``seqio`` module). 16 | 17 | .. code:: ipython2 18 | 19 | import coral as cor 20 | 21 | Then use seqio to read in our sequences. The Sanger class expects two 22 | inputs: a reference, or expected, sequence (``sequence.DNA`` object) and 23 | a list of the results (``list`` of ``sequence.DNA`` objects). Any way 24 | that you want to generate those objects is fine, but a simple way is to 25 | store them both in a single directory and then read them in. 26 | 27 | As a reference sequence, I have a genbank file (actually an ApE file - 28 | but it's basically genbank) that includes the complete plasmid I 29 | sequenced annotated with a bunch of features. For this, use 30 | ``seqio.read_dna``, which expects a path to the file (.ape, .gb, .fa, 31 | .fasta, .seq, .abi, and .ab1 are all acceptable formats). 32 | 33 | In the same folder there are 15 results which should cover the entire 34 | plasmid and show that the sequence is accurate. For this, we use 35 | ``seqio.read_sequencing``, which accepts a path to a folder and reads in 36 | all sequencing (.seq, .ab1, .abi) files and stores them as a list of DNA 37 | objects. 38 | 39 | .. code:: ipython2 40 | 41 | reference = cor.seqio.read_dna('../files_for_tutorial/maps/pMODKan-HO-pACT1GEV.ape') 42 | results = cor.seqio.read_sequencing('../files_for_tutorial/sequencing_files/') 43 | 44 | To compare the results to our expected sequence, we use the ``Sanger`` 45 | class, which does a Needleman-Wunsch alignment and scores any 46 | discrepancies. 47 | 48 | .. code:: ipython2 49 | 50 | alignment = cor.analysis.Sanger(reference, results) 51 | 52 | The Sanger alignment object contains our aligned sequences and some 53 | methods for analyzing them. 54 | 55 | Chiefly, it's useful to look at where predicted mismatches or indels are 56 | predicted, as they may disagree with the consensus. 57 | 58 | .. code:: ipython2 59 | 60 | alignment.plot() 61 | 62 | 63 | 64 | .. image:: analysis_sequencing_files/analysis_sequencing_7_0.png 65 | 66 | 67 | The plot is a bit ugly right now, but very functional. The deletions are 68 | all right at the beginning or end of our results and so probably aren't 69 | really there. In addition, there is another sequencing result showing no 70 | deletions at every one of those locations - we can reasonably assume 71 | those deletions are not actually there. If we were worried, however, we 72 | could investigate the ab1 files directly in another program. 73 | 74 | -------------------------------------------------------------------------------- /coral/reaction/_restriction.py: -------------------------------------------------------------------------------- 1 | '''Restriction endonuclease reactions.''' 2 | import coral.reaction 3 | 4 | 5 | def digest(dna, restriction_enzyme): 6 | '''Restriction endonuclease reaction. 7 | 8 | :param dna: DNA template to digest. 9 | :type dna: coral.DNA 10 | :param restriction_site: Restriction site to use. 11 | :type restriction_site: RestrictionSite 12 | :returns: list of digested DNA fragments. 13 | :rtype: coral.DNA list 14 | 15 | ''' 16 | pattern = restriction_enzyme.recognition_site 17 | located = dna.locate(pattern) 18 | if not located[0] and not located[1]: 19 | return [dna] 20 | # Bottom strand indices are relative to the bottom strand 5' end. 21 | # Convert to same type as top strand 22 | pattern_len = len(pattern) 23 | r_indices = [len(dna) - index - pattern_len for index in 24 | located[1]] 25 | # If sequence is palindrome, remove redundant results 26 | if pattern.is_palindrome(): 27 | r_indices = [index for index in r_indices if index not in 28 | located[0]] 29 | # Flatten cut site indices 30 | cut_sites = sorted(located[0] + r_indices) 31 | # Go through each cut site starting at highest one 32 | # Cut remaining template once, generating remaining + new 33 | current = [dna] 34 | for cut_site in cut_sites[::-1]: 35 | new = _cut(current, cut_site, restriction_enzyme) 36 | current.append(new[1]) 37 | current.append(new[0]) 38 | current.reverse() 39 | # Combine first and last back together if digest was circular 40 | if dna.circular: 41 | current[0] = current.pop() + current[0] 42 | return current 43 | 44 | 45 | def _cut(dna, index, restriction_enzyme): 46 | '''Cuts template once at the specified index. 47 | 48 | :param dna: DNA to cut 49 | :type dna: coral.DNA 50 | :param index: index at which to cut 51 | :type index: int 52 | :param restriction_enzyme: Enzyme with which to cut 53 | :type restriction_enzyme: coral.RestrictionSite 54 | :returns: 2-element list of digested sequence, including any overhangs. 55 | :rtype: list 56 | 57 | ''' 58 | # TODO: handle case where cut site is outside of recognition sequence, 59 | # for both circular and linear cases where site is at index 0 60 | # Find absolute indices at which to cut 61 | cut_site = restriction_enzyme.cut_site 62 | top_cut = index + cut_site[0] 63 | bottom_cut = index + cut_site[1] 64 | 65 | # Isolate left and ride sequences 66 | to_cut = dna.pop() 67 | max_cut = max(top_cut, bottom_cut) 68 | min_cut = min(top_cut, bottom_cut) 69 | left = to_cut[:max_cut] 70 | right = to_cut[min_cut:] 71 | 72 | # If applicable, leave overhangs 73 | diff = top_cut - bottom_cut 74 | if not diff: 75 | # Blunt-end cutter, no adjustment necessary 76 | pass 77 | elif diff > 0: 78 | # 3' overhangs 79 | left = coral.reaction.five_resect(left.flip(), diff).flip() 80 | right = coral.reaction.five_resect(right, diff) 81 | else: 82 | # 5' overhangs 83 | left = coral.reaction.three_resect(left, abs(diff)) 84 | right = coral.reaction.three_resect(right.flip(), abs(diff)).flip() 85 | 86 | return [left, right] 87 | -------------------------------------------------------------------------------- /tests/optional_tests/slow_tests/test_analysis/test_structure_windows.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests StructureWindows analysis class. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal 7 | from coral import analysis, DNA 8 | 9 | 10 | def test_structure_windows(): 11 | '''Tests StructureWindows class in structure_windows.''' 12 | 13 | seq = 'atggtgagcaagggcgaggagctgttcaccggggtggtgcccatcctggtcgagctggacggc' + \ 14 | 'gacgtaaacggccacaagttcagcgtgtccggcgagggcgagggcgatgccacctacggcaag' + \ 15 | 'ctgaccctgaagttcatctgcaccaccggcaagctgcccgtgccctggcccaccctcgtgacc' + \ 16 | 'accttcggctacggcctgcagtgcttcgcccgctaccccgaccacatgaagcagcacgacttc' + \ 17 | 'ttcaagtccgccatgcccgaaggctacgtccaggagcgcaccatcttcttcaaggacgacggc' + \ 18 | 'aactacaagacccgcgccgaggtgaagttcgagggcgacaccctggtgaaccgcatcgagctg' + \ 19 | 'aagggcatcgacttcaaggaggacggcaacatcctggggcacaagctggagtacaactacaac' + \ 20 | 'agccacaacgtctatatcatggccgacaagcagaagaacggcatcaaggtgaacttcaagatc' + \ 21 | 'cgccacaacatcgaggacggcagcgtgcagctcgccgaccactaccagcagaacacccccatc' + \ 22 | 'ggcgacggccccgtgctgctgcccgacaaccactacctgagctaccagtccgccctgagcaaa' + \ 23 | 'gaccccaacgagaagcgcgatcacatggtcctgctggagttcgtgaccgccgccgggatcact' + \ 24 | 'ctcggcatggacgagctgtacaagtaa' 25 | dna_seq = DNA(seq) 26 | walker = analysis.StructureWindows(dna_seq) 27 | walker.windows(window_size=60, context_len=90, step=10) 28 | assert_equal(walker.scores, 29 | (0.578570075, 30 | 0.5928413833333335, 31 | 0.5535072916666667, 32 | 0.5425574666666667, 33 | 0.6028716333333335, 34 | 0.5907444666666667, 35 | 0.5532209166666666, 36 | 0.5882098916666667, 37 | 0.6471799, 38 | 0.6957834999999999, 39 | 0.6209094583333334, 40 | 0.5929873583333332, 41 | 0.6117790833333332, 42 | 0.6116499166666667, 43 | 0.5987705999999998, 44 | 0.6439044999999999, 45 | 0.6817365833333334, 46 | 0.6488576499999998, 47 | 0.6900404249999998, 48 | 0.6657639999999999, 49 | 0.7083993333333333, 50 | 0.6360369916666666, 51 | 0.6452116666666665, 52 | 0.6395126666666666, 53 | 0.6288818333333333, 54 | 0.6351839999999999, 55 | 0.6463396666666666, 56 | 0.6717609166666665, 57 | 0.67853025, 58 | 0.7012450833333332, 59 | 0.6620117499999998, 60 | 0.7250783333333332, 61 | 0.6995034166666668, 62 | 0.7386933333333333, 63 | 0.7494905833333333, 64 | 0.7247731666666668, 65 | 0.7510857500000001, 66 | 0.7458025000000003, 67 | 0.7434455, 68 | 0.6702263583333334, 69 | 0.6390452499999999, 70 | 0.6503500249999998, 71 | 0.646285175, 72 | 0.606586825, 73 | 0.5707148, 74 | 0.644573625, 75 | 0.6644399750000001, 76 | 0.6716777749999999, 77 | 0.6807071583333334)) 78 | -------------------------------------------------------------------------------- /coral/analysis/_sequencing/substitution_matrices/blosum62.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .substitution_matrix import SubstitutionMatrix 3 | 4 | 5 | BLOSUM62 = SubstitutionMatrix( 6 | np.array([[4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 7 | 1, 0, -3, -2, 0, -2, -1, -1, -1, -4], 8 | [-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, 9 | -1, -1, -3, -2, -3, -1, -2, 0, -1, -4], 10 | [-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 11 | 0, -4, -2, -3, 4, -3, 0, -1, -4], 12 | [-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 13 | 0, -1, -4, -3, -3, 4, -3, 1, -1, -4], 14 | [0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, 15 | -3, -1, -1, -2, -2, -1, -3, -1, -3, -1, -4], 16 | [-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, 17 | -1, -2, -1, -2, 0, -2, 4, -1, -4], 18 | [-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, 19 | -1, -3, -2, -2, 1, -3, 4, -1, -4], 20 | [0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 21 | 0, -2, -2, -3, -3, -1, -4, -2, -1, -4], 22 | [-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, 23 | -1, -2, -2, 2, -3, 0, -3, 0, -1, -4], 24 | [-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, 25 | -2, -1, -3, -1, 3, -3, 3, -3, -1, -4], 26 | [-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, 27 | -2, -1, -2, -1, 1, -4, 3, -3, -1, -4], 28 | [-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 29 | 0, -1, -3, -2, -2, 0, -3, 1, -1, -4], 30 | [-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, 31 | -1, -1, -1, -1, 1, -3, 2, -1, -1, -4], 32 | [-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, 33 | -2, -2, 1, 3, -1, -3, 0, -3, -1, -4], 34 | [-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 35 | 7, -1, -1, -4, -3, -2, -2, -3, -1, -1, -4], 36 | [1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 37 | 1, -3, -2, -2, 0, -2, 0, -1, -4], 38 | [0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, 39 | -1, 1, 5, -2, -2, 0, -1, -1, -1, -1, -4], 40 | [-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, 41 | -4, -3, -2, 11, 2, -3, -4, -2, -2, -1, -4], 42 | [-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, 43 | -3, -2, -2, 2, 7, -1, -3, -1, -2, -1, -4], 44 | [0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, 45 | -2, 0, -3, -1, 4, -3, 2, -2, -1, -4], 46 | [-2, -1, 4, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, 47 | -1, -4, -3, -3, 4, -3, 0, -1, -4], 48 | [-1, -2, -3, -3, -1, -2, -3, -4, -3, 3, 3, -3, 2, 0, -3, 49 | -2, -1, -2, -1, 2, -3, 3, -3, -1, -4], 50 | [-1, 0, 0, 1, -3, 4, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, 51 | -1, -2, -2, -2, 0, -3, 4, -1, -4], 52 | [-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 53 | -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -4], 54 | [-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 55 | -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1]]), 56 | 'ARNDCQEGHILKMFPSTWYVBJZX*') 57 | -------------------------------------------------------------------------------- /coral/sequence/_peptide.py: -------------------------------------------------------------------------------- 1 | '''Peptide module.''' 2 | from ._sequence import Sequence 3 | 4 | 5 | class Peptide(object): 6 | '''Peptide sequence.''' 7 | 8 | def __init__(self, peptide, features=None, run_checks=True): 9 | ''' 10 | :param peptide: Input sequence (peptide). 11 | :type peptide: str 12 | :param run_checks: Check inputs / formats (disabling increases speed): 13 | alphabet check 14 | case 15 | :type run_checks: bool 16 | :returns: coral.Peptide instance. 17 | 18 | ''' 19 | self._sequence = Sequence(peptide, 'peptide', run_checks=run_checks, 20 | any_char='X') 21 | if features is None: 22 | self.features = [] 23 | else: 24 | self.features = features 25 | 26 | def copy(self): 27 | '''Create a copy of the current instance. 28 | 29 | :returns: A safely editable copy of the current sequence. 30 | :rtype: coral.Peptide 31 | 32 | ''' 33 | return type(self)(str(self._sequence), features=self.features, 34 | run_checks=False) 35 | 36 | @classmethod 37 | def extract(self, feature, remove_subfeatures=False): 38 | return self._sequence.extract(self, feature, 39 | remove_subfeatures=remove_subfeatures) 40 | 41 | def locate(self, pattern): 42 | return self._sequence.locate(pattern) 43 | 44 | def __add__(self, other): 45 | # Merge the sequences 46 | copy = self.copy() 47 | ocopy = other.copy() 48 | copy._sequence += ocopy._sequence 49 | 50 | # Merge the features 51 | for feature in ocopy.features: 52 | feature.move(len(copy)) 53 | copy.features += ocopy.features 54 | 55 | return copy 56 | 57 | def __contains__(self, key): 58 | return key in self._sequence 59 | 60 | def __delitem__(self, key): 61 | self._sequence.__delitem__(key) 62 | 63 | def __eq__(self, other): 64 | return self._sequence == other._sequence 65 | 66 | def __getitem__(self, key): 67 | new_instance = type(self)(str(self._sequence[key]), 68 | features=self.features, run_checks=False) 69 | return new_instance 70 | 71 | def __len__(self): 72 | return len(self._sequence) 73 | 74 | def __mul__(self, n): 75 | # TODO: Keep features as well? 76 | copy = self.copy() 77 | copy._sequence = self._sequence.__mul__(n) 78 | return copy 79 | 80 | def __ne__(self, other): 81 | return self._sequence != other._sequence 82 | 83 | def __radd__(self, other): 84 | if other == 0 or other is None: 85 | # For compatibility with sum() 86 | return self 87 | else: 88 | copy = self.copy() 89 | try: 90 | return copy._sequence.__radd__(other._sequence) 91 | except AttributeError: 92 | raise TypeError('Cannot add {} to {}'.format(self, other)) 93 | 94 | def __repr__(self): 95 | '''String to print when object is called directly.''' 96 | header = 'Peptide:' 97 | sequence = self._sequence.__repr__() 98 | return ' '.join([header, sequence]) 99 | 100 | def __setitem__(self, key, value): 101 | self._sequence.__setitem__(key, value) 102 | 103 | def __str__(self): 104 | return str(self._sequence) 105 | -------------------------------------------------------------------------------- /tests/tests/test_sequence/test_rna.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for the RNA sequence class. 3 | 4 | ''' 5 | 6 | from coral import RNA 7 | from nose.tools import assert_equal, assert_false, assert_true, assert_raises 8 | 9 | 10 | class TestRNA(object): 11 | ''' 12 | Testing class for sequence.RNA 13 | 14 | ''' 15 | 16 | def __init__(self): 17 | self.test_rna = RNA('augc') 18 | 19 | def test_reverse_complement(self): 20 | assert_equal(str(self.test_rna.reverse_complement()), 'GCAU') 21 | 22 | def test_locate(self): 23 | assert_equal(self.test_rna.locate('au'), [0]) 24 | assert_equal(self.test_rna.locate('gc'), [2]) 25 | assert_equal(len(self.test_rna.locate('augg')), 0) 26 | 27 | def test_copy(self): 28 | assert_equal(self.test_rna, self.test_rna.copy()) 29 | 30 | def test_getitem(self): 31 | assert_equal(str(self.test_rna[0]), 'A') 32 | assert_equal(str(self.test_rna[1]), 'U') 33 | assert_equal(str(self.test_rna[2]), 'G') 34 | assert_equal(str(self.test_rna[3]), 'C') 35 | assert_equal(str(self.test_rna[-1]), 'C') 36 | 37 | def test_delitem(self): 38 | copy0 = self.test_rna.copy() 39 | del copy0[0] 40 | assert_equal(str(copy0), 'UGC') 41 | copy1 = self.test_rna.copy() 42 | del copy1[1] 43 | assert_equal(str(copy1), 'AGC') 44 | copy2 = self.test_rna.copy() 45 | del copy2[2] 46 | assert_equal(str(copy2), 'AUC') 47 | copy3 = self.test_rna.copy() 48 | del copy3[3] 49 | assert_equal(str(copy3), 'AUG') 50 | copy_1 = self.test_rna.copy() 51 | del copy_1[-1] 52 | assert_equal(str(copy_1), 'AUG') 53 | 54 | def test_setitem(self): 55 | copy0 = self.test_rna.copy() 56 | copy0[0] = 'u' 57 | assert_equal(str(copy0), 'UUGC') 58 | copy1 = self.test_rna.copy() 59 | copy1[1] = 'a' 60 | assert_equal(str(copy1), 'AAGC') 61 | copy2 = self.test_rna.copy() 62 | copy2[2] = 'a' 63 | assert_equal(str(copy2), 'AUAC') 64 | copy3 = self.test_rna.copy() 65 | copy3[3] = 'a' 66 | assert_equal(str(copy3), 'AUGA') 67 | copy_1 = self.test_rna.copy() 68 | copy_1[-1] = 'a' 69 | assert_equal(str(copy_1), 'AUGA') 70 | 71 | def test_str(self): 72 | assert_equal(str(self.test_rna), 'AUGC') 73 | 74 | def test_len(self): 75 | assert_equal(len(self.test_rna), 4) 76 | 77 | def test_add(self): 78 | assert_equal(str((self.test_rna + self.test_rna)), 'AUGCAUGC') 79 | 80 | def test_radd(self): 81 | assert_equal(str(sum([self.test_rna, self.test_rna])), 'AUGCAUGC') 82 | 83 | def radd_800(seq): 84 | return 800 + seq 85 | 86 | assert_raises(TypeError, radd_800, self.test_rna) 87 | 88 | def test_mul(self): 89 | assert_equal(str((self.test_rna * 4)), 'AUGCAUGCAUGCAUGC') 90 | 91 | def mul_float(seq): 92 | return seq * 7.56 93 | 94 | assert_raises(TypeError, mul_float, self.test_rna) 95 | 96 | def test_eq(self): 97 | assert_true(self.test_rna == RNA('augc')) 98 | 99 | def test_ne(self): 100 | assert_true(self.test_rna != RNA('aagc')) 101 | 102 | def test_contains(self): 103 | assert_true(RNA('a') in self.test_rna) 104 | assert_true(RNA('u') in self.test_rna) 105 | assert_true(RNA('g') in self.test_rna) 106 | assert_true(RNA('c') in self.test_rna) 107 | assert_false(RNA('a') in RNA('ugc')) 108 | -------------------------------------------------------------------------------- /tests/tests/test_sequence/test_peptide.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for the Peptide sequence class. 3 | 4 | ''' 5 | 6 | from coral import Peptide 7 | from nose.tools import assert_equal, assert_false, assert_true, assert_raises 8 | 9 | 10 | class TestPeptide(object): 11 | ''' 12 | Testing class for sequence.Peptide 13 | 14 | ''' 15 | 16 | def __init__(self): 17 | self.test_peptide = Peptide('mkgp') 18 | 19 | def test_locate(self): 20 | assert_equal(self.test_peptide.locate('mk'), [0]) 21 | assert_equal(self.test_peptide.locate('gp'), [2]) 22 | assert_equal(len(self.test_peptide.locate('augg')), 0) 23 | 24 | def test_copy(self): 25 | assert_equal(self.test_peptide, self.test_peptide.copy()) 26 | 27 | def test_getitem(self): 28 | assert_equal(str(self.test_peptide[0]), 'M') 29 | assert_equal(str(self.test_peptide[1]), 'K') 30 | assert_equal(str(self.test_peptide[2]), 'G') 31 | assert_equal(str(self.test_peptide[3]), 'P') 32 | assert_equal(str(self.test_peptide[-1]), 'P') 33 | 34 | def test_delitem(self): 35 | copy0 = self.test_peptide.copy() 36 | del copy0[0] 37 | assert_equal(str(copy0), 'KGP') 38 | copy1 = self.test_peptide.copy() 39 | del copy1[1] 40 | assert_equal(str(copy1), 'MGP') 41 | copy2 = self.test_peptide.copy() 42 | del copy2[2] 43 | assert_equal(str(copy2), 'MKP') 44 | copy3 = self.test_peptide.copy() 45 | del copy3[3] 46 | assert_equal(str(copy3), 'MKG') 47 | copy_1 = self.test_peptide.copy() 48 | del copy_1[-1] 49 | assert_equal(str(copy_1), 'MKG') 50 | 51 | def test_setitem(self): 52 | copy0 = self.test_peptide.copy() 53 | copy0[0] = 'q' 54 | assert_equal(str(copy0), 'QKGP') 55 | copy1 = self.test_peptide.copy() 56 | copy1[1] = 'q' 57 | assert_equal(str(copy1), 'MQGP') 58 | copy2 = self.test_peptide.copy() 59 | copy2[2] = 'q' 60 | assert_equal(str(copy2), 'MKQP') 61 | copy3 = self.test_peptide.copy() 62 | copy3[3] = 'q' 63 | assert_equal(str(copy3), 'MKGQ') 64 | copy_1 = self.test_peptide.copy() 65 | copy_1[-1] = 'q' 66 | assert_equal(str(copy_1), 'MKGQ') 67 | 68 | def test_str(self): 69 | assert_equal(str(self.test_peptide), 'MKGP') 70 | 71 | def test_len(self): 72 | assert_equal(len(self.test_peptide), 4) 73 | 74 | def test_add(self): 75 | assert_equal(str((self.test_peptide + self.test_peptide)), 76 | 'MKGPMKGP') 77 | 78 | def test_radd(self): 79 | assert_equal(str(sum([self.test_peptide, self.test_peptide])), 80 | 'MKGPMKGP') 81 | 82 | def radd_800(seq): 83 | return 800 + seq 84 | 85 | assert_raises(TypeError, radd_800, self.test_peptide) 86 | 87 | def test_mul(self): 88 | assert_equal(str(self.test_peptide * 4), 'MKGPMKGPMKGPMKGP') 89 | 90 | def mul_float(seq): 91 | return seq * 7.56 92 | 93 | assert_raises(TypeError, mul_float, self.test_peptide) 94 | 95 | def test_eq(self): 96 | assert_true(self.test_peptide == Peptide('mkgp')) 97 | 98 | def test_ne(self): 99 | assert_true(self.test_peptide != Peptide('mkqp')) 100 | 101 | def test_contains(self): 102 | assert_true('m' in self.test_peptide) 103 | assert_true('k' in self.test_peptide) 104 | assert_true('g' in self.test_peptide) 105 | assert_true('p' in self.test_peptide) 106 | assert_false('q' in self.test_peptide) 107 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | '''Coral: code-ify your synthetic DNA design workflow. 2 | 3 | Documentation available at http://coral.readthedocs.org. 4 | 5 | Coral is a library for encoding the process of designing synthetic DNA 6 | constructs. Coral mirrors the traditional design steps used in GUI-based 7 | sequence design (ApE, j5, Benchling, etc.) as operations on data structures, 8 | enables iterative design through analysis modules, and connects seamlessly to 9 | outside libraries. Through the use of Coral, you can translate your DNA design 10 | processes into concise, executable, and reusable scripts. 11 | 12 | Coral encodes synthetic DNA design rules into its core sequence data types 13 | (DNA, RNA, and Peptide), enabling concise, dependable methods for automated 14 | DNA design. 15 | 16 | Coral works with PyPy so long as a PyPy-compatible numpy is installed. 17 | ''' 18 | 19 | import re 20 | import sys 21 | import numpy 22 | 23 | # Check python versions 24 | if sys.version_info.major > 2: 25 | print('Coral is currently compatible only with Python 2.') 26 | sys.exit(1) 27 | 28 | try: 29 | from setuptools import setup, Extension 30 | except ImportError: 31 | from distutils.core import setup, Extension 32 | 33 | # Get version from package __init__.py 34 | with open('coral/__init__.py', 'r') as fd: 35 | __version__ = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', 36 | fd.read(), re.MULTILINE).group(1) 37 | if not __version__: 38 | raise RuntimeError('Cannot find version information') 39 | 40 | 41 | doclines = __doc__.split('\n') 42 | 43 | config = { 44 | 'name': 'coral', 45 | 'version': __version__, 46 | 'description': doclines[0], 47 | 'long_description': '\n'.join(doclines[2:]), 48 | 'author': 'Nick Bolten', 49 | 'author_email': 'nbolten _at_ gmail', 50 | 'maintainer': 'Nick Bolten', 51 | 'maintainer_email': 'nbolten _at_ gmail', 52 | 'url': 'https://github.com/klavinslab/coral', 53 | 'license': 'MIT', 54 | 'download_url': 'https://github.com/klavinslab/coral.git', 55 | 'install_requires': ['numpy', 'biopython'], 56 | 'extras_require': {'plotting': ['matplotlib'], 57 | 'yeastdatabases': ['intermine', 'requests'], 58 | 'documentation': ['sphinx']}, 59 | 'packages': ['coral', 60 | 'coral.analysis', 61 | 'coral.analysis._sequence', 62 | 'coral.analysis._sequencing', 63 | 'coral.analysis._structure', 64 | 'coral.constants', 65 | 'coral.database', 66 | 'coral.design', 67 | 'coral.design._oligo_synthesis', 68 | 'coral.design._sequence_generation', 69 | 'coral.seqio', 70 | 'coral.reaction', 71 | 'coral.sequence', 72 | 'coral.utils'], 73 | 'package_data': {'coral': ['coral/sequence/d3-plasmid.js']}, 74 | 'include_package_data': True, 75 | 'classifiers': ['Programming Language :: Python', 76 | 'Programming Language :: Python :: 2.7', 77 | 'Programming Language :: Python :: 2 :: Only', 78 | 'Topic :: Scientific/Engineering', 79 | 'Topic :: Scientific/Engineering :: Bio-Informatics'], 80 | 'keywords': ['synthetic biology', 'biology', 'design', 'automation', 81 | 'cloning', 'sanger', 'primer', 'dna', 'structure'], 82 | 'zip_safe': False 83 | } 84 | 85 | seq_extension = Extension('coral.analysis._sequencing.calign', 86 | ['coral/analysis/_sequencing/calign.c'], 87 | include_dirs=[numpy.get_include()]) 88 | EXTENSIONS = [seq_extension] 89 | 90 | setup(ext_modules=EXTENSIONS, 91 | test_suite='nose.collector', 92 | **config) 93 | -------------------------------------------------------------------------------- /coral/design/_sequence_generation/random_sequences.py: -------------------------------------------------------------------------------- 1 | '''Generate a random DNA sequence.''' 2 | import random 3 | import coral 4 | from coral.constants.molecular_bio import CODON_FREQ_BY_AA 5 | 6 | 7 | def random_dna(n): 8 | '''Generate a random DNA sequence. 9 | 10 | :param n: Output sequence length. 11 | :type n: int 12 | :returns: Random DNA sequence of length n. 13 | :rtype: coral.DNA 14 | 15 | ''' 16 | return coral.DNA(''.join([random.choice('ATGC') for i in range(n)])) 17 | 18 | 19 | def random_codons(peptide, frequency_cutoff=0.0, weighted=False, table=None): 20 | '''Generate randomized codons given a peptide sequence. 21 | 22 | :param peptide: Peptide sequence for which to generate randomized 23 | codons. 24 | :type peptide: coral.Peptide 25 | :param frequency_cutoff: Relative codon usage cutoff - codons that 26 | are rarer will not be used. Frequency is 27 | relative to average over all codons for a 28 | given amino acid. 29 | :param frequency_cutoff: Codon frequency table to use. 30 | :param weighted: Use codon table 31 | :type weighted: bool 32 | :param table: Codon frequency table to use. Table should be organized 33 | by amino acid, then be a dict of codon: frequency. 34 | Only relevant if weighted=True or frequency_cutoff > 0. 35 | Tables available: 36 | 37 | constants.molecular_bio.CODON_FREQ_BY_AA['sc'] (default) 38 | :type table: dict 39 | :returns: Randomized sequence of codons (DNA) that code for the input 40 | peptide. 41 | :rtype: coral.DNA 42 | :raises: ValueError if frequency_cutoff is set so high that there are no 43 | codons available for an amino acid in the input peptide. 44 | 45 | ''' 46 | if table is None: 47 | table = CODON_FREQ_BY_AA['sc'] 48 | # Process codon table using frequency_cutoff 49 | new_table = _cutoff(table, frequency_cutoff) 50 | # Select codons randomly or using weighted distribution 51 | rna = '' 52 | for amino_acid in str(peptide): 53 | codons = new_table[amino_acid.upper()] 54 | if not codons: 55 | raise ValueError('No {} codons at freq cutoff'.format(amino_acid)) 56 | if weighted: 57 | cumsum = [] 58 | running_sum = 0 59 | for codon, frequency in codons.iteritems(): 60 | running_sum += frequency 61 | cumsum.append(running_sum) 62 | random_num = random.uniform(0, max(cumsum)) 63 | for codon, value in zip(codons, cumsum): 64 | if value > random_num: 65 | selection = codon 66 | break 67 | else: 68 | selection = random.choice(codons.keys()) 69 | rna += selection 70 | return coral.RNA(rna) 71 | 72 | 73 | def _cutoff(table, frequency_cutoff): 74 | '''Generate new codon frequency table given a mean cutoff. 75 | 76 | :param table: codon frequency table of form {amino acid: codon: frequency} 77 | :type table: dict 78 | :param frequency_cutoff: value between 0 and 1.0 for mean frequency cutoff 79 | :type frequency_cutoff: float 80 | :returns: A codon frequency table with some codons removed. 81 | :rtype: dict 82 | 83 | ''' 84 | new_table = {} 85 | # IDEA: cutoff should be relative to most-frequent codon, not average? 86 | for amino_acid, codons in table.iteritems(): 87 | average_cutoff = frequency_cutoff * sum(codons.values()) / len(codons) 88 | new_table[amino_acid] = {} 89 | for codon, frequency in codons.iteritems(): 90 | if frequency > average_cutoff: 91 | new_table[amino_acid][codon] = frequency 92 | return new_table 93 | -------------------------------------------------------------------------------- /coral/analysis/_structure/structure_windows.py: -------------------------------------------------------------------------------- 1 | '''Evaluate windows of a sequence for in-context structure.''' 2 | import coral.analysis 3 | 4 | 5 | class StructureWindows(object): 6 | '''Evaluate windows of structure and plot the results.''' 7 | 8 | def __init__(self, dna): 9 | ''' 10 | :param dna: DNA sequence to analyze. 11 | :type dna: coral.DNA 12 | 13 | ''' 14 | self.template = dna 15 | self.walked = [] 16 | self.core_starts = [] 17 | self.core_ends = [] 18 | self.scores = [] 19 | 20 | def windows(self, window_size=60, context_len=90, step=10): 21 | '''Walk through the sequence of interest in windows of window_size, 22 | evaluate free (unbound) pair probabilities. 23 | 24 | :param window_size: Window size in base pairs. 25 | :type window_size: int 26 | :param context_len: The number of bases of context to use when 27 | analyzing each window. 28 | :type context_len: int 29 | :param step: The number of base pairs to move for each new window. 30 | :type step: int 31 | 32 | ''' 33 | self.walked = _context_walk(self.template, window_size, context_len, 34 | step) 35 | self.core_starts, self.core_ends, self.scores = zip(*self.walked) 36 | return self.walked 37 | 38 | def plot(self): 39 | '''Plot the results of the run method.''' 40 | try: 41 | from matplotlib import pylab 42 | except ImportError: 43 | raise ImportError('Optional dependency matplotlib not installed.') 44 | 45 | if self.walked: 46 | fig = pylab.figure() 47 | ax1 = fig.add_subplot(111) 48 | ax1.plot(self.core_starts, self.scores, 'bo-') 49 | pylab.xlabel('Core sequence start position (base pairs).') 50 | pylab.ylabel('Score - Probability of being unbound.') 51 | pylab.show() 52 | else: 53 | raise Exception('Run calculate() first so there\'s data to plot!') 54 | 55 | 56 | def _context_walk(dna, window_size, context_len, step): 57 | '''Generate context-dependent 'non-boundedness' scores for a DNA sequence. 58 | 59 | :param dna: Sequence to score. 60 | :type dna: coral.DNA 61 | :param window_size: Window size in base pairs. 62 | :type window_size: int 63 | :param context_len: The number of bases of context to use when analyzing 64 | each window. 65 | :type context_len: int 66 | :param step: The number of base pairs to move for each new window. 67 | :type step: int 68 | 69 | ''' 70 | # Generate window indices 71 | window_start_ceiling = len(dna) - context_len - window_size 72 | window_starts = range(context_len - 1, window_start_ceiling, step) 73 | window_ends = [start + window_size for start in window_starts] 74 | 75 | # Generate left and right in-context subsequences 76 | l_starts = [step * i for i in range(len(window_starts))] 77 | l_seqs = [dna[start:end] for start, end in zip(l_starts, window_ends)] 78 | r_ends = [x + window_size + context_len for x in window_starts] 79 | r_seqs = [dna[start:end].reverse_complement() for start, end in 80 | zip(window_starts, r_ends)] 81 | 82 | # Combine and calculate nupack pair probabilities 83 | seqs = l_seqs + r_seqs 84 | pairs_run = coral.analysis.nupack_multi(seqs, 'dna', 'pairs', {'index': 0}) 85 | # Focus on pair probabilities that matter - those in the window 86 | pairs = [run[-window_size:] for run in pairs_run] 87 | # Score by average pair probability 88 | lr_scores = [sum(pair) / len(pair) for pair in pairs] 89 | 90 | # Split into left-right contexts again and sum for each window 91 | l_scores = lr_scores[0:len(seqs) / 2] 92 | r_scores = lr_scores[len(seqs) / 2:] 93 | scores = [(l + r) / 2 for l, r in zip(l_scores, r_scores)] 94 | 95 | # Summarize and return window indices and score 96 | summary = zip(window_starts, window_ends, scores) 97 | 98 | return summary 99 | -------------------------------------------------------------------------------- /tests/test_nupack/data/complexes_mfe_rna.ocx-mfe: -------------------------------------------------------------------------------- 1 | % NUPACK 3.0 2 | % Program: complexes 3 | % Start time: Thu Jan 28 00:31:45 2016 PST 4 | % 5 | % Command: ../bin/complexes -T 37.0 -quiet -mfe -material rna test_complexes 6 | % Maximum complex size to enumerate: 4 7 | % Number of complexes from enumeration: 14 8 | % Additional complexes from .list file: 0 9 | % Total number of permutations to calculate: 15 10 | % Parameters: RNA, 1995 11 | % Dangles setting: 1 12 | % Temperature (C): 37.0 13 | % Sodium concentration: 1.0000 M 14 | % Magnesium concentration: 0.0000 M 15 | % 16 | % Do not change the comments below this line, as they may be read by other programs! 17 | % 18 | % Number of strands: 2 19 | % id sequence 20 | % 1 GATACTAGCG 21 | % 2 TACGATT 22 | % T = 37.0 23 | 24 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 25 | % complex1-order1 26 | 10 27 | 0.00000000e+00 28 | .......... 29 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 30 | 31 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 32 | % complex2-order1 33 | 7 34 | 0.00000000e+00 35 | ....... 36 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 37 | 38 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 39 | % complex3-order1 40 | 20 41 | -5.15415041e+00 42 | .......((.+.......)). 43 | 8 19 44 | 9 18 45 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 46 | 47 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 48 | % complex4-order1 49 | 17 50 | -2.18135142e+00 51 | ........((+..))... 52 | 9 14 53 | 10 13 54 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 55 | 56 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 57 | % complex5-order1 58 | 14 59 | -3.15415041e+00 60 | ..((...+..))... 61 | 3 11 62 | 4 10 63 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 64 | 65 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 66 | % complex6-order1 67 | 30 68 | -7.56270284e+00 69 | .......((.+.....(((..+....))))). 70 | 8 29 71 | 9 28 72 | 16 27 73 | 17 26 74 | 18 25 75 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 76 | 77 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 78 | % complex7-order1 79 | 27 80 | -7.26270284e+00 81 | (......((.+.......)).+..).... 82 | 1 23 83 | 8 19 84 | 9 18 85 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 86 | 87 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 88 | % complex8-order1 89 | 24 90 | -3.86270284e+00 91 | (.......((+..))...+..).... 92 | 1 20 93 | 9 14 94 | 10 13 95 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 96 | 97 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 98 | % complex9-order1 99 | 21 100 | -3.16270284e+00 101 | ....(..+).((...+..))... 102 | 5 8 103 | 10 18 104 | 11 17 105 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 106 | 107 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 108 | % complex10-order1 109 | 40 110 | -1.23440543e+01 111 | .......((.+....(..((.+.......)).+)......)). 112 | 8 39 113 | 9 38 114 | 15 31 115 | 18 29 116 | 19 28 117 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 118 | 119 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 120 | % complex11-order1 121 | 37 122 | -9.24405426e+00 123 | (......((.+.....(((..+....))))).+..).... 124 | 1 33 125 | 8 29 126 | 9 28 127 | 16 27 128 | 17 26 129 | 18 25 130 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 131 | 132 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 133 | % complex12-order1 134 | 34 135 | -8.94405426e+00 136 | ..((...((.+.......)).+))((...+..))... 137 | 3 22 138 | 4 21 139 | 8 19 140 | 9 18 141 | 23 31 142 | 24 30 143 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 144 | 145 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 146 | % complex12-order2 147 | 34 148 | -8.74405426e+00 149 | ....(..((.+..(....+)......)).+...)... 150 | 5 31 151 | 8 26 152 | 9 25 153 | 13 18 154 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 155 | 156 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 157 | % complex13-order1 158 | 31 159 | -5.54405426e+00 160 | ..((....((+..))...+))((...+..))... 161 | 3 19 162 | 4 18 163 | 9 14 164 | 10 13 165 | 20 28 166 | 21 27 167 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 168 | 169 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 170 | % complex14-order1 171 | 28 172 | -5.44405426e+00 173 | ..((...+..))..(+.)((...+..))... 174 | 3 11 175 | 4 10 176 | 14 16 177 | 17 25 178 | 18 24 179 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 180 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | Changelog for Coral 2 | =================== 3 | 4 | 0.5.0 (2016-02-20) 5 | * Separated `ssDNA` (single-stranded) and `DNA` (implicitly double-stranded) 6 | classes. Convert between them using `.to_ds()` and `.to_ss()` methods, 7 | respectively. 8 | * `coral.Primer` instances now contain `ssDNA` instances, not `DNA`. 9 | * `feature_type` is now an optional argument when initializing new `Feature` 10 | instances. The default is 'misc_feature'. 11 | * Added `to_feature()` convenience method to `coral.DNA` for generating a 12 | feature from a given `DNA` instance. 13 | * Fixed an issue where `coral.analysis.tm` returned an arcane message when the 14 | input is a non-DNA character. 15 | * Fixed an issue where read_sequencing returned `ssDNA`. 16 | * Overhauled alignment and Sanger sequencing analysis modules. Added support 17 | for using the MAFFT command-line tool as an alignment method, located at 18 | `coral.analysis.MAFFT`. Added MAFFT as an option for the Sanger analysis class. 19 | Added a new function, `needle_msa`, that generates a reference-aligned MSA 20 | representation of a set of Needleman-Wunsch pairwise alignments. Added 21 | `coral.analysis.substitution_matrices` module, which adds a SubstitutionMatrix 22 | class for easily specifying customized substitution matrices for 23 | Needleman-Wunsch alignment as well as built-in matrices such as BLOSUM62, DNA, 24 | and DNA_SIMPLE. 25 | * dev note: started using zest.releaser to automate releases. 26 | 27 | 28 | 0.4.1-0.4.3 29 | * Bugfixes to 0.4.0 (brown-bag style bugs, fixed immediately after elease). 30 | 31 | 0.4.0 32 | * Renamed `coral.DNA.rotate()` method to `coral.DNA.rotate_to()`. 33 | * Created new `coral.DNA.rotate()` method that rotates a sequence 34 | 'counter-clockwise', acting as a deque. 35 | * Created new `coral.DNA.rotate_to_feature`, which rotates a sequence to a 36 | given feature's start location. 37 | * Created new `coral.DNA.excise` feature, which removes a feature's sequence 38 | from a circular DNA object, generating a linear product (useful for swapping 39 | out features). 40 | * Improved `coral.DNA` `__getitem__` behavior. 41 | * Added .material property to all sequence types for pseudo-type checking. 42 | * Made `coral.DNA.top()` and `coral.DNA.bottom()` methods into properties 43 | (`coral.DNA.top` and `coral.DNA.bottom`) that can be overwritten and accessed 44 | directly. 45 | * Replaced `coral.DNA.topology` and `coral.DNA.stranded` properties (which were 46 | strings) with boolean-valued `.circular` and `.ds` values, respectively. 47 | * Re-wrote (and renamed) the `coral.analysis.NUPACK` and 48 | `coral.analysis.ViennaRNA` packages to be more feature-complete. 49 | * Fixed an issue where re-running `coral.DNA.display()` in a Jupyter notebook 50 | resulted in non-updated text labels. 51 | 52 | 0.3.3 53 | * Added Python 2 version check to prevent installation on Python 3. 54 | 55 | 0.3.2 56 | * Removed cython dependencies entirely to ease installation. 57 | 58 | 0.3.1 59 | * Fixed issues with PCR simulation and annealing, can now handle all cases of 60 | primer directionality and overlaps, linear and circular templates. 61 | * Added pyx to package manifest for case where user already has cython 62 | installed. 63 | 64 | 0.3.0 65 | * Separated out annealing behavior into analysis function (`analysis.anneal`). 66 | * Functions that use annealing (e.g. `reaction.pcr`) can now accept partial 67 | annealing + overhang matches due to annealing overhaul. 68 | * Gibson reactions now retain features of the inputs. 69 | * Added `strip()` method to `coral.DNA`. 70 | * Installation now works on Mac OS X. 71 | * Made most dependencies optional. 72 | * Fixed issue where features were not being copied, resulting in unexpected 73 | behavior (assign by reference vs. value). 74 | * Fixed an issue where slicing the last N bases of a sequence (e.g. 75 | `y =x[-4:]`) would modify the feature locations of the parent (`x`). 76 | 77 | 0.2.1 78 | * Added HISTORY.md (this file) changelog. 79 | * Fixed version bump issue, added javascript to manifest, added 80 | dev-requirements.txt. 81 | 82 | 0.2.0 83 | * plasmid visualizations for iPython notebooks using `coral.DNA.display`. 84 | * features are now searchable using `coral.DNA.select_features`. 85 | * `seqio.read_dna` now keeps all feature qualifiers when reading genbank files 86 | (thanks @eyu-bolthreads!). 87 | 88 | 0.1.0 89 | 90 | Initial Release. 91 | -------------------------------------------------------------------------------- /tests/tests/test_design/test_oligoassembly.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for the OligoAssembly design class. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal 7 | from coral import design, DNA 8 | 9 | 10 | def test_oligo_assembly(): 11 | ''' 12 | Tests output of OligoAssembly class. 13 | 14 | ''' 15 | 16 | # Expected outputs 17 | olig1 = 'ATGCGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTG' + \ 18 | 'ATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAA' 19 | olig2 = 'TGGCCATGGAACAGGTAGTTTTCCAGTAGTGCAAATAAATTTAAGGGTAAGTTTTCCGTAT' + \ 20 | 'GTTGCATCACCTTCACCCTCTCCACTGACAGAAAATTTGTG' 21 | olig3 = 'TGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCGGTTATGGTGTTCAATGC' + \ 22 | 'TTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAA' 23 | olig4 = 'CGTGTCTTGTAGTTCCCGTCATCTTTGAAAAATATAGTTCTTTCCTGTACATAACCTTCGG' + \ 24 | 'GCATGGCACTCTTGAAAAAGTCATGCTGTTTCATATGATCTGGG' 25 | olig5 = 'TTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTG' + \ 26 | 'TTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACA' 27 | olig6 = 'TTTGTCTGCCATGATGTATACATTGTGTGAGTTATAGTTGTATTCCAATTTGTGTCCAAGA' + \ 28 | 'ATGTTTCCATCTTCTTTAAAATCAATACCTTTTAACTCGATTCTATT' 29 | olig7 = 'AACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTA' + \ 30 | 'ACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCA' 31 | olig8 = 'TTGTGTGGACAGGTAATGGTTGTCTGGTAAAAGGACAGGGCCATCGCCAATTGGAGTATTT' + \ 32 | 'TGTTGATAATGGTCTGCTAGTTGAACGCTTCCATCTTCAATGT' 33 | olig9 = 'CCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGAGAG' + \ 34 | 'ACCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGA' 35 | olig10 = 'TTAAGCTACTAAAGCGTAGTTTTCGTCGTTTGCAGCAGGCCTTTTGTATAGTTCATCCAT' + \ 36 | 'GCCATGTGTAATCCCAGCAGCTGTTACAAACTCAAGAAGG' 37 | 38 | reference_oligos = [olig1, olig2, olig3, olig4, olig5, olig6, olig7, olig8, 39 | olig9, olig10] 40 | reference_tms = [73.513413945987, 72.73367624289534, 73.73563193690484, 41 | 72.70706564878299, 72.72193323127533, 72.23050918438184, 42 | 72.07546311550101, 72.27046461560099, 73.67230272019759] 43 | 44 | # Run oligo synthesis on BBa_K082003 45 | seq = 'atgcgtaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgat' + \ 46 | 'gttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacatacggaaaactt' + \ 47 | 'acccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactact' + \ 48 | 'ttcggttatggtgttcaatgctttgcgagatacccagatcatatgaaacagcatgactttttc' + \ 49 | 'aagagtgccatgcccgaaggttatgtacaggaaagaactatatttttcaaagatgacgggaac' + \ 50 | 'tacaagacacgtgctgaagtcaagtttgaaggtgatacccttgttaatagaatcgagttaaaa' + \ 51 | 'ggtattgattttaaagaagatggaaacattcttggacacaaattggaatacaactataactca' + \ 52 | 'cacaatgtatacatcatggcagacaaacaaaagaatggaatcaaagttaacttcaaaattaga' + \ 53 | 'cacaacattgaagatggaagcgttcaactagcagaccattatcaacaaaatactccaattggc' + \ 54 | 'gatggccctgtccttttaccagacaaccattacctgtccacacaatctgccctttcgaaagat' + \ 55 | 'cccaacgaaaagagagaccacatggtccttcttgagtttgtaacagctgctgggattacacat' + \ 56 | 'ggcatggatgaactatacaaaaggcctgctgcaaacgacgaaaactacgctttagtagcttaa' 57 | dna_seq = DNA(seq) 58 | assembly = design.OligoAssembly(dna_seq, 59 | tm=72, 60 | length_range=(120, 120), 61 | require_even=True, 62 | start_5=True) 63 | assembly.design_assembly() 64 | 65 | # Prepare outputs vs reference 66 | output_oligos = [str(oligo).lower() for oligo in assembly.oligos] 67 | reference_oligos = [oligo.lower() for oligo in reference_oligos] 68 | 69 | assert_equal(output_oligos, reference_oligos) 70 | assert_equal(assembly.overlap_tms, reference_tms) 71 | 72 | # Test too short of oligo input 73 | too_short = DNA(seq[0:100]) 74 | too_short_assembly = design.OligoAssembly(too_short, 75 | tm=72, 76 | length_range=(120, 120), 77 | require_even=True, 78 | start_5=True) 79 | too_short_assembly.design_assembly() 80 | assert_equal(str(too_short_assembly.oligos[0]), str(too_short)) 81 | -------------------------------------------------------------------------------- /tests/test_nupack/data/complexes_mfe_rna99.ocx-mfe: -------------------------------------------------------------------------------- 1 | % NUPACK 3.0 2 | % Program: complexes 3 | % Start time: Thu Jan 28 00:32:00 2016 PST 4 | % 5 | % Command: ../bin/complexes -T 37.0 -quiet -mfe -material rna1999 test_complexes 6 | % Maximum complex size to enumerate: 4 7 | % Number of complexes from enumeration: 14 8 | % Additional complexes from .list file: 0 9 | % Total number of permutations to calculate: 15 10 | % Parameters: RNA, 1999 11 | % Dangles setting: 1 12 | % Temperature (C): 37.0 13 | % Sodium concentration: 1.0000 M 14 | % Magnesium concentration: 0.0000 M 15 | % 16 | % Do not change the comments below this line, as they may be read by other programs! 17 | % 18 | % Number of strands: 2 19 | % id sequence 20 | % 1 GATACTAGCG 21 | % 2 TACGATT 22 | % T = 37.0 23 | 24 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 25 | % complex1-order1 26 | 10 27 | 0.00000000e+00 28 | .......... 29 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 30 | 31 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 32 | % complex2-order1 33 | 7 34 | 0.00000000e+00 35 | ....... 36 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 37 | 38 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 39 | % complex3-order1 40 | 20 41 | -5.25415041e+00 42 | ....((((..+....)))).. 43 | 5 18 44 | 6 17 45 | 7 16 46 | 8 15 47 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 48 | 49 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 50 | % complex4-order1 51 | 17 52 | -2.58135142e+00 53 | ........((+..))... 54 | 9 14 55 | 10 13 56 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 57 | 58 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 59 | % complex5-order1 60 | 14 61 | -3.55415041e+00 62 | ..((...+..))... 63 | 3 11 64 | 4 10 65 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 66 | 67 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 68 | % complex6-order1 69 | 30 70 | -7.66270284e+00 71 | .......((.+.....(((..+....))))). 72 | 8 29 73 | 9 28 74 | 16 27 75 | 17 26 76 | 18 25 77 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 78 | 79 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 80 | % complex7-order1 81 | 27 82 | -7.66270284e+00 83 | ....((((..+....))))((+..))... 84 | 5 18 85 | 6 17 86 | 7 16 87 | 8 15 88 | 19 24 89 | 20 23 90 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 91 | 92 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 93 | % complex8-order1 94 | 24 95 | -4.26270284e+00 96 | (.......((+..))...+..).... 97 | 1 20 98 | 9 14 99 | 10 13 100 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 101 | 102 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 103 | % complex9-order1 104 | 21 105 | -2.66270284e+00 106 | ..((...+((.....+))))... 107 | 3 18 108 | 4 17 109 | 8 16 110 | 9 15 111 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 112 | 113 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 114 | % complex10-order1 115 | 40 116 | -1.24440543e+01 117 | .......((.+(...((((..+....))))..+....)..)). 118 | 8 39 119 | 9 38 120 | 11 35 121 | 15 28 122 | 16 27 123 | 17 26 124 | 18 25 125 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 126 | 127 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 128 | % complex11-order1 129 | 37 130 | -9.64405426e+00 131 | ....(((((.+.......)).+.....)))((+..))... 132 | 5 28 133 | 6 27 134 | 7 26 135 | 8 19 136 | 9 18 137 | 29 34 138 | 30 33 139 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 140 | 141 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 142 | % complex12-order1 143 | 34 144 | -9.34405426e+00 145 | (...((((..+....))))((+..))...+..).... 146 | 1 30 147 | 5 18 148 | 6 17 149 | 7 16 150 | 8 15 151 | 19 24 152 | 20 23 153 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 154 | 155 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 156 | % complex12-order2 157 | 34 158 | -9.34405426e+00 159 | ....((((..+..(....+)...))))((+..))... 160 | 5 25 161 | 6 24 162 | 7 23 163 | 8 22 164 | 13 18 165 | 26 31 166 | 27 30 167 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 168 | 169 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 170 | % complex13-order1 171 | 31 172 | -5.74405426e+00 173 | (...(...((+..))...+...)...+..).... 174 | 1 27 175 | 5 21 176 | 9 14 177 | 10 13 178 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 179 | 180 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 181 | % complex14-order1 182 | 28 183 | -5.24405426e+00 184 | ..((...+..))..(+.)((...+..))... 185 | 3 11 186 | 4 10 187 | 14 16 188 | 17 25 189 | 18 24 190 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 191 | -------------------------------------------------------------------------------- /tests/test_nupack/data/complexes_mfe_dna.ocx-mfe: -------------------------------------------------------------------------------- 1 | % NUPACK 3.0 2 | % Program: complexes 3 | % Start time: Thu Jan 28 00:03:04 2016 PST 4 | % 5 | % Command: ../bin/complexes -T 37.0 -quiet -mfe -material dna test_complexes 6 | % Maximum complex size to enumerate: 4 7 | % Number of complexes from enumeration: 14 8 | % Additional complexes from .list file: 0 9 | % Total number of permutations to calculate: 15 10 | % Parameters: DNA, 1998 11 | % Dangles setting: 1 12 | % Temperature (C): 37.0 13 | % Sodium concentration: 1.0000 M 14 | % Magnesium concentration: 0.0000 M 15 | % 16 | % Do not change the comments below this line, as they may be read by other programs! 17 | % 18 | % Number of strands: 2 19 | % id sequence 20 | % 1 GATACTAGCG 21 | % 2 TACGATT 22 | % T = 37.0 23 | 24 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 25 | % complex1-order1 26 | 10 27 | 0.00000000e+00 28 | .......... 29 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 30 | 31 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 32 | % complex2-order1 33 | 7 34 | 0.00000000e+00 35 | ....... 36 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 37 | 38 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 39 | % complex3-order1 40 | 20 41 | -5.60415041e+00 42 | ....((((..+....)))).. 43 | 5 18 44 | 6 17 45 | 7 16 46 | 8 15 47 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 48 | 49 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 50 | % complex4-order1 51 | 17 52 | -5.28135142e+00 53 | ........((+..))... 54 | 9 14 55 | 10 13 56 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 57 | 58 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 59 | % complex5-order1 60 | 14 61 | -6.01415041e+00 62 | ..((...+..))... 63 | 3 11 64 | 4 10 65 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 66 | 67 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 68 | % complex6-order1 69 | 30 70 | -9.20270284e+00 71 | ........((+....((((..+....)))))) 72 | 9 30 73 | 10 29 74 | 15 28 75 | 16 27 76 | 17 26 77 | 18 25 78 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 79 | 80 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 81 | % complex7-order1 82 | 27 83 | -1.03627028e+01 84 | ....((((..+....))))((+..))... 85 | 5 18 86 | 6 17 87 | 7 16 88 | 8 15 89 | 19 24 90 | 20 23 91 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 92 | 93 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 94 | % complex8-order1 95 | 24 96 | -8.58270284e+00 97 | .((.....((+..))...+....)). 98 | 2 23 99 | 3 22 100 | 9 14 101 | 10 13 102 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 103 | 104 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 105 | % complex9-order1 106 | 21 107 | -8.01270284e+00 108 | ....((.+..((...+..)))). 109 | 5 20 110 | 6 19 111 | 10 18 112 | 11 17 113 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 114 | 115 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 116 | % complex10-order1 117 | 40 118 | -1.41668532e+01 119 | ....((((..+.((.((((..+....))))..+.)).)))).. 120 | 5 38 121 | 6 37 122 | 7 36 123 | 8 35 124 | 12 33 125 | 13 32 126 | 15 28 127 | 16 27 128 | 17 26 129 | 18 25 130 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 131 | 132 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 133 | % complex11-order1 134 | 37 135 | -1.43240543e+01 136 | .((.((((..+....))))..+.)).....((+..))... 137 | 2 23 138 | 3 22 139 | 5 18 140 | 6 17 141 | 7 16 142 | 8 15 143 | 29 34 144 | 30 33 145 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 146 | 147 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 148 | % complex12-order1 149 | 34 150 | -1.37540543e+01 151 | .((.((((..+....))))..+..((...+..)))). 152 | 2 33 153 | 3 32 154 | 5 18 155 | 6 17 156 | 7 16 157 | 8 15 158 | 23 31 159 | 24 30 160 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 161 | 162 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 163 | % complex12-order2 164 | 34 165 | -1.42668532e+01 166 | ....((((((+..))...+....))))((+..))... 167 | 5 25 168 | 6 24 169 | 7 23 170 | 8 22 171 | 9 14 172 | 10 13 173 | 26 31 174 | 27 30 175 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 176 | 177 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 178 | % complex13-order1 179 | 31 180 | -1.34840543e+01 181 | .((.....((+..))...+..((...+..)))). 182 | 2 30 183 | 3 29 184 | 9 14 185 | 10 13 186 | 20 28 187 | 21 27 188 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 189 | 190 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 191 | % complex14-order1 192 | 28 193 | -1.33340543e+01 194 | ..((...+..)).(.+.)((...+..))... 195 | 3 11 196 | 4 10 197 | 13 16 198 | 17 25 199 | 18 24 200 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % 201 | -------------------------------------------------------------------------------- /tests/tests/test_design/test_primer.py: -------------------------------------------------------------------------------- 1 | '''Tests primer design module.''' 2 | from nose.tools import assert_equals, assert_not_equal, assert_raises 3 | from coral import design, DNA 4 | 5 | 6 | def test_primer(): 7 | '''Test primer function.''' 8 | seq = 'ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGC' + \ 9 | 'GACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAG' + \ 10 | 'CTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACC' + \ 11 | 'ACCTTCGGCTACGGCCTGCAGTGCTTCGCCCGCTACCCCGACCACATGAAGCAGCACGACTTC' + \ 12 | 'TTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGC' + \ 13 | 'AACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTG' + \ 14 | 'AAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAAC' + \ 15 | 'AGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATC' + \ 16 | 'CGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATC' + \ 17 | 'GGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAA' + \ 18 | 'GACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACT' + \ 19 | 'CTCGGCATGGACGAGCTGTACAAGTAA' 20 | dna_seq = DNA(seq) 21 | primer = design.primer(dna_seq, tm=72, min_len=10, tm_undershoot=1, 22 | tm_overshoot=3, end_gc=False, 23 | tm_parameters='cloning', overhang=None) 24 | assert_equals(str(primer), 'ATGGTGAGCAAGGGCGAGGAG') 25 | # Ensure that overhang is appropriately applied 26 | overhang_primer = design.primer(dna_seq, tm=72, min_len=10, 27 | tm_undershoot=1, tm_overshoot=3, 28 | end_gc=False, 29 | tm_parameters='cloning', 30 | overhang=DNA('GATCGATAT')) 31 | assert_equals(str(overhang_primer), 'GATCGATATATGGTGAGCAAGGGCGAGGAG') 32 | # If sequence is too short (too low of Tm), raise ValueError 33 | too_short = DNA('at') 34 | assert_raises(ValueError, design.primer, too_short, tm=72) 35 | # Should design different primers (sometimes) if ending on GC is preferred 36 | diff_template = DNA('GATCGATCGATACGATCGATATGCGATATGATCGATAT') 37 | nogc = design.primer(diff_template, tm=72, min_len=10, 38 | tm_undershoot=1, tm_overshoot=3, end_gc=False, 39 | tm_parameters='cloning', overhang=None) 40 | withgc = design.primer(diff_template, tm=72, min_len=10, 41 | tm_undershoot=1, tm_overshoot=3, 42 | end_gc=True, tm_parameters='cloning', overhang=None) 43 | assert_not_equal(nogc, withgc) 44 | # Should raise ValueError if it's impossible to create an end_gc primer 45 | end_at_template = DNA('ATGCGATACGATACGCGATATGATATATatatatat' + 46 | 'ATAAaaaaaaaaaattttttttTTTTTTTTTTTTTT' + 47 | 'TTTTTTTTTT') 48 | assert_raises(ValueError, design.primer, end_at_template, 49 | end_gc=True, tm=72) 50 | 51 | 52 | def test_primers(): 53 | '''Test primers function.''' 54 | seq = 'ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGC' + \ 55 | 'GACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAG' + \ 56 | 'CTGACCCTGAAGTTCATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACC' + \ 57 | 'ACCTTCGGCTACGGCCTGCAGTGCTTCGCCCGCTACCCCGACCACATGAAGCAGCACGACTTC' + \ 58 | 'TTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGC' + \ 59 | 'AACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTG' + \ 60 | 'AAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAAC' + \ 61 | 'AGCCACAACGTCTATATCATGGCCGACAAGCAGAAGAACGGCATCAAGGTGAACTTCAAGATC' + \ 62 | 'CGCCACAACATCGAGGACGGCAGCGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATC' + \ 63 | 'GGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGCTACCAGTCCGCCCTGAGCAAA' + \ 64 | 'GACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACT' + \ 65 | 'CTCGGCATGGACGAGCTGTACAAGTAA' 66 | dna_seq = DNA(seq) 67 | primers_list = design.primers(dna_seq, tm=72, min_len=10, 68 | tm_undershoot=1, tm_overshoot=3, 69 | end_gc=False, tm_parameters='cloning', 70 | overhangs=None) 71 | primers = [str(x.primer()) for x in primers_list] 72 | assert_equals(primers, ['ATGGTGAGCAAGGGCGAGGAG', 73 | 'TTACTTGTACAGCTCGTCCATGCCG']) 74 | -------------------------------------------------------------------------------- /coral/database/_rebase.py: -------------------------------------------------------------------------------- 1 | '''Retrieve restriction enzymes from rebase.''' 2 | import shutil 3 | import tempfile 4 | import urllib2 5 | import coral 6 | 7 | 8 | class Rebase(object): 9 | '''Retrieve restriction enzymes from rebase database.''' 10 | 11 | def __init__(self): 12 | self._tmpdir = None 13 | self.update() 14 | 15 | def update(self): 16 | '''Update definitions.''' 17 | # Download http://rebase.neb.com/rebase/link_withref to tmp 18 | self._tmpdir = tempfile.mkdtemp() 19 | try: 20 | self._rebase_file = self._tmpdir + '/rebase_file' 21 | print 'Downloading latest enzyme definitions' 22 | url = 'http://rebase.neb.com/rebase/link_withref' 23 | header = {'User-Agent': 'Mozilla/5.0'} 24 | req = urllib2.Request(url, headers=header) 25 | con = urllib2.urlopen(req) 26 | with open(self._rebase_file, 'wb') as rebase_file: 27 | rebase_file.write(con.read()) 28 | # Process into self._enzyme_dict 29 | self._process_file() 30 | except urllib2.HTTPError, e: 31 | print 'HTTP Error: {} {}'.format(e.code, url) 32 | print 'Falling back on default enzyme list' 33 | self._enzyme_dict = coral.constants.fallback_enzymes 34 | except urllib2.URLError, e: 35 | print 'URL Error: {} {}'.format(e.reason, url) 36 | print 'Falling back on default enzyme list' 37 | self._enzyme_dict = coral.constants.fallback_enzymes 38 | # Process into RestrictionSite objects? (depends on speed) 39 | print 'Processing into RestrictionSite instances.' 40 | self.restriction_sites = {} 41 | # TODO: make sure all names are unique 42 | for key, (site, cuts) in self._enzyme_dict.iteritems(): 43 | # Make a site 44 | try: 45 | r = coral.RestrictionSite(coral.DNA(site), cuts, name=key) 46 | # Add it to dict with name as key 47 | self.restriction_sites[key] = r 48 | except ValueError: 49 | # Encountered ambiguous sequence, have to ignore it until 50 | # coral.DNA can handle ambiguous DNA 51 | pass 52 | 53 | def get(self, name): 54 | '''Retrieve enzyme by name. 55 | 56 | :param name: Name of the restriction enzyme, e.g. EcoRV. 57 | :type name: str 58 | :returns: Restriction site matching the input name. 59 | :rtype: coral.RestrictionSite 60 | :raises: Exception when enzyme is not found in the database. 61 | 62 | ''' 63 | # Looks for restriction enzyme by name 64 | try: 65 | return self.restriction_sites[name] 66 | except KeyError: 67 | raise Exception('Enzyme not found.') 68 | 69 | def _process_file(self): 70 | '''Process rebase file into dict with name and cut site information.''' 71 | print 'Processing file' 72 | with open(self._rebase_file, 'r') as f: 73 | raw = f.readlines() 74 | names = [line.strip()[3:] for line in raw if line.startswith('<1>')] 75 | seqs = [line.strip()[3:] for line in raw if line.startswith('<5>')] 76 | if len(names) != len(seqs): 77 | raise Exception('Found different number of enzyme names and ' 78 | 'sequences.') 79 | self._enzyme_dict = {} 80 | for name, seq in zip(names, seqs): 81 | if '?' in seq: 82 | # Is unknown sequence, don't keep it 83 | pass 84 | elif seq.startswith('(') and seq.endswith(')'): 85 | # Has four+ cut sites, don't keep it 86 | pass 87 | elif '^' in seq: 88 | # Has reasonable internal cut sites, keep it 89 | top_cut = seq.index('^') 90 | bottom_cut = len(seq) - top_cut - 1 91 | site = seq.replace('^', '') 92 | self._enzyme_dict[name] = (site, (top_cut, bottom_cut)) 93 | elif seq.endswith(')'): 94 | # Has reasonable external cut sites, keep it 95 | # (4-cutter also starts with '(') 96 | # separate site and cut locations 97 | site, cuts = seq.split('(') 98 | cuts = cuts.replace(')', '') 99 | top_cut, bottom_cut = [int(x) + len(site) for x in 100 | cuts.split('/')] 101 | self._enzyme_dict[name] = (site, (top_cut, bottom_cut)) 102 | shutil.rmtree(self._tmpdir) 103 | -------------------------------------------------------------------------------- /tests/tests/test_reaction/test_central_dogma.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for central dogma submodule of reaction module. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal, assert_raises 7 | from coral import reaction, DNA, Peptide, RNA 8 | 9 | 10 | def test_transcription(): 11 | test_dna = DNA('ATGATGGGCAGTGTCGAATTAAATCTGCGTGAGACAGAATTGTGTT' + 12 | 'TGGGACTACCAGGCGGTGATACAGTTGCACCAGTAACAGGAAACAA' + 13 | 'AAGAGGATTCTCTGAAACAGTAGATTTGAAACTTAATTTGAACAAT' + 14 | 'GAGCCAGCCAACAAGGAAGGTTCCACCACTCATGACGTCGTCACAT' + 15 | 'TTGATAGTAAAGAAAAGAGTGCGTGTCCAAAAGATCCAGCTAAGCC' + 16 | 'ACCTGCCAAGGCTCAAGTCGTCGGATGGCCACCTGTGAGATCTTAT' + 17 | 'AGAAAGAACGTAATGGTTTCTTGTCAGAAGTCCAGTGGTGGTCCTG' + 18 | 'AAGCAGCGGCTtgaaaa') 19 | reference_rna = RNA('AUGAUGGGCAGUGUCGAAUUAAAUCUGCGUGAGACAGAAUU' + 20 | 'GUGUUUGGGACUACCAGGCGGUGAUACAGUUGCACCAGUAA' + 21 | 'CAGGAAACAAAAGAGGAUUCUCUGAAACAGUAGAUUUGAAA' + 22 | 'CUUAAUUUGAACAAUGAGCCAGCCAACAAGGAAGGUUCCAC' + 23 | 'CACUCAUGACGUCGUCACAUUUGAUAGUAAAGAAAAGAGUG' + 24 | 'CGUGUCCAAAAGAUCCAGCUAAGCCACCUGCCAAGGCUCAA' + 25 | 'GUCGUCGGAUGGCCACCUGUGAGAUCUUAUAGAAAGAACGU' + 26 | 'AAUGGUUUCUUGUCAGAAGUCCAGUGGUGGUCCUGAAGCAG' + 27 | 'CGGCUugaaaa') 28 | # Basic transcription should work 29 | transcription_output = reaction.transcribe(test_dna) 30 | assert_equal(transcription_output, reference_rna) 31 | 32 | # Coding RNA should exclude anything after a stop codon 33 | coding_rna_output = reaction.coding_sequence(transcription_output) 34 | assert_equal(coding_rna_output, reference_rna[:-3]) 35 | 36 | # Should fail is sequence lacks start codon or stop codon 37 | assert_raises(ValueError, reaction.coding_sequence, 38 | reaction.transcribe(DNA('aaatag'))) 39 | assert_raises(ValueError, reaction.coding_sequence, 40 | reaction.transcribe(DNA('atgaaa'))) 41 | 42 | 43 | def test_translation(): 44 | test_rna = RNA('AUGAUGGGCAGUGUCGAAUUAAAUCUGCGUGAGACAGAAUU' + 45 | 'GUGUUUGGGACUACCAGGCGGUGAUACAGUUGCACCAGUAA' + 46 | 'CAGGAAACAAAAGAGGAUUCUCUGAAACAGUAGAUUUGAAA' + 47 | 'CUUAAUUUGAACAAUGAGCCAGCCAACAAGGAAGGUUCCAC' + 48 | 'CACUCAUGACGUCGUCACAUUUGAUAGUAAAGAAAAGAGUG' + 49 | 'CGUGUCCAAAAGAUCCAGCUAAGCCACCUGCCAAGGCUCAA' + 50 | 'GUCGUCGGAUGGCCACCUGUGAGAUCUUAUAGAAAGAACGU' + 51 | 'AAUGGUUUCUUGUCAGAAGUCCAGUGGUGGUCCUGAAGCAG' + 52 | 'CGGCUugaaaa') 53 | reference_peptide = Peptide('MMGSVELNLRETELCLGLPGGDTVAPVTGNK' + 54 | 'RGFSETVDLKLNLNNEPANKEGSTTHDVVTF' + 55 | 'DSKEKSACPKDPAKPPAKAQVVGWPPVRSYR' + 56 | 'KNVMVSCQKSSGGPEAAA') 57 | # Basic transcription should work 58 | translation_output = reaction.translate(test_rna) 59 | assert_equal(translation_output, reference_peptide) 60 | 61 | # Coding peptide should exclude anything after a stop codon 62 | coding_rna = reaction.coding_sequence(test_rna) 63 | coding_peptide = reaction.translate(coding_rna) 64 | assert_equal(coding_peptide, reference_peptide) 65 | 66 | 67 | def test_reverse_transcription(): 68 | test_rna = RNA('AUGAUGGGCAGUGUCGAAUUAAAUCUGCGUGAGACAGAAUU' + 69 | 'GUGUUUGGGACUACCAGGCGGUGAUACAGUUGCACCAGUAA' + 70 | 'CAGGAAACAAAAGAGGAUUCUCUGAAACAGUAGAUUUGAAA' + 71 | 'CUUAAUUUGAACAAUGAGCCAGCCAACAAGGAAGGUUCCAC' + 72 | 'CACUCAUGACGUCGUCACAUUUGAUAGUAAAGAAAAGAGUG' + 73 | 'CGUGUCCAAAAGAUCCAGCUAAGCCACCUGCCAAGGCUCAA' + 74 | 'GUCGUCGGAUGGCCACCUGUGAGAUCUUAUAGAAAGAACGU' + 75 | 'AAUGGUUUCUUGUCAGAAGUCCAGUGGUGGUCCUGAAGCAG' + 76 | 'CGGCUugaaaa') 77 | ref_dna = DNA('ATGATGGGCAGTGTCGAATTAAATCTGCGTGAGACAGAATTGTGTT' + 78 | 'TGGGACTACCAGGCGGTGATACAGTTGCACCAGTAACAGGAAACAA' + 79 | 'AAGAGGATTCTCTGAAACAGTAGATTTGAAACTTAATTTGAACAAT' + 80 | 'GAGCCAGCCAACAAGGAAGGTTCCACCACTCATGACGTCGTCACAT' + 81 | 'TTGATAGTAAAGAAAAGAGTGCGTGTCCAAAAGATCCAGCTAAGCC' + 82 | 'ACCTGCCAAGGCTCAAGTCGTCGGATGGCCACCTGTGAGATCTTAT' + 83 | 'AGAAAGAACGTAATGGTTTCTTGTCAGAAGTCCAGTGGTGGTCCTG' + 84 | 'AAGCAGCGGCTtgaaaa') 85 | 86 | # Basic transcription should work 87 | r_transcription = reaction.reverse_transcribe(test_rna) 88 | assert_equal(r_transcription, ref_dna) 89 | -------------------------------------------------------------------------------- /docs/tutorial/design/design_primers.rst: -------------------------------------------------------------------------------- 1 | 2 | Primer Design 3 | ------------- 4 | 5 | One of the first things anyone learns in a molecular biology lab is how 6 | to design primers. The exact strategies vary a lot and are sometimes 7 | polymerase-specific. ``coral`` uses the Klavins' lab approach of 8 | targeting a specific melting temperature (Tm) and nothing else, with the 9 | exact Tm targeted being between 65°C and 72°C, the choice being personal 10 | preference. ``coral`` currently defaults to 72°C on the Phusion 11 | (modified Breslauer 1986) Tm calculator. 12 | 13 | ``coral.design_primer`` is a function that takes in a ``sequence.DNA`` 14 | object and rapidly finds the 5' subsequence that is closest to the 15 | desired Tm (within a user-definable error range). If the entire sequence 16 | would make a primer with too low of a Tm, a descriptive error is 17 | produced. 18 | 19 | For this tutorial, let's design primers that will amplify the gene EYFP. 20 | 21 | .. code:: ipython2 22 | 23 | import coral as cor 24 | 25 | First we read in a plasmid from Havens et al. 2012 and isolate the EYFP 26 | sequence. 27 | 28 | .. code:: ipython2 29 | 30 | plasmid = cor.seqio.read_dna("../files_for_tutorial/maps/pGP4G-EYFP.ape") 31 | eyfp_f = [f for f in plasmid.features if f.name == 'EYFP'][0] 32 | eyfp = plasmid.extract(eyfp_f) 33 | print len(eyfp) 34 | eyfp 35 | 36 | 37 | .. parsed-literal:: 38 | 39 | 717 40 | 41 | 42 | 43 | 44 | .. parsed-literal:: 45 | 46 | ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGC ... CGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAG 47 | TACCACTCGTTCCCGCTCCTCGACAAGTGGCCCCACCACG ... GCGGCGGCCCTAGTGAGAGCCGTACCTGCTCGACATGTTC 48 | 49 | 50 | 51 | Designing primers is straightforward - you just call 52 | ``design.design_primer`` with a ``sequence.DNA`` object as the input. 53 | 54 | .. code:: ipython2 55 | 56 | # Forward and reverse, one at a time using design_primer() 57 | forward = cor.design.primer(eyfp) 58 | reverse = cor.design.primer(eyfp.reverse_complement()) 59 | # Both at once using design_primers() 60 | forward, reverse = cor.design.primers(eyfp) 61 | # design_primer has many options, including adding overhangs 62 | custom_forward = cor.design.primer(eyfp, tm=65, min_len=12, 63 | tm_undershoot=1, tm_overshoot=1, 64 | end_gc=True, tm_parameters="santalucia98", 65 | overhang=cor.DNA("GGGGGATCGAT")) 66 | print forward 67 | print 68 | print custom_forward 69 | 70 | 71 | .. parsed-literal:: 72 | 73 | ATGGTGAGCAAGGGCG 74 | 75 | GGGGGATCGATATGGTGAGCAAGGGCGAGGAGCTGTTCAC 76 | 77 | 78 | Designing primers and getting a string output is just the first step in 79 | primer design - we want to know whether the primers actually *work* and 80 | write them out to a file. The point of programming DNA is that you 81 | *never* copy and paste! 82 | 83 | To simulate a PCR using the rules of molecular biology, use 84 | ``coral.reaction.pcr``. The output is a subsequence of the template DNA 85 | - the features may not match the plasmid exactly (due to being truncated 86 | by the PCR), but the sequences match. If a primer would bind in multiple 87 | places (exact matches to the template), the pcr function will fail and 88 | give a useful message. 89 | 90 | You can check for identical sequences using python's built in == 91 | operator. 92 | 93 | .. code:: ipython2 94 | 95 | amplicon = cor.reaction.pcr(plasmid, forward, reverse) 96 | amplicon == eyfp 97 | 98 | 99 | 100 | 101 | .. parsed-literal:: 102 | 103 | True 104 | 105 | 106 | 107 | Now that we have verified that our primers should at least amplify the 108 | DNA that we want, let's write out our primers to file so they can be 109 | submitted to an oligo synthesis company. 110 | 111 | .. code:: ipython2 112 | 113 | # First we give our primers names (the `.name` attribute is empty by default) 114 | forward.name = "EYFP_forward" 115 | reverse.name = "EYFP_reverse" 116 | # Then we write to file - a csv (comma separated value file) 117 | cor.seqio.write_primers([forward, reverse], "./designed_primers.csv", ["Forward EYFP primer", "Reverse EYFP primer"]) 118 | 119 | The csv file can then be opened in a spreadsheet application like Excel 120 | or processed by a downstream program. This is the format of the csv: 121 | 122 | .. code:: ipython2 123 | 124 | import csv 125 | with open("./designed_primers.csv", "r") as csv_file: 126 | reader = csv.reader(csv_file) 127 | lines = [line for line in reader] 128 | for line in lines: 129 | print line 130 | 131 | 132 | .. parsed-literal:: 133 | 134 | ['name', 'sequence', 'notes'] 135 | ['Forward EYFP primer', 'ATGGTGAGCAAGGGCG', ''] 136 | ['Reverse EYFP primer', 'CTTGTACAGCTCGTCCATGCC', ''] 137 | 138 | -------------------------------------------------------------------------------- /tests/optional_tests/slow_tests/test_design/test_oligoassembly.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Tests for the OligoAssembly design class. 3 | 4 | ''' 5 | 6 | from nose.tools import assert_equal, assert_raises, assert_true 7 | from coral import design, DNA 8 | 9 | 10 | def test_oligo_assembly(): 11 | ''' 12 | Tests output of OligoAssembly class. 13 | 14 | ''' 15 | 16 | seq = 'atgcgtaaaggagaagaacttttcactggagttgtcccaattcttgttgaattagatggtgat' + \ 17 | 'gttaatgggcacaaattttctgtcagtggagagggtgaaggtgatgcaacatacggaaaactt' + \ 18 | 'acccttaaatttatttgcactactggaaaactacctgttccatggccaacacttgtcactact' + \ 19 | 'ttcggttatggtgttcaatgctttgcgagatacccagatcatatgaaacagcatgactttttc' + \ 20 | 'aagagtgccatgcccgaaggttatgtacaggaaagaactatatttttcaaagatgacgggaac' + \ 21 | 'tacaagacacgtgctgaagtcaagtttgaaggtgatacccttgttaatagaatcgagttaaaa' + \ 22 | 'ggtattgattttaaagaagatggaaacattcttggacacaaattggaatacaactataactca' + \ 23 | 'cacaatgtatacatcatggcagacaaacaaaagaatggaatcaaagttaacttcaaaattaga' + \ 24 | 'cacaacattgaagatggaagcgttcaactagcagaccattatcaacaaaatactccaattggc' + \ 25 | 'gatggccctgtccttttaccagacaaccattacctgtccacacaatctgccctttcgaaagat' + \ 26 | 'cccaacgaaaagagagaccacatggtccttcttgagtttgtaacagctgctgggattacacat' + \ 27 | 'ggcatggatgaactatacaaaaggcctgctgcaaacgacgaaaactacgctttagtagcttaa' 28 | dna_seq = DNA(seq) 29 | 30 | # Test to make sure oligo_number parameter is working 31 | reference_oligos = ['atgcgtaaaggagaagaacttttcactggagttgtcccaattcttgttg' + 32 | 'aattagatggtgatgttaatgggcacaaattttctgtcagtgg', 33 | 'tttccagtagtgcaaataaatttaagggtaagttttccgtatgttgcat' + 34 | 'caccttcaccctctccactgacagaaaatttgtgcccattaaca', 35 | 'cggaaaacttacccttaaatttatttgcactactggaaaactacctgtt' + 36 | 'ccatggccaacacttgtcactactttcggttatggtgttcaatgc', 37 | 'ttcgggcatggcactcttgaaaaagtcatgctgtttcatatgatctggg' + 38 | 'tatctcgcaaagcattgaacaccataaccgaaagtagtgacaagt', 39 | 'ttcaagagtgccatgcccgaaggttatgtacaggaaagaactatatttt' + 40 | 'tcaaagatgacgggaactacaagacacgtgctgaagtcaagtttgaa', 41 | 'agaatgtttccatcttctttaaaatcaataccttttaactcgattctat' + 42 | 'taacaagggtatcaccttcaaacttgacttcagcacgtgtcttgtagt', 43 | 'tcgagttaaaaggtattgattttaaagaagatggaaacattcttggaca' + 44 | 'caaattggaatacaactataactcacacaatgtatacatcatggcaga', 45 | 'tcttcaatgttgtgtctaattttgaagttaactttgattccattctttt' + 46 | 'gtttgtctgccatgatgtatacattgtgtgagttatagttgtattc', 47 | 'ggaatcaaagttaacttcaaaattagacacaacattgaagatggaagcg' + 48 | 'ttcaactagcagaccattatcaacaaaatactccaattggcgatgg', 49 | 'cgttgggatctttcgaaagggcagattgtgtggacaggtaatggttgtc' + 50 | 'tggtaaaaggacagggccatcgccaattggagtattttgttga', 51 | 'tgccctttcgaaagatcccaacgaaaagagagaccacatggtccttctt' + 52 | 'gagtttgtaacagctgctgggattacacatggcat', 53 | 'ttaagctactaaagcgtagttttcgtcgtttgcagcaggccttttgtat' + 54 | 'agttcatccatgccatgtgtaatcccagcagctg'] 55 | 56 | oligo_n_assembly = design.OligoAssembly(dna_seq, 57 | tm=72, 58 | length_range=(100, 160), 59 | require_even=True, 60 | start_5=True, 61 | oligo_number=12) 62 | oligo_n_assembly.design_assembly() 63 | 64 | oligo_n_output = [str(oligo).lower() for oligo in oligo_n_assembly.oligos] 65 | assert_equal(oligo_n_output, reference_oligos) 66 | 67 | # Test to make sure oligo_number parameter fails with too restrictive of 68 | # settings 69 | def design_impossible(test_seq): 70 | assembly = design.OligoAssembly(test_seq, 71 | tm=72, 72 | length_range=(120, 120), 73 | require_even=True, 74 | start_5=True, 75 | oligo_number=12) 76 | assembly.design_assembly() 77 | 78 | assert_raises(Exception, design_impossible, dna_seq) 79 | 80 | 81 | def test_overlapping_overlaps(): 82 | ''' 83 | Sometimes, an assembly produces a result with 'overlapping overlaps' - 84 | not ideal. This should eventually be replaced by a catchable exception or 85 | prevented outright. 86 | 87 | ''' 88 | 89 | test_seq = DNA('ATCAATACTTATTACGATATATATAT' * 34) 90 | oligo_n_assembly = design.OligoAssembly(test_seq, 91 | tm=65, 92 | length_range=(80, 150), 93 | require_even=True, 94 | start_5=True, 95 | overlap_min=20, 96 | oligo_number=10) 97 | oligo_n_assembly.design_assembly() 98 | assert_true(type(oligo_n_assembly.warning) == str) 99 | -------------------------------------------------------------------------------- /docs/tutorial/introduction/modules.rst: -------------------------------------------------------------------------------- 1 | 2 | coral modules 3 | ------------- 4 | 5 | coral has 7 modules: analysis, constants, database, design, reaction, 6 | seqio, and sequence. 7 | 8 | The modules have been split up by function - the activity that a user 9 | wants to execute. For example, anything related to accessing scientific 10 | databases is in the database module and activities related to designing 11 | sequences are in the design module. 12 | 13 | The modules are explicitly organized via their \_\_init\_\_.py files. 14 | All this means is that anything available via coral.module.\* is usable 15 | and hopefully useful. You can explore the functions and classes defined 16 | for each module by reading more of the ipython documentation, sphinx 17 | autodoc documentation, or interactively investigating modules in the 18 | ipython notebook using tab completion and ? documentation. coral follows 19 | the PEP 8 style guidelines on class and function names so that you can 20 | differentiate between them - classes use CamelCase and functions use 21 | lower\_case with underscores. 22 | 23 | .. code:: ipython2 24 | 25 | import coral as cor # alternative you can import each module by itself e.g. from coral import design 26 | dir(cor) # dir lists everything in a module/object. Ignore the double underscore items. 27 | 28 | 29 | 30 | 31 | .. parsed-literal:: 32 | 33 | ['DNA', 34 | 'Feature', 35 | 'Peptide', 36 | 'Primer', 37 | 'RNA', 38 | 'RestrictionSite', 39 | '__builtins__', 40 | '__doc__', 41 | '__file__', 42 | '__name__', 43 | '__package__', 44 | '__path__', 45 | '__version__', 46 | 'analysis', 47 | 'constants', 48 | 'database', 49 | 'design', 50 | 'reaction', 51 | 'seqio', 52 | 'sequence', 53 | 'ssDNA', 54 | 'utils'] 55 | 56 | 57 | 58 | Top-level 59 | ~~~~~~~~~ 60 | 61 | In addition to the core modules, the top-level coral module provides the 62 | core data structures used in coral - DNA, RNA, and Peptide (as well as 63 | specialized classes like Primer). 64 | 65 | .. code:: ipython2 66 | 67 | dna = cor.DNA("ATGC") 68 | print "DNA: {}".format(dna) 69 | # You can also run methods on the object - in this case, check if the DNA is palindromic 70 | print "Palindrome?: {}".format(dna.is_palindrome()) 71 | print 72 | rna = cor.RNA("AUGC") 73 | print "RNA: {}".format(rna) 74 | print 75 | pep = cor.Peptide("mlnp") 76 | print "Peptide: {}".format(pep) 77 | 78 | 79 | .. parsed-literal:: 80 | 81 | DNA: ATGC 82 | Palindrome?: False 83 | 84 | RNA: AUGC 85 | 86 | Peptide: MLNP 87 | 88 | 89 | As you can see above, to make DNA, RNA, or Peptide objects you just 90 | invoke the correct sequence. command and give it a valid string as an 91 | argument. Case does not matter, but precision does - only unambiguous 92 | and valid DNA, RNA, or Peptide sequences are allowed. The sequence 93 | module also contains special cases of DNA objects (Primer, 94 | RestrictionSite, Feature), which are covered in detail later. You can 95 | treat DNA, RNA, and Peptide objects much like strings or lists in 96 | python, so addition, multiplication, slicing, and container logic are 97 | all defined. 98 | 99 | analysis 100 | ~~~~~~~~ 101 | 102 | The analysis module is focused on providing functions and classes for 103 | analyzing DNA, RNA, and Peptides, focusing on information inherent to 104 | the sequence (palindromes, repeats, melting temperatures), structural 105 | information (Vienna RNA and NUPACK classes), and sequencing (Sanger 106 | sequencing analysis). 107 | 108 | .. code:: ipython2 109 | 110 | # Example: finding the Tm of ATGCATGCATGCATGC according to the SantaLucia98 method. 111 | cor.analysis.tm(dna * 4, parameters="santalucia98") 112 | 113 | 114 | 115 | 116 | .. parsed-literal:: 117 | 118 | 48.03216557174494 119 | 120 | 121 | 122 | constants 123 | ~~~~~~~~~ 124 | 125 | The constants module contains data - information that doesn't change 126 | (i.e. is constant). This includes alphabets (sets of characters) that 127 | define DNA, RNA, and peptides and other standards, such as the genbank 128 | feature table. 129 | 130 | database 131 | ~~~~~~~~ 132 | 133 | The database module is for accessing scientific databases. It currently 134 | has limited functionality, talking only to the Rebase database of 135 | restriction enzymes. 136 | 137 | design 138 | ~~~~~~ 139 | 140 | The design module holds classes and functions for the design of new 141 | constructs. The two most important functions are design\_primer and 142 | gibson. The former designs primers for a given input sequence while the 143 | latter designs Gibson primers for a whole series of input fragments. 144 | 145 | reaction 146 | ~~~~~~~~ 147 | 148 | The reaction module simulates reactions relevant to cloning and basic 149 | molecular genetics, including transcription, reverse transcription, 150 | translation, exonuclease activity, extracting coding sequences, 151 | digesting with restriction endonucleases, pcr, and Gibson assembly. 152 | 153 | seqio 154 | ~~~~~ 155 | 156 | The seqio module is for sequence input/output - reading and writing 157 | sequences. The module currently supports reading in individual sequences 158 | (fasta or genbank) using read\_dna, reading in all the .ab1, .abi, and 159 | .seq files in a directory using read\_sequencing, and writing DNA 160 | objects to file (fasta or genbank). 161 | -------------------------------------------------------------------------------- /coral/design/_primers.py: -------------------------------------------------------------------------------- 1 | '''Primer design tools.''' 2 | import coral 3 | import warnings 4 | 5 | 6 | def primer(dna, tm=65, min_len=10, tm_undershoot=1, tm_overshoot=3, 7 | end_gc=False, tm_parameters='cloning', overhang=None, 8 | structure=False): 9 | '''Design primer to a nearest-neighbor Tm setpoint. 10 | 11 | :param dna: Sequence for which to design a primer. 12 | :type dna: coral.DNA 13 | :param tm: Ideal primer Tm in degrees C. 14 | :type tm: float 15 | :param min_len: Minimum primer length. 16 | :type min_len: int 17 | :param tm_undershoot: Allowed Tm undershoot. 18 | :type tm_undershoot: float 19 | :param tm_overshoot: Allowed Tm overshoot. 20 | :type tm_overshoot: float 21 | :param end_gc: Obey the 'end on G or C' rule. 22 | :type end_gc: bool 23 | :param tm_parameters: Melting temp calculator method to use. 24 | :type tm_parameters: string 25 | :param overhang: Append the primer to this overhang sequence. 26 | :type overhang: str 27 | :param structure: Evaluate primer for structure, with warning for high 28 | structure. 29 | :type structure: bool 30 | :returns: A primer. 31 | :rtype: coral.Primer 32 | :raises: ValueError if the input sequence is lower than the Tm settings 33 | allow. 34 | ValueError if a primer ending with G or C can't be found given 35 | the Tm settings. 36 | 37 | ''' 38 | # Check Tm of input sequence to see if it's already too low 39 | seq_tm = coral.analysis.tm(dna, parameters=tm_parameters) 40 | if seq_tm < (tm - tm_undershoot): 41 | msg = 'Input sequence Tm is lower than primer Tm setting' 42 | raise ValueError(msg) 43 | # Focus on first 90 bases - shouldn't need more than 90bp to anneal 44 | dna = dna[0:90] 45 | 46 | # Generate primers from min_len to 'tm' + tm_overshoot 47 | # TODO: this is a good place for optimization. Only calculate as many 48 | # primers as are needed. Use binary search. 49 | primers_tms = [] 50 | last_tm = 0 51 | bases = min_len 52 | while last_tm <= tm + tm_overshoot and bases != len(dna): 53 | next_primer = dna[0:bases] 54 | last_tm = coral.analysis.tm(next_primer, parameters=tm_parameters) 55 | primers_tms.append((next_primer, last_tm)) 56 | bases += 1 57 | 58 | # Trim primer list based on tm_undershoot and end_gc 59 | primers_tms = [(primer, melt) for primer, melt in primers_tms if 60 | melt >= tm - tm_undershoot] 61 | if end_gc: 62 | primers_tms = [pair for pair in primers_tms if 63 | pair[0][-1] == coral.DNA('C') or 64 | pair[0][-1] == coral.DNA('G')] 65 | if not primers_tms: 66 | raise ValueError('No primers could be generated using these settings') 67 | 68 | # Find the primer closest to the set Tm, make it single stranded 69 | tm_diffs = [abs(melt - tm) for primer, melt in primers_tms] 70 | best_index = tm_diffs.index(min(tm_diffs)) 71 | best_primer, best_tm = primers_tms[best_index] 72 | best_primer = best_primer.top 73 | 74 | # Apply overhang 75 | if overhang: 76 | overhang = overhang.top 77 | 78 | output_primer = coral.Primer(best_primer, best_tm, overhang=overhang) 79 | 80 | def _structure(primer): 81 | '''Check annealing sequence for structure. 82 | 83 | :param primer: Primer for which to evaluate structure 84 | :type primer: sequence.Primer 85 | 86 | ''' 87 | # Check whole primer for high-probability structure, focus in on 88 | # annealing sequence, report average 89 | nupack = coral.analysis.Nupack(primer.primer()) 90 | pairs = nupack.pairs(0) 91 | anneal_len = len(primer.anneal) 92 | pairs_mean = sum(pairs[-anneal_len:]) / anneal_len 93 | if pairs_mean < 0.5: 94 | warnings.warn('High probability structure', Warning) 95 | return pairs_mean 96 | if structure: 97 | _structure(output_primer) 98 | return output_primer 99 | 100 | 101 | def primers(dna, tm=65, min_len=10, tm_undershoot=1, tm_overshoot=3, 102 | end_gc=False, tm_parameters='cloning', overhangs=None, 103 | structure=False): 104 | '''Design primers for PCR amplifying any arbitrary sequence. 105 | 106 | :param dna: Input sequence. 107 | :type dna: coral.DNA 108 | :param tm: Ideal primer Tm in degrees C. 109 | :type tm: float 110 | :param min_len: Minimum primer length. 111 | :type min_len: int 112 | :param tm_undershoot: Allowed Tm undershoot. 113 | :type tm_undershoot: float 114 | :param tm_overshoot: Allowed Tm overshoot. 115 | :type tm_overshoot: float 116 | :param end_gc: Obey the 'end on G or C' rule. 117 | :type end_gc: bool 118 | :param tm_parameters: Melting temp calculator method to use. 119 | :type tm_parameters: string 120 | :param overhangs: 2-tuple of overhang sequences. 121 | :type overhangs: tuple 122 | :param structure: Evaluate each primer for structure, with warning for high 123 | structure. 124 | :type structure: bool 125 | :returns: A list primers (the output of primer). 126 | :rtype: list 127 | 128 | ''' 129 | if not overhangs: 130 | overhangs = [None, None] 131 | templates = [dna, dna.reverse_complement()] 132 | primer_list = [] 133 | for template, overhang in zip(templates, overhangs): 134 | primer_i = primer(template, tm=tm, min_len=min_len, 135 | tm_undershoot=tm_undershoot, 136 | tm_overshoot=tm_overshoot, end_gc=end_gc, 137 | tm_parameters=tm_parameters, 138 | overhang=overhang, structure=structure) 139 | primer_list.append(primer_i) 140 | return primer_list 141 | -------------------------------------------------------------------------------- /coral/analysis/_sequence/tm_params.py: -------------------------------------------------------------------------------- 1 | '''Nearest-neighbor method Tm calculation parameters. 2 | 3 | Nearest-neighbor parameters don't publish full NN parameters. Assumptions: 4 | AA = TT 5 | GG = CC 6 | CA = TG 7 | CT = AG 8 | GA = TC 9 | GT = AC 10 | 11 | ''' 12 | 13 | 14 | BRESLAUER = { 15 | 'delta_h': { 16 | 'AA': 9.1, 17 | 'TT': 9.1, 18 | 'AT': 8.6, 19 | 'TA': 6.0, 20 | 'CA': 5.8, 21 | 'TG': 5.8, 22 | 'GT': 6.5, 23 | 'AC': 6.5, 24 | 'CT': 7.8, 25 | 'AG': 7.8, 26 | 'GA': 5.6, 27 | 'TC': 5.6, 28 | 'CG': 11.9, 29 | 'GC': 11.1, 30 | 'GG': 11.0, 31 | 'CC': 11.0}, 32 | 'delta_h_err': { 33 | 'anyGC': 0.0, 34 | 'onlyAT': 0.0, 35 | 'symmetry': 0.0, 36 | 'terminalT': 0.0}, 37 | 'delta_s': { 38 | 'AA': 24.0, 39 | 'TT': 24.0, 40 | 'AT': 23.9, 41 | 'TA': 16.9, 42 | 'CA': 12.9, 43 | 'TG': 12.9, 44 | 'GT': 17.3, 45 | 'AC': 17.3, 46 | 'CT': 20.8, 47 | 'AG': 20.8, 48 | 'GA': 13.5, 49 | 'TC': 13.5, 50 | 'CG': 27.8, 51 | 'GC': 26.7, 52 | 'GG': 26.6, 53 | 'CC': 26.6}, 54 | 'delta_s_err': { 55 | 'anyGC': 16.77, 56 | 'onlyAT': 20.13, 57 | 'symmetry': 1.34, 58 | 'terminalT': 0.0}} 59 | 60 | 61 | SANTALUCIA96 = { 62 | 'delta_h': { 63 | 'AA': 8.4, 64 | 'TT': 8.4, 65 | 'AT': 6.5, 66 | 'TA': 6.3, 67 | 'CA': 7.4, 68 | 'TG': 7.4, 69 | 'GT': 8.6, 70 | 'AC': 8.6, 71 | 'CT': 6.1, 72 | 'AG': 6.1, 73 | 'GA': 7.7, 74 | 'TC': 7.7, 75 | 'CG': 10.1, 76 | 'GC': 11.1, 77 | 'GG': 6.7, 78 | 'CC': 6.7}, 79 | 'delta_h_err': { 80 | 'anyGC': 0.0, 81 | 'onlyAT': 0.0, 82 | 'symmetry': 0.0, 83 | 'terminalT': -0.4}, 84 | 'delta_s': { 85 | 'AA': 23.6, 86 | 'TT': 23.6, 87 | 'AT': 18.8, 88 | 'TA': 18.5, 89 | 'CA': 19.3, 90 | 'TG': 19.3, 91 | 'GT': 23.0, 92 | 'AC': 23.0, 93 | 'CT': 16.1, 94 | 'AG': 16.1, 95 | 'GA': 20.3, 96 | 'TC': 20.3, 97 | 'CG': 25.5, 98 | 'GC': 28.4, 99 | 'GG': 15.6, 100 | 'CC': 15.6}, 101 | 'delta_s_err': { 102 | 'anyGC': 5.9, 103 | 'onlyAT': 9.0, 104 | 'symmetry': 1.4, 105 | 'terminalT': 0.0}} 106 | 107 | 108 | SUGIMOTO = { 109 | 'delta_h': { 110 | 'AA': 8.0, 111 | 'TT': 8.0, 112 | 'AT': 5.6, 113 | 'TA': 6.6, 114 | 'CA': 8.2, 115 | 'TG': 8.2, 116 | 'GT': 9.4, 117 | 'AC': 9.4, 118 | 'CT': 6.6, 119 | 'AG': 6.6, 120 | 'GA': 8.8, 121 | 'TC': 8.8, 122 | 'CG': 11.8, 123 | 'GC': 10.5, 124 | 'GG': 10.9, 125 | 'CC': 10.9}, 126 | 'delta_h_err': { 127 | 'anyGC': -0.6, 128 | 'onlyAT': -0.6, 129 | 'symmetry': 0.0, 130 | 'terminalT': 0.0}, 131 | 'delta_s': { 132 | 'AA': 21.9, 133 | 'TT': 21.9, 134 | 'AT': 15.2, 135 | 'TA': 18.4, 136 | 'CA': 21.0, 137 | 'TG': 21.0, 138 | 'GT': 25.5, 139 | 'AC': 25.5, 140 | 'CT': 16.4, 141 | 'AG': 16.4, 142 | 'GA': 23.5, 143 | 'TC': 23.5, 144 | 'CG': 29.0, 145 | 'GC': 26.4, 146 | 'GG': 28.4, 147 | 'CC': 28.4}, 148 | 'delta_s_err': { 149 | 'anyGC': 9.0, 150 | 'onlyAT': 9.0, 151 | 'symmetry': 1.4, 152 | 'terminalT': 0.0}} 153 | 154 | 155 | SANTALUCIA98 = { 156 | 'delta_h': { 157 | 'AA': 7.9, 158 | 'TT': 7.9, 159 | 'AT': 7.2, 160 | 'TA': 7.2, 161 | 'CA': 8.5, 162 | 'TG': 8.5, 163 | 'GT': 8.4, 164 | 'AC': 8.4, 165 | 'CT': 7.8, 166 | 'AG': 7.8, 167 | 'GA': 8.2, 168 | 'TC': 8.2, 169 | 'CG': 10.6, 170 | 'GC': 9.8, 171 | 'GG': 8.0, 172 | 'CC': 8.0}, 173 | 'delta_h_err': { 174 | 'initGC': -0.1, 175 | 'initAT': -2.3, 176 | 'symmetry': 0.0}, 177 | 'delta_s': { 178 | 'AA': 22.2, 179 | 'TT': 22.2, 180 | 'AT': 20.4, 181 | 'TA': 21.3, 182 | 'CA': 22.7, 183 | 'TG': 22.7, 184 | 'GT': 22.4, 185 | 'AC': 22.4, 186 | 'CT': 21.0, 187 | 'AG': 21.0, 188 | 'GA': 22.2, 189 | 'TC': 22.2, 190 | 'CG': 27.2, 191 | 'GC': 24.4, 192 | 'GG': 19.9, 193 | 'CC': 19.9}, 194 | 'delta_s_err': { 195 | 'initGC': 2.8, 196 | 'initAT': -4.1, 197 | 'symmetry': 1.4}} 198 | 199 | 200 | CLONING = { 201 | 'delta_h': { 202 | 'AA': 9.1, 203 | 'TT': 9.1, 204 | 'AT': 8.6, 205 | 'TA': 6.0, 206 | 'CA': 5.8, 207 | 'TG': 5.8, 208 | 'GT': 6.5, 209 | 'AC': 6.5, 210 | 'CT': 7.8, 211 | 'AG': 7.8, 212 | 'GA': 5.6, 213 | 'TC': 5.6, 214 | 'CG': 11.9, 215 | 'GC': 11.1, 216 | 'GG': 11.0, 217 | 'CC': 11.0}, 218 | 'delta_h_err': { 219 | 'anyGC': 0.0, 220 | 'onlyAT': 0.0, 221 | 'symmetry': 0.0, 222 | 'terminalT': 0.0}, 223 | 'delta_s': { 224 | 'AA': 24.0, 225 | 'TT': 24.0, 226 | 'AT': 23.9, 227 | 'TA': 16.9, 228 | 'CA': 12.9, 229 | 'TG': 12.9, 230 | 'GT': 17.3, 231 | 'AC': 17.3, 232 | 'CT': 20.8, 233 | 'AG': 20.8, 234 | 'GA': 13.5, 235 | 'TC': 13.5, 236 | 'CG': 27.8, 237 | 'GC': 26.7, 238 | 'GG': 26.6, 239 | 'CC': 26.6}, 240 | 'delta_s_err': { 241 | 'onlyAT': 0.0, 242 | 'anyGC': 0.0, 243 | 'symmetry': 0.0, 244 | 'terminalT': 0.0}} 245 | --------------------------------------------------------------------------------