├── .github └── workflows │ ├── build.yml │ ├── document.yml │ └── publish.yml ├── .gitignore ├── LICENSE ├── README.rst ├── docs ├── Makefile ├── conf.py ├── index.rst ├── make.bat ├── ref.rst └── requirements.txt ├── examples └── basic_example.py ├── pypi-readme.rst ├── pyproject.toml ├── python_codon_tables ├── __init__.py ├── codon_usage_data │ ├── codon_usage_retriever.py │ ├── organisms.csv │ └── tables │ │ ├── b_subtilis_1423.csv │ │ ├── c_elegans_6239.csv │ │ ├── d_melanogaster_7227.csv │ │ ├── e_coli_316407.csv │ │ ├── g_gallus_9031.csv │ │ ├── h_sapiens_9606.csv │ │ ├── m_musculus_10090.csv │ │ ├── m_musculus_domesticus_10092.csv │ │ └── s_cerevisiae_4932.csv ├── python_codon_tables.py └── version.py └── tests ├── data └── table_newline.csv └── test_basics.py /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [push, workflow_dispatch] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-24.04 8 | 9 | steps: 10 | - uses: actions/checkout@v4 11 | - name: Set up Python 12 | uses: actions/setup-python@v5 13 | with: 14 | python-version: "3.12" 15 | - name: Install dependencies 16 | run: | 17 | python -m pip install --upgrade pip 18 | pip install pytest 19 | - name: Install 20 | run: | 21 | pip install -e . 22 | - name: Test with pytest 23 | run: | 24 | python -m pytest 25 | -------------------------------------------------------------------------------- /.github/workflows/document.yml: -------------------------------------------------------------------------------- 1 | name: documentation 2 | 3 | on: 4 | workflow_dispatch: 5 | release: 6 | types: [created] 7 | 8 | jobs: 9 | document: 10 | runs-on: ubuntu-24.04 11 | environment: 12 | name: github-pages 13 | url: ${{ steps.deployment.outputs.page_url }} 14 | permissions: 15 | pages: write 16 | id-token: write 17 | steps: 18 | - id: deployment 19 | uses: sphinx-notes/pages@v3 20 | with: 21 | python_version: 3.12 22 | documentation_path: ./docs 23 | requirements_path: ./docs/requirements.txt 24 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to PyPI 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | test: 9 | runs-on: ubuntu-24.04 10 | 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python 14 | uses: actions/setup-python@v5 15 | with: 16 | python-version: "3.12" 17 | - name: Install 18 | run: | 19 | pip install pytest 20 | pip install -e . 21 | - name: Test 22 | run: | 23 | python -m pytest 24 | deploy: 25 | runs-on: ubuntu-24.04 26 | needs: [test] 27 | permissions: 28 | id-token: write 29 | steps: 30 | - uses: actions/checkout@v4 31 | - name: Set up Python 32 | uses: actions/setup-python@v5 33 | with: 34 | python-version: '3.12' 35 | cache: pip 36 | cache-dependency-path: '**/pyproject.toml' 37 | - name: Install dependencies 38 | run: | 39 | pip install setuptools wheel build 40 | - name: Build 41 | run: | 42 | python -m build 43 | - name: Publish 44 | uses: pypa/gh-action-pypi-publish@release/v1 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Python Codon Tables 2 | =================== 3 | 4 | .. image:: https://github.com/Edinburgh-Genome-Foundry/python_codon_tables/actions/workflows/build.yml/badge.svg 5 | :target: https://github.com/Edinburgh-Genome-Foundry/python_codon_tables/actions/workflows/build.yml 6 | :alt: GitHub CI build status 7 | 8 | Provides codon usage tables as dictionaries, for Python. 9 | 10 | Tables for the following organisms are provided with the package 11 | (other tables can be downloaded using a TaxID): 12 | 13 | - *B. subtilis* 14 | - *C. elegans* 15 | - *D. melanogaster* 16 | - *E. coli* 17 | - *G. gallus* 18 | - *H. sapiens* 19 | - *M. musculus* 20 | - *M. musculus domesticus* 21 | - *S. cerevisiae* 22 | 23 | All tables are from `kazusa.or.jp `_ 24 | (codon usages were computed using NCBI sequence data). The original publication: 25 | 26 | .. code:: 27 | 28 | Codon usage tabulated from the international DNA sequence databases: 29 | status for the year 2000. 30 | Nakamura, Y., Gojobori, T. and Ikemura, T. (2000) Nucl. Acids Res. 28, 292. 31 | 32 | 33 | Usage 34 | ----- 35 | 36 | .. code:: python 37 | 38 | import python_codon_tables as pct 39 | 40 | # PRINT THE LIST OF NAMES OF ALL AVAILABLE TABLES 41 | print ('Available tables:', pct.available_codon_tables_names) 42 | 43 | # LOAD ONE TABLE BY NAME 44 | table = pct.get_codons_table("b_subtilis_1423") 45 | print (table['T']['ACA']) # returns 0.4 46 | print (table['*']['TAA']) # returns 0.61 47 | 48 | # LOAD ONE TABLE BY TAXID (it will get it from the internet if it is not 49 | # in the builtin tables) 50 | table = pct.get_codons_table(1423) 51 | print (table['T']['ACA']) # returns 0.4 52 | print (table['*']['TAA']) # returns 0.61 53 | 54 | # LOAD ALL BUIL-IN TABLES AT ONCE 55 | codons_tables = pct.get_all_available_codons_tables() 56 | print (codons_tables['c_elegans_6239']['L']['CTA']) # returns 0.09 57 | 58 | - Notice that by default the tables use nucleotide T instead of U. Using ``get_codons_table('e_coli', replace_U_by_T=False)`` will leave Us as Us. 59 | 60 | - In ``get_codons_table`` you can also provide a "shorthand" notation ``b_subtilis``, which will be automatically extended to ``b_subtilis_1423`` as it appears so in the built-in table (use this feature at your own risks!) 61 | 62 | 63 | The package can also use codon usage data from a CSV file in the form: 64 | 65 | ``` 66 | amino_acid,codon,relative_frequency 67 | *,UAA,0.64 68 | *,UAG,0.07 69 | *,UGA,0.29 70 | A,GCA,0.21 71 | A,GCC,0.27 72 | K,AAA,0.76 73 | K,AAG,0.24 74 | etc. 75 | ``` 76 | 77 | 78 | Contribute 79 | ---------- 80 | 81 | This project was started at the Edinburgh Genome Foundry by Zulko and is released on 82 | `Github `_ 83 | under the CC0 (Public Domain) license (and no warranty whatsoever, please cross-check the codon usage with other sources if you are not sure). 84 | Feel free to add other tables if you think of more commonly used species. 85 | 86 | 87 | Installation 88 | ------------ 89 | 90 | via pip: 91 | 92 | .. code:: bash 93 | 94 | pip install python_codon_tables 95 | 96 | Manual: 97 | 98 | .. code:: bash 99 | 100 | python setup.py install 101 | 102 | 103 | More biology software 104 | --------------------- 105 | 106 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/Edinburgh-Genome-Foundry.github.io/master/static/imgs/logos/egf-codon-horizontal.png 107 | :target: https://edinburgh-genome-foundry.github.io/ 108 | 109 | This library is part of the `EGF Codons `_ synthetic biology software suite for DNA design, manufacturing and validation. 110 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | import os, sys 9 | 10 | sys.path.insert(0, os.path.abspath("../python_codon_tables/")) 11 | 12 | project = "python_codon_tables" 13 | author = "Zulko" 14 | 15 | # -- General configuration --------------------------------------------------- 16 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 17 | 18 | extensions = [] 19 | 20 | templates_path = ["_templates"] 21 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 22 | 23 | extensions = [ 24 | "sphinx.ext.autodoc", 25 | "sphinx.ext.napoleon", 26 | ] 27 | napoleon_numpy_docstring = True 28 | 29 | # -- Options for HTML output ------------------------------------------------- 30 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 31 | 32 | html_theme = "sphinx_rtd_theme" 33 | html_static_path = ["_static"] 34 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Python Codon Tables documentation master file, created by 2 | sphinx-quickstart on Fri Mar 28 14:35:51 2025. 3 | 4 | Python Codon Tables 5 | =================== 6 | 7 | `General documentation `_ 8 | 9 | 10 | :doc:`API reference ` 11 | 12 | 13 | ---- 14 | 15 | .. toctree:: 16 | :hidden: 17 | :maxdepth: 1 18 | :caption: Python Codon Tables 19 | 20 | ref 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/ref.rst: -------------------------------------------------------------------------------- 1 | API reference 2 | ============= 3 | 4 | .. automodule:: python_codon_tables 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx_rtd_theme 2 | -------------------------------------------------------------------------------- /examples/basic_example.py: -------------------------------------------------------------------------------- 1 | import python_codon_tables as pct 2 | 3 | # PRINT THE LIST OF NAMES OF ALL AVAILABLE TABLES 4 | print("Available tables:", pct.available_codon_tables_names) 5 | 6 | # LOAD ONE TABLE BY NAME 7 | table = pct.get_codons_table("b_subtilis_1423") 8 | print(table["T"]["ACA"]) # returns 0.4 9 | print(table["*"]["TAA"]) # returns 0.61 10 | 11 | 12 | # LOAD ALL TABLES AT ONCE 13 | codon_tables = pct.get_all_available_codons_tables() 14 | print(codon_tables["c_elegans_6239"]["L"]["CTA"]) # returns 0.09 15 | -------------------------------------------------------------------------------- /pypi-readme.rst: -------------------------------------------------------------------------------- 1 | Python Codon Tables 2 | =================== 3 | 4 | Provides codon usage tables as dictionaries, for Python. 5 | 6 | 7 | Infos 8 | ----- 9 | 10 | **PIP installation:** 11 | 12 | .. code:: bash 13 | 14 | pip install python_codon_tables 15 | 16 | **Web documentation:** ``_ 17 | 18 | **Github Page:** ``_ 19 | 20 | **License:** CC0 21 | 22 | 23 | More biology software 24 | --------------------- 25 | 26 | .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/Edinburgh-Genome-Foundry.github.io/master/static/imgs/logos/egf-codon-horizontal.png 27 | :target: https://edinburgh-genome-foundry.github.io/ 28 | 29 | Python Codon Tables is part of the `EGF Codons `_ synthetic biology software suite for DNA design, manufacturing and validation. 30 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "python_codon_tables" 3 | version = "0.1.18" 4 | license = "CC0-1.0" 5 | authors = [{ name = "Zulko" }] 6 | description = "Codon Usage Tables for Python, from kazusa.or.jp" 7 | readme = "pypi-readme.rst" 8 | keywords = ["DNA", "codon", "usage"] 9 | 10 | [project.urls] 11 | Homepage = "https://github.com/Edinburgh-Genome-Foundry/python_codon_tables" 12 | 13 | [build-system] 14 | requires = ["setuptools"] 15 | build-backend = "setuptools.build_meta" 16 | 17 | [tool.setuptools.packages.find] 18 | exclude = ["docs*", "examples*", "tests*"] 19 | 20 | [tool.setuptools.package-data] 21 | python_codon_tables = ["codon_usage_data/*", "codon_usage_data/**/*"] 22 | -------------------------------------------------------------------------------- /python_codon_tables/__init__.py: -------------------------------------------------------------------------------- 1 | from .python_codon_tables import ( 2 | available_codon_tables_names, 3 | csv_string_to_codons_dict, 4 | get_codons_table, 5 | get_all_available_codons_tables, 6 | download_codons_table, 7 | ) 8 | 9 | from .version import __version__ 10 | -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/codon_usage_retriever.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Retrieve a Codon usage table from kazusa.or.jp, and store in a CSV file. 3 | 4 | Usage: 5 | ------ 6 | To retrieve a codon table for one organism, given its TaxID, use: 7 | 8 | > python codon_usage_retriever.py [TaxidNumber] [TargetFile.csv] 9 | 10 | For instance: 11 | 12 | > python codon_usage_retriever.py 316407 e_coli_codon_usage.csv 13 | 14 | To retrieve codon tables from all organisms in ``organisms.csv`` at once, use: 15 | 16 | > python codon_usage_retriever.py all 17 | 18 | 19 | 20 | """ 21 | import sys 22 | import os 23 | from python_codon_tables import download_codons_table 24 | 25 | 26 | def download_all_tables(): 27 | with open("organisms.csv", "r") as f: 28 | for line in f.readlines()[1:]: 29 | organism, taxid = line.strip("\n").split(",") 30 | print("Retrieving %s (taxid %s)" % (organism, taxid)) 31 | target = os.path.join("tables", "%s_%s.csv" % (organism, taxid)) 32 | download_codons_table(taxid=taxid, target_file=target) 33 | 34 | 35 | if __name__ == "__main__": 36 | print(" ".join(sys.argv)) 37 | if sys.argv[1] == "all": 38 | download_all_tables() 39 | else: 40 | download_codons_table(sys.argv[1], sys.argv[2]) 41 | -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/organisms.csv: -------------------------------------------------------------------------------- 1 | organism,taxid 2 | b_subtilis,1423 3 | c_elegans,6239 4 | d_melanogaster,7227 5 | e_coli,316407 6 | g_gallus,9031 7 | h_sapiens,9606 8 | m_musculus,10090 9 | m_musculus_domesticus,10092 10 | s_cerevisiae,4932 11 | -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/b_subtilis_1423.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.61 3 | *,UAG,0.15 4 | *,UGA,0.24 5 | A,GCA,0.28 6 | A,GCC,0.22 7 | A,GCG,0.26 8 | A,GCU,0.24 9 | C,UGC,0.54 10 | C,UGU,0.46 11 | D,GAC,0.36 12 | D,GAU,0.64 13 | E,GAA,0.68 14 | E,GAG,0.32 15 | F,UUC,0.32 16 | F,UUU,0.68 17 | G,GGA,0.31 18 | G,GGC,0.34 19 | G,GGG,0.16 20 | G,GGU,0.19 21 | H,CAC,0.32 22 | H,CAU,0.68 23 | I,AUA,0.13 24 | I,AUC,0.37 25 | I,AUU,0.49 26 | K,AAA,0.70 27 | K,AAG,0.30 28 | L,CUA,0.05 29 | L,CUC,0.11 30 | L,CUG,0.24 31 | L,CUU,0.23 32 | L,UUA,0.21 33 | L,UUG,0.16 34 | M,AUG,1.00 35 | N,AAC,0.44 36 | N,AAU,0.56 37 | P,CCA,0.19 38 | P,CCC,0.09 39 | P,CCG,0.44 40 | P,CCU,0.28 41 | Q,CAA,0.52 42 | Q,CAG,0.48 43 | R,AGA,0.25 44 | R,AGG,0.10 45 | R,CGA,0.10 46 | R,CGC,0.20 47 | R,CGG,0.17 48 | R,CGU,0.18 49 | S,AGC,0.23 50 | S,AGU,0.11 51 | S,UCA,0.23 52 | S,UCC,0.13 53 | S,UCG,0.10 54 | S,UCU,0.20 55 | T,ACA,0.40 56 | T,ACC,0.17 57 | T,ACG,0.27 58 | T,ACU,0.16 59 | V,GUA,0.20 60 | V,GUC,0.26 61 | V,GUG,0.26 62 | V,GUU,0.28 63 | W,UGG,1.00 64 | Y,UAC,0.35 65 | Y,UAU,0.65 -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/c_elegans_6239.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.43 3 | *,UAG,0.18 4 | *,UGA,0.39 5 | A,GCA,0.31 6 | A,GCC,0.20 7 | A,GCG,0.13 8 | A,GCU,0.36 9 | C,UGC,0.45 10 | C,UGU,0.55 11 | D,GAC,0.32 12 | D,GAU,0.68 13 | E,GAA,0.62 14 | E,GAG,0.38 15 | F,UUC,0.51 16 | F,UUU,0.49 17 | G,GGA,0.59 18 | G,GGC,0.12 19 | G,GGG,0.08 20 | G,GGU,0.20 21 | H,CAC,0.39 22 | H,CAU,0.61 23 | I,AUA,0.16 24 | I,AUC,0.31 25 | I,AUU,0.53 26 | K,AAA,0.59 27 | K,AAG,0.41 28 | L,CUA,0.09 29 | L,CUC,0.17 30 | L,CUG,0.14 31 | L,CUU,0.25 32 | L,UUA,0.11 33 | L,UUG,0.23 34 | M,AUG,1.00 35 | N,AAC,0.38 36 | N,AAU,0.62 37 | P,CCA,0.53 38 | P,CCC,0.09 39 | P,CCG,0.20 40 | P,CCU,0.18 41 | Q,CAA,0.66 42 | Q,CAG,0.34 43 | R,AGA,0.29 44 | R,AGG,0.08 45 | R,CGA,0.23 46 | R,CGC,0.10 47 | R,CGG,0.09 48 | R,CGU,0.21 49 | S,AGC,0.10 50 | S,AGU,0.15 51 | S,UCA,0.26 52 | S,UCC,0.13 53 | S,UCG,0.15 54 | S,UCU,0.21 55 | T,ACA,0.34 56 | T,ACC,0.18 57 | T,ACG,0.15 58 | T,ACU,0.32 59 | V,GUA,0.16 60 | V,GUC,0.22 61 | V,GUG,0.23 62 | V,GUU,0.39 63 | W,UGG,1.00 64 | Y,UAC,0.44 65 | Y,UAU,0.56 -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/d_melanogaster_7227.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.41 3 | *,UAG,0.33 4 | *,UGA,0.25 5 | A,GCA,0.17 6 | A,GCC,0.45 7 | A,GCG,0.19 8 | A,GCU,0.19 9 | C,UGC,0.71 10 | C,UGU,0.29 11 | D,GAC,0.47 12 | D,GAU,0.53 13 | E,GAA,0.33 14 | E,GAG,0.67 15 | F,UUC,0.62 16 | F,UUU,0.38 17 | G,GGA,0.29 18 | G,GGC,0.43 19 | G,GGG,0.07 20 | G,GGU,0.21 21 | H,CAC,0.60 22 | H,CAU,0.40 23 | I,AUA,0.19 24 | I,AUC,0.47 25 | I,AUU,0.34 26 | K,AAA,0.30 27 | K,AAG,0.70 28 | L,CUA,0.09 29 | L,CUC,0.15 30 | L,CUG,0.43 31 | L,CUU,0.10 32 | L,UUA,0.05 33 | L,UUG,0.18 34 | M,AUG,1.00 35 | N,AAC,0.56 36 | N,AAU,0.44 37 | P,CCA,0.25 38 | P,CCC,0.33 39 | P,CCG,0.29 40 | P,CCU,0.13 41 | Q,CAA,0.30 42 | Q,CAG,0.70 43 | R,AGA,0.09 44 | R,AGG,0.11 45 | R,CGA,0.15 46 | R,CGC,0.33 47 | R,CGG,0.15 48 | R,CGU,0.16 49 | S,AGC,0.25 50 | S,AGU,0.14 51 | S,UCA,0.09 52 | S,UCC,0.24 53 | S,UCG,0.20 54 | S,UCU,0.08 55 | T,ACA,0.20 56 | T,ACC,0.38 57 | T,ACG,0.26 58 | T,ACU,0.17 59 | V,GUA,0.11 60 | V,GUC,0.24 61 | V,GUG,0.47 62 | V,GUU,0.19 63 | W,UGG,1.00 64 | Y,UAC,0.63 65 | Y,UAU,0.37 -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/e_coli_316407.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.64 3 | *,UAG,0.07 4 | *,UGA,0.29 5 | A,GCA,0.21 6 | A,GCC,0.27 7 | A,GCG,0.36 8 | A,GCU,0.16 9 | C,UGC,0.56 10 | C,UGU,0.44 11 | D,GAC,0.37 12 | D,GAU,0.63 13 | E,GAA,0.69 14 | E,GAG,0.31 15 | F,UUC,0.43 16 | F,UUU,0.57 17 | G,GGA,0.11 18 | G,GGC,0.41 19 | G,GGG,0.15 20 | G,GGU,0.34 21 | H,CAC,0.43 22 | H,CAU,0.57 23 | I,AUA,0.07 24 | I,AUC,0.42 25 | I,AUU,0.51 26 | K,AAA,0.76 27 | K,AAG,0.24 28 | L,CUA,0.04 29 | L,CUC,0.10 30 | L,CUG,0.50 31 | L,CUU,0.10 32 | L,UUA,0.13 33 | L,UUG,0.13 34 | M,AUG,1.00 35 | N,AAC,0.55 36 | N,AAU,0.45 37 | P,CCA,0.19 38 | P,CCC,0.12 39 | P,CCG,0.53 40 | P,CCU,0.16 41 | Q,CAA,0.35 42 | Q,CAG,0.65 43 | R,AGA,0.04 44 | R,AGG,0.02 45 | R,CGA,0.06 46 | R,CGC,0.40 47 | R,CGG,0.10 48 | R,CGU,0.38 49 | S,AGC,0.28 50 | S,AGU,0.15 51 | S,UCA,0.12 52 | S,UCC,0.15 53 | S,UCG,0.15 54 | S,UCU,0.15 55 | T,ACA,0.13 56 | T,ACC,0.44 57 | T,ACG,0.27 58 | T,ACU,0.16 59 | V,GUA,0.15 60 | V,GUC,0.22 61 | V,GUG,0.37 62 | V,GUU,0.26 63 | W,UGG,1.00 64 | Y,UAC,0.43 65 | Y,UAU,0.57 -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/g_gallus_9031.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.32 3 | *,UAG,0.20 4 | *,UGA,0.47 5 | A,GCA,0.26 6 | A,GCC,0.32 7 | A,GCG,0.13 8 | A,GCU,0.29 9 | C,UGC,0.60 10 | C,UGU,0.40 11 | D,GAC,0.50 12 | D,GAU,0.50 13 | E,GAA,0.43 14 | E,GAG,0.57 15 | F,UUC,0.55 16 | F,UUU,0.45 17 | G,GGA,0.27 18 | G,GGC,0.31 19 | G,GGG,0.25 20 | G,GGU,0.18 21 | H,CAC,0.60 22 | H,CAU,0.40 23 | I,AUA,0.18 24 | I,AUC,0.46 25 | I,AUU,0.35 26 | K,AAA,0.44 27 | K,AAG,0.56 28 | L,CUA,0.06 29 | L,CUC,0.18 30 | L,CUG,0.41 31 | L,CUU,0.13 32 | L,UUA,0.08 33 | L,UUG,0.13 34 | M,AUG,1.00 35 | N,AAC,0.57 36 | N,AAU,0.43 37 | P,CCA,0.28 38 | P,CCC,0.30 39 | P,CCG,0.14 40 | P,CCU,0.27 41 | Q,CAA,0.27 42 | Q,CAG,0.73 43 | R,AGA,0.22 44 | R,AGG,0.21 45 | R,CGA,0.10 46 | R,CGC,0.19 47 | R,CGG,0.18 48 | R,CGU,0.10 49 | S,AGC,0.26 50 | S,AGU,0.14 51 | S,UCA,0.15 52 | S,UCC,0.20 53 | S,UCG,0.07 54 | S,UCU,0.18 55 | T,ACA,0.30 56 | T,ACC,0.31 57 | T,ACG,0.14 58 | T,ACU,0.25 59 | V,GUA,0.12 60 | V,GUC,0.22 61 | V,GUG,0.45 62 | V,GUU,0.21 63 | W,UGG,1.00 64 | Y,UAC,0.60 65 | Y,UAU,0.40 -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/h_sapiens_9606.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.30 3 | *,UAG,0.24 4 | *,UGA,0.47 5 | A,GCA,0.23 6 | A,GCC,0.40 7 | A,GCG,0.11 8 | A,GCU,0.27 9 | C,UGC,0.54 10 | C,UGU,0.46 11 | D,GAC,0.54 12 | D,GAU,0.46 13 | E,GAA,0.42 14 | E,GAG,0.58 15 | F,UUC,0.54 16 | F,UUU,0.46 17 | G,GGA,0.25 18 | G,GGC,0.34 19 | G,GGG,0.25 20 | G,GGU,0.16 21 | H,CAC,0.58 22 | H,CAU,0.42 23 | I,AUA,0.17 24 | I,AUC,0.47 25 | I,AUU,0.36 26 | K,AAA,0.43 27 | K,AAG,0.57 28 | L,CUA,0.07 29 | L,CUC,0.20 30 | L,CUG,0.40 31 | L,CUU,0.13 32 | L,UUA,0.08 33 | L,UUG,0.13 34 | M,AUG,1.00 35 | N,AAC,0.53 36 | N,AAU,0.47 37 | P,CCA,0.28 38 | P,CCC,0.32 39 | P,CCG,0.11 40 | P,CCU,0.29 41 | Q,CAA,0.27 42 | Q,CAG,0.73 43 | R,AGA,0.21 44 | R,AGG,0.21 45 | R,CGA,0.11 46 | R,CGC,0.18 47 | R,CGG,0.20 48 | R,CGU,0.08 49 | S,AGC,0.24 50 | S,AGU,0.15 51 | S,UCA,0.15 52 | S,UCC,0.22 53 | S,UCG,0.05 54 | S,UCU,0.19 55 | T,ACA,0.28 56 | T,ACC,0.36 57 | T,ACG,0.11 58 | T,ACU,0.25 59 | V,GUA,0.12 60 | V,GUC,0.24 61 | V,GUG,0.46 62 | V,GUU,0.18 63 | W,UGG,1.00 64 | Y,UAC,0.56 65 | Y,UAU,0.44 -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/m_musculus_10090.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.28 3 | *,UAG,0.23 4 | *,UGA,0.49 5 | A,GCA,0.23 6 | A,GCC,0.38 7 | A,GCG,0.09 8 | A,GCU,0.29 9 | C,UGC,0.52 10 | C,UGU,0.48 11 | D,GAC,0.55 12 | D,GAU,0.45 13 | E,GAA,0.41 14 | E,GAG,0.59 15 | F,UUC,0.56 16 | F,UUU,0.44 17 | G,GGA,0.26 18 | G,GGC,0.33 19 | G,GGG,0.23 20 | G,GGU,0.18 21 | H,CAC,0.59 22 | H,CAU,0.41 23 | I,AUA,0.16 24 | I,AUC,0.50 25 | I,AUU,0.34 26 | K,AAA,0.39 27 | K,AAG,0.61 28 | L,CUA,0.08 29 | L,CUC,0.20 30 | L,CUG,0.39 31 | L,CUU,0.13 32 | L,UUA,0.07 33 | L,UUG,0.13 34 | M,AUG,1.00 35 | N,AAC,0.57 36 | N,AAU,0.43 37 | P,CCA,0.29 38 | P,CCC,0.30 39 | P,CCG,0.10 40 | P,CCU,0.31 41 | Q,CAA,0.26 42 | Q,CAG,0.74 43 | R,AGA,0.22 44 | R,AGG,0.22 45 | R,CGA,0.12 46 | R,CGC,0.17 47 | R,CGG,0.19 48 | R,CGU,0.08 49 | S,AGC,0.24 50 | S,AGU,0.15 51 | S,UCA,0.14 52 | S,UCC,0.22 53 | S,UCG,0.05 54 | S,UCU,0.20 55 | T,ACA,0.29 56 | T,ACC,0.35 57 | T,ACG,0.10 58 | T,ACU,0.25 59 | V,GUA,0.12 60 | V,GUC,0.25 61 | V,GUG,0.46 62 | V,GUU,0.17 63 | W,UGG,1.00 64 | Y,UAC,0.57 65 | Y,UAU,0.43 -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/m_musculus_domesticus_10092.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.27 3 | *,UAG,0.45 4 | *,UGA,0.27 5 | A,GCA,0.30 6 | A,GCC,0.36 7 | A,GCG,0.11 8 | A,GCU,0.23 9 | C,UGC,0.55 10 | C,UGU,0.45 11 | D,GAC,0.61 12 | D,GAU,0.39 13 | E,GAA,0.49 14 | E,GAG,0.51 15 | F,UUC,0.61 16 | F,UUU,0.39 17 | G,GGA,0.29 18 | G,GGC,0.32 19 | G,GGG,0.22 20 | G,GGU,0.17 21 | H,CAC,0.65 22 | H,CAU,0.35 23 | I,AUA,0.29 24 | I,AUC,0.43 25 | I,AUU,0.27 26 | K,AAA,0.58 27 | K,AAG,0.42 28 | L,CUA,0.14 29 | L,CUC,0.21 30 | L,CUG,0.35 31 | L,CUU,0.13 32 | L,UUA,0.07 33 | L,UUG,0.10 34 | M,AUG,1.00 35 | N,AAC,0.59 36 | N,AAU,0.41 37 | P,CCA,0.34 38 | P,CCC,0.30 39 | P,CCG,0.10 40 | P,CCU,0.26 41 | Q,CAA,0.24 42 | Q,CAG,0.76 43 | R,AGA,0.34 44 | R,AGG,0.25 45 | R,CGA,0.10 46 | R,CGC,0.13 47 | R,CGG,0.13 48 | R,CGU,0.06 49 | S,AGC,0.23 50 | S,AGU,0.13 51 | S,UCA,0.21 52 | S,UCC,0.21 53 | S,UCG,0.06 54 | S,UCU,0.16 55 | T,ACA,0.40 56 | T,ACC,0.28 57 | T,ACG,0.08 58 | T,ACU,0.24 59 | V,GUA,0.19 60 | V,GUC,0.24 61 | V,GUG,0.43 62 | V,GUU,0.14 63 | W,UGG,1.00 64 | Y,UAC,0.59 65 | Y,UAU,0.41 -------------------------------------------------------------------------------- /python_codon_tables/codon_usage_data/tables/s_cerevisiae_4932.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | *,UAA,0.47 3 | *,UAG,0.23 4 | *,UGA,0.30 5 | A,GCA,0.29 6 | A,GCC,0.22 7 | A,GCG,0.11 8 | A,GCU,0.38 9 | C,UGC,0.37 10 | C,UGU,0.63 11 | D,GAC,0.35 12 | D,GAU,0.65 13 | E,GAA,0.70 14 | E,GAG,0.30 15 | F,UUC,0.41 16 | F,UUU,0.59 17 | G,GGA,0.22 18 | G,GGC,0.19 19 | G,GGG,0.12 20 | G,GGU,0.47 21 | H,CAC,0.36 22 | H,CAU,0.64 23 | I,AUA,0.27 24 | I,AUC,0.26 25 | I,AUU,0.46 26 | K,AAA,0.58 27 | K,AAG,0.42 28 | L,CUA,0.14 29 | L,CUC,0.06 30 | L,CUG,0.11 31 | L,CUU,0.13 32 | L,UUA,0.28 33 | L,UUG,0.29 34 | M,AUG,1.00 35 | N,AAC,0.41 36 | N,AAU,0.59 37 | P,CCA,0.42 38 | P,CCC,0.15 39 | P,CCG,0.12 40 | P,CCU,0.31 41 | Q,CAA,0.69 42 | Q,CAG,0.31 43 | R,AGA,0.48 44 | R,AGG,0.21 45 | R,CGA,0.07 46 | R,CGC,0.06 47 | R,CGG,0.04 48 | R,CGU,0.14 49 | S,AGC,0.11 50 | S,AGU,0.16 51 | S,UCA,0.21 52 | S,UCC,0.16 53 | S,UCG,0.10 54 | S,UCU,0.26 55 | T,ACA,0.30 56 | T,ACC,0.22 57 | T,ACG,0.14 58 | T,ACU,0.35 59 | V,GUA,0.21 60 | V,GUC,0.21 61 | V,GUG,0.19 62 | V,GUU,0.39 63 | W,UGG,1.00 64 | Y,UAC,0.44 65 | Y,UAU,0.56 -------------------------------------------------------------------------------- /python_codon_tables/python_codon_tables.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import re 3 | import os 4 | from functools import lru_cache 5 | 6 | if sys.version_info[0] == 3: 7 | import urllib.request 8 | 9 | urlopen = urllib.request.urlopen 10 | else: 11 | import urllib2 12 | 13 | urlopen = urllib2.urlopen 14 | 15 | _this_dir = os.path.dirname(os.path.realpath(__file__)) 16 | _tables_dir = os.path.join(_this_dir, "codon_usage_data", "tables") 17 | 18 | available_codon_tables_names = [filename[:-4] for filename in os.listdir(_tables_dir)] 19 | 20 | available_codon_tables_shortnames = { 21 | "_".join(table_name.split("_")[:-1]): table_name 22 | for table_name in available_codon_tables_names 23 | } 24 | 25 | 26 | def csv_string_to_codons_dict(csv_string): 27 | """Transform a CSV string of a codon table to a dict.""" 28 | result = {} 29 | csv_stripped = csv_string.rstrip() 30 | for line in csv_stripped.split("\n")[1:]: 31 | aa, codon, freq = line.split(",") 32 | if aa not in result: 33 | result[aa] = {} 34 | result[aa][codon] = float(freq) 35 | return result 36 | 37 | 38 | def table_with_U_replaced_by_T(table): 39 | return { 40 | aa: {codon.replace("U", "T"): freq for codon, freq in aa_data.items()} 41 | for aa, aa_data in table.items() 42 | } 43 | 44 | 45 | @lru_cache(maxsize=128) 46 | def get_codons_table(table_name, replace_U_by_T=True, web_timeout=5): 47 | """Get data from one of this package's builtin codon usage tables. 48 | 49 | The ``table_name`` argument very flexible on purpose, it can be either an 50 | integer representing a taxonomic ID (which will be downloaded from 51 | the kazusa database), or a string "12245" representing a TaxID, or a string 52 | "e_coli_316407" referring to a builtin table of python_codon_optimization, 53 | or a short form "e_coli" which will be automatically extended to 54 | "e_coli_316407" (at your own risks). 55 | 56 | If a taxonomic ID is provided and no table with this taxID is present in 57 | the ``codon_usage_data/tables/`` folder, the table will be downloaded from 58 | the http://www.kazusa.or.jp/codon website. As this website sometimes go 59 | down, the parameter ``web_timeout`` controls how long to wait before a 60 | Python exception is raised, informing the user that Kazusa may be down. 61 | 62 | The ``replace_U_by_T`` argument will replace all codons names from UAA to 63 | TAA etc. 64 | 65 | Returns a dict {"*": {'TAA': 0.64...}, 'K': {'AAA': 0.76...}, ...} 66 | """ 67 | if replace_U_by_T: 68 | table = get_codons_table(table_name, replace_U_by_T=False, web_timeout=5) 69 | return table_with_U_replaced_by_T(table) 70 | if isinstance(table_name, int) or str.isdigit(table_name): 71 | return download_codons_table(taxid=table_name, timeout=web_timeout) 72 | if table_name in available_codon_tables_shortnames: 73 | table_name = available_codon_tables_shortnames[table_name] 74 | with open(os.path.join(_tables_dir, table_name + ".csv"), "r") as f: 75 | return csv_string_to_codons_dict(f.read()) 76 | 77 | 78 | def get_all_available_codons_tables(replace_U_by_T=True): 79 | """Get all data from all of this package's builtin codon usage tables.""" 80 | return { 81 | table_name: get_codons_table(table_name, replace_U_by_T=replace_U_by_T) 82 | for table_name in available_codon_tables_names 83 | } 84 | 85 | 86 | @lru_cache(maxsize=128) 87 | def download_codons_table(taxid=316407, target_file=None, timeout=5): 88 | """Get all data from all of this package's builtin codon usage tables.""" 89 | _kazusa_url = ( 90 | "http://www.kazusa.or.jp/codon/cgi-bin/showcodon.cgi" "?aa=1&style=N&species=%s" 91 | ) 92 | _codon_regexpr = r"([ATGCU]{3}) ([A-Z]|\*) (\d.\d+)" 93 | url = _kazusa_url % taxid 94 | try: 95 | web_handle = urlopen(url, timeout=timeout) 96 | except Exception as err: 97 | if "timed out" in str(err): 98 | raise RuntimeError( 99 | ( 100 | "connexion to %s timed out after %d seconds. Maybe " 101 | "their website is down?" 102 | ) 103 | % (url, timeout) 104 | ) 105 | else: 106 | raise err 107 | 108 | html_content = web_handle.read().decode().replace("\n", " ") 109 | if "not found" in html_content.lower(): 110 | raise RuntimeError( 111 | "Codon usage table for taxonomy ID '%s' not found:" " %s" % (taxid, url) 112 | ) 113 | csv_data = "\n".join( 114 | ["amino_acid,codon,relative_frequency"] 115 | + sorted( 116 | [ 117 | "%s,%s,%s" % (aa, codon, usage) 118 | for codon, aa, usage in re.findall(_codon_regexpr, html_content) 119 | ] 120 | ) 121 | ) 122 | if target_file is not None: 123 | with open(target_file, "w+") as f: 124 | f.write(csv_data) 125 | else: 126 | return csv_string_to_codons_dict(csv_data) 127 | -------------------------------------------------------------------------------- /python_codon_tables/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.1.18" 2 | -------------------------------------------------------------------------------- /tests/data/table_newline.csv: -------------------------------------------------------------------------------- 1 | amino_acid,codon,relative_frequency 2 | A,GCA,0.21 3 | -------------------------------------------------------------------------------- /tests/test_basics.py: -------------------------------------------------------------------------------- 1 | """Basic tests to check that the main examples work.""" 2 | 3 | import os 4 | import pytest 5 | import python_codon_tables as pct 6 | 7 | 8 | mock_table_path = os.path.join("tests", "data", "table_newline.csv") 9 | 10 | 11 | def test_basics(): 12 | 13 | # LOAD ONE TABLE BY NAME 14 | table = pct.get_codons_table("b_subtilis_1423") 15 | assert table["T"]["ACA"] == 0.4 16 | assert table["*"]["TAA"] == 0.61 17 | 18 | # LOAD ALL TABLES AT ONCE 19 | codon_tables = pct.get_all_available_codons_tables() 20 | assert codon_tables["c_elegans_6239"]["L"]["CTA"] == 0.09 21 | 22 | 23 | def test_download_codon_table(tmpdir): 24 | table = pct.download_codons_table(taxid=316407) 25 | assert table["*"]["UAG"] == 0.07 26 | target = os.path.join(str(tmpdir), "test.csv") 27 | table = pct.download_codons_table(taxid=316407, target_file=target) 28 | 29 | 30 | def test_readme_example(): 31 | table = pct.get_codons_table("b_subtilis_1423") 32 | assert table["T"]["ACA"] == 0.4 33 | assert table["*"]["TAA"] == 0.61 34 | 35 | # LOAD ALL TABLES AT ONCE 36 | codons_tables = pct.get_all_available_codons_tables() 37 | assert codons_tables["c_elegans_6239"]["L"]["CTA"] == 0.09 38 | 39 | # GET A TABLE DIRECTLY FROM THE INTERNET 40 | table = pct.download_codons_table(taxid=316407) 41 | assert table["*"]["TGA"] == 0.29 42 | 43 | with pytest.raises(RuntimeError): 44 | table = pct.download_codons_table(taxid=000000000) # does not exist 45 | 46 | 47 | def test_readme_example(): 48 | table = pct.get_codons_table("b_subtilis_1423") 49 | assert table["T"]["ACA"] == 0.4 50 | assert table["*"]["TAA"] == 0.61 51 | 52 | # LOAD ALL TABLES AT ONCE 53 | codons_tables = pct.get_all_available_codons_tables() 54 | assert codons_tables["c_elegans_6239"]["L"]["CTA"] == 0.09 55 | 56 | # GET A TABLE DIRECTLY FROM THE INTERNET 57 | table = pct.download_codons_table(taxid=316407) 58 | assert table["*"]["UGA"] == 0.29 59 | 60 | 61 | def test_get_codons_table(): 62 | for table_name in (1423, "1423", "b_subtilis", "b_subtilis_1423"): 63 | table = pct.get_codons_table(table_name) 64 | assert table["T"]["ACA"] == 0.4 65 | assert table["*"]["TAA"] == 0.61 66 | 67 | 68 | def test_replace_U_by_T(): 69 | table = pct.get_codons_table("b_subtilis_1423", replace_U_by_T=False) 70 | assert table["*"]["UAA"] == 0.61 71 | 72 | 73 | def test_csv_string_to_codons_dict(): 74 | with open(mock_table_path, "r") as f: 75 | codon_dict = pct.csv_string_to_codons_dict(f.read()) 76 | assert type(codon_dict) == dict 77 | --------------------------------------------------------------------------------