├── src └── brendapyrser │ ├── __init__.py │ ├── constants.py │ └── parser.py ├── assets ├── logo.png └── social_logo_cut.png ├── MANIFEST.in ├── envs └── brendapyrser-dev.yml ├── CITATION.cff ├── .gitignore ├── .github ├── workflows │ ├── docs.yml │ ├── ci.yml │ ├── joss.yml │ ├── tests.yml │ └── docker.yml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── CODE_OF_CONDUCT.md ├── pyproject.toml ├── README.md ├── paper ├── paper.md └── paper.bib ├── CONTRIBUTING.md ├── LICENSE ├── tests └── tests.py └── docs └── examples.ipynb /src/brendapyrser/__init__.py: -------------------------------------------------------------------------------- 1 | from .parser import Reaction, BRENDA, ReactionList 2 | -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robaina/BRENDApyrser/HEAD/assets/logo.png -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | prune data code_oven README_files .ipynb_checkpoints 2 | exclude README.ipynb gitignore 3 | -------------------------------------------------------------------------------- /assets/social_logo_cut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Robaina/BRENDApyrser/HEAD/assets/social_logo_cut.png -------------------------------------------------------------------------------- /envs/brendapyrser-dev.yml: -------------------------------------------------------------------------------- 1 | name: brendapyrser-dev 2 | channels: 3 | - defaults 4 | - bioconda 5 | - conda-forge 6 | dependencies: 7 | - python >= 3.8 8 | - poetry >= 1.3 9 | - pip 10 | - pip: 11 | - mkdocs 12 | - mkdocs-gen-files 13 | - pymdown-extensions 14 | - mkdocs-jupyter 15 | - mkdocstrings[python] 16 | - ruff 17 | - black 18 | - black[jupyter] 19 | - argmark 20 | - coverage 21 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Robaina-Estévez" 5 | given-names: "Semidán" 6 | orcid: "https://orcid.org/0000-0003-0781-1677" 7 | title: "BRENDApyrser: a Python package to parse and manipulate the BRENDA database" 8 | version: 0.0.3 9 | doi: 10.5281/zenodo.7026555 10 | date-released: 2022-08-26 11 | url: "https://github.com/Robaina/BRENDApyrser" 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.txt 2 | /code_oven 3 | /data 4 | /.ipynb_checkpoints 5 | 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Packages 12 | *.egg 13 | *.egg-info 14 | dist 15 | build 16 | eggs 17 | parts 18 | bin 19 | var 20 | sdist 21 | develop-eggs 22 | .installed.cfg 23 | lib 24 | lib64 25 | __pycache__ 26 | 27 | # *.ipynb 28 | 29 | # Installer logs 30 | pip-log.txt 31 | 32 | # Unit test / coverage reports 33 | .coverage 34 | .tox 35 | nosetests.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | dev.ipynb 45 | 46 | *NOTES.txt 47 | 48 | ms/* 49 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: docs 2 | on: 3 | push: 4 | branches: [ main ] 5 | paths: 6 | - 'docs/**' 7 | - 'mkdocs.yml' 8 | 9 | jobs: 10 | 11 | build-docs: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | with: 16 | fetch-depth: 0 17 | - uses: actions/setup-python@v2 18 | - run: pip install --upgrade pip && pip install mkdocs mkdocs-gen-files pymdown-extensions mkdocs-jupyter mkdocstrings[python] 19 | - run: git config user.name 'github-actions[bot]' && git config user.email 'github-actions[bot]@users.noreply.github.com' 20 | - name: Publish docs 21 | run: mkdocs gh-deploy 22 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: push 3 | jobs: 4 | quality: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v3 8 | - name: Install Python 9 | uses: actions/setup-python@v4 10 | with: 11 | python-version: "3.8" 12 | - name: Install dependencies 13 | run: | 14 | python -m pip install --upgrade pip 15 | pip install ruff black 16 | # Include `--format=github` to enable automatic inline annotations. 17 | # - name: Check linters 18 | # run: ruff --format=github . # ruff does not allow trailing white space in logo (cli.py) 19 | - name: Check format 20 | run: black --check . 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/workflows/joss.yml: -------------------------------------------------------------------------------- 1 | name: joss 2 | on: [push] 3 | 4 | jobs: 5 | paper: 6 | runs-on: ubuntu-latest 7 | name: Paper Draft 8 | steps: 9 | - name: Checkout 10 | uses: actions/checkout@v3 11 | - name: Open Journals PDF Generator 12 | uses: openjournals/openjournals-draft-action@master 13 | with: 14 | journal: joss 15 | # This should be the path to the paper within your repo. 16 | paper-path: ms/paper.md 17 | - name: Upload 18 | uses: actions/upload-artifact@v1 19 | with: 20 | name: paper 21 | # This is the output path where Pandoc will write the compiled 22 | # PDF. Note, this should be the same directory as the input 23 | # paper.md 24 | path: ms/paper.pdf 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Smartphone (please complete the following information):** 32 | - Device: [e.g. iPhone6] 33 | - OS: [e.g. iOS8.1] 34 | - Browser [e.g. stock browser, safari] 35 | - Version [e.g. 22] 36 | 37 | **Additional context** 38 | Add any other context about the problem here. 39 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | on: 3 | push: 4 | paths-ignore: 5 | - '**.md' 6 | - '**.ipynb' 7 | - '**.bib' 8 | - 'ms/*' 9 | pull_request: 10 | types: [opened, reopened, edited] 11 | paths-ignore: 12 | - '**.md' 13 | - '**.ipynb' 14 | - '**.bib' 15 | - 'ms/*' 16 | 17 | jobs: 18 | 19 | create-env: 20 | name: ${{ matrix.os }} 21 | runs-on: ${{ matrix.os }} 22 | defaults: 23 | run: 24 | shell: bash -l {0} 25 | 26 | strategy: 27 | fail-fast: false 28 | matrix: 29 | os: [ubuntu-latest, macos-latest] 30 | 31 | steps: 32 | - name: checkout repository 33 | uses: actions/checkout@v3 34 | with: 35 | fetch-depth: 0 36 | 37 | - name: create environment 38 | uses: conda-incubator/setup-miniconda@v2 39 | with: 40 | python-version: 3.8 41 | # mamba-version: "*" 42 | channels: conda-forge,bioconda,defaults 43 | auto-activate-base: false 44 | activate-environment: tests_brendapyrser 45 | environment-file: envs/brendapyrser-dev.yml 46 | 47 | - name: Build & Install BRENDApyrser 48 | run: poetry build && pip install dist/brendapyrser*.whl 49 | 50 | - name: Run tests and collect coverage 51 | run: coverage run -m unittest discover tests && coverage xml 52 | 53 | - name: Upload coverage to Codecov 54 | uses: codecov/codecov-action@v3 55 | with: 56 | version: "v0.1.15" 57 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "brendapyrser" 7 | version = "0.0.4" 8 | description = "Tools to parse the BRENDA database" 9 | license = "Apache-2.0" 10 | authors = ["Semidán Robaina Estévez "] 11 | maintainers = ["Semidán Robaina Estévez "] 12 | readme = "README.md" 13 | homepage = "https://github.com/robaina/BRENDApyrser" 14 | repository = "https://github.com/robaina/BRENDApyrser" 15 | documentation = "https://robaina.github.io/BRENDApyrser" 16 | keywords = ["BRENDA", "metabolism", "enzymes", "bioinformatics"] 17 | classifiers = [ 18 | "License :: OSI Approved :: Apache Software License", 19 | "Programming Language :: Python :: 3", 20 | "Topic :: Scientific/Engineering :: Bio-Informatics", 21 | "Natural Language :: English", 22 | ] 23 | packages = [{ include = "brendapyrser", from = "src" }] 24 | [tool.poetry.dependencies] 25 | python = "^3.8" 26 | numpy = "^1.20.2" 27 | pandas = "^1.2.4" 28 | importlib-metadata = "^4.0.1" 29 | 30 | [tool.ruff] 31 | select = [ 32 | "E", # pycodestyle errors 33 | "W", # pycodestyle warnings 34 | "F", # pyflakes 35 | "I", # isort 36 | ] 37 | ignore = [ 38 | "E501", # line too long, handled by black 39 | "B008", # do not perform function calls in argument defaults 40 | "C901", # mccabe complexity 41 | "E999", # match statement is not yet supported 42 | "W605", # ASCII art, verbatim text 43 | ] 44 | 45 | [tool.ruff.isort] 46 | known-first-party = ["brendapyrser"] 47 | -------------------------------------------------------------------------------- /src/brendapyrser/constants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | BRENDA data fields and units of measurement. 6 | """ 7 | 8 | fields = { 9 | "AC": "activating compound", 10 | "AP": "application", 11 | "CF": "cofactor", 12 | "CL": "cloned", 13 | "CR": "crystallization", 14 | "EN": "engineering", 15 | "EXP": "expression", 16 | "GI": "general information on enzyme", 17 | "GS": "general stability", 18 | "IC50": "IC-50 Value", 19 | "ID": "EC-class", 20 | "IN": "inhibitors", 21 | "KKM": "Kcat/KM-Value substrate in {...}", 22 | "KI": "Ki-value, inhibitor in {...}", 23 | "KM": "KM-value, substrate in {...}", 24 | "LO": "localization", 25 | "ME": "metals/ions", 26 | "MW": "molecular weight", 27 | "NSP": "natural substrates/products reversibilty information in {...}", 28 | "OS": "oxygen stability", 29 | "OSS": "organic solvent stability", 30 | "PHO": "pH-optimum", 31 | "PHR": "pH-range", 32 | "PHS": "pH stability", 33 | "PI": "isoelectric point", 34 | "PM": "posttranslation modification", 35 | "PR": "protein", 36 | "PU": "purification", 37 | "RE": "reaction catalyzed", 38 | "RF": "references", 39 | "REN": "renatured", 40 | "RN": "accepted name (IUPAC)", 41 | "RT": "reaction type", 42 | "SA": "specific activity", 43 | "SN": "synonyms", 44 | "SP": "substrates/products, reversibilty information in {...}", 45 | "SS": "storage stability", 46 | "ST": "source/tissue", 47 | "SU": "subunits", 48 | "SY": "systematic name", 49 | "TN": "turnover number, substrate in {...}", 50 | "TO": "temperature optimum", 51 | "TR": "temperature range", 52 | "TS": "temperature stability", 53 | } 54 | 55 | units = { 56 | "KM": "mM", 57 | "KI": "mM", 58 | "TN": "$s^{-1}$", 59 | "SA": "$µmol.min^{-1}.mg^{-1}$", 60 | "KKM": "$mM^{-1}.s^{-1}$", 61 | "TO": "${}^oC$", 62 | "TR": "${}^oC$", 63 | "TS": "${}^oC$", 64 | "MW": "Da", 65 | } 66 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | # GitHub recommends pinning actions to a commit SHA. 7 | # To get a newer version, you will need to update the SHA. 8 | # You can also reference a tag or branch, but the action may change without warning. 9 | 10 | name: Publish Docker image 11 | 12 | on: 13 | release: 14 | types: [published] 15 | 16 | push: 17 | branches: 18 | - main 19 | 20 | jobs: 21 | push_to_registries: 22 | name: Push Docker image to multiple registries 23 | runs-on: ubuntu-latest 24 | permissions: 25 | packages: write 26 | contents: read 27 | steps: 28 | - name: Check out the repo 29 | uses: actions/checkout@v3 30 | 31 | - name: Log in to Docker Hub 32 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 33 | with: 34 | username: ${{ secrets.DOCKER_USERNAME }} 35 | password: ${{ secrets.DOCKER_PASSWORD }} 36 | 37 | - name: Log in to the Container registry 38 | uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9 39 | with: 40 | registry: ghcr.io 41 | username: ${{ github.actor }} 42 | password: ${{ secrets.API_GITHUB_TOKEN }} 43 | 44 | - name: Extract metadata (tags, labels) for Docker 45 | id: meta 46 | uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 47 | with: 48 | images: | 49 | robaina/brendapyrser 50 | ghcr.io/${{ github.repository }} 51 | 52 | - name: Build and push Docker images 53 | uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc 54 | with: 55 | context: . 56 | push: true 57 | tags: ${{ steps.meta.outputs.tags }} 58 | labels: ${{ steps.meta.outputs.labels }} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![logo](assets/logo.png) 2 | ## a Python package to parse and manipulate the BRENDA database 3 | 4 | [![tests](https://github.com/Robaina/BRENDApyrser/actions/workflows/tests.yml/badge.svg)](https://github.com/Robaina/BRENDApyrser/actions/workflows/tests.yml) 5 | [![codecov](https://codecov.io/gh/Robaina/BRENDApyrser/graph/badge.svg?token=214SPFXRTG)](https://codecov.io/gh/Robaina/BRENDApyrser) 6 | ![PyPI](https://img.shields.io/pypi/v/brendapyrser) 7 | ![GitHub release (latest by date)](https://img.shields.io/github/v/release/Robaina/Brendapyrser) 8 | [![GitHub license](https://img.shields.io/github/license/Robaina/BRENDApyrser)](https://github.com/Robaina/BRENDApyrser/blob/master/LICENSE) 9 | ![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4) 10 | [![DOI](https://zenodo.org/badge/299416438.svg)](https://zenodo.org/badge/latestdoi/299416438) 11 | 12 | ## What is Brendapyrser? 13 | This project provides python classes and functions to parse the text file containing the entire BRENDA enzyme database (https://www.brenda-enzymes.org) 14 | 15 | This is an ongoing project! 16 | 17 | ## Installation 18 | 1. ```pip install brendapyrser``` 19 | 20 | or 21 | 22 | 2. Git clone project to local directory. 23 | 24 | In terminal navigate to directory and enter: ```python setup.py install``` 25 | 26 | ## Usage 27 | 28 | Due to BRENDA's license, BRENDA's database cannot be downloaded directly by the parser, instead, the user is asked to download the database as a text file after accepting usage conditions [here](https://www.brenda-enzymes.org/download.php). 29 | 30 | You can find a jupyter notebook with usage examples [here](docs/examples.ipynb). 31 | 32 | ## Contribute 33 | 34 | Contributions are always more than welcome! Feel free to fork, make changes and pull requests. If you don't know where to start, you can always browse open issues. Thanks! 35 | 36 | ## Citation 37 | 38 | If you use this software, please cite it as below: 39 | 40 | Robaina-Estévez, S. (2022). BRENDApyrser: a Python package to parse and manipulate the BRENDA database (Version 0.0.3)[Computer software]. https://doi.org/10.5281/zenodo.7026555 41 | -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'Brendapyrser: a Python package to parse and manipulate the BRENDA database' 3 | tags: 4 | - Python 5 | - bioinformatics 6 | - BRENDA 7 | - metabolism 8 | - enzymes 9 | - biochemistry 10 | authors: 11 | - name: Semidán Robaina-Estévez 12 | orcid: 0000-0003-0781-1677 13 | email: srobaina@ull.edu.es 14 | affiliation: 1 15 | affiliations: 16 | - name: Department of Microbiology. University of La Laguna. Spain. 17 | index: 1 18 | date: 18 August 2022 19 | bibliography: paper.bib 20 | --- 21 | 22 | # Summary 23 | 24 | Enzymes — proteins with specialized, catalytic functions — constitute the workforce of cellular metabolism, catalyzing thousands of biochemical reactions within cells. Enzymes present different physicochemical properties that affect their function. Knowledge about these enzyme functional properties is fundamental to understanding how biochemical reactions operate and are controlled by the cell. The BRENDA [@brenda] database is a widely-used, publicly available collection of enzyme functional information obtained from the primary literature. The development of computational tools to parse and query BRENDA would facilitate its integration in analyses of cellular metabolism. 25 | 26 | # Statement of need 27 | 28 | Users can access the BRENDA database directly on the website [@brenda-web], which provides searching and filtering capabilities. BRENDA also offers an API to access the database programmatically within several programming languages. However, obtaining specific data through the browser or the API turns inefficient in certain applications, for instance, when extracting certain data fields for thousands of enzymes to conduct statistical analyses. Here, we present `Brendapyrser`, a Python package to parse and manipulate the BRENDA database. Instead of accessing BRENDA via its API, `Brendapyrser` provides a collection of objects and methods to parse BRENDA locally as a text file — currently sized under 300 MB —, thus extracting data fields more quickly. 29 | 30 | `Brendapyrser` was developed to be used by both researchers and students in courses in the areas of biochemistry, molecular biology, bioinformatics and computational biology. Moreover, `Brendapyrser`'s syntax and object-oriented organization are well-suited for exploratory analyses within the Python ecosystem, integrating well with interactive computing tools such as Jupyter Notebooks [@jupyter]. 31 | 32 | # Acknowledgements 33 | 34 | We acknowledge constructive feedback from Brendapyrser users that has helped improve the package. This work has been conducted without any financial or commercial support. 35 | 36 | # References -------------------------------------------------------------------------------- /paper/paper.bib: -------------------------------------------------------------------------------- 1 | @misc{brenda-web, 2 | author = {Prof. Dr. D. Schomburg}, 3 | title = {BRENDA: The comprehensive ennzyme information system}, 4 | year = {2022}, 5 | publisher = {Technische Universität Braunschweig}, 6 | journal = {Website}, 7 | url = {https://www.brenda-enzymes.org} 8 | } 9 | 10 | @article{brenda, 11 | author = {Jeske, Lisa and Placzek, Sandra and Schomburg, Ida and Chang, Antje and Schomburg, Dietmar}, 12 | title = "{BRENDA in 2019: a European ELIXIR core data resource}", 13 | journal = {Nucleic Acids Research}, 14 | volume = {47}, 15 | number = {D1}, 16 | pages = {D542-D549}, 17 | year = {2018}, 18 | month = {11}, 19 | abstract = "{The BRENDA enzyme database (www.brenda-enzymes.org), recently appointed ELIXIR Core Data Resource, is the main enzyme and enzyme-ligand information system. The core database provides a comprehensive overview on enzymes. A collection of 4.3 million data for ∼84 000 enzymes manually evaluated and extracted from ∼140 000 primary literature references is combined with information obtained by text and data mining, data integration and prediction algorithms. Supplements comprise disease-related data, protein sequences, 3D structures, predicted enzyme locations and genome annotations. Major developments are a revised ligand summary page and the structure search now including a similarity and isomer search. BKMS-react, an integrated database containing known enzyme-catalyzed reactions, is supplemented with further reactions and improved access to pathway connections. In addition to existing enzyme word maps with graphical information of enzyme specific terms, plant word maps have been developed. They show a graphical overview of terms, e.g. enzyme or plant pathogen information, connected to specific plants. An organism summary page showing all relevant information, e.g. taxonomy and synonyms linked to enzyme data, was implemented. Based on a decision by the IUBMB enzyme task force the enzyme class EC 7 has been established for ‘translocases’, enzymes that catalyze a transport of ions or metabolites across cellular membranes.}", 20 | issn = {0305-1048}, 21 | doi = {10.1093/nar/gky1048}, 22 | url = {https://doi.org/10.1093/nar/gky1048}, 23 | eprint = {https://academic.oup.com/nar/article-pdf/47/D1/D542/27437170/gky1048.pdf}, 24 | } 25 | 26 | @conference{jupyter, 27 | Title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows}, 28 | Author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando P{\'e}rez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Dami{\'a}n Avila and Safia Abdalla and Carol Willing}, 29 | Booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, 30 | Editor = {F. Loizides and B. Schmidt}, 31 | Organization = {IOS Press}, 32 | Pages = {87 - 90}, 33 | Year = {2016} 34 | } -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to BRENDApyrser 2 | 3 | First of all, thanks for taking the time to contribute! :tada::+1: 4 | 5 | Here you will find a set of guidelines for contributing to BRENDApyrser. Feel free to propose changes to this document in a pull request. 6 | 7 | ## Code of conduct 8 | 9 | This project and everyone participating in it is governed by the [Contributor Covenant, v2.0](.github/CODE_OF_CONDUCT.md) code of conduct. By participating, you are expected to uphold this code. 10 | 11 | ## I have a question! 12 | 13 | If you only have a question about all things related to BRENDApyrser, the best course of actions for you is to open a new [discussion](https://github.com/Robaina/BRENDApyrser/discussions). 14 | 15 | ## How can I contribute? 16 | 17 | ### 1. Reporting bugs 18 | 19 | We all make mistakes, and the developers behind BRENDApyrser are no exception... So, if you find a bug in the source code, please open an [issue](https://github.com/Robaina/BRENDApyrser/issues) and report it. Please, first search for similar issues that are currrently open. 20 | 21 | ### 2. Suggesting enhancements 22 | 23 | Are you missing some feature that would like BRENDApyrser to have? No problem! You can contribute by suggesting an enhancement, just open a new issue and tag it with the [```enhancement```](https://github.com/Robaina/BRENDApyrser/labels/enhancement) label. Please, first search for similar issues that are currrently open. 24 | 25 | ### 3. Improving the documentation 26 | 27 | Help is always needed at improving the [documentation](https://robaina.github.io/BRENDApyrser/). Either adding more detailed docstrings, usage explanations or new examples. 28 | 29 | ## First contribution 30 | 31 | Unsure where to begin contributing to BRENDApyrser? You can start by looking for issues with the label [```good first issue```](https://github.com/Robaina/BRENDApyrser/labels/good%20first%20issue). If you are unsure about how to set a developer environment for BRENDApyrser, do take a look at the section below. Thanks! 32 | 33 | ## Setting up a local developer environment 34 | 35 | To setup up a developer environment for BRENDApyrser: 36 | 37 | 1. Fork and download repo, cd to downloaded directory. You should create a new branch to work on your issue. 38 | 39 | 2. Create conda environment with required dependencies: 40 | 41 | The file `envs/BRENDApyrser-dev.yml` contains all dependencies required to use BRENDApyrser. Conda is very slow solving the environment. It is recommended to use [mamba](https://github.com/mamba-org/mamba) instead: 42 | 43 | ```bash 44 | mamba env create -n BRENDApyrser-dev -f envs/BRENDApyrser-dev.yml 45 | conda activate BRENDApyrser-dev 46 | ``` 47 | 48 | 3. Build package 49 | 50 | ```bash 51 | (BRENDApyrser-dev) poetry build 52 | ``` 53 | 54 | 4. Install BRENDApyrser 55 | 56 | ```bash 57 | (BRENDApyrser-dev) pip install dist/BRENDApyrser*.whl 58 | ``` 59 | 60 | 5. Run tests 61 | 62 | ```bash 63 | (BRENDApyrser-dev) python -m unittest discover tests 64 | ``` 65 | 66 | ## Building the documentation 67 | 68 | The documentation is formed by a series of markdown files located in directory [docs](https://github.com/Robaina/BRENDApyrser/tree/main/docs). This repo uses [mkdocs](https://www.mkdocs.org/) to automatically generate documentation pages from markdown files. Also, [MathJax](https://github.com/mathjax/MathJax) syntax is allowed! 69 | 70 | This means that, to modify the [API reference](https://robaina.github.io/BRENDApyrser/references/api/), all you need to do is to modify the docstring directly in the source file where the definion/class is located. And, to update the documentation pages, you just have to update the corresponding markdown file in the [docs](https://github.com/Robaina/BRENDApyrser/tree/main/docs) directory. Note that, if you need to change the documentation structure (e.g., add or new pages),you would need to tell mkdocs about this change through its [configuration file](https://github.com/Robaina/BRENDApyrser/blob/main/mkdocs.yml). Or just open an issue and ask for help! 71 | 72 | When all the changes are ready to deploy, just open a pull request. After reviewing and merging the changes, the documentation will be automatically deployed. 73 | 74 | Run the documentation locally with: 75 | 76 | > mkdocs serve 77 | 78 | ## Tests on push and pull request to main 79 | 80 | BRENDApyrser's repo contains a [GitHub Action](https://github.com/features/actions) to perform build and integration tests which is triggered automatically on push and pull request events to the main brach. Currently the tests include building and installing BRENDApyrser in Ubuntu and MacOS and running the [test](tests) suit. 81 | -------------------------------------------------------------------------------- /.github/CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | hello@semidanrobaina.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /tests/tests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Unit tests for brendapyrser. 6 | """ 7 | 8 | import unittest 9 | 10 | from brendapyrser import Reaction 11 | 12 | rxn_data = """ID 1.1.1.304 13 | ******************************************************************************** 14 | * * 15 | * Copyrighted by Dietmar Schomburg, Techn. University Braunschweig, GERMANY * 16 | * Distributed under the License as stated at http:/www.brenda-enzymes.org * 17 | * * 18 | ******************************************************************************** 19 | 20 | PROTEIN 21 | PR #1# Staphylococcus aureus <4> 22 | PR #2# Geobacillus stearothermophilus <5,8> 23 | PR #3# Klebsiella aerogenes <1> 24 | PR #4# Bacillus licheniformis <10> 25 | PR #5# Rhodococcus erythropolis <7> 26 | PR #6# Columba livia <2,3> 27 | PR #7# Klebsiella pneumoniae Q48436 UniProt <6> 28 | PR #8# Paenibacillus polymyxa KC505218 GenBank <9> 29 | PR #9# Mycobacterium sp. W8VSK8 UniProt <11> 30 | 31 | RECOMMENDED_NAME 32 | RN diacetyl reductase [(S)-acetoin forming] 33 | 34 | 35 | SYSTEMATIC_NAME 36 | SN (S)-acetoin:NAD+ oxidoreductase 37 | 38 | 39 | SYNONYMS 40 | SY #2# S-stereospecific diacetyl reductase <8> 41 | SY #2# BSDR <8> 42 | SY #4# budC <10> 43 | SY #5# AdR <7> 44 | SY #5# acetoin(diacetyl) reductase <7> 45 | SY #5# ReADR <7> 46 | SY #8# diacetyl reductase <9> 47 | SY #8# DAR <9> 48 | SY #9# ADS1 <11> 49 | 50 | REACTION 51 | RE (S)-acetoin + NAD+ = diacetyl + NADH + H+ (#6# Theorell-Chance 52 | mechanism with NADH as the leading substrate <3>) 53 | RE (S)-acetoin + NAD+ = diacetyl + NADH + H+ (#6# Theorell-Chance 54 | mechanism with NADH as the leading substrate <3>) 55 | 56 | SOURCE_TISSUE 57 | ST #6# liver <2,3> 58 | 59 | NATURAL_SUBSTRATE_PRODUCT 60 | NSP #2,5# diacetyl + NADH + H+ = (S)-acetoin + NAD+ <7,8> 61 | NSP #2,5# diacetyl + NADH + H+ = (S)-acetoin + NAD+ {ir} <7,8> 62 | NSP #2,5# diacetyl + NADH + H+ = (S)-acetoin + NAD+ {r} <7,8> 63 | NSP #2,5,8# more = ? (#5# Rhodococcus erythropolis WZ010 is capable of 64 | producing optically pure (2S,3S)-2,3-butanediol in alcoholic 65 | fermentation. <7>; #8# the enzyme also catalyzes the stereospcific 66 | reaction of (S)-acetoin reduction to butanediol, EC 1.1.1.76 <9>; #2# 67 | the enzyme shows an S-enantioselectivity in the reversible reduction of 68 | acetoin so it might be responsible of the meso-butenediol formation 69 | from R-acetoin. It acts on racemic acetoin and (S)-acetoin to form 70 | (2S,3S)-butane-2,3-diol, EC 1.1.1.76, but also on the 71 | (2R,3R)-butane-2,3-diol isomer in the reverse reaction, EC 1.1.1.4 <8>) 72 | <7,8,9> 73 | 74 | SUBSTRATE_PRODUCT 75 | SP #1# 2,3-pentanedione + beta-NADH + H+ = L-3-hydroxy-2-pentanone + 76 | beta-NAD+ {ir} <4> 77 | SP #1# diacetyl + beta-NADH + H+ = (S)-acetoin + beta-NAD+ (#1# 86.9% of 78 | the activity with pentane-2,3-dione <4>) {ir} <4> 79 | SP #1# ethyl pyruvate + beta-NADH + H+ = ? + beta-NAD+ (#1# 38.4% of the 80 | activity with pentane-2,3-dione <4>) {ir} <4> 81 | SP #1# methyl pyruvate + beta-NADH + H+ = ? + beta-NAD+ (#1# 22.8% of the 82 | activity with pentane-2,3-dione <4>) {ir} <4> 83 | SP #1,2,4,5,8,9# more = ? (#1# no activity with alpha-NADH or NADPH <4>; 84 | #5# Rhodococcus erythropolis WZ010 is capable of producing optically 85 | pure (2S,3S)-2,3-butanediol in alcoholic fermentation. <7>; #8# the 86 | enzyme also catalyzes the stereospcific reaction of (S)-acetoin 87 | reduction to butanediol, EC 1.1.1.76 <9>; #2# the enzyme shows an 88 | S-enantioselectivity in the reversible reduction of acetoin so it might 89 | be responsible of the meso-butenediol formation from R-acetoin. It acts 90 | on racemic acetoin and (S)-acetoin to form (2S,3S)-butane-2,3-diol, EC 91 | 1.1.1.76, but also on the (2R,3R)-butane-2,3-diol isomer in the reverse 92 | reaction, EC 1.1.1.4 <8>; #5# the enzyme displays absolute 93 | stereospecificity in the reduction of diacetyl to 94 | (2S,3S)-2,3-butanediol via (S)-acetoin. The enzyme shows higher 95 | catalytic efficiency for (S)-1-phenylethanol oxidation than that for 96 | acetophenone reduction. ReADR-catalyzed asymmetric reduction of 97 | diacetyl is coupled with stereoselective oxidation of 1-phenylethanol, 98 | which simultaneously forms both (2S,3S)-2,3-butanediol and 99 | (R)-1-phenylethanol in great conversions and enantiomeric excess 100 | values.The enzyme accepts a broad range of substrates including 101 | aliphatic and aryl alcohols, aldehydes, and ketones <7>; #9# enzyme 102 | shows activity as a reductase specific for (S)-acetoin, EC 1.1.1.76, 103 | and both diacetyl reductase (EC 1.1.1.304) and NAD+-dependent alcohol 104 | dehydrogenase (EC 1.1.1.1) activities <11>; #4# enzyme shows oxidative 105 | activity to racemic 2,3-butanediol but no activity toward racemic 106 | acetoin in the presence of NAD+ <10>) <4,7,8,9,10,11> 107 | SP #2,3,5,6,7,9# diacetyl + NADH + H+ = (S)-acetoin + NAD+ (#7# 87% of the 108 | (R)-2,3-butanediol dehydrogenase activity with substrate acetoin <6>) 109 | <1,2,3,5,6,7,8,11> 110 | SP #2,3,5,6,7,9# diacetyl + NADH + H+ = (S)-acetoin + NAD+ (#7# 87% of the 111 | (R)-2,3-butanediol dehydrogenase activity with substrate acetoin <6>) 112 | {ir} <1,2,3,5,6,7,8,11> 113 | SP #2,3,5,6,7,9# diacetyl + NADH + H+ = (S)-acetoin + NAD+ (#7# 87% of the 114 | (R)-2,3-butanediol dehydrogenase activity with substrate acetoin <6>) 115 | {r} <1,2,3,5,6,7,8,11> 116 | SP #3# ethyl pyruvate + NADH + H+ = ? + NAD+ (#3# 57.7% of the activity 117 | with diacetyl <1>) <1> 118 | SP #3# methyl glyoxal + NADH + H+ = ? + NAD+ (#3# 11% of the activity with 119 | diacetyl <1>) <1> 120 | SP #3# methyl pyruvate + NADH + H+ = ? + NAD+ (#3# 49% of the activity 121 | with diacetyl <1>) <1> 122 | SP #3,7# 2,3-pentanedione + NADH + H+ = 3-hydroxy-2-pentanone + NAD+ (#7# 123 | 77% of the (R)-2,3-butanediol dehydrogenase activity with substrate 124 | acetoin <6>; #3# 85.6% of the activity with diacetyl <1>) <1,6> 125 | SP #4# (2S,3S)-2,3-butanediol + NAD+ = (3S)-acetoin + NADH + H+ {r} <10> 126 | SP #4# (3S)-acetoin + NADH + H+ = (2S,3S)-2,3-butanediol + NAD+ (#4# 97% 127 | of the activity with diacetyl <10>) |#4# 97.3% enantiomeric excess and 128 | 96.5% diastereomeric excess <10>| {r} <10> 129 | SP #4# 1,2-propanediol + NAD+ = ? + NADH + H+ |#4# 0.5% of the activity 130 | with 2,3-butanediol <10>| {r} <10> 131 | SP #4# diacetyl + NADH + H+ = (3S)-acetoin + NAD+ |#4# 97.3% enantiomeric 132 | excess <10>| {ir} <10> 133 | SP #4# 2,3-pentanedione + NADH + H+ = ? + NAD+ |#4# 69% of the activity 134 | with diacetyl <10>| <10> 135 | SP #4# 2,3-hexanedione + NADH + H+ = ? + NAD+ |#4# 66% of the activity 136 | with diacetyl <10>| <10> 137 | SP #4# 3,4-hexanedione + NADH + H+ = ? + NAD+ |#4# 10% of the activity 138 | with diacetyl <10>| <10> 139 | SP #6,8# diacetyl + NADPH + H+ = (S)-acetoin + NADP+ {ir} <3,9> 140 | 141 | TURNOVER_NUMBER 142 | TN #4# 748 {NAD+} (#4# pH 10.0, 30°C <10>) <10> 143 | TN #4# 202 {(3S)-acetoin} (#4# pH 6.0, 30°C <10>) <10> 144 | TN #4# 591 {(2S,3S)-2,3-butanediol} (#4# pH 10.0, 30°C <10>) <10> 145 | TN #4# 1222 {diacetyl} (#4# pH 6.0, 30°C <10>) <10> 146 | TN #4# 1274 {NADH} (#4# pH 6.0, 30°C <10>) <10> 147 | TN #9# 110 {NADH} (#9# pH 7.0, 30°C <11>) <11> 148 | TN #9# 163 {diacetyl} (#9# pH 7.0, 30°C <11>) <11> 149 | 150 | KM_VALUE 151 | KM #1# 0.045 {NADH} (#1# 25°C, pH 6.0, cosubstrate diacetyl <4>) <4> 152 | KM #1# 0.095 {NADH} (#1# 25°C, pH 6.0, cosubstrate methyl pyruvate <4>) 153 | <4> 154 | KM #1# 0.025 {NADH} (#1# 25°C, pH 6.0, cosubstrate 2,3-pentanedione <4>) 155 | <4> 156 | KM #1# 0.11 {NADH} (#1# 25°C, pH 6.0, cosubstrate ethyl pyruvate <4>) <4> 157 | KM #1# 24 {ethyl pyruvate} (#1# 25°C, pH 6.0 <4>) <4> 158 | KM #1# 16 {Methyl pyruvate} (#1# 25°C, pH 6.0 <4>) <4> 159 | KM #1# 6 {2,3-Pentanedione} (#1# 25°C, pH 6.0 <4>) <4> 160 | KM #1# 15 {diacetyl} (#1# 25°C, pH 6.0 <4>) <4> 161 | KM #2# 19 {diacetyl} (#2# pH 7.5, 25°C <5>) <5> 162 | KM #3# 0.005 {NADH} (#3# cosubstrate acetoin, pH 7.0, 25°C <1>) <1> 163 | KM #3# 20 {ethyl pyruvate} (#3# pH 7.0, 25°C <1>) <1> 164 | KM #3# 0.007 {NADH} (#3# cosubstrate diacetyl, pH 7.0, 25°C <1>) <1> 165 | KM #3# 1.6 {diacetyl} (#3# pH 7.0, 25°C <1>) <1> 166 | KM #3# 6 {pentane-2,3-dione} (#3# pH 7.0, 25°C <1>) <1> 167 | KM #3# 18 {Methyl pyruvate} (#3# pH 7.0, 25°C <1>) <1> 168 | KM #3# 75 {methyl glyoxal} (#3# pH 7.0, 25°C <1>) <1> 169 | KM #4# 0.25 {NADH} (#4# pH 6.0, 30°C <10>) <10> 170 | KM #4# 0.34 {NAD+} (#4# pH 10.0, 30°C <10>) <10> 171 | KM #4# 0.47 {(3S)-acetoin} (#4# pH 6.0, 30°C <10>) <10> 172 | KM #4# 7.25 {(2S,3S)-2,3-butanediol} (#4# pH 10.0, 30°C <10>) <10> 173 | KM #4# 72.4 {diacetyl} (#4# pH 6.0, 30°C <10>) <10> 174 | KM #5# -999 {more} (#5# Michaelis-Menten-type kinetics <7>) <7> 175 | KM #6# 0.1 {NADH} (#6# pH 6.1, 25°C <3>) <3> 176 | KM #6# 3.1 {diacetyl} (#6# pH 6.1, 25°C <3>) <3> 177 | KM #6# 3 {diacetyl} (#6# 25°C, pH 6.1 <2>) <2> 178 | KM #6# 0.087 {NADH} (#6# 25°C, pH 5.9 <2>) <2> 179 | KM #6# 0.116 {NADH} (#6# 25°C, pH 6.1 <2>) <2> 180 | KM #6# 2.64 {diacetyl} (#6# 25°C, pH 6.7 <2>) <2> 181 | KM #6# 0.135 {NADH} (#6# 25°C, pH 6.7 <2>) <2> 182 | KM #6# 2.81 {diacetyl} (#6# 25°C, pH 5.9 <2>) <2> 183 | KM #9# 0.05 {NADH} (#9# pH 7.0, 30°C <11>) <11> 184 | KM #9# 4.47 {diacetyl} (#9# pH 7.0, 30°C <11>) <11> 185 | 186 | PH_OPTIMUM 187 | PHO #1,8# 6 (#8# assay at <9>) <4,9> 188 | PHO #2# 6.5 (#2# assay at <8>) <8> 189 | PHO #4# 5 (#4# reduction of diacetyl <10>) <10> 190 | PHO #4# 10 (#4# oxidation of butanediol <10>) <10> 191 | PHO #5# 7 (#5# diacetyl reduction <7>) <7> 192 | PHO #6# 6.1 <3> 193 | 194 | PH_RANGE 195 | PHR #4# 5-8 (#4# reduction of diacetyl <10>) <10> 196 | PHR #6# 5 (#6# 5 min, 30% loss of activity <2>) <2> 197 | PHR #6# 5.1 (#6# 5 min, 20% loss of activity <2>) <2> 198 | PHR #6# 5.4-7.6 (#6# stable within <2>) <2> 199 | PHR #6# 4.8 (#6# 5 min, 60% loss of activity <2>) <2> 200 | 201 | SPECIFIC_ACTIVITY 202 | SA #2# 71.4 (#2# pH 7.5, 25°C <5>) <5> 203 | SA #4# 120.0 (#4# substrate diacetyl, 30°C, pH 6.0 <10>) <10> 204 | SA #8# 72.6 (#8# purified recombinant enzyme, NADPH, pH 6.0, 30°C <9>) <9> 205 | 206 | TEMPERATURE_OPTIMUM 207 | TO #2# 50 <5> 208 | TO #5,8# 30 (#8# assay at <9>; #5# diacetyl reduction <7>) <7,9> 209 | 210 | COFACTOR 211 | CF #1,2,3,4,5,6# NADH (#2# dependent on <8>; #1# beta-NADH <4>; #3# 212 | specific for beta-NADH <1>) <1,3,4,5,7,8,10> 213 | CF #1,4,8# more (#4# no activity with NADPH <10>; #8# inactive with NADH 214 | <9>; #1# no activity with alpha-NADH or NADPH <4>) <4,9,10> 215 | CF #4,5# NAD+ <7,10> 216 | CF #8# NADPH (#8# dependent on <9>) <9> 217 | 218 | ACTIVATING_COMPOUND 219 | AC #5# DMSO (#5# DMSO at a final concentration of 30% v/v added into the 220 | assay mixture, increases the activity up to 120% of the control enzyme 221 | activity <7>) <7> 222 | 223 | INHIBITORS 224 | IN #1# diacetyl (#1# substrate inhibition at concentrations above 80-90 mM 225 | <4>) <4> 226 | IN #1# ethyl pyruvate (#1# substrate inhibition at concentrations above 227 | 80-90 mM <4>) <4> 228 | IN #1# Methyl pyruvate (#1# substrate inhibition at concentrations above 229 | 80-90 mM <4>) <4> 230 | IN #4# Cu2+ (#4# 1 mM, 0.2% of initial activity with substrate diacetyl, 231 | 1% with substrate 2,3-butanediol, respectively <10>) <10> 232 | IN #4# Ag+ (#4# 1 mM, 0.5% of initial activity with substrate diacetyl, 233 | 0.5% with substrate 2,3-butanediol, respectively <10>) <10> 234 | IN #4# Fe3+ (#4# 1 mM, 2% of initial activity with substrate diacetyl, 235 | 3.5% with substrate 2,3-butanediol, respectively <10>) <10> 236 | IN #4# EDTA (#4# 1 mM, 91% of initial activity with substrate diacetyl, 237 | 90% with substrate 2,3-butanediol, respectively <10>) <10> 238 | IN #4,5# Al3+ (#4# 1 mM, 4% of initial activity with substrate diacetyl, 239 | 6% with substrate 2,3-butanediol, respectively <10>) <7,10> 240 | IN #4,5# Zn2+ (#5# inhibits 94.6% at 2 mM <7>; #4# 1 mM, 75% of initial 241 | activity with substrate diacetyl, 80% with substrate 2,3-butanediol, 242 | respectively <10>) <7,10> 243 | IN #5# Fe2+ (#5# inhibits 91.6% at 2 mM <7>) <7> 244 | IN #6# 2-oxoglutarate (#6# noncompetitive <3>) <3> 245 | IN #6# hexane-2,5-dione (#6# noncompetitive <3>) <3> 246 | IN #6# NAD+ (#6# competitive, product inhibition <3>) <3> 247 | IN #6# acetoin (#6# noncompetitive, product inhibition <3>) <3> 248 | IN #6# acetone (#6# competitive for diacetyl, uncompetitive for NADH <3>) 249 | <3> 250 | IN #6# Pentane-3-one (#6# competitive for diacetyl, uncompetitive for NADH 251 | <3>) <3> 252 | 253 | METALS_IONS 254 | ME #5# more (#5# addition of EDTA or the cations at 1 mM, such as Na+, K+, 255 | Mn2+, Mg2+, and Ca2+, have no significant effect on the activity of 256 | ReADR <7>) <7> 257 | ME #5# Mn2+ (#5# activates by 201.6 to 265.6% at 2 mM <7>) <7> 258 | ME #5# K+ (#5# activates by 201.6 to 265.6% at 2 mM <7>) <7> 259 | ME #5# Na+ (#5# activates by 201.6 to 265.6% at 2 mM <7>) <7> 260 | 261 | MOLECULAR_WEIGHT 262 | MW #1# 68000 (#1# gel filtration <4>) <4> 263 | MW #2# 26000 (#2# 2 * 26000, SDS-PAGE <5>) <5> 264 | MW #2# 49000 (#2# gel filtration <5>) <5> 265 | MW #3# 61000 (#3# gel filtration <1>) <1> 266 | MW #3# 28000 (#3# 2 * 28000, SDS-PAGE <1>) <1> 267 | MW #4# 125000 (#4# gel filtration <10>) <10> 268 | MW #4# 30000 (#4# 4 * 30000, SDS-PAGE <10>) <10> 269 | MW #5# 26864 (#5# 2 * 26864, sequence calculation <7>) <7> 270 | MW #7# 96000 (#7# gel filtration <6>) <6> 271 | MW #7# 26591 (#7# 4 * 26591, calculated <6>) <6> 272 | MW #8# 118000 (#8# recombinant enzyme, gel filtration <9>) <9> 273 | MW #8# 28500 (#8# 4 * 28500, recombinant enzyme, SDS-PAGE <9>) <9> 274 | MW #9# 150000 (#9# gel filtration <11>) <11> 275 | MW #9# 36000 <11> 276 | 277 | SUBUNITS 278 | SU #1# monomer (#1# 1 * 68000, SDS-PAGE <4>) <4> 279 | SU #2,3# dimer (#3# 2 * 28000, SDS-PAGE <1>; #2# 2 * 26000, SDS-PAGE <5>) 280 | <1,5> 281 | SU #4,7,8,9# tetramer (#4# 4 * 30000, SDS-PAGE <10>; #7# 4 * 26591, 282 | calculated <6>; #8# 4 * 28500, recombinant enzyme, SDS-PAGE <9>; #9# 4 283 | * 36000, SDS-PAGE, 4 * 35971, calculated <11>) <6,9,10,11> 284 | SU #5# homodimer (#5# 2 * 26864, sequence calculation <7>) <7> 285 | 286 | PI_VALUE 287 | PI #3# 6.8 (#3# isoelectric focusing <1>) <1> 288 | PI #7# 5.9-7.2 (#7# isoelectric focusing <6>) <6> 289 | PI #9# 4.8 (#9# isoelectric focusing <11>) <11> 290 | PI #9# 5.1 (#9# calculated <11>) <11> 291 | 292 | APPLICATION 293 | AP #5,8# synthesis (#5# acetoin(diacetyl) reductase, i.e. 2,3-butanediol 294 | dehydrogenase, is one of the key enzymes in the microbial production of 295 | 2,3-butanediol, a platform with extensive industrial applications in 296 | the production of plastics, printing inks, perfumes, fumigants, 297 | spandex, moistening and softening agents, plasticizers, and 298 | pharmaceutical carrier <7>; #8# the enzyme is used for production of 299 | S-acetoin with higher than 99.9% optical purity from diacetyl using 300 | whole cells of engineered Escherichia coli <9>) <7,9> 301 | 302 | CLONED 303 | CL #4# (expression in Escherichia coli) <10> 304 | CL #5# (gene adr, DNA and amino acid sequence determination and analysis, 305 | sequence comparison, expression of His-tagged enzyme in Escherichia 306 | coli) <7> 307 | CL #7# (expression in Escherichia coli) <6> 308 | CL #8# (gene dar, fucntional expression in Escherichia coli strain Rosetta 309 | (DE3), resulting in production of S-acetoin with higher than 99.9% 310 | optical purity from diacetyl) <9> 311 | 312 | PURIFICATION 313 | PU #1# <4> 314 | PU #2# <5> 315 | PU #2# (native enzyme by adsorption on diethylaminoethyl–Sepharose and 316 | hydrophobic interaction chromatography) <8> 317 | PU #3# <1> 318 | PU #5# (recombinant His-tagged enzyme from Escherichia coli by nickel 319 | affinity chromatography) <7> 320 | PU #8# (recombinant enzyme from Escherichia coli strain Rosetta (DE3)) <9> 321 | 322 | GENERAL_STABILITY 323 | GS #2# (unstable to dilution, kept diluted at 0°C for ca. 60 min it will 324 | lose 62% of activity. This inactivation is almost completely reversed 325 | by the addition of NAD+) <5> 326 | 327 | ORGANIC_SOLVENT_STABILITY 328 | OSS #5# DMSO (#5# the enzyme retains 53.6% of the initial activity after 4 329 | h incubation with 30% v/v DMSO at 4°C <7>) <7> 330 | 331 | PH_STABILITY 332 | PHS #7# 7-8 <6> 333 | 334 | STORAGE_STABILITY 335 | SS #2# (storage at 0°C in the presence of 20% glycerol, 0.1 mM EDTA, 5 mM 336 | 2-mercaptoethanol and 0.6 mM NAD+ in TEA buffer, pH 7.5, half-life of 337 | one month) <5> 338 | 339 | REFERENCE 340 | RF <1> Carballo, J.; Martin, R.; Bernardo, A.; Gonzalez, J.: Purification, 341 | characterization and some properties of diacetyl(acetoin) reductase 342 | from Enterobacter aerogenes. Eur. J. Biochem. (1991) 198, 327-332. 343 | {Pubmed:2040298} (c) 344 | RF <2> Martin, R.; Diez, V.; Burgos, J.: Pigeon liver diacetyl reductase. 345 | Effects of pH on the kinetic parameters of the reaction. Biochim. 346 | Biophys. Acta (1976) 429, 293-300. {Pubmed:4124} 347 | RF <3> Burgos, J.; Martin, R.; Diez, V.: Pigeon liver diacetyl reductase. 348 | Kinetic and thermodynamic studies with NADH as coenzyme. Biochim. 349 | Biophys. Acta (1974) 364, 9-16. {Pubmed:4373071} 350 | RF <4> Vidal, I.; Gonzalez, J.; Bernardo, A.; Martin, R.: Purification and 351 | classification of diacetyl-reducing enzymes from Staphylococcus aureus. 352 | Biochem. J. (1988) 251, 461-466. {Pubmed:3041963} 353 | RF <5> Giovannini, P.P.; Medici, A; Bergamini, C.M.; Rippa, M.: Properties 354 | of diacetyl (acetoin) reductase from Bacillus stearothermophilus. 355 | Bioorg. Med. Chem. (1996) 4, 1197-1201. {Pubmed:8879540} 356 | RF <6> Ui, S.; Okajima, Y.; Mimura, A.; Kanai, H.; kobayashi, T.; Kudo, 357 | T.: Sequence analysis of the gene for and characterization of D-acetoin 358 | forming meso-2,3-butanediol dehydrogenase of Klebsiella pneumoniae 359 | expressed in Escherichia coli. J. Ferment. Bioeng. (1997) 83, 32-37. 360 | {Pubmed:} 361 | RF <7> Wang, Z.; Song, Q.; Yu, M.; Wang, Y.; Xiong, B.; Zhang, Y.; Zheng, 362 | J.; Ying, X.: Characterization of a stereospecific acetoin(diacetyl) 363 | reductase from Rhodococcus erythropolis WZ010 and its application for 364 | the synthesis of (2S,3S)-2,3-butanediol. Appl. Microbiol. Biotechnol. 365 | (2014) 98, 641-650. {Pubmed:23568047} 366 | RF <8> Giovannini, P.; Mantovani, M.; Grandini, A.; Medici, A.; Pedrini, 367 | P.: New acetoin reductases from Bacillus stearothermophilus: meso- and 368 | 2R,3R-butanediol as fermentation products. J. Mol. Catal. B (2011) 69, 369 | 15-20. {Pubmed:} 370 | RF <9> Gao, J.; Xu, Y.; Li, F.; Ding, G.: Production of S-acetoin from 371 | diacetyl by Escherichia coli transformant cells that express the 372 | diacetyl reductase gene of Paenibacillus polymyxa ZJ-9. Lett. Appl. 373 | Microbiol. (2013) 57, 274-281. {Pubmed:23701367} 374 | RF <10> Xu, G.C.; Bian, Y.Q.; Han, R.Z.; Dong, J.J.; Ni, Y.: Cloning, 375 | expression, and characterization of budC gene encoding 376 | meso-2,3-butanediol dehydrogenase from Bacillus licheniformis. Appl. 377 | Biochem. Biotechnol. (2016) 178, 604-617. {Pubmed:26494135} 378 | RF <11> Takeda, M.; Anamizu, S.; Motomatsu, S.; Chen, X.; Thapa Chhetri, 379 | R.: Identification and characterization of a mycobacterial 380 | NAD+-dependent alcohol dehydrogenase with superior reduction of 381 | diacetyl to (S)-acetoin. Biosci. Biotechnol. Biochem. (2014) 78, 382 | 1879-1886. {Pubmed:25082080} 383 | 384 | KI_VALUE 385 | KI #1# 300 {diacetyl} (#1# 25°C, pH 6.0 <4>) <4> 386 | KI #1# 150 {ethyl pyruvate} (#1# 25°C, pH 6.0 <4>) <4> 387 | KI #1# 150 {Methyl pyruvate} (#1# 25°C, pH 6.0 <4>) <4> 388 | 389 | KCAT_KM_VALUE 390 | KKM #4# 16.9 {diacetyl} (#4# pH 6.0, 30°C <10>) <10> 391 | KKM #4# 81.5 {(2S,3S)-2,3-butanediol} (#4# pH 10.0, 30°C <10>) <10> 392 | KKM #4# 432 {(3S)-acetoin} (#4# pH 6.0, 30°C <10>) <10> 393 | KKM #4# 2192 {NAD+} (#4# pH 10.0, 30°C <10>) <10> 394 | KKM #4# 5072 {NADH} (#4# pH 6.0, 30°C <10>) <10> 395 | KKM #5# 8.519 {NADH} (#5# pH 7.0, 30°C, recombinant enzyme <7>) <7> 396 | KKM #9# 210 {NADH} (#9# pH 7.0, 30°C <11>) <11> 397 | KKM #9# 36.4 {diacetyl} (#9# pH 7.0, 30°C <11>) <11> 398 | 399 | GENERAL_INFORMATION 400 | GI #2,5# metabolism (#5# acetoin(diacetyl) reductase, also known as 401 | 2,3-butanediol dehydrogenase, is one of the key enzymes in the 402 | microbial production of 2,3-butanediol <7>; #2# the enzyme is involved 403 | in the butanediol cycle, overview <8>) <7,8> 404 | GI #5# evolution (#5# the enzyme belongs to the family of the short-chain 405 | dehydrogenase/reductases <7>) <7>""" 406 | 407 | 408 | class TestReaction(unittest.TestCase): 409 | def test_ec_number(self): 410 | rxn = Reaction(rxn_data) 411 | self.assertEqual( 412 | rxn.ec_number, "1.1.1.304", "Failed to correctly retrieve EC number" 413 | ) 414 | 415 | def test_name(self): 416 | rxn = Reaction(rxn_data) 417 | self.assertEqual( 418 | rxn.name, 419 | "Diacetyl reductase [(s)-acetoin forming]", 420 | "Failed to correctly retrieve reaction name", 421 | ) 422 | 423 | def test_sysname(self): 424 | rxn = Reaction(rxn_data) 425 | self.assertEqual( 426 | rxn.systematic_name, 427 | "(S)-acetoin:NAD+ oxidoreductase", 428 | "Failed to correctly retrieve systematic reaction name", 429 | ) 430 | 431 | def test_KMvalues(self): 432 | rxn = Reaction(rxn_data) 433 | self.assertEqual( 434 | rxn.KMvalues.get_values()[:4], 435 | [0.045, 0.095, 0.025, 0.11], 436 | "Failed to correctly retrieve KM values", 437 | ) 438 | 439 | def test_KKMvalues(self): 440 | rxn = Reaction(rxn_data) 441 | self.assertEqual( 442 | rxn.KKMvalues.get_values()[:4], 443 | [16.9, 36.4, 81.5, 432.0], 444 | "Failed to correctly retrieve KKM values", 445 | ) 446 | 447 | def test_Kcatvalues(self): 448 | rxn = Reaction(rxn_data) 449 | self.assertEqual( 450 | rxn.Kcatvalues.get_values()[:4], 451 | [748.0, 202.0, 591.0, 1222.0], 452 | "Failed to correctly retrieve Kcat values", 453 | ) 454 | 455 | def test_temperature(self): 456 | rxn = Reaction(rxn_data) 457 | self.assertEqual( 458 | rxn.temperature["optimum"][0]["value"], 459 | 50.0, 460 | "Failed to correctly retrieve temperature values", 461 | ) 462 | 463 | 464 | if __name__ == "__main__": 465 | unittest.main() 466 | -------------------------------------------------------------------------------- /src/brendapyrser/parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """ 5 | Object to parse and manipulate the BRENDA database 6 | """ 7 | 8 | from __future__ import annotations 9 | 10 | import re 11 | from importlib import metadata 12 | 13 | import numpy as np 14 | import pandas as pd 15 | 16 | from .constants import fields, units 17 | 18 | meta = metadata.metadata("brendapyrser") 19 | __version__ = meta["Version"] 20 | __author__ = meta["Author"] 21 | 22 | 23 | class BRENDA: 24 | """ 25 | Provides methods to parse the BRENDA database (https://www.brenda-enzymes.org/) 26 | """ 27 | 28 | def __init__(self, path_to_database): 29 | with open(path_to_database, encoding="utf-8") as file: 30 | self.__data = file.read() 31 | self.__ec_numbers = [ 32 | ec.group(1) for ec in re.finditer("(?<=ID\\t)(.*)(?=\\n)", self.__data) 33 | ] 34 | self.__reactions = self.__initializeReactionObjects() 35 | self.__copyright = """Copyrighted by Dietmar Schomburg, Techn. University 36 | Braunschweig, GERMANY. Distributed under the License as stated 37 | at http:/www.brenda-enzymes.org""" 38 | self.__fields = fields 39 | self.__units = units 40 | 41 | def _repr_html_(self): 42 | """This method is executed automatically by Jupyter to print html!""" 43 | return """ 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
Number of Enzymes{n_ec}
BRENDA copyright{cr}
Brendapyrser version{parser}
Author{author}
55 | """.format( 56 | n_ec=len(self.__reactions), 57 | cr=self.__copyright, 58 | parser=__version__, 59 | author=__author__, 60 | ) 61 | 62 | def __getRxnData(self): 63 | rxn_data = [ 64 | r.group(0) 65 | for r in re.finditer("ID\\t(.+?)///", self.__data, flags=re.DOTALL) 66 | ] 67 | del self.__data 68 | return rxn_data 69 | 70 | def __initializeReactionObjects(self): 71 | return [Reaction(datum) for datum in self.__getRxnData()] 72 | 73 | @property 74 | def fields(self): 75 | return self.__fields 76 | 77 | @property 78 | def units(self): 79 | return self.__units 80 | 81 | @property 82 | def reactions(self): 83 | return ReactionList(self.__reactions) 84 | 85 | @property 86 | def copyright(self): 87 | return self.__copyright 88 | 89 | def getOrganisms(self) -> list: 90 | """ 91 | Get list of all represented species in BRENDA 92 | """ 93 | species = set() 94 | for rxn in self.__reactions: 95 | species.update([s["name"] for s in rxn.proteins.values()]) 96 | species.remove("") 97 | species = list(set([s for s in species if "no activity" not in s])) 98 | return species 99 | 100 | def getKMcompounds(self) -> list: 101 | """ 102 | Get list of all substrates in BRENDA with KM data 103 | """ 104 | cpds = set() 105 | for rxn in self.__reactions: 106 | cpds.update([s for s in rxn.KMvalues.keys()]) 107 | try: 108 | cpds.remove("") 109 | except Exception: 110 | pass 111 | return list(cpds) 112 | 113 | 114 | class ReactionList(list): 115 | # Make ReactionList slicing return ReactionList object 116 | def __init__(self, seq=None): 117 | super(self.__class__, self).__init__(seq) 118 | 119 | def __getslice__(self, start, stop): 120 | return self.__class__(super(self.__class__, self).__getslice__(start, stop)) 121 | 122 | def __getitem__(self, key): 123 | if isinstance(key, slice): 124 | return self.__class__(super(self.__class__, self).__getitem__(key)) 125 | else: 126 | return super(self.__class__, self).__getitem__(key) 127 | 128 | def get_by_id(self, id: str): 129 | try: 130 | return [rxn for rxn in self if rxn.ec_number == id][0] 131 | except Exception: 132 | raise ValueError(f"Enzyme with EC {id} not found in database") 133 | 134 | def get_by_name(self, name: str): 135 | try: 136 | return [rxn for rxn in self if rxn.name.lower() == name.lower()][0] 137 | except Exception: 138 | raise ValueError(f"Enzyme {name} not found in database") 139 | 140 | def filter_by_substrate(self, substrate: str) -> list[Reaction]: 141 | """ 142 | Filter reactions by a specific substrate 143 | """ 144 | return [ 145 | rxn 146 | for rxn in self 147 | if any( 148 | [substrate in mets["substrates"] for mets in rxn.substratesAndProducts] 149 | ) 150 | ] 151 | 152 | def filter_by_product(self, product: str) -> list[Reaction]: 153 | """ 154 | Filter reactions by a specific product 155 | """ 156 | return [ 157 | rxn 158 | for rxn in self 159 | if any([product in mets["products"] for mets in rxn.substratesAndProducts]) 160 | ] 161 | 162 | def filter_by_compound(self, compound: str) -> list[Reaction]: 163 | """ 164 | Filter reactions by a substrate or product 165 | """ 166 | return [ 167 | rxn 168 | for rxn in self 169 | if any( 170 | [ 171 | (compound in mets["substrates"] or compound in mets["products"]) 172 | for mets in rxn.substratesAndProducts 173 | ] 174 | ) 175 | ] 176 | 177 | def filter_by_organism(self, species: str): 178 | def is_contained(p, S): 179 | return any([p in s.lower() for s in S]) 180 | 181 | return self.__class__( 182 | [rxn for rxn in self if is_contained(species.lower(), rxn.organisms)] 183 | ) 184 | 185 | 186 | class EnzymeDict(dict): 187 | def filter_by_organism(self, species: str): 188 | filtered_dict = {} 189 | 190 | def is_contained(p, S): 191 | return any([p in s for s in S]) 192 | 193 | for k in self.keys(): 194 | filtered_values = [ 195 | v for v in self[k] if is_contained(species, v["species"]) 196 | ] 197 | if len(filtered_values) > 0: 198 | filtered_dict[k] = filtered_values 199 | return self.__class__(filtered_dict) 200 | 201 | def get_values(self): 202 | return [v["value"] for k in self.keys() for v in self[k]] 203 | 204 | 205 | class EnzymePropertyDict(EnzymeDict): 206 | def filter_by_compound(self, compound: str): 207 | try: 208 | return self.__class__({compound: self[compound]}) 209 | except Exception: 210 | return self.__class__({compound: []}) 211 | 212 | 213 | class EnzymeConditionDict(EnzymeDict): 214 | def filter_by_condition(self, condition: str): 215 | try: 216 | return self.__class__({condition: self[condition]}) 217 | except Exception: 218 | raise KeyError( 219 | f'Invalid condition, valid conditions are: {", ".join(list(self.keys()))}' 220 | ) 221 | 222 | 223 | class Reaction: 224 | def __init__(self, reaction_data): 225 | self.__reaction_data = reaction_data 226 | self.__ec_number = self.__extractRegexPattern("(?<=ID\t)(.*)(?=\n)") 227 | self.__systematic_name = self.__extractRegexPattern("(?<=SN\t)(.*)(?=\n)") 228 | self.__name = self.__extractRegexPattern("(?<=RN\t)(.*)(?=\n)").capitalize() 229 | self.__mechanism_str = ( 230 | self.__extractRegexPattern("(?<=RE\t)(.*)(?=\n[A-Z])", dotall=True) 231 | .replace("=", "<=>") 232 | .replace("\n\t", "") 233 | .split("\nRE\t") 234 | ) 235 | self.__reaction_type = self.__extractRegexPattern( 236 | "(?<=RT\t)(.*)(?=\n)", dotall=True 237 | ).split("\nRT\t") 238 | self.__proteins = self.getSpeciesDict() 239 | self.__references = self.getReferencesDict() 240 | 241 | def getSpeciesDict(self) -> dict: 242 | """ 243 | Returns a dict listing all proteins for given EC number 244 | """ 245 | species = {} 246 | lines = self.__getDataLines("PR") 247 | for line in lines: 248 | res = self.extractDataLineInfo(line) 249 | species_name, protein_ID = self.__splitSpeciesFromProteinID(res["value"]) 250 | species[res["species"][0]] = { 251 | "name": species_name, 252 | "proteinID": protein_ID, 253 | "refs": res["refs"], 254 | } 255 | return species 256 | 257 | def getReferencesDict(self): 258 | """ 259 | Returns a dict listing the bibliography cited for the given EC number 260 | """ 261 | references = {} 262 | lines = self.__getDataLines("RF") 263 | for line in lines: 264 | line = self.__removeTabs(line) 265 | line, refs = self.__extractDataField(line, ("<", ">")) 266 | references[refs] = line 267 | return references 268 | 269 | def printReactionSummary(self): 270 | data = { 271 | "EC number": self.__ec_number, 272 | "Name": self.__name, 273 | "Systematic name": self.__systematic_name, 274 | "Reaction type": self.__reaction_type, 275 | "Mechanism": self.__mechanism, 276 | } 277 | return pd.DataFrame.from_dict(data, orient="index", columns=[""]) 278 | 279 | def _repr_html_(self): 280 | """This method is executed automatically by Jupyter to print html!""" 281 | return """ 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 |
Enzyme identifier{ec}
Name{name}
Systematic name{sys_name}
Reaction type{rxn_type}
Reaction{rxn_str}
295 | """.format( 296 | ec=self.__ec_number, 297 | name=self.__name, 298 | sys_name=self.__systematic_name, 299 | rxn_type=self.__reaction_type, 300 | rxn_str=self.reaction_str, 301 | ) 302 | 303 | def __extractRegexPattern(self, pattern, dotall=False): 304 | if dotall: 305 | flag = re.DOTALL 306 | else: 307 | flag = 0 308 | try: 309 | return re.search(pattern, self.__reaction_data, flags=flag).group(1) 310 | except Exception: 311 | return "" 312 | 313 | def __getDataLines(self, pattern: str): 314 | try: 315 | search_pattern = f"{pattern}\t(.+?)\n(?!\t)" 316 | return [ 317 | p.group(1) 318 | for p in re.finditer( 319 | search_pattern, self.__reaction_data, flags=re.DOTALL 320 | ) 321 | ] 322 | except Exception: 323 | return [] 324 | 325 | @staticmethod 326 | def __removeTabs(line): 327 | return line.replace("\n", "").replace("\t", "").strip() 328 | 329 | @staticmethod 330 | def __extractDataField(line, regex_tags: tuple): 331 | try: 332 | left, right = regex_tags 333 | searched_s = re.search(f"{left}(.+?){right}", line) 334 | span = searched_s.span() 335 | matched_s = line[span[0] + 1 : span[1] - 1].strip() 336 | line = line.replace(f"{searched_s.group()}", "") 337 | return (line, matched_s) 338 | except Exception: 339 | return (line, "") 340 | 341 | @staticmethod 342 | def __eval_range_value(v): 343 | try: 344 | if not re.search("\d-\d", v): 345 | return float(v) 346 | else: 347 | return np.mean([float(s) for s in v.split("-")]) 348 | except Exception: 349 | return -999 350 | 351 | @staticmethod 352 | def __splitSpeciesFromProteinID(line): 353 | try: 354 | idx = re.search("[A-Z]{1}[0-9]{1}", line).start() 355 | return (line[:idx].strip(), line[idx:].strip()) 356 | except Exception: 357 | return (line.strip(), "") 358 | 359 | def extractDataLineInfo(self, line: str, numeric_value=False): 360 | """ 361 | Extracts data fields in each data line according to the tags used by BRENDA 362 | and described in the REAMDE.txt file. What remains after extracting all tags 363 | is the value of that particular data field, e.g., KM value. 364 | """ 365 | line = self.__removeTabs(line) 366 | line, specific_info = self.__extractDataField(line, ("{", ".*}")) 367 | line, meta = self.__extractDataField(line, ("\(", ".*\)")) 368 | line, refs = self.__extractDataField(line, ("<", ">")) 369 | line, species = self.__extractDataField(line, ("#", "#")) 370 | if numeric_value: 371 | value = self.__eval_range_value(line.strip()) 372 | else: 373 | value = line.strip() 374 | return { 375 | "value": value, 376 | "species": species.split(","), 377 | "meta": meta, 378 | "refs": refs.split(","), 379 | "specific_info": specific_info, 380 | } 381 | 382 | def __extractReactionMechanismInfo(self, line: str): 383 | """ 384 | Extracts reaction string and mechanism info 385 | """ 386 | line = self.__removeTabs(line) 387 | line, meta = self.__extractDataField(line, ("\(", ".*\)")) 388 | rxn_str = line.strip() 389 | meta_list = [] 390 | for meta_line in meta.split(";"): 391 | meta_line, refs = self.__extractDataField(meta_line, ("<", ">")) 392 | meta_line, species = self.__extractDataField(meta_line, ("#", "#")) 393 | meta_list.append( 394 | { 395 | "species": species.split(","), 396 | "refs": refs.split(","), 397 | "meta": meta_line.strip(), 398 | } 399 | ) 400 | return (rxn_str, meta_list) 401 | 402 | def __getBinomialNames(self, species_list: list) -> list: 403 | """ 404 | Returns a list with binomial names mapped to the species codes 405 | employed by BRENDA to attach species to protein entries 406 | """ 407 | species_dict = self.__proteins 408 | return list( 409 | set( 410 | [ 411 | species_dict[s]["name"] 412 | for s in species_list 413 | if s in species_dict.keys() 414 | ] 415 | ) 416 | ) 417 | 418 | def __getFullReferences(self, refs_list: list) -> list: 419 | """ 420 | Returns a list with full reference mapped to the refs codes 421 | employed by BRENDA in each entry 422 | """ 423 | refs_dict = self.__references 424 | return [refs_dict[s] for s in refs_list if s in refs_dict.keys()] 425 | 426 | def __getDictOfEnzymeActuators(self, pattern: str) -> dict: 427 | res = {} 428 | lines = self.__getDataLines(pattern) 429 | for line in lines: 430 | data = self.extractDataLineInfo(line) 431 | if data["value"] != "more": 432 | res[data["value"]] = { 433 | "species": self.__getBinomialNames(data["species"]), 434 | "meta": data["meta"], 435 | #'refs': data['refs']} 436 | "refs": self.__getFullReferences(data["refs"]), 437 | } 438 | return EnzymePropertyDict(res) 439 | 440 | def __getDictOfEnzymeProperties(self, pattern: str) -> dict: 441 | res = {} 442 | lines = self.__getDataLines(pattern) 443 | for line in lines: 444 | data = self.extractDataLineInfo(line, numeric_value=True) 445 | substrate = data["specific_info"] 446 | if substrate != "more": 447 | if substrate not in res.keys(): 448 | res[substrate] = [] 449 | res[substrate].append( 450 | { 451 | "value": data["value"], 452 | "species": self.__getBinomialNames(data["species"]), 453 | "meta": data["meta"], 454 | #'refs': data['refs']}) 455 | "refs": self.__getFullReferences(data["refs"]), 456 | } 457 | ) 458 | return EnzymePropertyDict(res) 459 | 460 | def __extractTempOrPHData(self, data_type: str) -> list: 461 | values = [] 462 | lines = self.__getDataLines(data_type) 463 | if "R" not in data_type: 464 | eval_value = self.__eval_range_value 465 | else: 466 | 467 | def eval_value(v): 468 | try: 469 | return [float(s) for s in v.split("-")] 470 | except Exception: 471 | return [-999, -999] 472 | 473 | for line in lines: 474 | data = self.extractDataLineInfo(line) 475 | values.append( 476 | { 477 | "value": eval_value(data["value"]), 478 | "species": self.__getBinomialNames(data["species"]), 479 | "meta": data["meta"], 480 | "refs": data["refs"], 481 | } 482 | ) 483 | return values 484 | 485 | @property 486 | def summary(self): 487 | return self.printReactionSummary() 488 | 489 | @property 490 | def ec_number(self): 491 | return self.__ec_number 492 | 493 | @property 494 | def name(self): 495 | return self.__name 496 | 497 | @property 498 | def systematic_name(self): 499 | return self.__systematic_name 500 | 501 | @property 502 | def reaction_str(self): 503 | return self.__extractReactionMechanismInfo(self.__mechanism_str)[0] 504 | 505 | @property 506 | def mechanism(self): 507 | return self.__mechanism_str 508 | # return self.__extractReactionMechanismInfo(self.__mechanism_str)[1] 509 | 510 | @property 511 | def reaction_type(self) -> list[str]: 512 | return self.__reaction_type 513 | 514 | @property 515 | def cofactors(self): 516 | return self.__getDictOfEnzymeActuators("CF") 517 | 518 | @property 519 | def metals(self): 520 | return self.__getDictOfEnzymeActuators("ME") 521 | 522 | @property 523 | def inhibitors(self): 524 | return self.__getDictOfEnzymeActuators("IN") 525 | 526 | @property 527 | def activators(self): 528 | return self.__getDictOfEnzymeActuators("AC") 529 | 530 | @property 531 | def KMvalues(self): 532 | return self.__getDictOfEnzymeProperties("KM") 533 | 534 | @property 535 | def KIvalues(self): 536 | return self.__getDictOfEnzymeProperties("KI") 537 | 538 | @property 539 | def KKMvalues(self): 540 | return self.__getDictOfEnzymeProperties("KKM") 541 | 542 | @property 543 | def Kcatvalues(self): 544 | return self.__getDictOfEnzymeProperties("TN") 545 | 546 | @property 547 | def specificActivities(self): 548 | lines = self.__getDataLines("SA") 549 | return [self.extractDataLineInfo(line, numeric_value=True) for line in lines] 550 | 551 | @property 552 | def substratesAndProducts(self) -> list: 553 | """ 554 | Returns list of dicts with evaluated "natural" substrates and products 555 | of the enzyme across organisms. 556 | """ 557 | substrates, products, res = [], [], [] 558 | lines = self.__getDataLines("NSP") 559 | for line in lines: 560 | data = self.extractDataLineInfo(line) 561 | rxn = ( 562 | data["value"] 563 | .replace("{}", "") 564 | .replace("?", "") 565 | .replace("more", "") 566 | .strip() 567 | ) 568 | try: 569 | subs, prods = rxn.split("=") 570 | subs = [s.strip() for s in subs.split("+") if s.strip() != ""] 571 | prods = [s.strip() for s in prods.split("+") if s.strip() != ""] 572 | subs.sort() 573 | prods.sort() 574 | if subs not in substrates and len(subs) > 0 and len(prods) > 0: 575 | substrates.append(subs) 576 | products.append(prods) 577 | res.append({"substrates": subs, "products": prods}) 578 | except Exception: 579 | pass 580 | return res 581 | 582 | @property 583 | def temperature(self): 584 | return EnzymeConditionDict( 585 | { 586 | "optimum": self.__extractTempOrPHData("TO"), 587 | "range": self.__extractTempOrPHData("TR"), 588 | "stability": self.__extractTempOrPHData("TS"), 589 | } 590 | ) 591 | 592 | @property 593 | def PH(self): 594 | return EnzymeConditionDict( 595 | { 596 | "optimum": self.__extractTempOrPHData("PHO"), 597 | "range": self.__extractTempOrPHData("PHR"), 598 | "stability": self.__extractTempOrPHData("PHS"), 599 | } 600 | ) 601 | 602 | @property 603 | def proteins(self) -> dict: 604 | return self.__proteins 605 | 606 | @property 607 | def organisms(self) -> list: 608 | """ 609 | Returns a list containing all represented species in the database for this reaction 610 | """ 611 | organisms = list(set([s["name"] for s in self.proteins.values()])) 612 | organisms.sort() 613 | return organisms 614 | 615 | @property 616 | def references(self): 617 | return self.__references 618 | -------------------------------------------------------------------------------- /docs/examples.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Brendapyrser: A python parser for the BRENDA database\n", 8 | "\n", 9 | "This project provides python classes and functions to parse the text file containing the entire BRENDA enzyme database (https://www.brenda-enzymes.org)\n", 10 | "\n", 11 | "Due to BRENDA's license, BRENDA's database cannot be downloaded directly by the parser, instead, the user is asked to download the database as a text file after accepting usage conditions [here](https://www.brenda-enzymes.org/download_brenda_without_registration.php)." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "import numpy as np\n", 21 | "from matplotlib import pyplot as plt\n", 22 | "from brendapyrser import BRENDA\n", 23 | "\n", 24 | "\n", 25 | "dataFile = \"data/brenda_download.txt\"" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## 1. Parsing BRENDA" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 2, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/html": [ 43 | "\n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | "
Number of Enzymes7609
BRENDA copyrightCopyrighted by Dietmar Schomburg, Techn. University\n", 49 | " Braunschweig, GERMANY. Distributed under the License as stated\n", 50 | " at http:/www.brenda-enzymes.org
Brendapyrser version0.0.2
AuthorSemidán Robaina Estévez, 2020-2022
\n", 57 | " " 58 | ], 59 | "text/plain": [ 60 | "" 61 | ] 62 | }, 63 | "execution_count": 2, 64 | "metadata": {}, 65 | "output_type": "execute_result" 66 | } 67 | ], 68 | "source": [ 69 | "# Let's load the database\n", 70 | "brenda = BRENDA(dataFile)\n", 71 | "brenda" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 4, 77 | "metadata": {}, 78 | "outputs": [ 79 | { 80 | "data": { 81 | "image/png": "", 82 | "text/plain": [ 83 | "
" 84 | ] 85 | }, 86 | "metadata": { 87 | "needs_background": "light" 88 | }, 89 | "output_type": "display_data" 90 | }, 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "Minimum and maximum values in database: 0.0 mM, 997.0 mM\n" 96 | ] 97 | } 98 | ], 99 | "source": [ 100 | "# Plot all Km values in the database\n", 101 | "BRENDA_KMs = np.array([v for r in brenda.reactions for v in r.KMvalues.get_values()])\n", 102 | "values = BRENDA_KMs[(BRENDA_KMs < 1000) & (BRENDA_KMs >= 0)]\n", 103 | "plt.hist(values)\n", 104 | "plt.title(f\"Median KM value: {np.median(values)}\")\n", 105 | "plt.xlabel(\"KM (mM)\")\n", 106 | "plt.show()\n", 107 | "print(f\"Minimum and maximum values in database: {values.min()} mM, {values.max()} mM\")" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 6, 113 | "metadata": {}, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "image/png": "", 118 | "text/plain": [ 119 | "
" 120 | ] 121 | }, 122 | "metadata": { 123 | "needs_background": "light" 124 | }, 125 | "output_type": "display_data" 126 | }, 127 | { 128 | "name": "stdout", 129 | "output_type": "stream", 130 | "text": [ 131 | "Minimum and maximum values in database: 5.83e-10 1/s, 997.0 1/s\n" 132 | ] 133 | } 134 | ], 135 | "source": [ 136 | "# Plot all Km values in the database\n", 137 | "BRENDA_Kcats = np.array(\n", 138 | " [v for r in brenda.reactions for v in r.Kcatvalues.get_values()]\n", 139 | ")\n", 140 | "values = BRENDA_Kcats[(BRENDA_Kcats < 1000) & (BRENDA_Kcats >= 0)]\n", 141 | "plt.hist(values)\n", 142 | "plt.title(f\"Median Kcat value: {np.median(values)}\")\n", 143 | "plt.xlabel(\"Kcat (1/s)\")\n", 144 | "plt.show()\n", 145 | "print(f\"Minimum and maximum values in database: {values.min()} 1/s, {values.max()} 1/s\")" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 7, 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "image/png": "", 156 | "text/plain": [ 157 | "
" 158 | ] 159 | }, 160 | "metadata": { 161 | "needs_background": "light" 162 | }, 163 | "output_type": "display_data" 164 | }, 165 | { 166 | "name": "stdout", 167 | "output_type": "stream", 168 | "text": [ 169 | "Minimum and maximum values in database: 0.0 °C, 125.0 °C\n" 170 | ] 171 | } 172 | ], 173 | "source": [ 174 | "# Plot all enzyme optimal temperature values in the database\n", 175 | "BRENDA_TO = np.array(\n", 176 | " [\n", 177 | " v\n", 178 | " for r in brenda.reactions\n", 179 | " for v in r.temperature.filter_by_condition(\"optimum\").get_values()\n", 180 | " ]\n", 181 | ")\n", 182 | "values = BRENDA_TO[(BRENDA_TO >= 0)]\n", 183 | "plt.hist(values)\n", 184 | "plt.title(f\"Median Optimum Temperature: {np.median(values)}\")\n", 185 | "plt.xlabel(\"TO (${}^oC$)\")\n", 186 | "plt.show()\n", 187 | "print(f\"Minimum and maximum values in database: {values.min()} °C, {values.max()} °C\")" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "We see that the median optimal temperature for all enzymes in the BRENDA database is 37 °C! That's interesting... perhaps all organisms have agreed to prefer that temperature over other ones... or, more likely, it could be that BRENDA database is biased towards mammals and microorganisms that live within mammals... such as human pathogens.\n", 195 | "\n", 196 | "Let's filter results for a particular species, let's try with a hyperthermophylic baterial genus, _Thermotoga_" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 8, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "data": { 206 | "image/png": "", 207 | "text/plain": [ 208 | "
" 209 | ] 210 | }, 211 | "metadata": { 212 | "needs_background": "light" 213 | }, 214 | "output_type": "display_data" 215 | }, 216 | { 217 | "name": "stdout", 218 | "output_type": "stream", 219 | "text": [ 220 | "Minimum and maximum values in database: 20.0 °C, 105.0 °C\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "# Plot all enzyme optimal temperature values in the database\n", 226 | "species = \"Thermotoga\"\n", 227 | "BRENDA_TO = np.array(\n", 228 | " [\n", 229 | " v\n", 230 | " for r in brenda.reactions.filter_by_organism(species)\n", 231 | " for v in r.temperature.filter_by_condition(\"optimum\")\n", 232 | " .filter_by_organism(species)\n", 233 | " .get_values()\n", 234 | " ]\n", 235 | ")\n", 236 | "values = BRENDA_TO[(BRENDA_TO >= 0)]\n", 237 | "plt.hist(values)\n", 238 | "plt.title(f\"Median Optimum Temperature: {np.median(values)}\")\n", 239 | "plt.xlabel(\"TO (${}^oC$)\")\n", 240 | "plt.show()\n", 241 | "print(f\"Minimum and maximum values in database: {values.min()} °C, {values.max()} °C\")" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "We can see that the median optimal temperature among all enzymes in the genus, 80°C, is much higher than in the case of the entire database. That's consistent with the fact that _Thermotoga_ are hyperthermophylic... alright!" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "## 2. Extracting data for _Pyruvate kinase_" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 3, 261 | "metadata": {}, 262 | "outputs": [ 263 | { 264 | "data": { 265 | "text/html": [ 266 | "\n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | "
Enzyme identifier2.7.1.40
NamePyruvate kinase
Systematic nameATP:pyruvate 2-O-phosphotransferase
Reaction typePhospho group transfer
ReactionATP + pyruvate <=> ADP + phosphoenolpyruvate
\n", 280 | " " 281 | ], 282 | "text/plain": [ 283 | "" 284 | ] 285 | }, 286 | "execution_count": 3, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "# We can retrieve an enzyme entry by its EC number like this\n", 293 | "r = brenda.reactions.get_by_id(\"2.7.1.40\")\n", 294 | "r" 295 | ] 296 | }, 297 | { 298 | "cell_type": "code", 299 | "execution_count": 10, 300 | "metadata": {}, 301 | "outputs": [ 302 | { 303 | "data": { 304 | "image/png": "", 305 | "text/plain": [ 306 | "
" 307 | ] 308 | }, 309 | "metadata": { 310 | "needs_background": "light" 311 | }, 312 | "output_type": "display_data" 313 | } 314 | ], 315 | "source": [ 316 | "# Here are all the KM values for phosphoenolpyruvate associated with this enzyme class\n", 317 | "compound = \"phosphoenolpyruvate\"\n", 318 | "kms = r.KMvalues.filter_by_compound(compound).get_values()\n", 319 | "plt.hist(kms)\n", 320 | "plt.xlabel(\"KM (mM)\")\n", 321 | "plt.title(f\"{r.name} ({compound})\")\n", 322 | "plt.show()" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 14, 328 | "metadata": {}, 329 | "outputs": [ 330 | { 331 | "data": { 332 | "image/png": "", 333 | "text/plain": [ 334 | "
" 335 | ] 336 | }, 337 | "metadata": { 338 | "needs_background": "light" 339 | }, 340 | "output_type": "display_data" 341 | } 342 | ], 343 | "source": [ 344 | "# Here are all the KM values for phosphoenolpyruvate associated with this enzyme class\n", 345 | "compound = \"phosphoenolpyruvate\"\n", 346 | "KMs = r.KMvalues.filter_by_compound(compound).get_values()\n", 347 | "plt.hist(KMs)\n", 348 | "plt.xlabel(\"KM (mM)\")\n", 349 | "plt.title(f\"{r.name} ({compound})\")\n", 350 | "plt.show()" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 12, 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "data": { 360 | "text/plain": [ 361 | "[0.051500000000000004, 0.18]" 362 | ] 363 | }, 364 | "execution_count": 12, 365 | "metadata": {}, 366 | "output_type": "execute_result" 367 | } 368 | ], 369 | "source": [ 370 | "# And further filtered by organism\n", 371 | "r.KMvalues.filter_by_organism(\"Bos taurus\").filter_by_compound(\n", 372 | " \"phosphoenolpyruvate\"\n", 373 | ").get_values()" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 11, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "data": { 383 | "image/png": "", 384 | "text/plain": [ 385 | "
" 386 | ] 387 | }, 388 | "metadata": { 389 | "needs_background": "light" 390 | }, 391 | "output_type": "display_data" 392 | } 393 | ], 394 | "source": [ 395 | "# Here are all the Kcat values for phosphoenolpyruvate associated with this enzyme class\n", 396 | "compound = \"phosphoenolpyruvate\"\n", 397 | "kcats = r.Kcatvalues.filter_by_compound(compound).get_values()\n", 398 | "plt.hist(kcats)\n", 399 | "plt.xlabel(\"Kcat ($s^{-1}$)\")\n", 400 | "plt.title(f\"{r.name} ({compound})\")\n", 401 | "plt.show()" 402 | ] 403 | }, 404 | { 405 | "cell_type": "markdown", 406 | "metadata": {}, 407 | "source": [ 408 | "## 3 Finding all KM values for a given substrate and organism\n", 409 | "Next, we will retrieve KM values associated to a particular substrate for all enzymes in a given species. Will t he KM values distribute around a narrow or wider concentration range? Since substrate concentration in cytoplasma is the same for all enzymes it makes sense that all cytoplasmi enzymes utilizing that substrate have similar KM values. Let's test this idea with _Escherichia coli_ and some common substrates participating in the central carbon metabolism." 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 13, 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "image/png": "", 420 | "text/plain": [ 421 | "
" 422 | ] 423 | }, 424 | "metadata": { 425 | "needs_background": "light" 426 | }, 427 | "output_type": "display_data" 428 | } 429 | ], 430 | "source": [ 431 | "species, compound = \"Escherichia coli\", \"NADH\"\n", 432 | "KMs = np.array(\n", 433 | " [\n", 434 | " v\n", 435 | " for r in brenda.reactions.filter_by_organism(species)\n", 436 | " for v in r.KMvalues.filter_by_compound(compound)\n", 437 | " .filter_by_organism(species)\n", 438 | " .get_values()\n", 439 | " ]\n", 440 | ")\n", 441 | "\n", 442 | "if len(KMs) > 0:\n", 443 | " plt.hist(KMs)\n", 444 | " plt.xlabel(\"KM (mM)\")\n", 445 | " plt.title(f\"{species} KMs ({compound}), median = {np.median((KMs))}\")\n", 446 | " plt.show()\n", 447 | "else:\n", 448 | " print(\"No KM values for compound\")" 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "That's interesting! typical NADH concentrations are low in _Escherichia coli_, e.g., from [BioNumbers](http://book.bionumbers.org/what-are-the-concentrations-of-free-metabolites-in-cells/) we get a value of 0.083 mM. The median KM value for NADH among all enzymes binding it is lower as we see in the plot above! Hence, it looks like most enzymes are (nearly) saturated for NADH and thus fluxes are sort of independent of NADH concentration." 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "metadata": {}, 461 | "source": [ 462 | "# 4 Filtering reactions by specific compound\n", 463 | "\n", 464 | "We can also filter reactions in BRENDA by a specific compound: substrate, product or either of the two. Let's filter reactions containing _phosphoenolpyruvate_ as a substrate, product or both to exemplify this feature" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 25, 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "data": { 474 | "text/html": [ 475 | "\n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | "
Enzyme identifier2.5.1.19
Name3-phosphoshikimate 1-carboxyvinyltransferase
Systematic namephosphoenolpyruvate:3-phosphoshikimate 5-O-(1-carboxyvinyl)-transferase
Reaction typeEnolpyruvate group transfer (#3,52,55# induced-fit mechanism, formation
Reactionphosphoenolpyruvate + 3-phosphoshikimate <=> phosphate +5-O-
\n", 489 | " " 490 | ], 491 | "text/plain": [ 492 | "" 493 | ] 494 | }, 495 | "execution_count": 25, 496 | "metadata": {}, 497 | "output_type": "execute_result" 498 | } 499 | ], 500 | "source": [ 501 | "substrate_rxns = brenda.reactions.filter_by_substrate(\"phosphoenolpyruvate\")\n", 502 | "substrate_rxns[2]" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": 26, 508 | "metadata": {}, 509 | "outputs": [ 510 | { 511 | "data": { 512 | "text/html": [ 513 | "\n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | "
Enzyme identifier2.5.1.7
NameUdp-n-acetylglucosamine 1-carboxyvinyltransferase
Systematic namephosphoenolpyruvate:UDP-N-acetyl-D-glucosamine
Reaction typeCarboxyvinyl group transfer
Reactionphosphoenolpyruvate + UDP-N-acetyl-alpha-D-glucosamine <=> phosphate +UDP-N-acetyl-3-O-
\n", 527 | " " 528 | ], 529 | "text/plain": [ 530 | "" 531 | ] 532 | }, 533 | "execution_count": 26, 534 | "metadata": {}, 535 | "output_type": "execute_result" 536 | } 537 | ], 538 | "source": [ 539 | "compound_rxns = brenda.reactions.filter_by_compound(\"phosphoenolpyruvate\")\n", 540 | "compound_rxns[7]" 541 | ] 542 | } 543 | ], 544 | "metadata": { 545 | "interpreter": { 546 | "hash": "367a058ebb24ea2c2bb1633bf810ec6a1a05f59e065f27f721ea93103e797079" 547 | }, 548 | "kernelspec": { 549 | "display_name": "Python 3 (ipykernel)", 550 | "language": "python", 551 | "name": "python3" 552 | }, 553 | "language_info": { 554 | "codemirror_mode": { 555 | "name": "ipython", 556 | "version": 3 557 | }, 558 | "file_extension": ".py", 559 | "mimetype": "text/x-python", 560 | "name": "python", 561 | "nbconvert_exporter": "python", 562 | "pygments_lexer": "ipython3", 563 | "version": "3.9.6" 564 | } 565 | }, 566 | "nbformat": 4, 567 | "nbformat_minor": 4 568 | } 569 | --------------------------------------------------------------------------------