├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── build_test.yml │ ├── cffconvert.yml │ ├── publish_pypi.yml │ └── stale_issue_pr.yml ├── .gitignore ├── .prospector.yml ├── .readthedocs.yml ├── .zenodo.json ├── CHANGELOG.md ├── CITATION.cff ├── CODE_OF_CONDUCT.rst ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── NOTICE ├── README.md ├── docs ├── .setup_save.rst ├── Makefile ├── pdb │ ├── 1AK4_10w.pdb │ ├── 1AK4_5w.pdb │ ├── 3CRO.pdb │ ├── decoy.pdb │ ├── dummy.pdb │ ├── dummy_transform.pdb │ ├── ref.pdb │ └── test.pdb └── source │ ├── .vscode │ └── settings.json │ ├── _templates │ └── autosummary │ │ └── method.rst │ ├── conf.py │ ├── index.rst │ ├── pdb2sql.StructureSimilarity.rst │ ├── pdb2sql.interface.rst │ ├── pdb2sql.pdb2sqlcore.rst │ ├── pdb2sql.superpose.rst │ ├── pdb2sql.transform.rst │ ├── pdb2sql.utils.rst │ └── tutorial.rst ├── example ├── align_pdb.py └── axes_cyl.py ├── makefile ├── paper ├── arch.png ├── comp.png ├── paper.bibtex ├── paper.md └── sim.png ├── pdb2sql ├── StructureSimilarity.py ├── __init__.py ├── __version__.py ├── align.py ├── interface.py ├── many2sql.py ├── pdb2sql_base.py ├── pdb2sqlcore.py ├── superpose.py ├── transform.py └── utils.py ├── setup.py └── test ├── 1AK4_5w_superposed_on_1AK4_10w.pdb ├── __init__.py ├── aligned_structure.pdb ├── pdb ├── 1AK4 │ ├── 1AK4_10w.pdb │ ├── 1AK4_10w_aligned.pdb │ ├── 1AK4_5w.pdb │ ├── 1AK4_5w_nonmatch.pdb │ ├── target.izone │ ├── target.lzone │ └── target.pdb ├── 3CRO.pdb ├── 3CRO_H.pdb ├── dummy_blank_chainID_with_segID.pdb ├── dummy_blank_chainID_without_segID.pdb ├── dummy_blank_element.pdb ├── dummy_blank_occupancy.pdb ├── dummy_blank_temperature.pdb ├── dummy_longline.pdb ├── dummy_template.pdb ├── dummy_transform.pdb └── test_model.pdb ├── test_align.py ├── test_interface.py ├── test_many2sql.py ├── test_pdb2sqlcore.py ├── test_structureSimilarity.py ├── test_superpose.py ├── test_transform.py ├── test_utils.py └── utils.py /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Environment:** 14 | - OS system: 15 | - Version: 16 | - Branch commit ID: 17 | - Inputs: 18 | 19 | **To Reproduce** 20 | Steps/commands to reproduce the behaviour: 21 | 1. 22 | 2. 23 | 3. 24 | 25 | **Expected Results** 26 | A clear and concise description of what you expected to happen. 27 | 28 | **Actual Results or Error Info** 29 | If applicable, add screenshots to help explain your problem. 30 | 31 | **Additional Context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | open-pull-requests-limit: 10 8 | -------------------------------------------------------------------------------- /.github/workflows/build_test.yml: -------------------------------------------------------------------------------- 1 | name: Build_Test 2 | 3 | on: 4 | push: 5 | paths: 6 | - pdb2sql/** 7 | - test/** 8 | - setup.py 9 | pull_request: 10 | paths: 11 | - pdb2sql/** 12 | - test/** 13 | - setup.py 14 | 15 | jobs: 16 | smoke_test: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python 3.7 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: 3.7 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -e .[test] 28 | - name: Test with pytest 29 | run: | 30 | pytest --cov=pdb2sql --cov-report=xml 31 | - name: Coveralls 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 34 | run: | 35 | coveralls --service=github 36 | 37 | basic_test: 38 | needs: smoke_test 39 | runs-on: ubuntu-latest 40 | strategy: 41 | matrix: 42 | python-version: [3.8, 3.9] 43 | steps: 44 | - uses: actions/checkout@v2 45 | - name: Set up Python ${{ matrix.python-version }} 46 | uses: actions/setup-python@v2 47 | with: 48 | python-version: ${{ matrix.python-version }} 49 | - name: Install dependencies 50 | run: | 51 | python -m pip install --upgrade pip 52 | pip install -e .[test] 53 | - name: Test with pytest 54 | run: | 55 | pytest --cov=pdb2sql --cov-report=xml 56 | - name: Coveralls 57 | env: 58 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 59 | run: | 60 | coveralls --service=github 61 | -------------------------------------------------------------------------------- /.github/workflows/cffconvert.yml: -------------------------------------------------------------------------------- 1 | name: cffconvert 2 | 3 | on: 4 | push: 5 | paths: 6 | - CITATION.cff 7 | 8 | jobs: 9 | validate: 10 | name: "validate" 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Check out a copy of the repository 14 | uses: actions/checkout@v2 15 | 16 | - name: Check whether the citation metadata from CITATION.cff is valid 17 | uses: citation-file-format/cffconvert-github-action@2.0.0 18 | with: 19 | args: "--validate" 20 | -------------------------------------------------------------------------------- /.github/workflows/publish_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python distributions to PyPI or TestPyPI 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | version: 6 | description: 'Version upload to pypi' 7 | required: true 8 | pypi_repo: 9 | description: 'Upload to testpypi or pypi' 10 | default: 'testpypi' 11 | required: true 12 | 13 | jobs: 14 | publish: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@master 18 | with: 19 | ref: ${{ github.event.inputs.version }} 20 | - uses: actions/setup-python@v3 21 | with: 22 | python-version: '3.9' 23 | - name: Check distribution version 24 | run: | 25 | v=$(python setup.py --version) 26 | if [[ $v != ${{ github.event.inputs.version }} ]]; then 27 | echo "ERROR: Package version $v is not same as input version ${{ github.event.inputs.version }}." 28 | echo "Update package version, tag the commit and rerun this workflow." 29 | exit 1 30 | fi 31 | - name: Install pypa/build 32 | run: >- 33 | python -m 34 | pip install 35 | build 36 | --user 37 | - name: Build a binary wheel and a source tarball 38 | run: >- 39 | python -m 40 | build 41 | --sdist 42 | --wheel 43 | --outdir dist/ 44 | . 45 | - name: Publish distribution to TestPyPI 46 | if: ${{ github.event.inputs.pypi_repo == 'testpypi' }} 47 | uses: pypa/gh-action-pypi-publish@master 48 | with: 49 | user: __token__ 50 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 51 | repository_url: https://test.pypi.org/legacy/ 52 | - name: Publish distribution to PyPI 53 | if: ${{ github.event.inputs.pypi_repo == 'pypi' }} 54 | uses: pypa/gh-action-pypi-publish@master 55 | with: 56 | user: __token__ 57 | password: ${{ secrets.PYPI_API_TOKEN }} 58 | -------------------------------------------------------------------------------- /.github/workflows/stale_issue_pr.yml: -------------------------------------------------------------------------------- 1 | name: Close inactive issues and pull requests 2 | on: 3 | schedule: 4 | - cron: "14 3 * * 1,3,5" # check at 03:14 on Monday, Wednesday, and Friday 5 | 6 | jobs: 7 | close-issues: 8 | runs-on: ubuntu-latest 9 | permissions: 10 | issues: write 11 | pull-requests: write 12 | steps: 13 | - uses: actions/stale@v5.0.0 14 | with: 15 | days-before-issue-stale: 30 16 | days-before-issue-close: 7 17 | stale-issue-label: "stale" 18 | stale-issue-message: "This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days." 19 | close-issue-message: "This issue was closed because it has been inactive for 7 days since being marked as stale." 20 | days-before-pr-stale: 14 21 | days-before-pr-close: 7 22 | stale-pr-message: "This PR is stale because it has been open for 14 days with no activity. Remove stale label or comment or this will be closed in 7 days." 23 | close-pr-message: "This PR was closed because it has been inactive for 7 days since being marked as stale." 24 | exempt-issue-labels: 'blocked' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Byte-compiled / optimized 2 | *__pycache__* 3 | *sublime.* 4 | *.egg-info 5 | .vscode/* 6 | .DS_Store 7 | -------------------------------------------------------------------------------- /.prospector.yml: -------------------------------------------------------------------------------- 1 | # prospector configuration file 2 | 3 | --- 4 | 5 | output-format: grouped 6 | 7 | strictness: medium 8 | doc-warnings: false 9 | test-warnings: true 10 | member-warnings: false 11 | 12 | ignore-paths: 13 | - docs 14 | - paper 15 | 16 | pyroma: 17 | run: true 18 | 19 | pep8: 20 | full: true 21 | 22 | pep257: 23 | disable: [ 24 | # Disable because not part of PEP257 official convention: 25 | # see http://pep257.readthedocs.io/en/latest/error_codes.html 26 | D203, # 1 blank line required before class docstring 27 | D212, # Multi-line docstring summary should start at the first line 28 | D213, # Multi-line docstring summary should start at the second line 29 | D404, # First word of the docstring should not be This 30 | ] 31 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | 2 | # .readthedocs.yml 3 | # Read the Docs configuration file 4 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 5 | 6 | # Required 7 | version: 2 8 | 9 | # Build documentation in the docs/ directory with Sphinx 10 | sphinx: 11 | configuration: docs/source/conf.py 12 | 13 | # Build documentation with MkDocs 14 | #mkdocs: 15 | # configuration: mkdocs.yml 16 | 17 | # Optionally build your docs in additional formats such as PDF and ePub 18 | formats: all 19 | 20 | # Optionally set the version of Python and requirements required to build your docs 21 | python: 22 | version: 3.7 23 | install: 24 | - method: pip 25 | path: . 26 | extra_requirements: 27 | - docs 28 | system_packages: true -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "creators": [ 3 | { 4 | "affiliation": "Netherlands eScience Center", 5 | "name": "Renaud, Nicolas", 6 | "orcid": "0000-0001-9589-2694" 7 | }, 8 | { 9 | "affiliation": "Netherlands eScience Center", 10 | "name": "Geng, Cunliang", 11 | "orcid": "0000-0002-1409-8358" 12 | } 13 | ], 14 | "description": "Fast and versatile biomolecular structure PDB file parser using SQL queries.", 15 | "keywords": [ 16 | "pdb parser", 17 | "protein structure", 18 | "biomolecule", 19 | "bioinformatics", 20 | "CAPRI" 21 | ], 22 | "license": { 23 | "id": "Apache-2.0" 24 | }, 25 | "title": "The pdb2sql Python Package: Parsing, Manipulation and Analysis of PDB Files Using SQL Queries" 26 | } 27 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | 4 | All notable changes to this project will be documented in this file. 5 | This project adheres to [Semantic Versioning](http://semver.org/). 6 | 7 | ## 0.5.2 8 | - Fixed some bugs 9 | 10 | ## 0.5.1 11 | - Updated `compute_CapriClass` conditions 12 | 13 | ## 0.5.0 14 | - Added atom name selection for lrmsd calculation 15 | - Removed hardcoded chainIDs and added chainID selection in `StructureSimilarity` 16 | 17 | ## 0.4.0 18 | - Added `many2sql` to support read multiple PDB files 19 | - Added support for `Path` objects of input PDB files 20 | - Added support for `help(pdb2sql)` 21 | - Updated assignment of chain IDs in `StructureSimilarity` 22 | 23 | ## 0.3.0 24 | - Added `align` to superpose a structure to a specific axis or plane 25 | - Added `superpose` to superpose two structures based on selections 26 | - Added `fetch` to download PDB file with given PDB ID 27 | - Updated `interface` object to take `pdb2sql` object as input -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # YAML 1.2 2 | --- 3 | abstract: "Fast and versatile biomolecular structure PDB file parser using SQL queries." 4 | authors: 5 | - 6 | affiliation: "Netherlands eScience Center" 7 | family-names: Renaud 8 | given-names: Nicolas 9 | orcid: "https://orcid.org/0000-0001-9589-2694" 10 | - 11 | affiliation: "Netherlands eScience Center" 12 | family-names: Geng 13 | given-names: Cunliang 14 | orcid: "https://orcid.org/0000-0002-1409-8358" 15 | cff-version: 1.2.0 16 | doi: "10.5281/zenodo.3232887" 17 | keywords: 18 | - "pdb parser" 19 | - "protein structure" 20 | - biomolecule 21 | - bioinformatics 22 | - CAPRI 23 | license: "Apache-2.0" 24 | message: "If you use this software, please cite it using these metadata." 25 | repository-code: "https://github.com/DeepRank/pdb2sql" 26 | title: pdb2sql 27 | ... 28 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | Contributor Covenant Code of Conduct 3 | ############################################################################### 4 | 5 | Our Pledge 6 | ********** 7 | 8 | In the interest of fostering an open and welcoming environment, we as 9 | contributors and maintainers pledge to making participation in our project and 10 | our community a harassment-free experience for everyone, regardless of age, body 11 | size, disability, ethnicity, gender identity and expression, level of experience, 12 | education, socio-economic status, nationality, personal appearance, race, 13 | religion, or sexual identity and orientation. 14 | 15 | Our Standards 16 | ************* 17 | 18 | Examples of behavior that contributes to creating a positive environment 19 | include: 20 | 21 | * Using welcoming and inclusive language 22 | * Being respectful of differing viewpoints and experiences 23 | * Gracefully accepting constructive criticism 24 | * Focusing on what is best for the community 25 | * Showing empathy towards other community members 26 | 27 | Examples of unacceptable behavior by participants include: 28 | 29 | * The use of sexualized language or imagery and unwelcome sexual attention or 30 | advances 31 | * Trolling, insulting/derogatory comments, and personal or political attacks 32 | * Public or private harassment 33 | * Publishing others' private information, such as a physical or electronic 34 | address, without explicit permission 35 | * Other conduct which could reasonably be considered inappropriate in a 36 | professional setting 37 | 38 | Our Responsibilities 39 | ******************** 40 | 41 | Project maintainers are responsible for clarifying the standards of acceptable 42 | behavior and are expected to take appropriate and fair corrective action in 43 | response to any instances of unacceptable behavior. 44 | 45 | Project maintainers have the right and responsibility to remove, edit, or 46 | reject comments, commits, code, wiki edits, issues, and other contributions 47 | that are not aligned to this Code of Conduct, or to ban temporarily or 48 | permanently any contributor for other behaviors that they deem inappropriate, 49 | threatening, offensive, or harmful. 50 | 51 | Scope 52 | ***** 53 | 54 | This Code of Conduct applies both within project spaces and in public spaces 55 | when an individual is representing the project or its community. Examples of 56 | representing a project or community include using an official project e-mail 57 | address, posting via an official social media account, or acting as an appointed 58 | representative at an online or offline event. Representation of a project may be 59 | further defined and clarified by project maintainers. 60 | 61 | Enforcement 62 | *********** 63 | 64 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 65 | reported by contacting the project team at n.renaud@esciencecenter.nl. All 66 | complaints will be reviewed and investigated and will result in a response that 67 | is deemed necessary and appropriate to the circumstances. The project team is 68 | obligated to maintain confidentiality with regard to the reporter of an incident. 69 | Further details of specific enforcement policies may be posted separately. 70 | 71 | Project maintainers who do not follow or enforce the Code of Conduct in good 72 | faith may face temporary or permanent repercussions as determined by other 73 | members of the project's leadership. 74 | 75 | Attribution 76 | *********** 77 | 78 | This Code of Conduct is adapted from the `Contributor Covenant `_, version 1.4, 79 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 80 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ############################ 2 | Contributing guidelines 3 | ############################ 4 | 5 | We welcome any kind of contribution to our software, from simple comment or question to a full fledged `pull request `_. Please read and follow our `Code of Conduct `_. 6 | 7 | A contribution can be one of the following cases: 8 | 9 | #. you have a question; 10 | #. you think you may have found a bug (including unexpected behavior); 11 | #. you want to make some kind of change to the code base (e.g. to fix a bug, to add a new feature, to update documentation). 12 | 13 | The sections below outline the steps in each case. 14 | 15 | You have a question 16 | ******************* 17 | 18 | #. use the search functionality `here `__ to see if someone already filed the same issue; 19 | #. if your issue search did not yield any relevant results, make a new issue; 20 | #. apply the "Question" label; apply other labels when relevant. 21 | 22 | You think you may have found a bug 23 | ********************************** 24 | 25 | #. use the search functionality `here `__ to see if someone already filed the same issue; 26 | #. if your issue search did not yield any relevant results, make a new issue, making sure to provide enough information to the rest of the community to understand the cause and context of the problem. Depending on the issue, you may want to include: 27 | - the `SHA hashcode `_ of the commit that is causing your problem; 28 | - some identifying information (name and version number) for dependencies you're using; 29 | - information about the operating system; 30 | #. apply relevant labels to the newly created issue. 31 | 32 | You want to make some kind of change to the code base 33 | ***************************************************** 34 | 35 | #. (**important**) announce your plan to the rest of the community *before you start working*. This announcement should be in the form of a (new) issue; 36 | #. (**important**) wait until some kind of consensus is reached about your idea being a good idea; 37 | #. if needed, fork the repository to your own Github profile and create your own feature branch off of the latest master commit. While working on your feature branch, make sure to stay up to date with the master branch by pulling in changes, possibly from the 'upstream' repository (follow the instructions `here `__ and `here `__); 38 | #. make sure the existing tests still work by running ``pytest`` from the `test/` folder; 39 | #. add your own tests (if necessary); 40 | #. update or expand the documentation; 41 | #. `push `_ your feature branch to (your fork of) the PDB2SQL repository on GitHub; 42 | #. create the pull request, e.g. following the instructions `here `__. 43 | 44 | In case you feel like you've made a valuable contribution, but you don't know how to write or run tests for it, or how to generate the documentation: don't let this discourage you from making the pull request; we can help you! Just go ahead and submit the pull request, but keep in mind that you might be asked to append additional commits to your pull request. 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "{}" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | 205 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md 3 | graft test/pdb 4 | include test/utils.py -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | This product includes PDB2SQL, software developed by 2 | Netherlands eScience Center. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDB2SQL 2 | 3 | [![PyPI](https://img.shields.io/pypi/v/pdb2sql)](https://pypi.org/project/pdb2sql/) 4 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3232887.svg)](https://doi.org/10.5281/zenodo.3232887) 5 | [![RSD](https://img.shields.io/badge/RSD-pdb2sql-red)](https://research-software.nl/software/pdb2sql) 6 | ![Build_Test](https://github.com/DeepRank/pdb2sql/workflows/Build_Test/badge.svg) 7 | [![Coverage Status](https://coveralls.io/repos/github/DeepRank/pdb2sql/badge.svg)](https://coveralls.io/github/DeepRank/pdb2sql) 8 | [![Codacy Badge](https://app.codacy.com/project/badge/Grade/36ad228df234488ab70ade6b2a80d54b)](https://www.codacy.com/gh/DeepRank/pdb2sql/dashboard?utm_source=github.com&utm_medium=referral&utm_content=DeepRank/pdb2sql&utm_campaign=Badge_Grade) 9 | [![Documentation Status](https://readthedocs.org/projects/pdb2sql/badge/?version=latest)](https://pdb2sql.readthedocs.io/en/latest/?badge=latest) 10 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.02077/status.svg)](https://doi.org/10.21105/joss.02077) 11 | 12 | `pdb2sql` is a Python package that leverage SQL queries to parse, manipulate and process PDB files. It provides: 13 | 14 | - a powerful `pdb2sql` object to convert PDB data in SQL database 15 | - strcuture transformation functions (rotations, translations...) 16 | - useful capablities to 17 | - calculate structure interface (contact atoms and residues) 18 | - calculate structure similarity (iRMSD, lRMSD, FNAT, DockQ...) 19 | 20 | ## Installation 21 | 22 | ``` 23 | pip install pdb2sql 24 | ``` 25 | 26 | ## Documentation 27 | The documentation of the package alongside small tutorial can be found at : 28 | - 29 | 30 | ## Quick Example 31 | 32 | `pdb2sql` easily allows to load a PDB file in an object. Once loaded, the data can be parsed using SQL queries. To facilitate the adoption of the tool simple methods have been developped to wrap the SQL queries in simple methods. For example obtaining the positions of all carbon, nitrogen and oxygen atoms of chain A from all residues but VAL and LEU, one can use : 33 | 34 | ```python 35 | from pdb2sql import pdb2sql 36 | pdb = pdb2sql('1AK4.pdb') 37 | atoms = pdb.get('x,y,z', 38 | name = ['C','N', 'O'], 39 | no_resName = ['VAL','LEU'], 40 | chainID = 'A') 41 | ``` 42 | -------------------------------------------------------------------------------- /docs/.setup_save.rst: -------------------------------------------------------------------------------- 1 | \.setup\_save module 2 | ==================== 3 | 4 | .. automodule:: .setup_save 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile clean check 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | cd source; $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | 23 | clean: 24 | rm -f source/decoy.db 25 | rm -fr source/api 26 | rm -fr source/_build 27 | 28 | check: 29 | open source/_build/html/index.html 30 | -------------------------------------------------------------------------------- /docs/pdb/dummy.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 N MET A 1 -20.948 -13.418 28.320 1.00 46.93 N 2 | ATOM 2 CA MET A 1 -21.093 -12.112 28.939 1.00 52.50 C 3 | ATOM 3 C MET A 1 -22.482 -11.566 28.846 1.00 52.55 C 4 | ATOM 4 O MET A 1 -22.816 -10.393 28.618 1.00 52.75 O 5 | ATOM 5 CB MET A 1 -19.916 -11.178 28.789 1.00 59.92 C 6 | ATOM 6 CG MET A 1 -18.839 -11.701 29.713 1.00 80.88 C 7 | ATOM 7 SD MET A 1 -17.178 -11.517 29.038 1.00 95.94 S 8 | ATOM 8 CE MET A 1 -16.527 -13.173 29.365 1.00 90.58 C 9 | ATOM 9 N GLN A 2 -23.243 -12.593 29.074 1.00 51.78 N 10 | ATOM 10 CA GLN A 2 -24.639 -12.681 29.076 1.00 52.49 C 11 | ATOM 11 C GLN A 2 -25.268 -12.252 30.349 1.00 42.74 C 12 | ATOM 12 O GLN A 2 -24.688 -12.207 31.435 1.00 47.12 O 13 | ATOM 13 CB GLN A 2 -24.971 -14.147 28.858 1.00 45.95 C 14 | ATOM 14 CG GLN A 2 -24.141 -14.712 27.710 1.00 53.26 C 15 | ATOM 15 CD GLN A 2 -24.923 -15.776 27.001 1.00 68.74 C 16 | ATOM 16 OE1 GLN A 2 -25.159 -16.851 27.563 1.00 82.61 O 17 | ATOM 17 NE2 GLN A 2 -25.382 -15.458 25.797 1.00 76.83 N 18 | ATOM 18 N THR A 3 -26.513 -11.973 30.116 1.00 21.94 N 19 | ATOM 19 CA THR A 3 -27.440 -11.567 31.088 1.00 15.55 C 20 | ATOM 20 C THR A 3 -28.200 -12.824 31.459 1.00 5.55 C 21 | ATOM 21 O THR A 3 -27.960 -13.910 30.947 1.00 13.48 O 22 | ATOM 22 CB THR A 3 -28.318 -10.497 30.412 1.00 14.60 C 23 | ATOM 23 OG1 THR A 3 -27.550 -9.329 30.158 1.00 7.60 O 24 | ATOM 24 CG2 THR A 3 -29.542 -10.173 31.249 1.00 14.52 C 25 | ATOM 25 N LEU A 4 -29.107 -12.704 32.360 1.00 2.00 N 26 | ATOM 26 CA LEU A 4 -29.866 -13.850 32.734 1.00 2.00 C 27 | ATOM 27 C LEU A 4 -31.054 -13.904 31.795 1.00 23.22 C 28 | ATOM 28 O LEU A 4 -31.605 -14.952 31.468 1.00 22.54 O 29 | ATOM 29 CB LEU A 4 -30.361 -13.645 34.167 1.00 2.00 C 30 | ATOM 30 CG LEU A 4 -31.598 -14.458 34.413 1.00 2.00 C 31 | ATOM 31 CD1 LEU A 4 -31.138 -15.884 34.580 1.00 10.70 C 32 | ATOM 32 CD2 LEU A 4 -32.285 -13.993 35.678 1.00 2.00 C 33 | TER 32 LEU A 34 | ATOM 33 N ILE B 5 -24.269 -24.311 62.813 1.00 9.48 N 35 | ATOM 34 CA ILE B 5 -23.843 -24.770 64.102 1.00 26.84 C 36 | ATOM 35 C ILE B 5 -22.871 -25.894 64.054 1.00 25.15 C 37 | ATOM 36 O ILE B 5 -21.848 -25.947 64.740 1.00 26.70 O 38 | ATOM 37 CB ILE B 5 -25.073 -25.455 64.560 1.00 2.00 C 39 | ATOM 38 CG1 ILE B 5 -26.132 -24.381 64.698 1.00 8.15 C 40 | ATOM 39 CG2 ILE B 5 -24.748 -26.175 65.844 1.00 15.39 C 41 | ATOM 40 CD1 ILE B 5 -27.169 -24.675 65.763 1.00 4.14 C 42 | ATOM 41 N ALA B 6 -23.314 -26.842 63.273 1.00 5.60 N 43 | ATOM 42 CA ALA B 6 -22.569 -28.021 63.037 1.00 6.15 C 44 | ATOM 43 C ALA B 6 -21.334 -27.605 62.300 1.00 2.00 C 45 | ATOM 44 O ALA B 6 -20.346 -28.310 62.260 1.00 19.27 O 46 | ATOM 45 CB ALA B 6 -23.390 -28.992 62.204 1.00 28.29 C 47 | ATOM 46 N LYS B 7 -20.313 -24.801 63.054 1.00 42.89 N 48 | ATOM 47 CA LYS B 7 -19.947 -23.992 64.206 1.00 35.06 C 49 | ATOM 48 C LYS B 7 -19.630 -22.543 63.931 1.00 28.97 C 50 | ATOM 49 O LYS B 7 -18.474 -22.128 63.903 1.00 23.79 O 51 | ATOM 50 CB LYS B 7 -19.233 -24.631 65.379 1.00 29.85 C 52 | ATOM 51 CG LYS B 7 -18.891 -26.083 65.150 1.00 33.26 C 53 | ATOM 52 CD LYS B 7 -17.459 -26.383 65.536 1.00 40.19 C 54 | ATOM 53 CE LYS B 7 -17.355 -27.043 66.897 1.00 63.40 C 55 | ATOM 54 NZ LYS B 7 -17.415 -28.509 66.822 1.00 66.95 N 56 | END 57 | -------------------------------------------------------------------------------- /docs/pdb/dummy_transform.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 N THR A 1 1.000 0.000 0.000 1.00 0.69 N 2 | ATOM 2 CA THR A 1 -1.000 0.000 0.000 1.00 0.50 C 3 | ATOM 3 C THR A 1 0.000 1.000 0.000 1.00 0.45 C 4 | ATOM 4 O THR A 1 0.000 -1.000 0.000 1.00 0.69 O 5 | ATOM 5 CB THR A 1 0.000 0.000 1.000 1.00 0.50 C 6 | ATOM 6 H1 THR A 1 0.000 0.000 -1.000 1.00 0.45 H 7 | -------------------------------------------------------------------------------- /docs/pdb/test.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 N MET A 1 2.000 -3.418 38.320 1.00 46.93 N 2 | ATOM 2 CA MET A 1 2.000 -2.112 38.939 1.00 52.50 C 3 | ATOM 3 C MET A 1 2.000 -1.566 38.846 1.00 52.55 C 4 | ATOM 4 O MET A 1 2.000 -0.393 38.618 1.00 52.75 O 5 | ATOM 5 CB MET A 1 2.000 -1.178 38.789 1.00 59.92 C 6 | ATOM 6 CG MET A 1 2.000 -1.701 39.713 1.00 80.88 C 7 | ATOM 7 SD MET A 1 2.000 -1.517 39.038 1.00 95.94 S 8 | ATOM 8 CE MET A 1 2.000 -3.173 39.365 1.00 90.58 C 9 | ATOM 9 N GLN A 2 2.000 -2.593 39.074 1.00 51.78 N 10 | ATOM 10 CA GLN A 2 2.000 -2.681 39.076 1.00 52.49 C 11 | ATOM 11 C GLN A 2 -15.268 -2.252 40.349 1.00 42.74 C 12 | ATOM 12 O GLN A 2 -14.688 -2.207 41.435 1.00 47.12 O 13 | ATOM 13 CB GLN A 2 -14.971 -4.147 38.858 1.00 45.95 C 14 | ATOM 14 CG GLN A 2 -14.141 -4.712 37.710 1.00 53.26 C 15 | ATOM 15 CD GLN A 2 -14.923 -5.776 37.001 1.00 68.74 C 16 | ATOM 16 OE1 GLN A 2 -15.159 -6.851 37.563 1.00 82.61 O 17 | ATOM 17 NE2 GLN A 2 -15.382 -5.458 35.797 1.00 76.83 N 18 | ATOM 18 N THR A 3 -16.513 -1.973 40.116 1.00 21.94 N 19 | ATOM 19 CA THR A 3 -17.440 -1.567 41.088 1.00 15.55 C 20 | ATOM 20 C THR A 3 -18.200 -2.824 41.459 1.00 5.55 C 21 | ATOM 21 O THR A 3 -17.960 -3.910 40.947 1.00 13.48 O 22 | ATOM 22 CB THR A 3 -18.318 -0.497 40.412 1.00 14.60 C 23 | ATOM 23 OG1 THR A 3 -17.550 0.671 40.158 1.00 7.60 O 24 | ATOM 24 CG2 THR A 3 -19.542 -0.173 41.249 1.00 14.52 C 25 | ATOM 25 N LEU A 4 -19.107 -2.704 42.360 1.00 2.00 N 26 | ATOM 26 CA LEU A 4 -19.866 -3.850 42.734 1.00 2.00 C 27 | ATOM 27 C LEU A 4 -21.054 -3.904 41.795 1.00 23.22 C 28 | ATOM 28 O LEU A 4 -21.605 -4.952 41.468 1.00 22.54 O 29 | ATOM 29 CB LEU A 4 -20.361 -3.645 44.167 1.00 2.00 C 30 | ATOM 30 CG LEU A 4 -21.598 -4.458 44.413 1.00 2.00 C 31 | ATOM 31 CD1 LEU A 4 -21.138 -5.884 44.580 1.00 10.70 C 32 | ATOM 32 CD2 LEU A 4 -22.285 -3.993 45.678 1.00 2.00 C 33 | ATOM 33 N ILE C 5 -14.269 -14.311 72.813 1.00 9.48 N 34 | ATOM 34 CA ILE C 5 -13.843 -14.770 74.102 1.00 26.84 C 35 | ATOM 35 C ILE C 5 -12.871 -15.894 74.054 1.00 25.15 C 36 | ATOM 36 O ILE C 5 -11.848 -15.947 74.740 1.00 26.70 O 37 | ATOM 37 CB ILE C 5 -15.073 -15.455 74.560 1.00 2.00 C 38 | ATOM 38 CG1 ILE C 5 -16.132 -14.381 74.698 1.00 8.15 C 39 | ATOM 39 CG2 ILE C 5 -14.748 -16.175 75.844 1.00 15.39 C 40 | ATOM 40 CD1 ILE C 5 -17.169 -14.675 75.763 1.00 4.14 C 41 | ATOM 41 N ALA C 6 -13.314 -16.842 73.273 1.00 5.60 N 42 | ATOM 42 CA ALA C 6 -12.569 -18.021 73.037 1.00 6.15 C 43 | ATOM 43 C ALA C 6 -11.334 -17.605 72.300 1.00 2.00 C 44 | ATOM 44 O ALA C 6 -10.346 -18.310 72.260 1.00 19.27 O 45 | ATOM 45 CB ALA C 6 -13.390 -18.992 72.204 1.00 28.29 C 46 | ATOM 46 N LYS C 7 -10.313 -14.801 73.054 1.00 42.89 N 47 | ATOM 47 CA LYS C 7 -9.947 -13.992 74.206 1.00 35.06 C 48 | ATOM 48 C LYS C 7 -9.630 -12.543 73.931 1.00 28.97 C 49 | ATOM 49 O LYS C 7 -8.474 -12.128 73.903 1.00 23.79 O 50 | ATOM 50 CB LYS C 7 -9.233 -14.631 75.379 1.00 29.85 C 51 | ATOM 51 CG LYS C 7 -8.891 -16.083 75.150 1.00 33.26 C 52 | ATOM 52 CD LYS C 7 -7.459 -16.383 75.536 1.00 40.19 C 53 | ATOM 53 CE LYS C 7 -7.355 -17.043 76.897 1.00 63.40 C 54 | ATOM 54 NZ LYS C 7 -7.415 -18.509 76.822 1.00 66.95 N 55 | -------------------------------------------------------------------------------- /docs/source/.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "restructuredtext.confPath": "${workspaceFolder}" 3 | } -------------------------------------------------------------------------------- /docs/source/_templates/autosummary/method.rst: -------------------------------------------------------------------------------- 1 | {{ name | escape | underline }} 2 | 3 | .. currentmodule:: {{ module }} 4 | 5 | .. automethod:: {{ fullname }} -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import pdb2sql 14 | import os 15 | import sys 16 | 17 | # -- File setup -------------------------------------------------------------- 18 | # source_suffix = ['.rst', '.md'] 19 | source_suffix = '.rst' 20 | 21 | # The master toctree document. 22 | master_doc = 'index' 23 | 24 | 25 | # -- Project information ----------------------------------------------------- 26 | 27 | project = 'pdb2sql' 28 | copyright = '2019, Netherlands eScience Center' 29 | author = 'Nicolas Renaud' 30 | 31 | # The full version, including alpha/beta/rc tags 32 | release = pdb2sql.__version__ 33 | 34 | 35 | # -- General configuration --------------------------------------------------- 36 | 37 | # Add any Sphinx extension module names here, as strings. They can be 38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 39 | # ones. 40 | extensions = [ 41 | 'sphinx.ext.autodoc', 42 | 'sphinx.ext.napoleon', 43 | 'sphinx.ext.intersphinx', 44 | 'sphinx.ext.todo', 45 | 'sphinx.ext.autosummary', 46 | 'sphinx.ext.doctest', 47 | 'sphinx.ext.coverage', 48 | 'sphinx.ext.viewcode', 49 | 'sphinx.ext.autosectionlabel', 50 | 'IPython.sphinxext.ipython_directive', 51 | 'IPython.sphinxext.ipython_console_highlighting', 52 | ] 53 | 54 | # Add any paths that contain templates here, relative to this directory. 55 | templates_path = ['_templates'] 56 | 57 | # List of patterns, relative to source directory, that match files and 58 | # directories to ignore when looking for source files. 59 | # This pattern also affects html_static_path and html_extra_path. 60 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 61 | 62 | 63 | # -- Options for HTML output ------------------------------------------------- 64 | 65 | # The theme to use for HTML and HTML Help pages. See the documentation for 66 | # a list of builtin themes. 67 | # 68 | # html_theme = 'default' 69 | # html_theme = 'sphinx_rtd_theme' 70 | html_theme = 'nature' 71 | 72 | # Add any paths that contain custom static files (such as style sheets) here, 73 | # relative to this directory. They are copied after the builtin static files, 74 | # so a file named "default.css" will overwrite the builtin "default.css". 75 | html_static_path = ['_static'] 76 | 77 | 78 | # -- Options for extensions ------------------------------------------------- 79 | 80 | # Only the __init__ method’s docstring is inserted. 81 | autoclass_content = 'init' 82 | # order members by source code order 83 | autodoc_member_order = 'bysource' 84 | # Disable docstring inheritance 85 | autodoc_inherit_docstrings = False 86 | # mock the packges that is not avaiable on your machine 87 | # autodoc_mock_imports = ['cython', 'sqlalchemy', 'matplotlib', 88 | # 'numpy', 'schema', 'tqdm', 'pandas'] 89 | 90 | # napoleon 91 | napoleon_numpy_docstring = False 92 | napoleon_use_rtype = False 93 | 94 | # If true, `todo` and `todoList` produce output, else they produce nothing. 95 | todo_include_todos = True 96 | 97 | # intersphinx 98 | intersphinx_mapping = { 99 | 'python': ('https://docs.python.org/3', None), 100 | 'numpy': ('http://docs.scipy.org/doc/numpy/', None) 101 | } 102 | 103 | # autosummary 104 | # Make _autosummary files and include them 105 | autosummary_generate = True 106 | # autosummary_imported_members = True 107 | 108 | 109 | # ipython 110 | ipython_warning_is_error = False 111 | ipython_execlines = [ 112 | "import numpy as np", 113 | ] 114 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | ************************************* 2 | pdb2sql: Processing PDB data with SQL 3 | ************************************* 4 | 5 | `pdb2sql`_ is a Python package that allows to use SQL queries to handle `PDB`_ files. 6 | Currently, only 'ATOM' data is parsed, and other items of PDB, e.g. HETATM, are ignored. 7 | 8 | Installation: 9 | ``pip install pdb2sql`` 10 | 11 | .. _pdb2sql: https://github.com/DeepRank/pdb2sql 12 | .. _PDB: https://www.rcsb.org/ 13 | 14 | ======== 15 | Tutorial 16 | ======== 17 | 18 | .. toctree:: 19 | :maxdepth: 1 20 | 21 | 10 minutes to pdb2sql 22 | 23 | ========== 24 | Python API 25 | ========== 26 | 27 | .. toctree:: 28 | :maxdepth: 1 29 | 30 | PDB2SQL 31 | Interface 32 | Superposition 33 | Structure Similarity 34 | Structure Transformation 35 | Utilities 36 | 37 | .. :caption: Python API 38 | .. 39 | ================== 40 | Indices and tables 41 | ================== 42 | 43 | * :ref:`genindex` 44 | * :ref:`modindex` 45 | * :ref:`search` 46 | -------------------------------------------------------------------------------- /docs/source/pdb2sql.StructureSimilarity.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | Structure Similarity 3 | ==================== 4 | .. autoclass:: pdb2sql.StructureSimilarity.StructureSimilarity 5 | 6 | .. currentmodule:: pdb2sql.StructureSimilarity.StructureSimilarity 7 | 8 | i-RMSD (interface RMSD) 9 | ~~~~~~~~~~~~~~~~~~~~~~~ 10 | .. autosummary:: 11 | :toctree: api/ 12 | 13 | compute_irmsd_fast 14 | compute_irmsd_pdb2sql 15 | compute_izone 16 | 17 | l-RMSD (ligand RMSD) 18 | ~~~~~~~~~~~~~~~~~~~~~~ 19 | .. autosummary:: 20 | :toctree: api/ 21 | 22 | compute_lrmsd_fast 23 | compute_lrmsd_pdb2sql 24 | compute_lzone 25 | 26 | FNAT (Fraction of native contacts) 27 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 28 | .. autosummary:: 29 | :toctree: api/ 30 | 31 | compute_fnat_fast 32 | compute_fnat_pdb2sql 33 | compute_residue_pairs_ref 34 | 35 | DockQ 36 | ~~~~~~ 37 | .. autosummary:: 38 | :toctree: api/ 39 | 40 | compute_DockQScore 41 | 42 | CAPRI classes 43 | ~~~~~~~~~~~~~ 44 | .. autosummary:: 45 | :toctree: api/ 46 | 47 | compute_CapriClass 48 | 49 | Clashes 50 | ~~~~~~~ 51 | .. autosummary:: 52 | :toctree: api/ 53 | 54 | compute_clashes 55 | 56 | .. 57 | .. automodule:: pdb2sql.StructureSimilarity 58 | :members: 59 | :undoc-members: 60 | :show-inheritance: 61 | -------------------------------------------------------------------------------- /docs/source/pdb2sql.interface.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Interface 3 | ========= 4 | .. autoclass:: pdb2sql.interface.interface 5 | .. currentmodule:: pdb2sql.interface.interface 6 | 7 | Contact Atoms 8 | ~~~~~~~~~~~~~ 9 | .. autosummary:: 10 | :toctree: api/ 11 | 12 | get_contact_atoms 13 | 14 | Contact Residues 15 | ~~~~~~~~~~~~~~~~ 16 | .. autosummary:: 17 | :toctree: api/ 18 | 19 | get_contact_residues -------------------------------------------------------------------------------- /docs/source/pdb2sql.pdb2sqlcore.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | PDB2SQL 3 | ======= 4 | 5 | This module is based on :py:mod:`sqlite3`. 6 | 7 | .. autoclass:: pdb2sql.pdb2sqlcore.pdb2sql 8 | .. currentmodule:: pdb2sql.pdb2sqlcore.pdb2sql 9 | 10 | Process PDB 11 | ~~~~~~~~~~~ 12 | .. autosummary:: 13 | :toctree: api/ 14 | 15 | read_pdb 16 | 17 | .. currentmodule:: pdb2sql.pdb2sql_base.pdb2sql_base 18 | .. autosummary:: 19 | :toctree: api/ 20 | 21 | exportpdb 22 | sql2pdb 23 | 24 | .. currentmodule:: pdb2sql.pdb2sqlcore.pdb2sql 25 | 26 | Get SQL Data 27 | ~~~~~~~~~~~~ 28 | .. autosummary:: 29 | :toctree: api/ 30 | 31 | get 32 | get_colnames 33 | get_chains 34 | get_residues 35 | get_xyz 36 | 37 | Set SQL data 38 | ~~~~~~~~~~~~ 39 | .. autosummary:: 40 | :toctree: api/ 41 | 42 | update 43 | add_column 44 | update_column 45 | update_xyz 46 | 47 | Print SQL data 48 | ~~~~~~~~~~~~~~ 49 | .. autosummary:: 50 | :toctree: api/ 51 | 52 | print 53 | print_colnames 54 | 55 | .. 56 | .. automodule:: pdb2sql.pdb2sqlcore 57 | :members: 58 | :undoc-members: 59 | :show-inheritance: 60 | -------------------------------------------------------------------------------- /docs/source/pdb2sql.superpose.rst: -------------------------------------------------------------------------------- 1 | ============================= 2 | Superposition and alignement 3 | ============================= 4 | 5 | Superposition 6 | ~~~~~~~~~~~~~~~ 7 | 8 | .. currentmodule:: pdb2sql.superpose 9 | 10 | .. autosummary:: 11 | :toctree: api/ 12 | 13 | superpose 14 | superpose_selection 15 | 16 | .. 17 | .. automodule:: pdb2sql.superpose 18 | :members: 19 | :undoc-members: 20 | :show-inheritance: 21 | 22 | 23 | Alignement 24 | ~~~~~~~~~~~ 25 | 26 | .. currentmodule:: pdb2sql.align 27 | 28 | .. autosummary:: 29 | :toctree: api/ 30 | 31 | align 32 | align_interface 33 | 34 | .. 35 | .. automodule:: pdb2sql.align 36 | :members: 37 | :undoc-members: 38 | :show-inheritance: 39 | -------------------------------------------------------------------------------- /docs/source/pdb2sql.transform.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Transform 3 | ========= 4 | .. currentmodule:: pdb2sql.transform 5 | 6 | Rotation 7 | ~~~~~~~~ 8 | .. autosummary:: 9 | :toctree: api/ 10 | 11 | rotate 12 | rot_mat 13 | 14 | get_rot_axis_angle 15 | 16 | rot_axis 17 | rot_xyz_around_axis 18 | 19 | rot_euler 20 | rotation_euler 21 | 22 | 23 | Translation 24 | ~~~~~~~~~~~ 25 | .. autosummary:: 26 | :toctree: api/ 27 | 28 | translation 29 | 30 | 31 | .. 32 | .. automodule:: pdb2sql.transform 33 | :members: 34 | :undoc-members: 35 | :show-inheritance: 36 | -------------------------------------------------------------------------------- /docs/source/pdb2sql.utils.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | Utilities 3 | ========= 4 | .. currentmodule:: pdb2sql.utils 5 | 6 | PDB Tools 7 | ~~~~~~~~~ 8 | .. autosummary:: 9 | :toctree: api/ 10 | 11 | fetch 12 | 13 | .. 14 | .. automodule:: pdb2sql.transform 15 | :members: 16 | :undoc-members: 17 | :show-inheritance: 18 | -------------------------------------------------------------------------------- /docs/source/tutorial.rst: -------------------------------------------------------------------------------- 1 | .. ipython:: python 2 | :suppress: 3 | 4 | # change working dir to docs/ 5 | import os 6 | os.chdir('..') 7 | 8 | ===================== 9 | 10 minutes to pdb2sql 10 | ===================== 11 | 12 | This is a short introduction to pdb2sql. 13 | 14 | 15 | Download PDB files 16 | ------------------ 17 | 18 | A handy tool `fetch` is provided to download PDB files from `PDB `_ website. 19 | 20 | .. ipython:: python 21 | 22 | from pdb2sql import fetch 23 | fetch('3CRO', './pdb/') 24 | ls ./pdb 25 | 26 | For clear illustration, some `dummy PDB files `_ 27 | are used in the following examples. 28 | 29 | Get and set data 30 | ---------------- 31 | 32 | First, we import as follows: 33 | 34 | .. ipython:: python 35 | 36 | from pdb2sql import pdb2sql 37 | 38 | Create a SQL database instance: 39 | 40 | .. ipython:: python 41 | 42 | db = pdb2sql("./pdb/dummy.pdb") 43 | 44 | 45 | The ``db`` is a SQL instance that contains one table named *ATOM*. 46 | 47 | In this table, each row represents one atom, and columns are atom properties: 48 | 49 | .. ipython:: python 50 | 51 | db.print() 52 | 53 | Get data 54 | ^^^^^^^^ 55 | 56 | Get chainID, residue number, residue name and atom name of all atoms: 57 | 58 | .. ipython:: python 59 | 60 | p = db.get('chainID, resSeq, resName, name') 61 | p 62 | 63 | Get x,y,z coordinates of all atoms: 64 | 65 | .. ipython:: python 66 | 67 | p = db.get('x,y,z') 68 | p 69 | 70 | Get x,y,z coordinates of chain A atoms: 71 | 72 | .. ipython:: python 73 | 74 | p = db.get('chainID, x,y,z', chainID=['A']) 75 | p 76 | 77 | Get x,y,z coordinates of atoms on residue 1 and 4 of Chain A 78 | 79 | .. ipython:: python 80 | 81 | p = db.get('chainID,resSeq,x,y,z', chainID=['A'], resSeq=['1', '4']) 82 | p 83 | 84 | Get data of all atoms except residue MET and GLN atoms 85 | 86 | .. ipython:: python 87 | 88 | p = db.get('chainID, resSeq, resName, name', no_resName = ['MET', 'GLN']) 89 | p 90 | 91 | Get data of all atoms except residue MET and GLN atoms or CA (carbon alpha) atoms 92 | 93 | .. ipython:: python 94 | 95 | p = db.get('chainID, resSeq, resName, name', no_resName = ['MET', 'GLN'], no_name = ['CA']) 96 | p 97 | 98 | 99 | Get all data, a simple way is ``db.get('*')``. 100 | 101 | A shortcut to get x,y,z coordinates: 102 | 103 | .. ipython:: python 104 | 105 | p = db.get_xyz() 106 | p 107 | 108 | Get chain IDs: 109 | 110 | .. ipython:: python 111 | 112 | p = db.get_chains() 113 | p 114 | 115 | Get residue list: 116 | 117 | .. ipython:: python 118 | 119 | p = db.get_residues() 120 | p 121 | 122 | 123 | Filter the data base 124 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 125 | 126 | pdb2sql allows to create a new database by filtering the one we jut created 127 | 128 | .. ipython:: python 129 | 130 | db_chainA = db(chainID='A') 131 | db_chainA.print() 132 | 133 | In that example `dp_chainA` is a sql database that only includes the atoms from chain A. 134 | All the selection keywords (chainID, resSeq, resName, name) and their negations 135 | (no_chainID, no_resSeq, no_resName, no_name) can be used and combined to obtain the new database. 136 | 137 | Set data 138 | ^^^^^^^^ 139 | 140 | Rename chain B to C: 141 | 142 | .. ipython:: python 143 | 144 | num_B_atoms = len(db.get('chainID', chainID=['B'])) 145 | chainC = ['C'] * num_B_atoms 146 | db.get_chains() 147 | db.update('chainID', chainC, chainID = ['B']) 148 | db.get_chains() 149 | 150 | 151 | Update x,y,z coordinates for structure translatation of [10,10,10] 152 | 153 | .. ipython:: python 154 | 155 | xyz_old = db.get_xyz() 156 | xyz = np.array(xyz_old) + 10.0 157 | db.update('x,y,z', xyz) 158 | xyz_new = db.get_xyz() 159 | print("old:\n", xyz_old) 160 | print("new:\n", xyz_new) 161 | 162 | Update a column using index, e.g. change the x coordinates of the first 163 | 10 atoms to 2: 164 | 165 | .. ipython:: python 166 | 167 | x = np.ones(10) + 1 168 | db.update_column('x', values=x, index=list(range(10))) 169 | db.print('serial, name, x') 170 | 171 | Add a new column *type* with value *high*: 172 | 173 | .. ipython:: python 174 | 175 | db.add_column('type', value = 'high', coltype = 'str') 176 | db.print('serial, name, type') 177 | 178 | 179 | PDB I/O 180 | ------- 181 | 182 | Read PDB file or data to a list: 183 | 184 | .. ipython:: python 185 | 186 | pdb = pdb2sql.read_pdb('./pdb/dummy.pdb') 187 | pdb 188 | 189 | Convert SQL data to PDB-formated data: 190 | 191 | .. ipython:: python 192 | 193 | pdb = db.sql2pdb() 194 | pdb 195 | 196 | Write PDB file from SQL database: 197 | 198 | .. ipython:: python 199 | 200 | db.exportpdb('./pdb/test.pdb') 201 | 202 | # show the test.pdb file 203 | ls ./pdb 204 | 205 | 206 | 207 | Interface calculation 208 | --------------------- 209 | 210 | Create an :class:`~pdb2sql.interface.interface` SQL database instance: 211 | 212 | .. ipython:: python 213 | 214 | from pdb2sql import interface 215 | 216 | # use pdb2sql instance as input 217 | from pdb2sql import pdb2sql 218 | pdb_db = pdb2sql('./pdb/3CRO.pdb') 219 | db = interface(pdb_db) 220 | 221 | # or use pdb file as input 222 | db = interface('./pdb/3CRO.pdb') 223 | 224 | Interface atoms 225 | ^^^^^^^^^^^^^^^ 226 | 227 | .. ipython:: python 228 | 229 | itf_atom = db.get_contact_atoms(cutoff = 3) 230 | itf_atom_pair = db.get_contact_atoms(cutoff = 3, return_contact_pairs=True) 231 | print("interface atom:\n", itf_atom) 232 | print("interface atom pairs:\n", itf_atom_pair) 233 | 234 | 235 | Interface residues 236 | ^^^^^^^^^^^^^^^^^^ 237 | 238 | .. ipython:: python 239 | 240 | itf_residue = db.get_contact_residues(cutoff = 3) 241 | itf_residue_pair = db.get_contact_residues(cutoff = 3, return_contact_pairs=True) 242 | itf_residue 243 | itf_residue_pair 244 | 245 | 246 | Structure superposition 247 | -------------------------- 248 | 249 | pdb2sql allows to superpose two structure on top of each other either using the full structure or with selection keywords. 250 | For example to superpose the chain A of two PDB one can use : 251 | 252 | .. ipython:: python 253 | 254 | from pdb2sql import superpose 255 | ref = pdb2sql('./pdb/1AK4_5w.pdb') 256 | decoy = pdb2sql('./pdb/1AK4_10w.pdb') 257 | superposed_decoy = superpose(decoy, ref, chainID='A', export=True) 258 | 259 | This will export a new PDB file containining the structure of the decoy superposed onto the reference. 260 | 261 | Structure alignement 262 | --------------------------- 263 | 264 | pdb2sql allows to align structure along a specific axis 265 | 266 | .. ipython:: python 267 | 268 | from pdb2sql import align 269 | db = pdb2sql('./pdb/1AK4_10w.pdb') 270 | aligned_db = align(db, axis='z', export=True) 271 | 272 | The alignement can also consider only a subpart of the complex using the selection keywords: 273 | 274 | .. ipython:: python 275 | 276 | aligned_db = align(db, axis='z', chainID='A') 277 | 278 | There the chain A will be aligned along the z-axis 279 | 280 | This will create a new PDB file containing the structure aligned along the z-axis. It is 281 | also possible aligning an interface in a given plane 282 | 283 | .. ipython:: python 284 | 285 | from pdb2sql import align_interface 286 | db = pdb2sql('./pdb/3CRO.pdb') 287 | aligned_db = align_interface(db, plane='xy', export=True) 288 | 289 | By default the interface formed by chain A and B will be considered. In case multiple chains are present 290 | in the structure it is possible to specify wich interface to consider: 291 | 292 | .. ipython:: python 293 | 294 | aligned_db = align_interface(db, plane='xy', chain1='L', chain2='R') 295 | 296 | 297 | There the interface between chain L and R will be considered. Note that any other selection 298 | keyword can be used to specify which interface to account for. 299 | 300 | Structure similarity calculation 301 | -------------------------------- 302 | 303 | Create a :class:`~pdb2sql.StructureSimilarity.StructureSimilarity` instance: 304 | 305 | .. ipython:: python 306 | 307 | from pdb2sql.StructureSimilarity import StructureSimilarity 308 | sim = StructureSimilarity('./pdb/decoy.pdb', './pdb/ref.pdb') 309 | 310 | interface RMSD 311 | ^^^^^^^^^^^^^^ 312 | 313 | .. ipython:: python 314 | :okwarning: 315 | 316 | irmsd_fast = sim.compute_irmsd_fast() 317 | irmsd_pdb2sql = sim.compute_irmsd_pdb2sql() 318 | irmsd_fast 319 | irmsd_pdb2sql 320 | 321 | 322 | ligand RMSD 323 | ^^^^^^^^^^^ 324 | 325 | .. ipython:: python 326 | :okwarning: 327 | 328 | lrmsd_fast = sim.compute_lrmsd_fast() 329 | lrmsd_pdb2sql = sim.compute_lrmsd_pdb2sql() 330 | lrmsd_fast 331 | lrmsd_pdb2sql 332 | 333 | FNAT 334 | ^^^^ 335 | 336 | Calculate the fraction of native contacts: 337 | 338 | .. ipython:: python 339 | :okwarning: 340 | 341 | fnat_fast = sim.compute_fnat_fast() 342 | fnat_pdb2sql = sim.compute_fnat_pdb2sql() 343 | fnat_fast 344 | fnat_pdb2sql 345 | 346 | 347 | DockQ score 348 | ^^^^^^^^^^^ 349 | 350 | .. ipython:: python 351 | 352 | dockQ = sim.compute_DockQScore(fnat_fast, lrmsd_fast, irmsd_fast) 353 | dockQ 354 | 355 | 356 | Structure transformation 357 | ------------------------ 358 | 359 | Create SQL instance: 360 | 361 | .. ipython:: python 362 | 363 | from pdb2sql import transform 364 | db = pdb2sql('./pdb/dummy_transform.pdb') 365 | 366 | The atom coordinates are: 367 | 368 | .. ipython:: python 369 | 370 | db.get_xyz() 371 | 372 | Rotations 373 | ^^^^^^^^^ 374 | Rotate structures 180 degrees along the x-axis: 375 | 376 | .. ipython:: python 377 | 378 | angle = np.pi 379 | axis = (1., 0., 0.) 380 | transform.rot_axis(db, axis, angle) 381 | db.get_xyz() 382 | 383 | Get random rotation axis and angle: 384 | 385 | .. ipython:: python 386 | 387 | axis, angle = transform.get_rot_axis_angle() 388 | axis 389 | angle 390 | 391 | Translations 392 | ^^^^^^^^^^^^ 393 | 394 | Translate structure 5Å along y-axis: 395 | 396 | .. ipython:: python 397 | 398 | trans_vec = np.array([0,5,0]) 399 | transform.translation(db, trans_vec) 400 | db.get_xyz() -------------------------------------------------------------------------------- /example/align_pdb.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pdb2sql import align 3 | from pdb2sql import align_interface 4 | 5 | pdb = '../test/1AK4/decoys/1AK4_cm-it0_745.pdb' 6 | 7 | #- example 1 8 | align(pdb) # align PC1 to axis x 9 | 10 | #- example 2 11 | align(pdb, axis = 'z', export = True) 12 | 13 | #- example 3 14 | selection = {'chainID':['A'], 'resSeq':['30', '144'], 'name' : ['CA']} 15 | align(pdb, export = True, **selection) 16 | sys.exit() 17 | 18 | # other examples for selection 19 | selection = {'no_chainID':['A'], 'no_name' : ['CA','C', 'O', 'N'], 'no_resName' : ['ALA', 'TRP']} 20 | 21 | #- example 4 22 | pdb1 = '../test/1AK4/decoys/1AK4_cm-it0_745.pdb' 23 | pdb2 = '../test/1AK4/decoys/1AK4_cm-itw_238w.pdb' 24 | align_interface(pdb1, export = True) 25 | align_interface(pdb2, export = True) 26 | 27 | #- example 4 28 | 29 | 30 | -------------------------------------------------------------------------------- /example/axes_cyl.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2008 Robert L. Campbell 2 | 3 | from pymol.cgo import * 4 | from pymol import cmd 5 | from pymol.vfont import plain 6 | 7 | # create the axes object, draw axes with cylinders coloured red, green, 8 | #blue for X, Y and Z 9 | 10 | obj = [ 11 | CYLINDER, 0., 0., 0., 20., 0., 0., 0.2, 1.0, 1.0, 1.0, 1.0, 0.0, 0., 12 | CYLINDER, 0., 0., 0., 0., 20., 0., 0.2, 1.0, 1.0, 1.0, 0., 1.0, 0., 13 | CYLINDER, 0., 0., 0., 0., 0., 20., 0.2, 1.0, 1.0, 1.0, 0., 0.0, 1.0, 14 | 15 | ] 16 | 17 | # add labels to axes object 18 | 19 | cyl_text(obj,plain,[-5.,-5.,-1],'Origin',0.20,axes=[[3.0,0.0,0.0],[0.0,3.0,0.0],[0.0,0.0,3.0]]) 20 | cyl_text(obj,plain,[20.,0.,0.],'X',0.20,axes=[[3.0,0.0,0.0],[0.0,3.0,0.0],[0.0,0.0,3.0]]) 21 | cyl_text(obj,plain,[0.,20.,0.],'Y',0.20,axes=[[3.0,0.0,0.0],[0.0,3.0,0.0],[0.0,0.0,3.0]]) 22 | cyl_text(obj,plain,[0.,0.,20.],'Z',0.20,axes=[[3.0,0.0,0.0],[0.0,3.0,0.0],[0.0,0.0,3.0]]) 23 | 24 | # then we load it into PyMOL 25 | cmd.load_cgo(obj,'axes1') 26 | 27 | 28 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-build clean-pyc clean-test release dist 2 | 3 | help: 4 | @echo "clean - remove all build, test, coverage and Python artifacts" 5 | @echo "clean-build - remove build artifacts" 6 | @echo "clean-pyc - remove Python file artifacts" 7 | @echo "clean-test - remove test and coverage artifacts" 8 | @echo "release - package and upload a release" 9 | @echo "dist - package" 10 | 11 | clean: clean-build clean-pyc clean-test 12 | 13 | clean-build: 14 | rm -fr build/ 15 | rm -fr dist/ 16 | rm -fr .eggs/ 17 | find . -name '*.egg-info' -exec rm -fr {} + 18 | find . -name '*.egg' -exec rm -f {} + 19 | 20 | clean-pyc: 21 | find . -name '*.pyc' -exec rm -f {} + 22 | find . -name '*.pyo' -exec rm -f {} + 23 | find . -name '*~' -exec rm -f {} + 24 | find . -name '__pycache__' -exec rm -fr {} + 25 | find . -name '*_cache' -exec rm -fr {} + 26 | 27 | clean-test: 28 | rm -f .coverage 29 | 30 | dist: clean 31 | python setup.py sdist bdist_wheel 32 | ls -l dist 33 | 34 | release: 35 | python -m twine upload dist/* 36 | -------------------------------------------------------------------------------- /paper/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepRank/pdb2sql/76d7b684b6921261436e2656d9ecd8be86148199/paper/arch.png -------------------------------------------------------------------------------- /paper/comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepRank/pdb2sql/76d7b684b6921261436e2656d9ecd8be86148199/paper/comp.png -------------------------------------------------------------------------------- /paper/paper.bibtex: -------------------------------------------------------------------------------- 1 | @article{biopython, 2 | author = {Cock, Peter J. A. and Antao, Tiago and Chang, Jeffrey T. and Chapman, Brad A. and Cox, Cymon J. and Dalke, Andrew and Friedberg, Iddo and Hamelryck, Thomas and Kauff, Frank and Wilczynski, Bartek and de Hoon, Michiel J. L.}, 3 | title = "{Biopython: freely available Python tools for computational molecular biology and bioinformatics}", 4 | journal = {Bioinformatics}, 5 | volume = {25}, 6 | number = {11}, 7 | pages = {1422-1423}, 8 | year = {2009}, 9 | month = {03}, 10 | abstract = "{Summary: The Biopython project is a mature open source international collaboration of volunteer developers, providing Python libraries for a wide range of bioinformatics problems. Biopython includes modules for reading and writing different sequence file formats and multiple sequence alignments, dealing with 3D macro molecular structures, interacting with common tools such as BLAST, ClustalW and EMBOSS, accessing key online databases, as well as providing numerical methods for statistical learning.Availability: Biopython is freely available, with documentation and source code at www.biopython.org under the Biopython license.Contact: All queries should be directed to the Biopython mailing lists, see www.biopython.org/wiki/\_Mailing\_listspeter.cock@scri.ac.uk.}", 11 | issn = {1367-4803}, 12 | doi = {10.1093/bioinformatics/btp163}, 13 | url = {https://doi.org/10.1093/bioinformatics/btp163}, 14 | eprint = {https://academic.oup.com/bioinformatics/article-pdf/25/11/1422/944180/btp163.pdf}, 15 | } 16 | 17 | @article{biopdb, 18 | author = {Hamelryck, Thomas and Manderick, Bernard}, 19 | title = "{PDB file parser and structure class implemented in Python}", 20 | journal = {Bioinformatics}, 21 | volume = {19}, 22 | number = {17}, 23 | pages = {2308-2310}, 24 | year = {2003}, 25 | month = {11}, 26 | abstract = "{Summary: The biopython project provides a set of bioinformatics tools implemented in Python. Recently, biopython was extended with a set of modules that deal with macromolecular structure. Biopython now contains a parser for PDB files that makes the atomic information available in an easy-to-use but powerful data structure. The parser and data structure deal with features that are often left out or handled inadequately by other packages, e.g. atom and residue disorder (if point mutants are present in the crystal), anisotropic B factors, multiple models and insertion codes. In addition, the parser performs some sanity checking to detect obvious errors.Availability: The Biopython distribution (including source code and documentation) is freely available (under the Biopython license) from http://www.biopython.org}", 27 | issn = {1367-4803}, 28 | doi = {10.1093/bioinformatics/btg299}, 29 | url = {https://doi.org/10.1093/bioinformatics/btg299}, 30 | eprint = {https://academic.oup.com/bioinformatics/article-pdf/19/17/2308/537332/btg299.pdf}, 31 | } 32 | 33 | @software{profit, 34 | author = {Martin, A. C. R. and Porter, C. T.}, 35 | title = "{ProFit3.1}", 36 | year = {2009}, 37 | url = {http://www.bioinf.org.uk/software/profit/}, 38 | } 39 | 40 | @article{mmtk, 41 | author = {Hinsen, Konrad}, 42 | title = {The molecular modeling toolkit: A new approach to molecular simulations}, 43 | journal = {Journal of Computational Chemistry}, 44 | 45 | volume = {21}, 46 | number = {2}, 47 | pages = {79-85}, 48 | keywords = {molecular simulation, biomolecules, object-oriented design}, 49 | doi = {10.1002/(SICI)1096-987X(20000130)21:2<79::AID-JCC1>3.0.CO;2-B}, 50 | url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/%28SICI%291096-987X%2820000130%2921%3A2%3C79%3A%3AAID-JCC1%3E3.0.CO%3B2-B}, 51 | eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/%28SICI%291096-987X%2820000130%2921%3A2%3C79%3A%3AAID-JCC1%3E3.0.CO%3B2-B}, 52 | abstract = {Abstract The Molecular Modeling Toolkit is a library that implements common molecular simulation techniques, with an emphasis on biomolecular simulations. It uses modern software engineering techniques (object-oriented design, a high-level language) to overcome limitations associated with the large monolithic simulation programs that are commonly used for biomolecules. Its principal advantages are (1) easy extension and combination with other libraries due to modular library design; (2) a single high-level general-purpose programming language (Python) is used for library implementation as well as for application scripts; (3) use of documented and machine-independent formats for all data files; and (4) interfaces to other simulation and visualization programs. © 2000 John Wiley \& Sons, Inc. J Comput Chem 21: 79–85, 2000}, 53 | year = {2000} 54 | } 55 | 56 | @software{dbloader, 57 | author = {RCSB PDB}, 58 | title = "{Db Loader}", 59 | url = {https://sw-tools.rcsb.org/apps/DB-LOADER/index.html}, 60 | } 61 | 62 | @InProceedings{ mdanalysis, 63 | author = { {R}ichard {J}. {G}owers and {M}ax {L}inke and {J}onathan {B}arnoud and {T}yler {J}. {E}. {R}eddy and {M}anuel {N}. {M}elo and {S}ean {L}. {S}eyler and {J}an {D}omański and {D}avid {L}. {D}otson and {S}ébastien {B}uchoux and {I}an {M}. {K}enney and {O}liver {B}eckstein }, 64 | title = { {M}{D}{A}nalysis: {A} {P}ython {P}ackage for the {R}apid {A}nalysis of {M}olecular {D}ynamics {S}imulations }, 65 | booktitle = { {P}roceedings of the 15th {P}ython in {S}cience {C}onference }, 66 | pages = { 98 - 105 }, 67 | year = { 2016 }, 68 | editor = { {S}ebastian {B}enthall and {S}cott {R}ostrup }, 69 | doi = { 10.25080/Majora-629e541a-00e } 70 | } 71 | 72 | @incollection{sqlalchemy, 73 | place={Mountain View}, 74 | title={SQLAlchemy}, 75 | booktitle={The Architecture of Open Source Applications Volume II: Structure, Scale, and a Few More Fearless Hacks}, 76 | publisher={aosabook.org}, 77 | url="http://aosabook.org/en/sqlalchemy.html", 78 | author={Bayer, Michael}, 79 | editor={Brown, Amy and Wilson, Greg}, 80 | year={2012} 81 | } 82 | 83 | @article{iscore, 84 | author = {Geng, Cunliang and Jung, Yong and Renaud, Nicolas and Honavar, Vasant and Bonvin, Alexandre M J J and Xue, Li C}, 85 | title = "{i{S}core: A novel graph kernel-based function for scoring protein-protein docking models}", 86 | journal = {Bioinformatics}, 87 | year = {2019}, 88 | month = {06}, 89 | issn = {1367-4803}, 90 | doi = {10.1093/bioinformatics/btz496}, 91 | doi = {10.1093/bioinformatics/btz496} 92 | } 93 | 94 | 95 | @article{prody, 96 | author = {Bakan, Ahmet and Meireles, Lidio M. and Bahar, Ivet}, 97 | title = "{ProDy: Protein Dynamics Inferred from Theory and Experiments}", 98 | journal = {Bioinformatics}, 99 | volume = {27}, 100 | number = {11}, 101 | pages = {1575-1577}, 102 | year = {2011}, 103 | month = {04}, 104 | abstract = "{Summary: We developed a Python package, ProDy, for structure-based analysis of protein dynamics. ProDy allows for quantitative characterization of structural variations in heterogeneous datasets of structures experimentally resolved for a given biomolecular system, and for comparison of these variations with the theoretically predicted equilibrium dynamics. Datasets include structural ensembles for a given family or subfamily of proteins, their mutants and sequence homologues, in the presence/absence of their substrates, ligands or inhibitors. Numerous helper functions enable comparative analysis of experimental and theoretical data, and visualization of the principal changes in conformations that are accessible in different functional states. ProDy application programming interface (API) has been designed so that users can easily extend the software and implement new methods.Availability:ProDy is open source and freely available under GNU General Public License from http://www.csb.pitt.edu/ProDy/.Contact:ahb12@pitt.edu; bahar@pitt.edu}", 105 | issn = {1367-4803}, 106 | doi = {10.1093/bioinformatics/btr168}, 107 | url = {https://doi.org/10.1093/bioinformatics/btr168}, 108 | eprint = {https://academic.oup.com/bioinformatics/article-pdf/27/11/1575/5904480/btr168.pdf}, 109 | } 110 | 111 | @article{biojava, 112 | author = {Lafita, Aleix AND Bliven, Spencer AND Prlić, Andreas AND Guzenko, Dmytro AND Rose, Peter W. AND Bradley, Anthony AND Pavan, Paolo AND Myers-Turnbull, Douglas AND Valasatava, Yana AND Heuer, Michael AND Larson, Matt AND Burley, Stephen K. AND Duarte, Jose M.}, 113 | journal = {PLOS Computational Biology}, 114 | publisher = {Public Library of Science}, 115 | title = {BioJava 5: A community driven open-source bioinformatics library}, 116 | year = {2019}, 117 | month = {02}, 118 | volume = {15}, 119 | url = {https://doi.org/10.1371/journal.pcbi.1006791}, 120 | pages = {1-8}, 121 | abstract = {BioJava is an open-source project that provides a Java library for processing biological data. The project aims to simplify bioinformatic analyses by implementing parsers, data structures, and algorithms for common tasks in genomics, structural biology, ontologies, phylogenetics, and more. Since 2012, we have released two major versions of the library (4 and 5) that include many new features to tackle challenges with increasingly complex macromolecular structure data. BioJava requires Java 8 or higher and is freely available under the LGPL 2.1 license. The project is hosted on GitHub at https://github.com/biojava/biojava. More information and documentation can be found online on the BioJava website (http://www.biojava.org) and tutorial (https://github.com/biojava/biojava-tutorial). All inquiries should be directed to the GitHub page or the BioJava mailing list (http://lists.open-bio.org/mailman/listinfo/biojava-l).}, 122 | number = {2}, 123 | doi = {10.1371/journal.pcbi.1006791} 124 | } 125 | @article{dockq, 126 | title = {{DockQ}: A Quality Measure for Protein-Protein Docking Models}, 127 | volume = {11}, 128 | issn = {1932-6203}, 129 | url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0161879}, 130 | doi = {10.1371/journal.pone.0161879}, 131 | pages = {e0161879}, 132 | number = {8}, 133 | journaltitle = {{PLOS} {ONE}}, 134 | shortjournal = {{PLOS} {ONE}}, 135 | author = {Basu, Sankar and Wallner, Björn}, 136 | urldate = {2020-01-15}, 137 | date = {2016-08-25}, 138 | } 139 | 140 | @article{pdb, 141 | title = {Protein Data Bank: the single global archive for 3D macromolecular structure data}, 142 | volume = {47}, 143 | issn = {0305-1048}, 144 | url = {https://academic.oup.com/nar/article/47/D1/D520/5144142}, 145 | doi = {10.1093/nar/gky949}, 146 | shorttitle = {Protein Data Bank}, 147 | pages = {D520--D528}, 148 | issue = {D1}, 149 | journaltitle = {Nucleic Acids Research}, 150 | shortjournal = {Nucleic Acids Res}, 151 | author = {Burley, Stephen K. and Berman, Helen M. and Bhikadiya, Charmi and Bi, Chunxiao and Chen, Li and Costanzo, Luigi Di and Christie, Cole and Duarte, Jose M. and Dutta, Shuchismita and Feng, Zukang and Ghosh, Sutapa and Goodsell, David S. and Green, Rachel Kramer and Guranovic, Vladimir and Guzenko, Dmytro and Hudson, Brian P. and Liang, Yuhe and Lowe, Robert and Peisach, Ezra and Periskova, Irina and Randle, Chris and Rose, Alexander and Sekharan, Monica and Shao, Chenghua and Tao, Yi-Ping and Valasatava, Yana and Voigt, Maria and Westbrook, John and Young, Jasmine and Zardecki, Christine and Zhuravleva, Marina and Kurisu, Genji and Nakamura, Haruki and Kengaku, Yumiko and Cho, Hasumi and Sato, Junko and Kim, Ju Yaen and Ikegawa, Yasuyo and Nakagawa, Atsushi and Yamashita, Reiko and Kudou, Takahiro and Bekker, Gert-Jan and Suzuki, Hirofumi and Iwata, Takeshi and Yokochi, Masashi and Kobayashi, Naohiro and Fujiwara, Toshimichi and Velankar, Sameer and Kleywegt, Gerard J. and Anyango, Stephen and Armstrong, David R. and Berrisford, John M. and Conroy, Matthew J. and Dana, Jose M. and Deshpande, Mandar and Gane, Paul and Gáborová, Romana and Gupta, Deepti and Gutmanas, Aleksandras and Koča, Jaroslav and Mak, Lora and Mir, Saqib and Mukhopadhyay, Abhik and Nadzirin, Nurul and Nair, Sreenath and Patwardhan, Ardan and Paysan-Lafosse, Typhaine and Pravda, Lukas and Salih, Osman and Sehnal, David and Varadi, Mihaly and Vařeková, Radka and Markley, John L. and Hoch, Jeffrey C. and Romero, Pedro R. and Baskaran, Kumaran and Maziuk, Dimitri and Ulrich, Eldon L. and Wedell, Jonathan R. and Yao, Hongyang and Livny, Miron and Ioannidis, Yannis E.}, 152 | urldate = {2020-01-15}, 153 | date = {2019-01-08} 154 | } 155 | 156 | @article {pdbtools, 157 | author = {Rodrigues, Jo{\~a}o P. G. L. M. and Teixeira, Jo{\~a}o M. C. and Trellet, Mika{\"e}l and Bonvin, Alexandre M. J. J.}, 158 | title = {{pdb-tools}: a swiss army knife for molecular structures}, 159 | elocation-id = {483305}, 160 | year = {2018}, 161 | doi = {10.1101/483305}, 162 | publisher = {Cold Spring Harbor Laboratory}, 163 | URL = {https://www.biorxiv.org/content/early/2018/12/04/483305}, 164 | eprint = {https://www.biorxiv.org/content/early/2018/12/04/483305.full.pdf}, 165 | journal = {bioRxiv} 166 | } 167 | 168 | @article{capri, 169 | title = {Assessment of blind predictions of protein–protein interactions: Current status of docking methods}, 170 | volume = {52}, 171 | issn = {1097-0134}, 172 | url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/prot.10393}, 173 | doi = {10.1002/prot.10393}, 174 | shorttitle = {Assessment of blind predictions of protein–protein interactions}, 175 | pages = {51--67}, 176 | number = {1}, 177 | journaltitle = {Proteins: Structure, Function, and Bioinformatics}, 178 | author = {Méndez, Raúl and Leplae, Raphaël and Maria, Leonardo De and Wodak, Shoshana J.}, 179 | urldate = {2020-01-15}, 180 | date = {2003} 181 | } 182 | -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'The pdb2sql Python Package: Parsing, Manipulation and Analysis of PDB Files Using SQL Queries' 3 | tags: 4 | - Python 5 | - Bioinformatics 6 | - PDB files 7 | authors: 8 | - name: Nicolas Renaud 9 | orcid: 0000-0001-9589-2694 10 | affiliation: 1 11 | - name: Cunliang Geng 12 | orcid: 0000-0002-1409-8358 13 | affiliation: 1 14 | affiliations: 15 | - name: Netherlands eScience Center, Science Park 140 1098 XG Amsterdam, the Netherlands 16 | index: 1 17 | date: 13 January 2020 18 | bibliography: paper.bibtex 19 | --- 20 | 21 | # Summary 22 | 23 | The analysis of biomolecular structures is a crucial task for a wide range of applications ranging from drug design to protein engineering. The Protein Data Bank (PDB) file format [@pdb] is the most popular format to describe biomolecular structures such as proteins and nucleic acids. In this text-based format, each line represents a given atom and entails its main properties such as atom name and identifier, residue name and identifier, chain identifier, coordinates, etc. Several solutions have been developed to parse PDB files into dedicated objects that facilitate the analysis and manipulation of biomolecular structures. This is, for example, the case for the ``BioPython`` parser [@biopython,@biopdb] that loads PDB files into a nested dictionary, the structure of which mimics the hierarchical nature of the biomolecular structure. Selecting a given sub-part of the biomolecule can then be done by going through the dictionary and selecting the required atoms. Other packages, such as ``ProDy`` [@prody], ``BioJava`` [@biojava], ``MMTK`` [@mmtk] and ``MDAnalysis`` [@mdanalysis] to cite a few, also offer solutions to parse PDB files. However, these parsers are embedded in large codebases that are sometimes difficult to integrate with new applications and are often geared toward the analysis of molecular dynamics simulations. Lightweight applications such as ``pdb-tools`` [@pdbtools] lack the capabilities to manipulate coordinates. 24 | 25 | 26 | 27 | We present here the Python package ``pdb2sql``, which loads individual PDB files into a relational database. Among different solutions, the Structured Query Language (SQL) is a very popular solution to query a given database. However SQL queries are complex and domain scientists such as bioinformaticians are usually not familiar with them. This represents an important barrier to the adoption of SQL technology in bioinformatics. ``pdb2sql`` exposes complex SQL queries through simple Python methods that are intuitive for end users. As such, our package leverages the power of SQL queries and removes the barrier that SQL complexity represents. In addition, several advanced modules have also been built, for example, to rotate or translate biomolecular structures, to characterize interface contacts, and to measure structure similarity between two protein complexes. Additional modules can easily be developed following the same scheme. As a consequence, ``pdb2sql`` is a lightweight and versatile PDB tool that is easy to extend and to integrate with new applications. 28 | 29 | 30 | # Capabilities of ``pdb2sql`` 31 | 32 | ``pdb2sql`` allows a user to query, manipulate, and process PDB files through a series of dedicated classes. We give an overview of these features and illustrate them with snippets of code. More examples can be found in the documentation (https://pdb2sql.readthedocs.io). 33 | 34 | ## Extracting data from PDB files 35 | 36 | ``pdb2sql`` allows a user to simply query the database using the ``get(attr, **kwargs)`` method. The attribute ``attr`` here is a list of or a single column name of the ``SQL`` database; see Table 1 for available attributes. The keyword argument ``kwargs`` can then be used to specify a sub-selection of atoms. 37 | 38 | Table 1. Atom attributes and associated definitions in ``pdb2sql`` 39 | 40 | | Attribute | Definition | 41 | |-----------|-------------------------------------------| 42 | | serial | Atom serial number | 43 | | name | Atom name | 44 | | altLoc | Alternate location indicator | 45 | | resName | Residue name | 46 | | chainID | Chain identifier | 47 | | resSeq | Residue sequence number | 48 | | iCode | Code for insertion of residues | 49 | | x | Orthogonal coordinates for X in Angstroms | 50 | | y | Orthogonal coordinates for Y in Angstroms | 51 | | z | Orthogonal coordinates for Z in Angstroms | 52 | | occ | Occupancy | 53 | | temp | Temperature factor | 54 | | element | Element symbol | 55 | | model | Model serial number | 56 | 57 | 58 | Every attribute name can be used to select specific atoms and multiple conditions can be easily combined. For example, let's consider the following example: 59 | 60 | ```python 61 | from pdb2sql import pdb2sql 62 | pdb = pdb2sql('1AK4.pdb') 63 | atoms = pdb.get('x,y,z', 64 | name=['C','H'], 65 | resName=['VAL','LEU'], 66 | chainID='A') 67 | ``` 68 | 69 | This snippet extracts the coordinates of the carbon and hydrogen atoms that belong to all the valine and leucine residues of the chain labelled `A` in the PDB file. Atoms can also be excluded from the selection by appending the prefix ``no_`` to the attribute name. This is the case in the following example: 70 | 71 | ```python 72 | from pdb2sql import pdb2sql 73 | pdb = pdb2sql('1AK4.pdb') 74 | atoms = pdb.get('name, resName', 75 | no_resName=['GLY', 'PHE']) 76 | ``` 77 | This snippet extracts the atom and residue names of all atoms except those belonging to the glycine and phenylalanine residues of the structure. Similar combinations of arguments can be designed to obtain complex selection rules that precisely select the desired atom properties. 78 | 79 | ## Manipulating PDB files 80 | 81 | The data contained in the SQL database can also be modified using the ``update(attr, vals, **kwargs)`` method. The attributes and keyword arguments are identical to those in the ``get`` method. The ``vals`` argument should contain a `numpy` array whose dimension should match the selection criteria. For example: 82 | 83 | ```python 84 | import numpy as np 85 | from pdb2sql import pdb2sql 86 | 87 | pdb = pdb2sql('1AK4.pdb') 88 | xyz = pdb.get('x,y,z', chainID='A', resSeq=1) 89 | xyz = np.array(xyz) 90 | xyz -= np.mean(xyz) 91 | pdb.update('x,y,z', xyz, chainID='A', resSeq=1) 92 | ``` 93 | 94 | This snippet first extracts the coordinates of atoms in the first residue of chain A, then translates this fragment to the origin and updates the coordinate values in the database. ``pdb2sql`` also provides a convenient class ``transform`` to easily translate or rotate structures. For example, to translate the first residue of the structure 5 Å along the Y-axis, 95 | 96 | ```python 97 | import numpy as np 98 | from pdb2sql import pdb2sql 99 | from pdb2sql import transform 100 | 101 | pdb = pdb2sql('1AK4.pdb') 102 | trans_vec = np.array([0,5,0]) 103 | transform.translation(pdb, trans_vec, resSeq=1, chainID='A') 104 | ``` 105 | 106 | One can also rotate a given selection around a given axis with the `rotate_axis` method: 107 | 108 | ```python 109 | angle = np.pi 110 | axis = (1., 0., 0.) 111 | transform.rot_axis(pdb, axis, angle, resSeq=1, chainID='A') 112 | ``` 113 | 114 | ## Identifying interface 115 | 116 | The ``interface`` class is derived from the ``pdb2sql`` class and offers functionality to identify contact atoms or residues between two different chains with a given contact distance. It is useful for extracting and analysing the interface of, e.g., protein-protein complexes. The following example snippet returns all the atoms and all the residues of the interface of '1AK4.pdb' defined by a contact distance of 6 Å. 117 | 118 | ```python 119 | from pdb2sql import interface 120 | 121 | pdb = interface('1AK4.pdb') 122 | atoms = pdb.get_contact_atoms(cutoff=6.0) 123 | res = pdb.get_contact_residues(cutoff=6.0) 124 | ``` 125 | 126 | It is also possible to directly create an ``interface`` instance with a ``pdb2sql`` instance as input. In this case, all the changes in the ``pdb2sql`` instance before creating the new ``interface`` instance will be kept in the ``interface`` instance; afterwards, the two instances will be independent, which means changes in one will not affect the other. 127 | 128 | ```python 129 | from pdb2sql import pdb2sql 130 | from pdb2sql import interface 131 | 132 | pdb = pdb2sql('1AK4.pdb') 133 | pdbitf = interface(pdb) 134 | atoms = pdbitf.get_contact_atoms(cutoff=6.0) 135 | res = pdbitf.get_contact_residues(cutoff=6.0) 136 | ``` 137 | 138 | 139 | ## Computing Structure Similarity 140 | 141 | The ``StructureSimilarity`` class allows a user to compute similarity measures between two protein-protein complexes. Several popular measures used to classify qualities of protein complex structures in the CAPRI (Critical Assessment of PRedicted Interactions) challenges [@capri] have been implemented: interface rmsd, ligand rmsd, fraction of native contacts and DockQ [@dockq]. The approach implemented to compute the interface rmsd and ligand rmsd is identical to the well-known package ``ProFit`` [@profit]. All the methods required to superimpose structures have been implemented in the ``transform`` class and therefore this relies on no external dependencies. The following snippet shows how to compute these measures: 142 | 143 | ```python 144 | from pdb2sql import StructureSimilarity 145 | 146 | sim = StructureSimilarity(decoy = '1AK4_model.pdb', 147 | ref = '1AK4_xray.pdb') 148 | 149 | irmsd = sim.compute_irmsd_fast() 150 | lrmsd = sim.compute_lrmsd_fast() 151 | fnat = sim.compute_fnat_fast() 152 | dockQ = sim.compute_DockQScore(fnat, lrmsd, irmsd) 153 | ``` 154 | 155 | 156 | # Application 157 | ``psb2sql`` has been used at the Netherlands eScience center for bioinformatics projects. This is, for example, the case of ``iScore`` [@iscore], which uses graph kernels and support vector machines to rank protein-protein interfaces. We illustrate the use of the package here by computing the interface rmsd and ligand rmsd of a series of structural models using the experimental structure as a reference. This is a common task for protein-protein docking, where a large number of docked conformations are generated and have then to be compared to ground truth to identify the best-generated poses. This calculation is usually done using the ProFit software and we, therefore, compare our results with those obtained with ProFit. The code to compute the similarity measure for different decoys is simple: 158 | 159 | ```python 160 | from pdb2sql import StructureSimilarity 161 | 162 | ref = '1AK4.pdb' 163 | decoys = os.listdir('./decoys') 164 | irmsd = {} 165 | 166 | for d in decoys:g 167 | sim = StructureSimilarity(d, ref) 168 | irmsd[d] = sim.compute_irmsd_fast(method='svd', izone='1AK4.izone') 169 | ``` 170 | 171 | Note that the method will compute the i-zone, i.e., the zone of the proteins that form the interface in a similar way to ProFit. This is done for the first calculations and the i-zone is then reused for the subsequent calculations. The comparison of our interface rmsd values to those given by ProFit is shown in Fig 1. 172 | 173 | ![Example figure.](sim.png) 174 | Figure 1. Left - Superimposed model (green) and reference (cyan) structures. Right - comparison of interface rmsd values given by `pdb2sql` and by `ProFit`. 175 | 176 | # Acknowledgements 177 | We acknowledge contributions from Li Xue, Sonja Georgievska, and Lars Ridder. 178 | 179 | 180 | # References 181 | -------------------------------------------------------------------------------- /paper/sim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepRank/pdb2sql/76d7b684b6921261436e2656d9ecd8be86148199/paper/sim.png -------------------------------------------------------------------------------- /pdb2sql/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | PDB2SQL 3 | 4 | A package to leverage SQL queries to parse, manipulate and process PDB files. 5 | 6 | Provides: 7 | 1. a powerful pdb2sql object to convert PDB data in SQL database 8 | 2. strcuture transformation functions (rotations, translations...) 9 | 3. useful capablities to 10 | - calculate structure interface (contact atoms and residues) 11 | - calculate structure similarity (iRMSD, lRMSD, FNAT, DockQ...) 12 | 13 | Reference: 14 | Online tutorial and documentation: https://pdb2sql.readthedocs.io 15 | 16 | Example: 17 | `pdb2sql` easily allows to load a PDB file in an object. Once loaded, 18 | the data can be parsed using SQL queries. To facilitate the adoption of 19 | the tool simple methods have been developped to wrap the SQL queries in 20 | simple methods. 21 | 22 | For example obtaining the positions of all carbon, nitrogen and oxygen 23 | atoms of chain A from all residues but VAL and LEU, one can use : 24 | 25 | >>> from pdb2sql import pdb2sql 26 | >>> pdb = pdb2sql('1AK4.pdb') 27 | >>> atoms = pdb.get('x,y,z', 28 | ... name = ['C','N', 'O'], 29 | ... no_resName = ['VAL','LEU'], 30 | ... chainID = 'A') 31 | 32 | 33 | Available modules: 34 | pdb2sql 35 | Core `pdb2sql` object 36 | many2sql 37 | Core `many2sql` object 38 | interface 39 | Core `interface` object 40 | StructureSimilarity 41 | Tools to compute structure similarities between two structures. 42 | transform 43 | Tools to do structure transformation 44 | align 45 | Tools to do structure alignment 46 | superpose 47 | Tools to do structure superposition 48 | 49 | Utilities: 50 | fetch 51 | download PDB file from PDB website https://www.rcsb.org/. 52 | """ 53 | 54 | from .pdb2sqlcore import pdb2sql 55 | from .many2sql import many2sql 56 | from .interface import interface 57 | from .StructureSimilarity import StructureSimilarity 58 | from . import transform 59 | from .utils import fetch 60 | from .align import align, align_interface 61 | from .superpose import superpose 62 | 63 | from .__version__ import __version__ 64 | 65 | # remove unnecesary modules 66 | del pdb2sql_base 67 | del pdb2sqlcore 68 | del utils 69 | -------------------------------------------------------------------------------- /pdb2sql/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.5.3' 2 | -------------------------------------------------------------------------------- /pdb2sql/align.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .pdb2sqlcore import pdb2sql 3 | from .interface import interface 4 | from .transform import rot_xyz_around_axis 5 | 6 | 7 | def align(pdb, axis='x', export=True, **kwargs): 8 | """Align the max principal component of a structure along one of the cartesian axis 9 | 10 | Arguments: 11 | pdb {str, pdb2sql} -- the pdbfile or the sql database of the complex 12 | 13 | Keyword Arguments: 14 | axis {str} -- cartesian axis for alignement (default: {'x'}) 15 | export {bool} -- export the aligned structure to file 16 | **kwargs {dict} -- option to select subpart of the structure for alignement 17 | 18 | Returns: 19 | pd2sql -- sql databse of the aligned structure 20 | 21 | Example: 22 | >>> pdb = '1AK4' 23 | >>> sql = align(pdb,chainID='A') 24 | """ 25 | 26 | if not isinstance(pdb, pdb2sql): 27 | sql = pdb2sql(pdb) 28 | else: 29 | sql = pdb 30 | 31 | # extract coordinate 32 | xyz = np.array(sql.get('x,y,z', **kwargs)) 33 | 34 | # get the pca eigenvect we want to align 35 | vect = get_max_pca_vect(xyz) 36 | 37 | # align the sql 38 | sql = align_pca_vect(sql, vect, axis) 39 | 40 | # export the pdbfile 41 | if export: 42 | export_aligned(sql) 43 | 44 | return sql 45 | 46 | 47 | def align_interface(ppi, plane='xy', export=True, **kwargs): 48 | """align the interface of a complex in a given plane 49 | 50 | Arguments: 51 | ppi {interface} -- sql interface or pdb file 52 | plane {str} -- plane for alignement 53 | 54 | Keyword Arguments: 55 | export {bool} -- write a pdb file (default: {True}) 56 | kwargs {dict} -- keyword argument from interface.get_contact_atoms method 57 | """ 58 | 59 | if not isinstance(ppi, interface): 60 | sql = interface(ppi) 61 | else: 62 | sql = ppi 63 | 64 | index_contact = sql.get_contact_atoms(**kwargs) 65 | row_id = [] 66 | for _, v in index_contact.items(): 67 | row_id += v 68 | xyz = np.array(sql.get('x,y,z', rowID=row_id)) 69 | 70 | # get the pca eigenvect we want to align 71 | vect = get_min_pca_vect(xyz) 72 | 73 | # align the sql database 74 | dict_plane = {'xy': 'z', 'xz': 'y', 'yz': 'x'} 75 | sql = align_pca_vect(sql, vect, dict_plane[plane]) 76 | 77 | # export the pdbfile 78 | if export: 79 | export_aligned(sql) 80 | 81 | return sql 82 | 83 | 84 | def align_pca_vect(sql, vect, axis): 85 | """Align the pca vect of the sql along th axis 86 | 87 | Arguments: 88 | sql {pdb2sql} -- sqldb of the complex 89 | vect {np.ndarray} -- pca eigenvect 90 | axis {str} -- axis along which to align vect 91 | 92 | Returns: 93 | pdb2sql -- aligned sqldb 94 | """ 95 | 96 | # rotation angles 97 | phi, theta = get_rotation_angle(vect) 98 | 99 | # complete coordinate 100 | xyz = np.array(sql.get('x,y,z')) 101 | 102 | # align them 103 | xyz = _align_along_axis(xyz, axis, phi, theta) 104 | 105 | # update the sql 106 | sql.update('x,y,z', xyz) 107 | 108 | return sql 109 | 110 | 111 | def export_aligned(sql): 112 | """export a pdb file of the aligned pdb 113 | 114 | Arguments: 115 | sql {pdb2sql} -- aligned sqldb 116 | """ 117 | if isinstance(sql.pdbfile, str): 118 | fname = sql.pdbfile.rstrip('.pdb') + '_aligned.pdb' 119 | else: 120 | fname = 'aligned_structure.pdb' 121 | sql.exportpdb(fname) 122 | 123 | 124 | def get_rotation_angle(vmax): 125 | """Extracts the rotation angles from the PCA 126 | 127 | Arguments: 128 | u {np.array} -- eigenvalues of the PCA 129 | V {np.array} -- eigenvectors of the PCA 130 | """ 131 | 132 | # extract max eigenvector 133 | 134 | x, y, z = vmax 135 | r = np.linalg.norm(vmax) 136 | 137 | # rotation angle 138 | phi = np.arctan2(y, x) 139 | theta = np.arccos(z/r) 140 | 141 | return phi, theta 142 | 143 | 144 | def get_max_pca_vect(xyz): 145 | """Get the max eigenvector of th pca 146 | 147 | Arguments: 148 | xyz {numpy.ndarray} -- matrix of the atoms coordinates 149 | """ 150 | u, v = pca(xyz) 151 | return v[:, np.argmax(u)] 152 | 153 | 154 | def get_min_pca_vect(xyz): 155 | """Get the min eigenvector of th pca 156 | 157 | Arguments: 158 | xyz {numpy.ndarray} -- matrix of the atoms coordinates 159 | """ 160 | u, v = pca(xyz) 161 | return v[:, np.argmin(u)] 162 | 163 | 164 | def pca(mat): 165 | """computes the principal component analysis of the points A 166 | 167 | Arguments: 168 | A {numpy.ndarray} -- matrix of points [npoints x ndim] 169 | 170 | Returns: 171 | tuple -- eigenvalues, eigenvectors, score 172 | """ 173 | scat = (mat-np.mean(mat.T, axis=1)).T 174 | u, v = np.linalg.eig(np.cov(scat)) 175 | return u, v 176 | 177 | 178 | def _align_along_axis(xyz, axis, phi, theta): 179 | """align the xyz coordinates along the given axi 180 | 181 | Arguments: 182 | xyz {numpy.ndarray} -- coordinates of the atoms 183 | axis {str} -- axis to align 184 | phi {float} -- azimuthal angle 185 | theta {float} -- the other angles 186 | 187 | Raises: 188 | ValueError: axis should be x y or z 189 | 190 | Returns: 191 | nd.array -- rotated coordinates 192 | """ 193 | 194 | # align along preferred axis 195 | if axis == 'x': 196 | xyz = rot_xyz_around_axis(xyz, np.array([0, 0, 1]), -phi) 197 | xyz = rot_xyz_around_axis( 198 | xyz, np.array([0, 1, 0]), np.pi/2 - theta) 199 | 200 | elif axis == 'y': 201 | xyz = rot_xyz_around_axis( 202 | xyz, np.array([0, 0, 1]), np.pi/2 - phi) 203 | xyz = rot_xyz_around_axis( 204 | xyz, np.array([0, 1, 0]), np.pi/2 - theta) 205 | 206 | elif axis == 'z': 207 | xyz = rot_xyz_around_axis(xyz, np.array([0, 0, 1]), -phi) 208 | xyz = rot_xyz_around_axis(xyz, np.array([0, 1, 0]), -theta) 209 | else: 210 | raise ValueError('axis should be x, y ,or z') 211 | 212 | return xyz 213 | -------------------------------------------------------------------------------- /pdb2sql/interface.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import itertools 3 | import warnings 4 | from .pdb2sqlcore import pdb2sql 5 | 6 | 7 | class interface(pdb2sql): 8 | 9 | def __init__(self, pdb, **kwargs): 10 | """Create an independent SQL database for interface object. 11 | 12 | Args: 13 | pdb(str, list, ndarray, pdb2sql): pdb file or data, or pdb2sql object. 14 | If pdb2sql object is used, all changes in the database of pdb2sql 15 | object before initializing the interface instance will be used in the 16 | new sql database of the interface instance; afterwards, two databses 17 | will be independent from each other. 18 | 19 | Examples: 20 | >>> from pdb2sql import pdb2sql 21 | >>> from pdb2sql import interface 22 | >>> # use pdb2sql object as input 23 | >>> pdb_db = pdb2sql('3CRO.pdb') 24 | >>> interface_db1 = interface(pdb_db) 25 | >>> # use pdb file as input 26 | >>> interface_db2 = interface('3CRO.pdb') 27 | """ 28 | if isinstance(pdb, pdb2sql): 29 | pdb._commit() 30 | pdb = pdb.sql2pdb() 31 | super().__init__(pdb, **kwargs) 32 | 33 | def __repr__(self): 34 | return f'{self.__module__}.{self.__class__.__name__} object' 35 | ########################################################################## 36 | # 37 | # get the contact atoms 38 | # 39 | ########################################################################## 40 | 41 | def get_contact_atoms( 42 | self, 43 | cutoff=8.5, 44 | allchains=False, 45 | chain1='A', 46 | chain2='B', 47 | extend_to_residue=False, 48 | only_backbone_atoms=False, 49 | excludeH=False, 50 | return_contact_pairs=False): 51 | """Get rowIDs of contact atoms. 52 | 53 | Args: 54 | cutoff (float): distance cutoff for calculating contact. 55 | Defaults to 8.5. 56 | allchains (bool): calculate contacts for all chains or not. 57 | Defaults to False. 58 | chain1 (str): first chain ID. Defaults to 'A'. 59 | Used when 'allchains' is False. 60 | chain2 (str): second chain ID. Defaults to 'B'. 61 | Used when 'allchains' is False. 62 | extend_to_residue (bool): get all atoms of the residues containing 63 | at least one contact atom. Defaults to False. 64 | only_backbone_atoms (bool): only use backbone atoms to 65 | calculate contact or not. Defaults to False. 66 | excludeH (bool): Exculde hydrogen atoms for contact 67 | calculation or not. Defaults to False. 68 | return_contact_pairs (bool): if return atomic contact pairs 69 | or not. Defaults to False. 70 | 71 | Returns: 72 | dict: rowID of contact atoms or rowID of contact atom pairs 73 | """ 74 | if allchains: 75 | chainIDs = self.get_chains() 76 | else: 77 | chainIDs = [chain1, chain2] 78 | 79 | chains = self.get_chains() 80 | for c in chainIDs: 81 | if c not in chains: 82 | raise ValueError( 83 | 'chain %s not found in the structure' % c) 84 | 85 | xyz = dict() 86 | index = dict() 87 | resName = dict() 88 | atName = dict() 89 | 90 | for chain in chainIDs: 91 | 92 | data = np.array( 93 | self.get('x,y,z,rowID,resName,name', chainID=chain)) 94 | xyz[chain] = data[:, :3].astype(float) 95 | index[chain] = data[:, 3].astype(int) 96 | resName[chain] = data[:, -2] 97 | atName[chain] = data[:, -1] 98 | 99 | # loop through the first chain 100 | # TODO : loop through the smallest chain instead ... 101 | #index_contact_1,index_contact_2 = [],[] 102 | #index_contact_pairs = {} 103 | 104 | index_contact = dict() 105 | index_contact_pairs = dict() 106 | 107 | for chain1, chain2 in itertools.combinations(chainIDs, 2): 108 | 109 | xyz1 = xyz[chain1] 110 | xyz2 = xyz[chain2] 111 | 112 | atName1 = atName[chain1] 113 | atName2 = atName[chain2] 114 | 115 | if chain1 not in index_contact: 116 | index_contact[chain1] = [] 117 | 118 | if chain2 not in index_contact: 119 | index_contact[chain2] = [] 120 | 121 | for i, x0 in enumerate(xyz1): 122 | 123 | # compute the contact atoms 124 | contacts = np.where( 125 | np.sqrt(np.sum((xyz2 - x0)**2, 1)) <= cutoff)[0] 126 | 127 | # exclude the H if required 128 | if excludeH and atName1[i][0] == 'H': 129 | continue 130 | 131 | if len(contacts) > 0 and any( 132 | [not only_backbone_atoms, atName1[i] in self.backbone_atoms]): 133 | 134 | pairs = [ 135 | index[chain2][k] for k in contacts if any( 136 | [ 137 | atName2[k] in self.backbone_atoms, 138 | not only_backbone_atoms]) and not ( 139 | excludeH and atName2[k][0] == 'H')] 140 | if len(pairs) > 0: 141 | index_contact_pairs[index[chain1][i]] = pairs 142 | index_contact[chain1] += [index[chain1][i]] 143 | index_contact[chain2] += pairs 144 | 145 | # if no atoms were found 146 | if len(index_contact_pairs) == 0: 147 | warnings.warn('No contact atoms detected in pdb2sql') 148 | 149 | # get uniques 150 | for chain in chainIDs: 151 | index_contact[chain] = sorted(set(index_contact[chain])) 152 | 153 | # extend the list to entire residue 154 | if extend_to_residue: 155 | for chain in chainIDs: 156 | index_contact[chain] = self._extend_contact_to_residue( 157 | index_contact[chain], only_backbone_atoms) 158 | 159 | # not sure that's the best way of dealing with that 160 | # TODO split to two functions get_contact_atoms and 161 | # get_contact_atom_pairs 162 | if return_contact_pairs: 163 | return index_contact_pairs 164 | else: 165 | return index_contact 166 | 167 | # extend the contact atoms to the residue 168 | def _extend_contact_to_residue(self, index1, only_backbone_atoms): 169 | 170 | # extract the data 171 | dataA = self.get('chainID,resName,resSeq', rowID=index1) 172 | #dataB = self.get('chainID,resName,resSeq',rowID=index2) 173 | 174 | # create tuple cause we want to hash through it 175 | dataA = list(map(lambda x: tuple(x), dataA)) 176 | #dataB = list(map(lambda x: tuple(x),dataB)) 177 | 178 | # extract uniques 179 | resA = list(set(dataA)) 180 | #resB = list(set(dataB)) 181 | 182 | # init the list 183 | index_contact_A = [] 184 | 185 | # contact of chain A 186 | for resdata in resA: 187 | chainID, resName, resSeq = resdata 188 | 189 | if only_backbone_atoms: 190 | index = self.get( 191 | 'rowID', 192 | chainID=chainID, 193 | resName=resName, 194 | resSeq=resSeq) 195 | name = self.get( 196 | 'name', 197 | chainID=chainID, 198 | resName=resName, 199 | resSeq=resSeq) 200 | index_contact_A += [ind for ind, 201 | n in zip(index, 202 | name) if n in self.backbone_atoms] 203 | else: 204 | index_contact_A += self.get('rowID', 205 | chainID=chainID, 206 | resName=resName, 207 | resSeq=resSeq) 208 | 209 | # make sure that we don't have double (maybe optional) 210 | index_contact_A = sorted(set(index_contact_A)) 211 | 212 | return index_contact_A 213 | 214 | # get the contact residue 215 | def get_contact_residues( 216 | self, 217 | cutoff=8.5, 218 | allchains=False, 219 | chain1='A', 220 | chain2='B', 221 | excludeH=False, 222 | only_backbone_atoms=False, 223 | return_contact_pairs=False): 224 | """Get contact residues represented with (chain,resSeq, resname). 225 | 226 | Args: 227 | cutoff (float): distance cutoff for contact calculation 228 | Defaults to 8.5. 229 | allchains (bool): calculate contacts for all chains or not. 230 | Defaults to False. 231 | chain1 (str): first chain ID. Defaults to 'A'. 232 | chain2 (str): second chain ID. Defaults to 'B'. 233 | excludeH (bool): Exculde hydrogen atoms for contact 234 | calculation or not. Defaults to False. 235 | only_backbone_atoms (bool): only use backbone atoms to 236 | calculate contact or not. Defaults to False. 237 | return_contact_pairs (bool): if return residue contact pairs 238 | or not. Defaults to False. 239 | 240 | Returns: 241 | dict: (chain,resSeq,resName) of contact residues or 242 | contact residue pairs. 243 | """ 244 | # TODO split this func to two functions 245 | # TODO get_contact_residues and get_contact_residue_pairs 246 | 247 | # get the contact atoms 248 | if return_contact_pairs: 249 | 250 | # declare the dict 251 | residue_contact_pairs = {} 252 | 253 | # get the contact atom pairs 254 | atom_pairs = self.get_contact_atoms( 255 | cutoff=cutoff, 256 | allchains=allchains, 257 | chain1=chain1, 258 | chain2=chain2, 259 | only_backbone_atoms=only_backbone_atoms, 260 | excludeH=excludeH, 261 | return_contact_pairs=True) 262 | 263 | # loop over the atom pair dict 264 | for iat1, atoms2 in atom_pairs.items(): 265 | 266 | # get the res info of the current atom 267 | data1 = tuple( 268 | self.get( 269 | 'chainID,resSeq,resName', 270 | rowID=[iat1])[0]) 271 | 272 | # create a new entry in the dict if necessary 273 | if data1 not in residue_contact_pairs: 274 | residue_contact_pairs[data1] = set() 275 | 276 | # get the res info of the atom in the other chain 277 | data2 = self.get( 278 | 'chainID,resSeq,resName', rowID=atoms2) 279 | 280 | # store that in the dict without double 281 | for resData in data2: 282 | residue_contact_pairs[data1].add(tuple(resData)) 283 | 284 | for resData in residue_contact_pairs.keys(): 285 | residue_contact_pairs[resData] = sorted( 286 | residue_contact_pairs[resData]) 287 | 288 | return residue_contact_pairs 289 | 290 | else: 291 | 292 | # get the contact atoms 293 | contact_atoms = self.get_contact_atoms( 294 | cutoff=cutoff, 295 | allchains=allchains, 296 | chain1=chain1, 297 | chain2=chain2, 298 | excludeH=excludeH, 299 | only_backbone_atoms=only_backbone_atoms, 300 | return_contact_pairs=False) 301 | 302 | # get the residue info 303 | data = dict() 304 | residue_contact = dict() 305 | 306 | for chain in contact_atoms.keys(): 307 | data[chain] = self.get( 308 | 'chainID,resSeq,resName', 309 | rowID=contact_atoms[chain]) 310 | residue_contact[chain] = sorted( 311 | set([tuple(resData) for resData in data[chain]])) 312 | 313 | return residue_contact 314 | -------------------------------------------------------------------------------- /pdb2sql/many2sql.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import warnings 3 | import subprocess as sp 4 | import os 5 | import sys 6 | import numpy as np 7 | import pandas as pd 8 | from pathlib import Path 9 | 10 | from .pdb2sqlcore import pdb2sql 11 | 12 | 13 | class many2sql(pdb2sql): 14 | 15 | def __init__(self, pdbfiles, tablenames=None): 16 | """Create a sql database containing multiple pdbs. 17 | 18 | Args: 19 | pdbfiles(list): list of pdb files or data. 20 | tablenames (list): list of table names in string 21 | Defaults to None. 22 | 23 | Raises: 24 | TypeError: pdbfiles must be a list 25 | TypeError: tablenames must be a list 26 | TypeError: tablenames must be a list of strings 27 | """ 28 | 29 | if not isinstance(pdbfiles, list): 30 | raise TypeError('pdbfiles must be a list') 31 | 32 | if tablenames is not None: 33 | if not isinstance(tablenames, list): 34 | raise TypeError('tablenames must be a list') 35 | else: 36 | for i in tablenames: 37 | if not isinstance(i, str): 38 | raise TypeError(f'tablenames must be a list of strings,' 39 | f' {i} is a type of {type(i)}') 40 | 41 | self.npdb = len(pdbfiles) 42 | 43 | self.tablenames = tablenames 44 | if self.tablenames is None: 45 | self.tablenames = ['ATOM'] 46 | for i in range(1, self.npdb): 47 | self.tablenames.append('ATOM'+str(i)) 48 | 49 | super().__init__(self.convert_input( 50 | pdbfiles[0]), tablename=self.tablenames[0]) 51 | 52 | for i in range(1, self.npdb): 53 | self._create_table( 54 | self.convert_input(pdbfiles[i]), tablename=self.tablenames[i]) 55 | 56 | def __call__(self, **kwargs): 57 | """Return a class instance containing the selection of each structure 58 | 59 | Returns: 60 | many2sql: class instance containing the selection of each structure 61 | """ 62 | 63 | names = self._get_table_names() 64 | 65 | first = True 66 | for n in names: 67 | pdb_data = self.sql2pdb(tablename=n, **kwargs) 68 | if first: 69 | new_db = many2sql([pdb_data], tablenames=[n]) 70 | first = False 71 | else: 72 | new_db._create_table(pdb_data, tablename=n) 73 | 74 | return new_db 75 | 76 | def convert_input(self, pdb): 77 | """Converts the input in a format that pdb2sql accepts 78 | 79 | Args: 80 | pdb (str, list, pdb2sql): input data 81 | 82 | Returns: 83 | str, list: correct input 84 | """ 85 | 86 | if isinstance(pdb, pdb2sql): 87 | return pdb.sql2pdb() 88 | 89 | return pdb 90 | 91 | def intersect(self, match=['name', 'resname', 'resSeq', 'chainID']): 92 | """Returns a many2sql instance containing the common part of all the structures. 93 | 94 | Args: 95 | match (list, optional): column name that must match in the intersection. 96 | Defaults to ['name', 'resname', 'resSeq', 'chainID']. 97 | 98 | Returns: 99 | many2sql: a class instance containing the tables of the matchin structure 100 | """ 101 | 102 | all_data = self.get_intersection('*', match=match) 103 | all_names = self._get_table_names() 104 | 105 | first = True 106 | for name, data in zip(all_names, all_data): 107 | if first: 108 | new_db = many2sql( 109 | [self.data2pdb(data)], tablenames=[name]) 110 | first = False 111 | else: 112 | new_db._create_table( 113 | self.data2pdb(data), tablename=name) 114 | 115 | return new_db 116 | 117 | def get_all(self, columns, **kwargs): 118 | """Returns the data from the selection of all table in the instance 119 | 120 | Args: 121 | columns (str): column name(s) to return 122 | 123 | Returns: 124 | list: data per structure 125 | """ 126 | 127 | names = self._get_table_names() 128 | data = [] 129 | for n in names: 130 | data.append(self.get(columns, tablename=n, **kwargs)) 131 | return data 132 | 133 | def get_intersection(self, column, match=['name', 'resname', 'resSeq', 'chainID']): 134 | """Return the data of the interection 135 | 136 | Args: 137 | column (str): column table to return 138 | match (list, optional): column name that must match in the intersection. 139 | Defaults to ['name', 'resname', 'resSeq', 'chainID']. 140 | 141 | Returns: 142 | list: data per structure 143 | """ 144 | names = self._get_table_names() 145 | ntable = len(names) 146 | select = "select " 147 | 148 | # column names 149 | if column == '*': 150 | column_list = list(self.col.keys()) 151 | else: 152 | column_list = column.split(',') 153 | ncol = len(column_list) 154 | 155 | # fields to select 156 | fields = '' 157 | for n in names: 158 | for c in column.split(','): 159 | fields += n+'.'+c+', ' 160 | fields = fields[:-2]+' ' 161 | 162 | # join the table 163 | from_join = 'from ' + ' INNER JOIN '.join(names) + ' ' 164 | 165 | # conditions 166 | cond = 'on ' 167 | for attr in match: 168 | for i1 in range(ntable-1): 169 | table1 = names[i1] 170 | for i2 in range(i1+1, ntable): 171 | table2 = names[i2] 172 | cond += table1+'.'+attr+'='+table2+'.'+attr+' and ' 173 | cond = cond[:-5]+';' 174 | query = select+fields+from_join+cond 175 | raw_data = self.conn.execute(query) 176 | 177 | data = [] 178 | for i in range(ntable): 179 | data.append([]) 180 | 181 | for x in raw_data: 182 | for it in range(ntable): 183 | s, e = it*ncol, (it+1)*ncol 184 | data[it].append(list(x[s:e])) 185 | return data 186 | -------------------------------------------------------------------------------- /pdb2sql/pdb2sql_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class pdb2sql_base(object): 5 | 6 | def __init__( 7 | self, 8 | pdbfile, 9 | sqlfile=None, 10 | fix_chainID=False, 11 | verbose=False): 12 | """Base class for the definition of sql database. 13 | 14 | Args: 15 | pdbfile (str, list(str/bytes), ndarray) : name of pdbfile or 16 | list or ndarray containing the pdb data 17 | sqlfile (str, optional): name of the sqlfile. 18 | By default it is created in memory only. 19 | fix_chainID (bool, optinal): check if the name of the chains 20 | are A,B,C, .... and fix it if not. 21 | verbose (bool): probably print stuff 22 | """ 23 | self.pdbfile = pdbfile 24 | self.sqlfile = sqlfile 25 | self.fix_chainID = fix_chainID 26 | self.is_valid = True 27 | self.verbose = verbose 28 | 29 | self.backbone_atoms = ['CA', 'C', 'N', 'O'] 30 | 31 | # hard limit for the number of SQL varaibles 32 | self.SQLITE_LIMIT_VARIABLE_NUMBER = 999 33 | self.max_sql_values = 950 34 | 35 | # column names and types 36 | self.col = {'serial': 'INT', 37 | 'name': 'TEXT', 38 | 'altLoc': 'TEXT', 39 | 'resName': 'TEXT', 40 | 'chainID': 'TEXT', 41 | 'resSeq': 'INT', 42 | 'iCode': 'TEXT', 43 | 'x': 'REAL', 44 | 'y': 'REAL', 45 | 'z': 'REAL', 46 | 'occ': 'REAL', 47 | 'temp': 'REAL', 48 | 'element': 'TEXT', 49 | 'model': 'INT'} 50 | 51 | # delimtier of the column format 52 | # taken from 53 | # http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM 54 | self.delimiter = { 55 | 'serial': [6, 11], 56 | 'name': [12, 16], 57 | 'altLoc': [16, 17], 58 | 'resName': [17, 20], 59 | 'chainID': [21, 22], 60 | 'resSeq': [22, 26], 61 | 'iCode': [26, 27], 62 | 'x': [30, 38], 63 | 'y': [38, 46], 64 | 'z': [46, 54], 65 | 'occ': [54, 60], 66 | 'temp': [60, 66], 67 | 'element': [76, 78]} 68 | 69 | ########################################################################## 70 | # 71 | # CREATION AND PRINTING 72 | # 73 | ########################################################################## 74 | 75 | ''' 76 | Main function to create the SQL data base 77 | ''' 78 | 79 | def _create_sql(self): 80 | raise NotImplementedError() 81 | 82 | def _get_table_names(self): 83 | names = self.conn.execute( 84 | "SELECT name from sqlite_master WHERE type='table';") 85 | return [n[0] for n in names] 86 | 87 | # get the properties 88 | def get(self, atnames, **kwargs): 89 | raise NotImplementedError() 90 | 91 | def get_xyz(self, tablename='atom', **kwargs): 92 | """Shortcut to get the xyz coordinates.""" 93 | return self.get('x,y,z', tablename=tablename, **kwargs) 94 | 95 | def get_residues(self, tablename='atom', **kwargs): 96 | """Get the residue sequence. 97 | 98 | Returns: 99 | list : residue sequence 100 | 101 | Examples: 102 | >>> db.get_residues() 103 | """ 104 | 105 | res = [tuple(x) for x in self.get( 106 | 'chainID,resName,resSeq', tablename=tablename, **kwargs)] 107 | return sorted(set(res), key=res.index) 108 | 109 | def get_chains(self, tablename='atom', **kwargs): 110 | """Get the chain IDs. 111 | 112 | Returns: 113 | list : chain IDs in alphabetical order. 114 | 115 | Examples: 116 | >>> db.get_chains() 117 | """ 118 | chains = self.get('chainID', tablename=tablename, **kwargs) 119 | return sorted(set(chains)) 120 | 121 | def update(self, attribute, values, **kwargs): 122 | raise NotImplementedError() 123 | 124 | def update_xyz(self, xyz, tablename='atom', **kwargs): 125 | """Update the xyz coordinates.""" 126 | self.update('x,y,z', xyz, **kwargs) 127 | 128 | def update_column(self, colname, values, index=None): 129 | """Update a single column.""" 130 | raise NotImplementedError() 131 | 132 | def add_column(self, colname, coltype='FLOAT', default=0): 133 | """Add a new column to the ATOM table.""" 134 | raise NotImplementedError() 135 | 136 | def exportpdb(self, fname, append=False, tablename='atom', **kwargs): 137 | """Export a PDB file. 138 | 139 | Args: 140 | fname(str): output filename 141 | append(bool): append expored data to file or not 142 | kwargs: argument to select atoms, dict value must be list, 143 | e.g.: 144 | - name = ['CA', 'O'] 145 | - no_name = ['CA', 'C'] 146 | - chainID = ['A'] 147 | - no_chainID = ['A'] 148 | """ 149 | if append: 150 | f = open(fname, 'a') 151 | else: 152 | f = open(fname, 'w') 153 | 154 | lines = self.sql2pdb(tablename=tablename, **kwargs) 155 | for i in lines: 156 | f.write(i + '\n') 157 | 158 | f.close() 159 | 160 | def sql2pdb(self, tablename='atom', **kwargs): 161 | """Convert SQL data to PDB formatted lines. 162 | 163 | Args: 164 | kwargs: argument to select atoms, dict value must be list, 165 | e.g.: 166 | - name = ['CA', 'O'] 167 | - no_name = ['CA', 'C'] 168 | - chainID = ['A'] 169 | - no_chainID = ['A'] 170 | Returns: 171 | list: pdb-format lines 172 | """ 173 | cols = ','.join(self.col.keys()) 174 | data = self.get(cols, tablename=tablename, **kwargs) 175 | return self.data2pdb(data) 176 | 177 | def data2pdb(self, data): 178 | """converts data from a get method to a pdb 179 | 180 | Args: 181 | data (list): data from a get statement 182 | 183 | Returns: 184 | list: the formatted pdb data 185 | """ 186 | pdb = [] 187 | # the PDB format is pretty strict 188 | # http://www.wwpdb.org/documentation/file-format-content/format33/sect9.html#ATOM 189 | for d in data: 190 | line = 'ATOM ' 191 | line += '{:>5}'.format(d[0]) # serial 192 | line += ' ' 193 | line += self._format_atomname(d) # name 194 | line += '{:>1}'.format(d[2]) # altLoc 195 | line += '{:>3}'.format(d[3]) # resname 196 | line += ' ' 197 | line += '{:>1}'.format(d[4]) # chainID 198 | line += '{:>4}'.format(d[5]) # resSeq 199 | line += '{:>1}'.format(d[6]) # iCODE 200 | line += ' ' 201 | line += pdb2sql_base._format_xyz(d[7]) # x 202 | line += pdb2sql_base._format_xyz(d[8]) # y 203 | line += pdb2sql_base._format_xyz(d[9]) # z 204 | line += '{:>6.2f}'.format(d[10]) # occ 205 | line += '{:>6.2f}'.format(d[11]) # temp 206 | line += ' ' * 10 207 | line += '{:>2}'.format(d[12]) # element 208 | line += ' ' * 2 # charge, keep it blank 209 | pdb.append(line) 210 | 211 | return pdb 212 | 213 | def _format_atomname(self, data): 214 | """Format atom name to align with PDB reqireuments. 215 | 216 | - alignment of one-letter atom name starts at column 14, 217 | - while two-letter atom name such as FE starts at column 13. 218 | 219 | Args: 220 | data(list): sql output for one pdb line 221 | 222 | Returns: 223 | str: formatted atom name 224 | """ 225 | name = data[1] 226 | lname = len(name) 227 | if lname in (1, 4): 228 | name = '{:^4}'.format(name) 229 | elif lname == 2: 230 | if name == data[12]: # name == element 231 | name = '{:<4}'.format(name) 232 | else: 233 | name = '{:^4}'.format(name) 234 | else: 235 | if name[0] in '0123456789': 236 | name = '{:<4}'.format(name) 237 | else: 238 | name = '{:>4}'.format(name) 239 | return name 240 | 241 | @staticmethod 242 | def _format_xyz(i): 243 | """Format PDB coordinations x,y or z value. 244 | 245 | Note: PDB has a fixed 8-column space for x,y or z value. 246 | Thus the value should be in the range of (-1e7, 1e8). 247 | 248 | Args: 249 | (float): PDB coordinations x, y or z. 250 | 251 | Raises: 252 | ValueError: Exceed the range of (-1e7, 1e8) 253 | 254 | Returns: 255 | str: formated x, y or z value. 256 | """ 257 | 258 | if i >= 1e8 - 0.5 or i <= -1e7 + 0.5: 259 | raise ValueError( 260 | f'PDB coordination {i} exceeds the range of (-1e7, 1e8) ' 261 | f'after rounding.') 262 | elif i >= 1e6 - 0.5 or i <= -1e5 + 0.5: 263 | i = '{:>8.0f}'.format(i) 264 | elif i >= 1e5 - 0.5 or i <= -1e4 + 0.5: 265 | i = '{:>8.1f}'.format(i) 266 | elif i >= 1e4 - 0.5 or i <= -1e3 + 0.5: 267 | i = '{:>8.2f}'.format(i) 268 | else: 269 | i = '{:>8.3f}'.format(i) 270 | 271 | return i 272 | 273 | def _close(self, rmdb=True): 274 | 275 | if self.sqlfile is None: 276 | self.conn.close() 277 | 278 | else: 279 | if rmdb: 280 | self.conn.close() 281 | os.system('rm %s' % (self.sqlfile)) 282 | else: 283 | self._commit() 284 | self.conn.close() 285 | -------------------------------------------------------------------------------- /pdb2sql/superpose.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import numpy as np 4 | from .pdb2sqlcore import pdb2sql 5 | from .many2sql import many2sql 6 | from .transform import rotate 7 | import warnings 8 | 9 | 10 | def superpose(mobile, target, method='svd', only_backbone=True, export=True, **kwargs): 11 | """superpose two complexes 12 | 13 | Arguments: 14 | mobile {str or pdb2sql} -- name or sqldb of the mobile pdb 15 | target {str or pdb2sql} -- name or sqldb of the target pdb 16 | 17 | Keyword Arguments: 18 | method {str} -- method used to superpose the complex (default: {'svd'}) 19 | only_backbone {bool} -- use only backbone atos to align (default: True) 20 | **kwargs -- keyword arguments used to select a portion of the pdb 21 | 22 | Example: 23 | >> pdb1 = '1AK4_5w.pdb' 24 | >> pdb2 = '1AK4_10w.pdb' 25 | >> superpose(pdb1, pdb2, chainID='A') 26 | 27 | """ 28 | 29 | backbone_atoms = ['CA', 'C', 'N', 'O'] 30 | 31 | if not isinstance(mobile, pdb2sql): 32 | sql_mobile = pdb2sql(mobile) 33 | else: 34 | sql_mobile = mobile 35 | 36 | if not isinstance(target, pdb2sql): 37 | sql_target = pdb2sql(target) 38 | else: 39 | sql_target = target 40 | 41 | if only_backbone: 42 | if 'name' not in kwargs: 43 | kwargs['name'] = backbone_atoms 44 | else: 45 | raise ValueError( 46 | 'Atom type specified but only_backbone == True') 47 | 48 | # selections of some atoms 49 | selection_mobile = np.array(sql_mobile.get("x,y,z", **kwargs)) 50 | selection_target = np.array(sql_target.get("x,y,z", **kwargs)) 51 | 52 | # deal with the cases where some res are missing/added 53 | if len(selection_mobile) != len(selection_target): 54 | warnings.warn( 55 | 'selection have different size, getting intersection') 56 | selection_mobile, selection_target = get_intersection( 57 | sql_mobile, sql_target, **kwargs) 58 | 59 | # the molbile original coordinates 60 | xyz_mobile = np.array(sql_mobile.get("x,y,z")) 61 | 62 | # transform the xyz mobile 63 | xyz_mobile = superpose_selection(xyz_mobile, 64 | selection_mobile, selection_target, method) 65 | 66 | # update the sql 67 | sql_mobile.update('x,y,z', xyz_mobile) 68 | 69 | # export a pdb file 70 | if export: 71 | target_name = os.path.basename( 72 | sql_target.pdbfile).rstrip('.pdb') 73 | mobile_name = os.path.basename( 74 | sql_mobile.pdbfile).rstrip('.pdb') 75 | fname = mobile_name + '_superposed_on_' + \ 76 | target_name + '.pdb' 77 | sql_mobile.exportpdb(fname) 78 | 79 | return sql_mobile 80 | 81 | 82 | def superpose_selection(xyz_mobile, 83 | selection_mobile, 84 | selection_target, method): 85 | """superpose the xyz using the selection 86 | 87 | Arguments: 88 | xyz_mobile {np.ndarray} -- xyz to be aligned 89 | selection_mobile {np.ndarray} -- xyz of the mobile used for the superposition 90 | selection_target {np.ndarray} -- xyz of the target used for the superposition 91 | method {str} -- svd or quaternion 92 | 93 | Returns: 94 | np.ndarray -- xyz of the xyz_mobile 95 | """ 96 | sel_mob = np.copy(selection_mobile) 97 | sel_tar = np.copy(selection_target) 98 | 99 | # translation vector 100 | tr_mobile = get_trans_vect(sel_mob) 101 | tr_target = get_trans_vect(sel_tar) 102 | 103 | # rotation matrix 104 | sel_tar += tr_target 105 | sel_mob += tr_mobile 106 | rmat = get_rotation_matrix(sel_mob, sel_tar, method=method) 107 | 108 | # transform the coordinate of second pdb 109 | xyz_mobile += tr_mobile 110 | origin = np.array([0, 0, 0]) 111 | xyz_mobile = rotate(xyz_mobile, rmat, center=origin) 112 | xyz_mobile -= tr_target 113 | 114 | return xyz_mobile 115 | 116 | 117 | def get_trans_vect(pts): 118 | """Get the translationv vector to the origin. 119 | 120 | Args: 121 | pts (np.array(nx3)): position of the points in the molecule 122 | 123 | Returns: 124 | float: minus mean value of the xyz columns 125 | """ 126 | return -np.mean(pts, 0) 127 | 128 | 129 | def get_rotation_matrix(p, q, method='svd'): 130 | """Get the rotation matrix 131 | 132 | Arguments: 133 | p {np.ndarray} -- coordinate 134 | q {np.ndarray} -- coordinate 135 | 136 | Keyword Arguments: 137 | method {str} -- method to use svd or quaternion (default: {'svd'}) 138 | 139 | Raises: 140 | ValueError: if method is incorect 141 | 142 | Returns: 143 | np.ndarray -- rotation matrix 144 | """ 145 | 146 | # get the matrix with Kabsh method 147 | if method.lower() == 'svd': 148 | mat = get_rotation_matrix_Kabsh(p, q) 149 | 150 | # or with the quaternion method 151 | elif method.lower() == 'quaternion': 152 | mat = get_rotation_matrix_quaternion(p, q) 153 | 154 | else: 155 | raise ValueError( 156 | f'{method} is not a valid method for rmsd alignement. ' 157 | f'Options are svd or quaternions') 158 | 159 | return mat 160 | 161 | 162 | def get_rotation_matrix_Kabsh(P, Q): 163 | """Get the rotation matrix to aligh two point clouds. 164 | 165 | The method is based on th Kabsh approach 166 | https://cnx.org/contents/HV-RsdwL@23/Molecular-Distance-Measures 167 | 168 | Args: 169 | P (np.array): xyz of the first point cloud 170 | Q (np.array): xyz of the second point cloud 171 | 172 | Returns: 173 | np.array: rotation matrix 174 | 175 | Raises: 176 | ValueError: matrix have different sizes 177 | """ 178 | pshape = P.shape 179 | qshape = Q.shape 180 | 181 | if pshape[0] == qshape[0]: 182 | npts = pshape[0] 183 | else: 184 | raise ValueError("Matrix don't have the same number of points", 185 | P.shape, Q.shape) 186 | 187 | p0, q0 = np.abs(np.mean(P, 0)), np.abs(np.mean(Q, 0)) 188 | eps = 1E-6 189 | if any(p0 > eps) or any(q0 > eps): 190 | raise ValueError('You must center the fragment first', p0, q0) 191 | 192 | # form the covariance matrix 193 | A = np.dot(P.T, Q) / npts 194 | 195 | # SVD the matrix 196 | V, _, W = np.linalg.svd(A) 197 | 198 | # the W matrix returned here is 199 | # already its transpose 200 | # https://docs.scipy.org/doc/numpy-1.13.0/reference/generated/numpy.linalg.svd.html 201 | W = W.T 202 | 203 | # determinant 204 | d = np.linalg.det(np.dot(W, V.T)) 205 | 206 | # form the U matrix 207 | Id = np.eye(3) 208 | if d < 0: 209 | Id[2, 2] = -1 210 | 211 | U = np.dot(W, np.dot(Id, V.T)) 212 | 213 | return U 214 | 215 | 216 | def get_rotation_matrix_quaternion(P, Q): 217 | """Get the rotation matrix to aligh two point clouds. 218 | 219 | The method is based on the quaternion approach 220 | http://www.ams.stonybrook.edu/~coutsias/papers/rmsd17.pdf 221 | 222 | Args: 223 | P (np.array): xyz of the first point cloud 224 | Q (np.array): xyz of the second point cloud 225 | 226 | Returns: 227 | np.array: rotation matrix 228 | 229 | Raises: 230 | ValueError: matrix have different sizes 231 | """ 232 | pshape = P.shape 233 | qshape = Q.shape 234 | 235 | if pshape[0] != qshape[0]: 236 | raise ValueError("Matrix don't have the same number of points", 237 | P.shape, Q.shape) 238 | 239 | p0, q0 = np.abs(np.mean(P, 0)), np.abs(np.mean(Q, 0)) 240 | eps = 1E-6 241 | if any(p0 > eps) or any(q0 > eps): 242 | raise ValueError('You must center the fragment first', p0, q0) 243 | 244 | # form the correlation matrix 245 | R = np.dot(P.T, Q) 246 | 247 | # form the F matrix (eq. 10 of ref[1]) 248 | F = np.zeros((4, 4)) 249 | 250 | F[0, 0] = np.trace(R) 251 | F[0, 1] = R[1, 2] - R[2, 1] 252 | F[0, 2] = R[2, 0] - R[0, 2] 253 | F[0, 3] = R[0, 1] - R[1, 0] 254 | 255 | F[1, 0] = R[1, 2] - R[2, 1] 256 | F[1, 1] = R[0, 0] - R[1, 1] - R[2, 2] 257 | F[1, 2] = R[0, 1] + R[1, 0] 258 | F[1, 3] = R[0, 2] + R[2, 0] 259 | 260 | F[2, 0] = R[2, 0] - R[0, 2] 261 | F[2, 1] = R[0, 1] + R[1, 0] 262 | F[2, 2] = -R[0, 0] + R[1, 1] - R[2, 2] 263 | F[2, 3] = R[1, 2] + R[2, 1] 264 | 265 | F[3, 0] = R[0, 1] - R[1, 0] 266 | F[3, 1] = R[0, 2] + R[2, 0] 267 | F[3, 2] = R[1, 2] + R[2, 1] 268 | F[3, 3] = -R[0, 0] - R[1, 1] + R[2, 2] 269 | 270 | # diagonalize it 271 | l, U = np.linalg.eig(F) 272 | 273 | # extract the eigenvect of the highest eigenvalues 274 | indmax = np.argmax(l) 275 | q0, q1, q2, q3 = U[:, indmax] 276 | 277 | # form the rotation matrix (eq. 33 ref[1]) 278 | U = np.zeros((3, 3)) 279 | 280 | U[0, 0] = q0**2 + q1**2 - q2**2 - q3**2 281 | U[0, 1] = 2 * (q1 * q2 - q0 * q3) 282 | U[0, 2] = 2 * (q1 * q3 + q0 * q2) 283 | U[1, 0] = 2 * (q1 * q2 + q0 * q3) 284 | U[1, 1] = q0**2 - q1**2 + q2**2 - q3**2 285 | U[1, 2] = 2 * (q2 * q3 - q0 * q1) 286 | U[2, 0] = 2 * (q1 * q3 - q0 * q2) 287 | U[2, 1] = 2 * (q2 * q3 + q0 * q1) 288 | U[2, 2] = q0**2 - q1**2 - q2**2 + q3**2 289 | 290 | return U 291 | 292 | 293 | def get_intersection(db1, db2, **kwargs): 294 | """Get the xyz of the intersection between db1 and db2 295 | 296 | Args: 297 | db1 (pdb2sql): pdbsql of the first complex 298 | db2 (pdb2sql): pdb2sql of the second complex 299 | """ 300 | pdbdata = [db1.sql2pdb(), db2.sql2pdb()] 301 | manydb = many2sql(pdbdata) 302 | 303 | data = manydb(**kwargs).get_intersection('x,y,z') 304 | return np.array(data[0]), np.array(data[1]) 305 | -------------------------------------------------------------------------------- /pdb2sql/transform.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | ''' 4 | This file contains several transformations of the 5 | molecular coordinate that might be usefull during the 6 | definition of the data set. 7 | ''' 8 | 9 | ######################################################################## 10 | # Translation 11 | ######################################################################## 12 | 13 | 14 | def translation(db, vect, **kwargs): 15 | """Translate molecule in SQL database. 16 | 17 | Args: 18 | db(pdb2sql): SQL database 19 | vect(array): translation vector 20 | """ 21 | xyz = _get_xyz(db, **kwargs) 22 | xyz += vect 23 | _update(db, xyz, **kwargs) 24 | 25 | ######################################################################## 26 | # Rotation using axis–angle presentation 27 | # see https://en.wikipedia.org/wiki/Rotation_matrix#Rotation_matrix_from_axis_and_angle 28 | ######################################################################## 29 | 30 | 31 | def rot_axis(db, axis, angle, **kwargs): 32 | """Rotate molecules in a SQL database. 33 | 34 | Args: 35 | db (pdb2sql): SQL database 36 | axis (list(float)): axis of rotation 37 | angle (float): angle of rotation 38 | 39 | Returns: 40 | np.array: rotated xyz coordinates 41 | """ 42 | xyz = _get_xyz(db, **kwargs) 43 | xyz = rot_xyz_around_axis(xyz, axis, angle) 44 | _update(db, xyz, **kwargs) 45 | 46 | 47 | def get_rot_axis_angle(seed=None): 48 | """Get the rotation angle and axis. 49 | 50 | Args: 51 | seed(int): random seed for numpy 52 | 53 | Returns: 54 | list(float): axis of rotation 55 | float: angle of rotation 56 | """ 57 | if seed is not None: 58 | np.random.seed(seed) 59 | 60 | # define the rotation axis 61 | # uniform distribution on a sphere 62 | # eq1,2 in http://mathworld.wolfram.com/SpherePointPicking.html 63 | u1, u2 = np.random.rand(), np.random.rand() 64 | theta = 2 * np.pi * u1 # [0, 2*pi) 65 | phi = np.arccos(2 * u2 - 1) # [0, pi] 66 | # eq19 in http://mathworld.wolfram.com/SphericalCoordinates.html 67 | axis = [np.sin(phi) * np.cos(theta), 68 | np.sin(phi) * np.sin(theta), 69 | np.cos(phi)] 70 | 71 | # define the rotation angle 72 | angle = 2 * np.pi * np.random.rand() 73 | 74 | return axis, angle 75 | 76 | 77 | def rot_xyz_around_axis(xyz, axis, angle, center=None): 78 | """Rotate given xyz coordinates. 79 | 80 | Args: 81 | xyz(np.array): original xyz coordinates 82 | axis (list(float)): axis of rotation 83 | angle (float): angle of rotation 84 | center (list(float)): center of rotation, 85 | defaults to the mean of input xyz. 86 | 87 | Returns: 88 | np.array: rotated xyz coordinates 89 | """ 90 | # get the data 91 | ct, st = np.cos(angle), np.sin(angle) 92 | ux, uy, uz = axis 93 | 94 | # definition of the rotation matrix 95 | rot_mat = np.array([[ct + ux**2 * (1 - ct), 96 | ux * uy * (1 - ct) - uz * st, 97 | ux * uz * (1 - ct) + uy * st], 98 | [uy * ux * (1 - ct) + uz * st, 99 | ct + uy**2 * (1 - ct), 100 | uy * uz * (1 - ct) - ux * st], 101 | [uz * ux * (1 - ct) - uy * st, 102 | uz * uy * (1 - ct) + ux * st, 103 | ct + uz**2 * (1 - ct)]]) 104 | 105 | # apply the rotation 106 | return rotate(xyz, rot_mat, center) 107 | 108 | ######################################################################## 109 | # Rotation using Euler anlges 110 | # see https://en.wikipedia.org/wiki/Rotation_matrix#General_rotations 111 | ######################################################################## 112 | 113 | 114 | def rot_euler(db, alpha, beta, gamma, **kwargs): 115 | """Rotate molecules in SQL database from Euler rotation axis. 116 | 117 | Args: 118 | alpha (float): angle of rotation around the x axis 119 | beta (float): angle of rotation around the y axis 120 | gamma (float): angle of rotation around the z axis 121 | kwargs: keyword argument to select the atoms. 122 | """ 123 | xyz = _get_xyz(db, **kwargs) 124 | xyz = rotation_euler(xyz, alpha, beta, gamma) 125 | _update(db, xyz, **kwargs) 126 | 127 | 128 | def rotation_euler(xyz, alpha, beta, gamma, center=None): 129 | """Rotate given xyz coordinates from Euler rotation axis. 130 | 131 | Args: 132 | xyz (array): original xyz coordinates 133 | alpha (float): angle of rotation around the x axis 134 | beta (float): angle of rotation around the y axis 135 | gamma (float): angle of rotation around the z axis 136 | kwargs: keyword argument to select the atoms. 137 | 138 | Returns: 139 | array: x,y,z coordinates after rotation 140 | """ 141 | 142 | # precomte the trig 143 | ca, sa = np.cos(alpha), np.sin(alpha) 144 | cb, sb = np.cos(beta), np.sin(beta) 145 | cg, sg = np.cos(gamma), np.sin(gamma) 146 | 147 | # rotation matrices 148 | rx = np.array([[1, 0, 0], [0, ca, -sa], [0, sa, ca]]) 149 | ry = np.array([[cb, 0, sb], [0, 1, 0], [-sb, 0, cb]]) 150 | rz = np.array([[cg, -sg, 0], [sg, cg, 0], [0, 0, 1]]) 151 | 152 | # get rotation matrix 153 | rot_mat = np.dot(rz, np.dot(ry, rx)) 154 | 155 | # apply the rotation 156 | return rotate(xyz, rot_mat, center) 157 | 158 | ######################################################################## 159 | # Rotation using provided rotation matrix 160 | ######################################################################## 161 | 162 | 163 | def rot_mat(db, mat, **kwargs): 164 | """Rotate molecule in SQL database from a rotation matrix. 165 | 166 | Args: 167 | mat (np.array): 3x3 rotation matrix 168 | kwargs: keyword argument to select the atoms. 169 | """ 170 | xyz = _get_xyz(db, **kwargs) 171 | xyz = rotate(xyz, mat) 172 | _update(db, xyz, **kwargs) 173 | 174 | 175 | def rotate(xyz, rot_mat, center=None): 176 | """Rotate xyz from a rotation matrix. 177 | 178 | Args: 179 | xyz(np.ndarray): x,y,z coordinates 180 | rot_mat(np.ndarray): rotation matrix 181 | center (list or np.ndarray, optional): rotation center. 182 | Defaults to None, i.e. using molecule center as rotation 183 | center. 184 | 185 | Raises: 186 | TypeError: Rotation center must be list or 1D np.ndarray. 187 | 188 | Returns: 189 | np.ndarray: x,y,z coordinates after rotation 190 | """ 191 | # the default rotation center is the center of molecule itself. 192 | if center is None: 193 | center = np.mean(xyz, 0) 194 | 195 | if not isinstance(center, (list, np.ndarray)): 196 | raise TypeError("Rotation center must be list or 1D np.ndarray") 197 | 198 | return np.dot(rot_mat, (xyz - center).T).T + center 199 | 200 | ######################################################################## 201 | # helper functions 202 | ######################################################################## 203 | 204 | 205 | def _get_xyz(db, **kwargs): 206 | return np.array(db.get('x,y,z', **kwargs)) 207 | 208 | 209 | def _update(db, xyz, **kwargs): 210 | db.update('x,y,z', xyz, **kwargs) 211 | -------------------------------------------------------------------------------- /pdb2sql/utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import urllib.request 4 | from urllib.error import HTTPError 5 | 6 | def fetch(pdbid, outdir='.'): 7 | """Download PDB file from PDB website https://www.rcsb.org/. 8 | 9 | Args: 10 | pdbid(str): PDB ID 11 | outdir (str, optional): Output path. Defaults to '.'. 12 | 13 | Raises: 14 | ValueError: PDB ID is not valid 15 | ValueError: PDB ID is valid but does not exist on PDB website 16 | 17 | Examples: 18 | >>> from pdb2sql import fetch 19 | >>> fetch('1cbh') 20 | """ 21 | # defaults 22 | hosturl = 'http://files.rcsb.org/download' 23 | 24 | # check pdbid 25 | p = re.compile('[0-9a-z]{4,4}$', re.IGNORECASE) 26 | if not p.match(pdbid): 27 | raise ValueError(f'Invalid PDB ID: {pdbid}.') 28 | pdb = pdbid + '.pdb' 29 | cif = pdbid + '.cif' 30 | 31 | # build downloading url 32 | url_pdb = os.path.join(hosturl, pdb) 33 | url_cif = os.path.join(hosturl, cif) 34 | fout = os.path.join(outdir, pdb) 35 | 36 | # get url content 37 | try: 38 | pdbdata = urllib.request.urlopen(url_pdb) 39 | except HTTPError: 40 | try: 41 | cifdata = urllib.request.urlopen(url_cif) 42 | raise ValueError(f'The PDB ID given is only represented in ' 43 | f'mmCIF format and pdb2sql does not handle mmCIF format.') 44 | except HTTPError: 45 | raise ValueError(f'PDB ID not exist: {pdbid}') 46 | 47 | # write to file 48 | with open(fout, 'wb') as f: 49 | f.write(pdbdata.read()) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | from setuptools import (find_packages, setup) 5 | 6 | here = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | # To update the package version number, edit pdb2sql/__version__.py 9 | version = {} 10 | with open(os.path.join(here, 'pdb2sql', '__version__.py')) as f: 11 | exec(f.read(), version) 12 | 13 | with open('README.md') as readme_file: 14 | readme = readme_file.read() 15 | 16 | setup( 17 | name='pdb2sql', 18 | version=version['__version__'], 19 | description="PDB parser using SQL queries", 20 | long_description=readme + '\n\n', 21 | long_description_content_type='text/markdown', 22 | author=["Nicolas Renaud"], 23 | author_email='n.renaud@esciencecenter.nl', 24 | url='https://github.com/DeepRank/pdb2sql', 25 | packages=find_packages(), 26 | package_dir={ 27 | 'pdb2sql': 'pdb2sql'}, 28 | include_package_data=True, 29 | license="Apache Software License 2.0", 30 | zip_safe=False, 31 | keywords='PDB2SQL', 32 | classifiers=[ 33 | 'Development Status :: 2 - Pre-Alpha', 34 | 'Intended Audience :: Developers', 35 | 'License :: OSI Approved :: Apache Software License', 36 | 'Natural Language :: English', 37 | 'Intended Audience :: Science/Research', 38 | 'Programming Language :: Python :: 3.7', 39 | 'Topic :: Scientific/Engineering :: Bio-Informatics'], 40 | test_suite='tests', 41 | install_requires=[ 42 | 'numpy', 43 | 'pandas'], 44 | extras_require={ 45 | 'dev': [ 46 | 'prospector[with_pyroma]', 47 | 'autopep8', 48 | 'isort', 49 | 'twine'], 50 | 'docs': [ 51 | 'sphinx', 52 | 'ipython',], 53 | 'test': [ 54 | 'coveralls', 55 | 'pytest', 56 | 'pytest-cov',], 57 | }) 58 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | """Init module for tests package.""" 2 | from pathlib import Path 3 | 4 | test_folder = Path(__file__).resolve().parent 5 | pdb_folder = Path(test_folder, 'pdb') 6 | -------------------------------------------------------------------------------- /test/pdb/1AK4/target.izone: -------------------------------------------------------------------------------- 1 | zone A54-A54 2 | zone A55-A55 3 | zone A56-A56 4 | zone A57-A57 5 | zone A58-A58 6 | zone A59-A59 7 | zone A60-A60 8 | zone A61-A61 9 | zone A62-A62 10 | zone A63-A63 11 | zone A64-A64 12 | zone A65-A65 13 | zone A66-A66 14 | zone A69-A69 15 | zone A70-A70 16 | zone A71-A71 17 | zone A72-A72 18 | zone A73-A73 19 | zone A74-A74 20 | zone A75-A75 21 | zone A76-A76 22 | zone A82-A82 23 | zone A92-A92 24 | zone A97-A97 25 | zone A98-A98 26 | zone A99-A99 27 | zone A100-A100 28 | zone A101-A101 29 | zone A102-A102 30 | zone A103-A103 31 | zone A104-A104 32 | zone A105-A105 33 | zone A107-A107 34 | zone A108-A108 35 | zone A109-A109 36 | zone A110-A110 37 | zone A111-A111 38 | zone A112-A112 39 | zone A113-A113 40 | zone A114-A114 41 | zone A115-A115 42 | zone A116-A116 43 | zone A117-A117 44 | zone A118-A118 45 | zone A119-A119 46 | zone A120-A120 47 | zone A121-A121 48 | zone A122-A122 49 | zone A123-A123 50 | zone A125-A125 51 | zone A126-A126 52 | zone A127-A127 53 | zone A147-A147 54 | zone A148-A148 55 | zone A149-A149 56 | zone A150-A150 57 | zone A151-A151 58 | zone B81-B81 59 | zone B82-B82 60 | zone B83-B83 61 | zone B84-B84 62 | zone B85-B85 63 | zone B86-B86 64 | zone B87-B87 65 | zone B88-B88 66 | zone B89-B89 67 | zone B90-B90 68 | zone B91-B91 69 | zone B92-B92 70 | zone B93-B93 71 | zone B94-B94 72 | zone B95-B95 73 | zone B96-B96 74 | zone B97-B97 75 | zone B98-B98 76 | zone B100-B100 77 | zone B103-B103 78 | zone B113-B113 79 | zone B120-B120 80 | zone B121-B121 81 | -------------------------------------------------------------------------------- /test/pdb/1AK4/target.lzone: -------------------------------------------------------------------------------- 1 | zone A2-A2 2 | zone A3-A3 3 | zone A4-A4 4 | zone A5-A5 5 | zone A6-A6 6 | zone A7-A7 7 | zone A8-A8 8 | zone A9-A9 9 | zone A10-A10 10 | zone A11-A11 11 | zone A12-A12 12 | zone A13-A13 13 | zone A14-A14 14 | zone A15-A15 15 | zone A16-A16 16 | zone A17-A17 17 | zone A18-A18 18 | zone A19-A19 19 | zone A20-A20 20 | zone A21-A21 21 | zone A22-A22 22 | zone A23-A23 23 | zone A24-A24 24 | zone A25-A25 25 | zone A26-A26 26 | zone A27-A27 27 | zone A28-A28 28 | zone A29-A29 29 | zone A30-A30 30 | zone A31-A31 31 | zone A32-A32 32 | zone A33-A33 33 | zone A34-A34 34 | zone A35-A35 35 | zone A36-A36 36 | zone A37-A37 37 | zone A38-A38 38 | zone A39-A39 39 | zone A40-A40 40 | zone A41-A41 41 | zone A42-A42 42 | zone A43-A43 43 | zone A44-A44 44 | zone A45-A45 45 | zone A46-A46 46 | zone A47-A47 47 | zone A48-A48 48 | zone A49-A49 49 | zone A50-A50 50 | zone A51-A51 51 | zone A52-A52 52 | zone A53-A53 53 | zone A54-A54 54 | zone A55-A55 55 | zone A56-A56 56 | zone A57-A57 57 | zone A58-A58 58 | zone A59-A59 59 | zone A60-A60 60 | zone A61-A61 61 | zone A62-A62 62 | zone A63-A63 63 | zone A64-A64 64 | zone A65-A65 65 | zone A66-A66 66 | zone A67-A67 67 | zone A68-A68 68 | zone A69-A69 69 | zone A70-A70 70 | zone A71-A71 71 | zone A72-A72 72 | zone A73-A73 73 | zone A74-A74 74 | zone A75-A75 75 | zone A76-A76 76 | zone A77-A77 77 | zone A78-A78 78 | zone A79-A79 79 | zone A80-A80 80 | zone A81-A81 81 | zone A82-A82 82 | zone A83-A83 83 | zone A84-A84 84 | zone A85-A85 85 | zone A86-A86 86 | zone A87-A87 87 | zone A88-A88 88 | zone A89-A89 89 | zone A90-A90 90 | zone A91-A91 91 | zone A92-A92 92 | zone A93-A93 93 | zone A94-A94 94 | zone A95-A95 95 | zone A96-A96 96 | zone A97-A97 97 | zone A98-A98 98 | zone A99-A99 99 | zone A100-A100 100 | zone A101-A101 101 | zone A102-A102 102 | zone A103-A103 103 | zone A104-A104 104 | zone A105-A105 105 | zone A106-A106 106 | zone A107-A107 107 | zone A108-A108 108 | zone A109-A109 109 | zone A110-A110 110 | zone A111-A111 111 | zone A112-A112 112 | zone A113-A113 113 | zone A114-A114 114 | zone A115-A115 115 | zone A116-A116 116 | zone A117-A117 117 | zone A118-A118 118 | zone A119-A119 119 | zone A120-A120 120 | zone A121-A121 121 | zone A122-A122 122 | zone A123-A123 123 | zone A124-A124 124 | zone A125-A125 125 | zone A126-A126 126 | zone A127-A127 127 | zone A128-A128 128 | zone A129-A129 129 | zone A130-A130 130 | zone A131-A131 131 | zone A132-A132 132 | zone A133-A133 133 | zone A134-A134 134 | zone A135-A135 135 | zone A136-A136 136 | zone A137-A137 137 | zone A138-A138 138 | zone A139-A139 139 | zone A140-A140 140 | zone A141-A141 141 | zone A142-A142 142 | zone A143-A143 143 | zone A144-A144 144 | zone A145-A145 145 | zone A146-A146 146 | zone A147-A147 147 | zone A148-A148 148 | zone A149-A149 149 | zone A150-A150 150 | zone A151-A151 151 | zone A152-A152 152 | zone A153-A153 153 | zone A154-A154 154 | zone A155-A155 155 | zone A156-A156 156 | zone A157-A157 157 | zone A158-A158 158 | zone A159-A159 159 | zone A160-A160 160 | zone A161-A161 161 | zone A162-A162 162 | zone A163-A163 163 | zone A164-A164 164 | zone A165-A165 165 | -------------------------------------------------------------------------------- /test/pdb/dummy_blank_chainID_with_segID.pdb: -------------------------------------------------------------------------------- 1 | ATOM 817 N MET -1 -20.948 -13.418 28.320 1.00 46.93 L N 2 | ATOM 818 CA MET -1 -21.093 -12.112 28.939 1.00 52.50 L C 3 | ATOM 819 C MET -1 -22.482 -11.566 28.846 1.00 52.55 L C 4 | ATOM 820 O MET -1 -22.816 -10.393 28.618 1.00 52.75 L O 5 | ATOM 821 CB MET -1 -19.916 -11.178 28.789 1.00 59.92 L C 6 | ATOM 822 CG MET -1 -18.839 -11.701 29.713 1.00 80.88 L C 7 | ATOM 823 SD MET -1 -17.178 -11.517 29.038 1.00 95.94 L S 8 | ATOM 824 CE MET -1 -16.527 -13.173 29.365 1.00 90.58 L C 9 | ATOM 825 N GLN 0 -23.243 -12.593 29.074 1.00 51.78 L N 10 | ATOM 826 CA GLN 0 -24.639 -12.681 29.076 1.00 52.49 L C 11 | ATOM 827 C GLN 0 -25.268 -12.252 30.349 1.00 42.74 L C 12 | ATOM 828 O GLN 0 -24.688 -12.207 31.435 1.00 47.12 L O 13 | ATOM 829 CB GLN 0 -24.971 -14.147 28.858 1.00 45.95 L C 14 | ATOM 830 CG GLN 0 -24.141 -14.712 27.710 1.00 53.26 L C 15 | ATOM 831 CD GLN 0 -24.923 -15.776 27.001 1.00 68.74 L C 16 | ATOM 832 OE1 GLN 0 -25.159 -16.851 27.563 1.00 82.61 L O 17 | ATOM 833 NE2 GLN 0 -25.382 -15.458 25.797 1.00 76.83 L N 18 | ATOM 834 N THR 1 -26.513 -11.973 30.116 1.00 21.94 L N 19 | ATOM 835 CA THR 1 -27.440 -11.567 31.088 1.00 15.55 L C 20 | ATOM 836 C THR 1 -28.200 -12.824 31.459 1.00 5.55 L C 21 | ATOM 837 O THR 1 -27.960 -13.910 30.947 1.00 13.48 L O 22 | ATOM 838 CB THR 1 -28.318 -10.497 30.412 1.00 14.60 L C 23 | ATOM 839 OG1 THR 1 -27.550 -9.329 30.158 1.00 7.60 L O 24 | ATOM 840 CG2 THR 1 -29.542 -10.173 31.249 1.00 14.52 L C 25 | -------------------------------------------------------------------------------- /test/pdb/dummy_blank_chainID_without_segID.pdb: -------------------------------------------------------------------------------- 1 | ATOM 817 N MET -1 -20.948 -13.418 28.320 1.00 46.93 N 2 | ATOM 818 CA MET -1 -21.093 -12.112 28.939 1.00 52.50 C 3 | ATOM 819 C MET -1 -22.482 -11.566 28.846 1.00 52.55 C 4 | ATOM 820 O MET -1 -22.816 -10.393 28.618 1.00 52.75 O 5 | ATOM 821 CB MET -1 -19.916 -11.178 28.789 1.00 59.92 C 6 | ATOM 822 CG MET -1 -18.839 -11.701 29.713 1.00 80.88 C 7 | ATOM 823 SD MET -1 -17.178 -11.517 29.038 1.00 95.94 S 8 | ATOM 824 CE MET -1 -16.527 -13.173 29.365 1.00 90.58 C 9 | ATOM 825 N GLN 0 -23.243 -12.593 29.074 1.00 51.78 N 10 | ATOM 826 CA GLN 0 -24.639 -12.681 29.076 1.00 52.49 C 11 | ATOM 827 C GLN 0 -25.268 -12.252 30.349 1.00 42.74 C 12 | ATOM 828 O GLN 0 -24.688 -12.207 31.435 1.00 47.12 O 13 | ATOM 829 CB GLN 0 -24.971 -14.147 28.858 1.00 45.95 C 14 | ATOM 830 CG GLN 0 -24.141 -14.712 27.710 1.00 53.26 C 15 | ATOM 831 CD GLN 0 -24.923 -15.776 27.001 1.00 68.74 C 16 | ATOM 832 OE1 GLN 0 -25.159 -16.851 27.563 1.00 82.61 O 17 | ATOM 833 NE2 GLN 0 -25.382 -15.458 25.797 1.00 76.83 N 18 | ATOM 834 N THR 1 -26.513 -11.973 30.116 1.00 21.94 N 19 | ATOM 835 CA THR 1 -27.440 -11.567 31.088 1.00 15.55 C 20 | ATOM 836 C THR 1 -28.200 -12.824 31.459 1.00 5.55 C 21 | ATOM 837 O THR 1 -27.960 -13.910 30.947 1.00 13.48 O 22 | ATOM 838 CB THR 1 -28.318 -10.497 30.412 1.00 14.60 C 23 | ATOM 839 OG1 THR 1 -27.550 -9.329 30.158 1.00 7.60 O 24 | ATOM 840 CG2 THR 1 -29.542 -10.173 31.249 1.00 14.52 C 25 | -------------------------------------------------------------------------------- /test/pdb/dummy_blank_element.pdb: -------------------------------------------------------------------------------- 1 | ATOM 817 N MET L -1 -20.948 -13.418 28.320 1.00 46.93 2 | ATOM 818 CA MET L -1 -21.093 -12.112 28.939 1.00 52.50 3 | ATOM 819 C MET L -1 -22.482 -11.566 28.846 1.00 52.55 4 | ATOM 820 O MET L -1 -22.816 -10.393 28.618 1.00 52.75 5 | ATOM 821 CB MET L -1 -19.916 -11.178 28.789 1.00 59.92 6 | ATOM 822 CG MET L -1 -18.839 -11.701 29.713 1.00 80.88 7 | ATOM 823 SD MET L -1 -17.178 -11.517 29.038 1.00 95.94 8 | ATOM 824 CE MET L -1 -16.527 -13.173 29.365 1.00 90.58 9 | ATOM 825 N GLN L 0 -23.243 -12.593 29.074 1.00 51.78 10 | ATOM 826 CA GLN L 0 -24.639 -12.681 29.076 1.00 52.49 11 | ATOM 827 C GLN L 0 -25.268 -12.252 30.349 1.00 42.74 12 | ATOM 828 O GLN L 0 -24.688 -12.207 31.435 1.00 47.12 13 | ATOM 829 CB GLN L 0 -24.971 -14.147 28.858 1.00 45.95 14 | ATOM 830 CG GLN L 0 -24.141 -14.712 27.710 1.00 53.26 15 | ATOM 831 CD GLN L 0 -24.923 -15.776 27.001 1.00 68.74 16 | ATOM 832 OE1 GLN L 0 -25.159 -16.851 27.563 1.00 82.61 17 | ATOM 833 NE2 GLN L 0 -25.382 -15.458 25.797 1.00 76.83 18 | ATOM 834 N THR L 1 -26.513 -11.973 30.116 1.00 21.94 19 | ATOM 835 CA THR L 1 -27.440 -11.567 31.088 1.00 15.55 20 | ATOM 836 C THR L 1 -28.200 -12.824 31.459 1.00 5.55 21 | ATOM 837 O THR L 1 -27.960 -13.910 30.947 1.00 13.48 22 | ATOM 838 CB THR L 1 -28.318 -10.497 30.412 1.00 14.60 23 | ATOM 839 OG1 THR L 1 -27.550 -9.329 30.158 1.00 7.60 24 | ATOM 840 CG2 THR L 1 -29.542 -10.173 31.249 1.00 14.52 25 | ATOM 840 H1 THR L 1 -29.542 -10.173 31.249 1.00 14.52 26 | ATOM 840 CA CA L 1 -29.542 -10.173 31.249 1.00 14.52 27 | -------------------------------------------------------------------------------- /test/pdb/dummy_blank_occupancy.pdb: -------------------------------------------------------------------------------- 1 | ATOM 817 N MET L -1 -20.948 -13.418 28.320 46.93 N 2 | ATOM 818 CA MET L -1 -21.093 -12.112 28.939 52.50 C 3 | ATOM 819 C MET L -1 -22.482 -11.566 28.846 52.55 C 4 | ATOM 820 O MET L -1 -22.816 -10.393 28.618 52.75 O 5 | ATOM 821 CB MET L -1 -19.916 -11.178 28.789 59.92 C 6 | ATOM 822 CG MET L -1 -18.839 -11.701 29.713 80.88 C 7 | ATOM 823 SD MET L -1 -17.178 -11.517 29.038 95.94 S 8 | ATOM 824 CE MET L -1 -16.527 -13.173 29.365 90.58 C 9 | ATOM 825 N GLN L 0 -23.243 -12.593 29.074 51.78 N 10 | ATOM 826 CA GLN L 0 -24.639 -12.681 29.076 52.49 C 11 | ATOM 827 C GLN L 0 -25.268 -12.252 30.349 42.74 C 12 | ATOM 828 O GLN L 0 -24.688 -12.207 31.435 47.12 O 13 | ATOM 829 CB GLN L 0 -24.971 -14.147 28.858 45.95 C 14 | ATOM 830 CG GLN L 0 -24.141 -14.712 27.710 53.26 C 15 | ATOM 831 CD GLN L 0 -24.923 -15.776 27.001 68.74 C 16 | ATOM 832 OE1 GLN L 0 -25.159 -16.851 27.563 82.61 O 17 | ATOM 833 NE2 GLN L 0 -25.382 -15.458 25.797 76.83 N 18 | ATOM 834 N THR L 1 -26.513 -11.973 30.116 21.94 N 19 | ATOM 835 CA THR L 1 -27.440 -11.567 31.088 15.55 C 20 | ATOM 836 C THR L 1 -28.200 -12.824 31.459 5.55 C 21 | ATOM 837 O THR L 1 -27.960 -13.910 30.947 13.48 O 22 | ATOM 838 CB THR L 1 -28.318 -10.497 30.412 14.60 C 23 | ATOM 839 OG1 THR L 1 -27.550 -9.329 30.158 7.60 O 24 | ATOM 840 CG2 THR L 1 -29.542 -10.173 31.249 14.52 C 25 | -------------------------------------------------------------------------------- /test/pdb/dummy_blank_temperature.pdb: -------------------------------------------------------------------------------- 1 | ATOM 817 N MET L -1 -20.948 -13.418 28.320 1.00 N 2 | ATOM 818 CA MET L -1 -21.093 -12.112 28.939 1.00 C 3 | ATOM 819 C MET L -1 -22.482 -11.566 28.846 1.00 C 4 | ATOM 820 O MET L -1 -22.816 -10.393 28.618 1.00 O 5 | ATOM 821 CB MET L -1 -19.916 -11.178 28.789 1.00 C 6 | ATOM 822 CG MET L -1 -18.839 -11.701 29.713 1.00 C 7 | ATOM 823 SD MET L -1 -17.178 -11.517 29.038 1.00 S 8 | ATOM 824 CE MET L -1 -16.527 -13.173 29.365 1.00 C 9 | ATOM 825 N GLN L 0 -23.243 -12.593 29.074 1.00 N 10 | ATOM 826 CA GLN L 0 -24.639 -12.681 29.076 1.00 C 11 | ATOM 827 C GLN L 0 -25.268 -12.252 30.349 1.00 C 12 | ATOM 828 O GLN L 0 -24.688 -12.207 31.435 1.00 O 13 | ATOM 829 CB GLN L 0 -24.971 -14.147 28.858 1.00 C 14 | ATOM 830 CG GLN L 0 -24.141 -14.712 27.710 1.00 C 15 | ATOM 831 CD GLN L 0 -24.923 -15.776 27.001 1.00 C 16 | ATOM 832 OE1 GLN L 0 -25.159 -16.851 27.563 1.00 O 17 | ATOM 833 NE2 GLN L 0 -25.382 -15.458 25.797 1.00 N 18 | ATOM 834 N THR L 1 -26.513 -11.973 30.116 1.00 N 19 | ATOM 835 CA THR L 1 -27.440 -11.567 31.088 1.00 C 20 | ATOM 836 C THR L 1 -28.200 -12.824 31.459 1.00 C 21 | ATOM 837 O THR L 1 -27.960 -13.910 30.947 1.00 O 22 | ATOM 838 CB THR L 1 -28.318 -10.497 30.412 1.00 C 23 | ATOM 839 OG1 THR L 1 -27.550 -9.329 30.158 1.00 O 24 | ATOM 840 CG2 THR L 1 -29.542 -10.173 31.249 1.00 C 25 | -------------------------------------------------------------------------------- /test/pdb/dummy_longline.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 O5' DA A 1 -16.851 -5.543 74.981 1.00 55.62 A O 1 2 | ATOM 2 C5' DA A 1 -18.254 -5.683 75.238 1.00 51.97 A C 2 3 | ATOM 3 C4' DA A 1 -18.600 -7.125 75.571 1.00 37.32 A C 3 4 | -------------------------------------------------------------------------------- /test/pdb/dummy_template.pdb: -------------------------------------------------------------------------------- 1 | ATOM 817 N MET L -1 -20.948 -13.418 28.320 1.00 46.93 N 2 | ATOM 818 CA MET L -1 -21.093 -12.112 28.939 1.00 52.50 C 3 | ATOM 819 C MET L -1 -22.482 -11.566 28.846 1.00 52.55 C 4 | ATOM 820 O MET L -1 -22.816 -10.393 28.618 1.00 52.75 O 5 | ATOM 821 CB MET L -1 -19.916 -11.178 28.789 1.00 59.92 C 6 | ATOM 822 CG MET L -1 -18.839 -11.701 29.713 1.00 80.88 C 7 | ATOM 823 SD MET L -1 -17.178 -11.517 29.038 1.00 95.94 S 8 | ATOM 824 CE MET L -1 -16.527 -13.173 29.365 1.00 90.58 C 9 | ATOM 825 N GLN L 0 -23.243 -12.593 29.074 1.00 51.78 N 10 | ATOM 826 CA GLN L 0 -24.639 -12.681 29.076 1.00 52.49 C 11 | ATOM 827 C GLN L 0 -25.268 -12.252 30.349 1.00 42.74 C 12 | ATOM 828 O GLN L 0 -24.688 -12.207 31.435 1.00 47.12 O 13 | ATOM 829 CB GLN L 0 -24.971 -14.147 28.858 1.00 45.95 C 14 | ATOM 830 CG GLN L 0 -24.141 -14.712 27.710 1.00 53.26 C 15 | ATOM 831 CD GLN L 0 -24.923 -15.776 27.001 1.00 68.74 C 16 | ATOM 832 OE1 GLN L 0 -25.159 -16.851 27.563 1.00 82.61 O 17 | ATOM 833 NE2 GLN L 0 -25.382 -15.458 25.797 1.00 76.83 N 18 | ATOM 834 N THR L 1 -26.513 -11.973 30.116 1.00 21.94 N 19 | ATOM 835 CA THR L 1 -27.440 -11.567 31.088 1.00 15.55 C 20 | ATOM 836 C THR L 1 -28.200 -12.824 31.459 1.00 5.55 C 21 | ATOM 837 O THR L 1 -27.960 -13.910 30.947 1.00 13.48 O 22 | ATOM 838 CB THR L 1 -28.318 -10.497 30.412 1.00 14.60 C 23 | ATOM 839 OG1 THR L 1 -27.550 -9.329 30.158 1.00 7.60 O 24 | ATOM 840 CG2 THR L 1 -29.542 -10.173 31.249 1.00 14.52 C 25 | -------------------------------------------------------------------------------- /test/pdb/dummy_transform.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 N THR A 1 1.000 0.000 0.000 1.00 0.69 N 2 | ATOM 2 CA THR A 1 -1.000 0.000 0.000 1.00 0.50 C 3 | ATOM 3 C THR A 1 0.000 1.000 0.000 1.00 0.45 C 4 | ATOM 4 O THR A 1 0.000 -1.000 0.000 1.00 0.69 O 5 | ATOM 5 CB THR A 1 0.000 0.000 1.000 1.00 0.50 C 6 | ATOM 6 H1 THR A 1 0.000 0.000 -1.000 1.00 0.45 H 7 | -------------------------------------------------------------------------------- /test/pdb/test_model.pdb: -------------------------------------------------------------------------------- 1 | MODEL 1 2 | ATOM 1 N THR A 1 -6.837 7.439 2.442 1.00 0.00 N 3 | ATOM 2 CA THR A 1 -7.164 6.033 2.066 1.00 0.00 C 4 | ATOM 3 C THR A 1 -6.491 5.682 0.750 1.00 0.00 C 5 | ATOM 4 O THR A 1 -5.941 6.531 0.077 1.00 0.00 O 6 | ATOM 5 CB THR A 1 -8.668 5.852 1.890 1.00 0.00 C 7 | ATOM 6 OG1 THR A 1 -9.212 7.163 2.007 1.00 0.00 O 8 | ATOM 7 CG2 THR A 1 -9.283 5.049 3.031 1.00 0.00 C 9 | ATOM 8 H1 THR A 1 -5.882 7.675 2.103 1.00 0.00 H 10 | ATOM 9 H2 THR A 1 -7.527 8.085 2.007 1.00 0.00 H 11 | ATOM 10 H3 THR A 1 -6.872 7.539 3.476 1.00 0.00 H 12 | ATOM 11 HA THR A 1 -6.812 5.365 2.829 1.00 0.00 H 13 | ATOM 12 HB THR A 1 -8.902 5.414 0.942 1.00 0.00 H 14 | ATOM 13 HG1 THR A 1 -9.841 7.162 2.730 1.00 0.00 H 15 | ATOM 14 HG21 THR A 1 -8.982 5.472 3.979 1.00 0.00 H 16 | ATOM 15 HG22 THR A 1 -10.358 5.077 2.956 1.00 0.00 H 17 | ATOM 16 HG23 THR A 1 -8.949 4.020 2.976 1.00 0.00 H 18 | ATOM 17 N GLN A 2 -6.546 4.430 0.417 1.00 0.00 N 19 | ATOM 18 CA GLN A 2 -5.947 3.978 -0.855 1.00 0.00 C 20 | ATOM 19 C GLN A 2 -6.820 2.876 -1.453 1.00 0.00 C 21 | ATOM 20 O GLN A 2 -7.445 2.125 -0.732 1.00 0.00 O 22 | ATOM 21 CB GLN A 2 -4.545 3.445 -0.585 1.00 0.00 C 23 | ATOM 22 CG GLN A 2 -3.698 3.655 -1.832 1.00 0.00 C 24 | ATOM 23 CD GLN A 2 -3.293 5.125 -1.934 1.00 0.00 C 25 | ATOM 24 OE1 GLN A 2 -2.655 5.668 -1.055 1.00 0.00 O 26 | ATOM 25 NE2 GLN A 2 -3.643 5.804 -2.993 1.00 0.00 N 27 | ATOM 26 H GLN A 2 -6.974 3.785 1.009 1.00 0.00 H 28 | ATOM 27 HA GLN A 2 -5.900 4.809 -1.542 1.00 0.00 H 29 | ATOM 28 HB2 GLN A 2 -4.107 3.976 0.248 1.00 0.00 H 30 | ATOM 29 HB3 GLN A 2 -4.594 2.391 -0.349 1.00 0.00 H 31 | ATOM 30 HG2 GLN A 2 -2.815 3.041 -1.781 1.00 0.00 H 32 | ATOM 31 HG3 GLN A 2 -4.270 3.390 -2.704 1.00 0.00 H 33 | ATOM 32 HE21 GLN A 2 -4.158 5.369 -3.706 1.00 0.00 H 34 | ATOM 33 HE22 GLN A 2 -3.392 6.747 -3.076 1.00 0.00 H 35 | ATOM 34 N SER A 3 -6.846 2.794 -2.752 1.00 0.00 N 36 | ATOM 35 CA SER A 3 -7.697 1.763 -3.391 1.00 0.00 C 37 | ATOM 36 C SER A 3 -6.904 0.491 -3.681 1.00 0.00 C 38 | ATOM 37 O SER A 3 -5.702 0.441 -3.501 1.00 0.00 O 39 | ATOM 38 CB SER A 3 -8.239 2.326 -4.696 1.00 0.00 C 40 | ATOM 39 OG SER A 3 -7.391 3.430 -4.980 1.00 0.00 O 41 | ATOM 40 H SER A 3 -6.316 3.406 -3.302 1.00 0.00 H 42 | ATOM 41 HA SER A 3 -8.520 1.523 -2.735 1.00 0.00 H 43 | ATOM 42 HB2 SER A 3 -8.170 1.596 -5.476 1.00 0.00 H 44 | ATOM 43 HB3 SER A 3 -9.259 2.659 -4.577 1.00 0.00 H 45 | ATOM 44 HG SER A 3 -7.648 4.155 -4.407 1.00 0.00 H 46 | ENDMDL 47 | MODEL 2 48 | ATOM 1 N THR A 1 -8.549 7.439 1.006 1.00 0.00 N 49 | ATOM 2 CA THR A 1 -8.261 5.987 1.198 1.00 0.00 C 50 | ATOM 3 C THR A 1 -7.612 5.422 -0.067 1.00 0.00 C 51 | ATOM 4 O THR A 1 -7.899 5.859 -1.163 1.00 0.00 O 52 | ATOM 5 CB THR A 1 -9.545 5.220 1.481 1.00 0.00 C 53 | ATOM 6 OG1 THR A 1 -10.550 6.220 1.619 1.00 0.00 O 54 | ATOM 7 CG2 THR A 1 -9.491 4.508 2.830 1.00 0.00 C 55 | ATOM 8 H1 THR A 1 -8.058 7.780 0.155 1.00 0.00 H 56 | ATOM 9 H2 THR A 1 -9.574 7.577 0.892 1.00 0.00 H 57 | ATOM 10 H3 THR A 1 -8.217 7.971 1.835 1.00 0.00 H 58 | ATOM 11 HA THR A 1 -7.590 5.865 2.019 1.00 0.00 H 59 | ATOM 12 HB THR A 1 -9.774 4.540 0.696 1.00 0.00 H 60 | ATOM 13 HG1 THR A 1 -10.551 6.752 0.820 1.00 0.00 H 61 | ATOM 14 HG21 THR A 1 -8.578 3.934 2.904 1.00 0.00 H 62 | ATOM 15 HG22 THR A 1 -9.519 5.236 3.628 1.00 0.00 H 63 | ATOM 16 HG23 THR A 1 -10.336 3.844 2.925 1.00 0.00 H 64 | ATOM 17 N GLN A 2 -6.748 4.455 0.112 1.00 0.00 N 65 | ATOM 18 CA GLN A 2 -6.080 3.850 -1.074 1.00 0.00 C 66 | ATOM 19 C GLN A 2 -6.993 2.790 -1.696 1.00 0.00 C 67 | ATOM 20 O GLN A 2 -7.816 2.208 -1.020 1.00 0.00 O 68 | ATOM 21 CB GLN A 2 -4.764 3.202 -0.635 1.00 0.00 C 69 | ATOM 22 CG GLN A 2 -3.720 3.391 -1.736 1.00 0.00 C 70 | ATOM 23 CD GLN A 2 -3.191 4.826 -1.694 1.00 0.00 C 71 | ATOM 24 OE1 GLN A 2 -3.585 5.621 -0.864 1.00 0.00 O 72 | ATOM 25 NE2 GLN A 2 -2.298 5.197 -2.570 1.00 0.00 N 73 | ATOM 26 H GLN A 2 -6.544 4.130 1.014 1.00 0.00 H 74 | ATOM 27 HA GLN A 2 -5.881 4.622 -1.802 1.00 0.00 H 75 | ATOM 28 HB2 GLN A 2 -4.419 3.666 0.278 1.00 0.00 H 76 | ATOM 29 HB3 GLN A 2 -4.919 2.151 -0.463 1.00 0.00 H 77 | ATOM 30 HG2 GLN A 2 -2.900 2.705 -1.585 1.00 0.00 H 78 | ATOM 31 HG3 GLN A 2 -4.168 3.205 -2.701 1.00 0.00 H 79 | ATOM 32 HE21 GLN A 2 -1.977 4.560 -3.242 1.00 0.00 H 80 | ATOM 33 HE22 GLN A 2 -1.949 6.112 -2.557 1.00 0.00 H 81 | ATOM 34 N SER A 3 -6.826 2.564 -2.971 1.00 0.00 N 82 | ATOM 35 CA SER A 3 -7.686 1.561 -3.655 1.00 0.00 C 83 | ATOM 36 C SER A 3 -6.956 0.223 -3.816 1.00 0.00 C 84 | ATOM 37 O SER A 3 -5.875 0.030 -3.293 1.00 0.00 O 85 | ATOM 38 CB SER A 3 -8.049 2.104 -5.030 1.00 0.00 C 86 | ATOM 39 OG SER A 3 -7.146 3.182 -5.229 1.00 0.00 O 87 | ATOM 40 H SER A 3 -6.143 3.048 -3.475 1.00 0.00 H 88 | ATOM 41 HA SER A 3 -8.584 1.407 -3.081 1.00 0.00 H 89 | ATOM 42 HB2 SER A 3 -7.904 1.356 -5.780 1.00 0.00 H 90 | ATOM 43 HB3 SER A 3 -9.069 2.462 -5.044 1.00 0.00 H 91 | ATOM 44 HG SER A 3 -6.803 3.121 -6.123 1.00 0.00 H 92 | ENDMDL 93 | -------------------------------------------------------------------------------- /test/test_align.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pathlib import Path 3 | from pdb2sql.align import align, pca, align_interface 4 | import numpy as np 5 | 6 | 7 | from . import pdb_folder 8 | 9 | 10 | class TestAlign(unittest.TestCase): 11 | """Test the superpose functionality""" 12 | 13 | def setUp(self): 14 | self.pdb = Path(pdb_folder, '1AK4', '1AK4_10w.pdb') 15 | 16 | def test_align(self): 17 | """Test align()""" 18 | 19 | for idir, axis in zip([0, 1, 2], ['x', 'y', 'z']): 20 | with self.assertWarns(UserWarning) as ex: 21 | db = align(self.pdb, axis=axis) 22 | xyz = np.array(db.get('x,y,z')) 23 | u, v = pca(xyz) 24 | vmax = np.abs(v[:, np.argmax(u)]) 25 | assert np.argmax(vmax) == idir 26 | 27 | def test_align_interface(self): 28 | """Test align_interface()""" 29 | 30 | def get_xyz_interface(db): 31 | idx = db.get_contact_atoms() 32 | idx = idx['A'] + idx['B'] 33 | return np.array(db.get('x,y,z', rowID=idx)) 34 | 35 | for idir, plane in zip([2, 1, 0], ['xy', 'xz', 'yz']): 36 | with self.assertWarns(UserWarning) as ex: 37 | db = align_interface(self.pdb, plane=plane) 38 | xyz = get_xyz_interface(db) 39 | u, v = pca(xyz) 40 | vmax = np.abs(v[:, np.argmin(u)]) 41 | assert np.argmax(vmax) == idir 42 | 43 | if __name__ == '__main__': 44 | unittest.main() 45 | -------------------------------------------------------------------------------- /test/test_interface.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from pathlib import Path 3 | from pdb2sql import interface 4 | from pdb2sql import pdb2sql 5 | 6 | from . import pdb_folder 7 | 8 | 9 | class Test_1_ContactAtoms(unittest.TestCase): 10 | """Test function get_contact_atoms.""" 11 | 12 | def setUp(self): 13 | self.pdb = Path(pdb_folder, '3CRO.pdb') 14 | self.db = interface(self.pdb) 15 | 16 | def test_get_contact_atoms_default(self): 17 | """"verify get_contact_atoms default.""" 18 | contact_atoms = self.db.get_contact_atoms() 19 | self.assertIsInstance(contact_atoms, dict) 20 | self.assertEqual(len(contact_atoms), 2) 21 | self.assertEqual(list(contact_atoms.keys()), ['A', 'B']) 22 | # in pymol `select natoms, chain A within 8.5 of chain B` 23 | # to get the number of contact atoms 24 | self.assertEqual(len(contact_atoms['A']), 341) 25 | self.assertEqual(len(contact_atoms['B']), 333) 26 | 27 | def test_get_contact_atoms_cutoff(self): 28 | """"verify get_contact_atoms(cutoff=5.5)""" 29 | cutoff = 5.5 30 | contact_atoms = self.db.get_contact_atoms(cutoff=cutoff) 31 | self.assertIsInstance(contact_atoms, dict) 32 | self.assertEqual(len(contact_atoms), 2) 33 | self.assertEqual(list(contact_atoms.keys()), ['A', 'B']) 34 | self.assertEqual(len(contact_atoms['A']), 185) 35 | self.assertEqual(len(contact_atoms['B']), 174) 36 | 37 | def test_get_contact_atoms_allchains(self): 38 | """"verify get_contact_atoms(allchains=True)""" 39 | contact_atoms = self.db.get_contact_atoms(allchains=True) 40 | self.assertIsInstance(contact_atoms, dict) 41 | self.assertEqual(len(contact_atoms), 4) 42 | self.assertEqual(list(contact_atoms.keys()), ['A', 'B', 'L', 'R']) 43 | self.assertEqual(len(contact_atoms['A']), 367) 44 | self.assertEqual(len(contact_atoms['B']), 372) 45 | self.assertEqual(len(contact_atoms['L']), 314) 46 | self.assertEqual(len(contact_atoms['R']), 304) 47 | 48 | def test_get_contact_atoms_chain1chain2(self): 49 | """"verify get_contact_atoms(chain1='L', chain2='R')""" 50 | contact_atoms = self.db.get_contact_atoms(chain1='L', chain2='R') 51 | self.assertIsInstance(contact_atoms, dict) 52 | self.assertEqual(len(contact_atoms), 2) 53 | self.assertEqual(list(contact_atoms.keys()), ['L', 'R']) 54 | self.assertEqual(len(contact_atoms['L']), 132) 55 | self.assertEqual(len(contact_atoms['R']), 132) 56 | 57 | def test_get_contact_atoms_extend2residue(self): 58 | """"verify get_contact_atoms(extend_to_residue=True)""" 59 | contact_atoms = self.db.get_contact_atoms(extend_to_residue=True) 60 | self.assertIsInstance(contact_atoms, dict) 61 | self.assertEqual(len(contact_atoms), 2) 62 | self.assertEqual(list(contact_atoms.keys()), ['A', 'B']) 63 | # in pymol `select natoms, byres(chain A within 8.5 of chain B)` 64 | # to get the number of contact atoms 65 | self.assertEqual(len(contact_atoms['A']), 405) 66 | self.assertEqual(len(contact_atoms['B']), 409) 67 | 68 | def test_get_contact_atoms_onlybackbone_NA(self): 69 | """"verify get_contact_atoms(extend_to_residue=True) for nuclear 70 | acids.""" 71 | with self.assertWarns(UserWarning) as ex: 72 | contact_atoms = self.db.get_contact_atoms(only_backbone_atoms=True) 73 | self.assertEqual(len(ex.warnings), 1) 74 | self.assertEqual(ex.warning.args[0], 75 | 'No contact atoms detected in pdb2sql') 76 | self.assertIsInstance(contact_atoms, dict) 77 | self.assertEqual(len(contact_atoms), 2) 78 | self.assertEqual(list(contact_atoms.keys()), ['A', 'B']) 79 | self.assertEqual(len(contact_atoms['A']), 0) 80 | self.assertEqual(len(contact_atoms['B']), 0) 81 | 82 | def test_get_contact_atoms_onlybackbone_protein(self): 83 | """"verify get_contact_atoms(extend_to_residue=True) for proteins.""" 84 | contact_atoms = self.db.get_contact_atoms( 85 | only_backbone_atoms=True, 86 | chain1='L', 87 | chain2='R' 88 | ) 89 | self.assertIsInstance(contact_atoms, dict) 90 | self.assertEqual(len(contact_atoms), 2) 91 | self.assertEqual(list(contact_atoms.keys()), ['L', 'R']) 92 | # pymol `select catoms, (chain L and name CA+C+N+O) 93 | # within 8.5 of (chain R and name CA+C+N+O)` 94 | self.assertEqual(len(contact_atoms['L']), 22) 95 | self.assertEqual(len(contact_atoms['R']), 20) 96 | 97 | def test_get_contact_atoms_exludeH(self): 98 | """"verify get_contact_atoms(excludeH=True)""" 99 | pdb = Path(pdb_folder, '3CRO_H.pdb') 100 | db = interface(pdb) 101 | contact_atoms = db.get_contact_atoms(excludeH=True) 102 | self.assertIsInstance(contact_atoms, dict) 103 | self.assertEqual(len(contact_atoms), 2) 104 | self.assertEqual(list(contact_atoms.keys()), ['A', 'B']) 105 | self.assertEqual(len(contact_atoms['A']), 341) 106 | self.assertEqual(len(contact_atoms['B']), 333) 107 | 108 | def test_get_contact_atoms_contactpairs(self): 109 | """"verify get_contact_atoms(return_conact_pairs=True)""" 110 | contact_atoms = self.db.get_contact_atoms( 111 | return_contact_pairs=True 112 | ) 113 | self.assertIsInstance(contact_atoms, dict) 114 | self.assertEqual(len(contact_atoms), 341) 115 | 116 | for i in contact_atoms.keys(): 117 | with self.subTest(i=i): 118 | self.assertIsInstance(contact_atoms[i], list) 119 | self.assertNotEqual(len(contact_atoms[i]), 0) 120 | self.assertEqual(len(contact_atoms[6]), 1) 121 | self.assertEqual(len(contact_atoms[404]), 19) 122 | 123 | def test_get_contact_atoms_alltrue(self): 124 | """"verify get_contact_atoms(True)""" 125 | pdb = Path(pdb_folder, '3CRO_H.pdb') 126 | db = interface(pdb) 127 | contact_atoms = db.get_contact_atoms( 128 | allchains=True, 129 | extend_to_residue=True, 130 | only_backbone_atoms=True, 131 | excludeH=True) 132 | self.assertIsInstance(contact_atoms, dict) 133 | self.assertEqual(len(contact_atoms), 4) 134 | self.assertEqual(list(contact_atoms.keys()), ['A', 'B', 'L', 'R']) 135 | # pymol `select catoms, name CA+C+N+O and byres((chain L and name CA+C+N+O ) 136 | # within 8.5 of (chain R and name CA+C+N+O))` 137 | self.assertEqual(len(contact_atoms['A']), 0) 138 | self.assertEqual(len(contact_atoms['B']), 0) 139 | self.assertEqual(len(contact_atoms['L']), 36) 140 | self.assertEqual(len(contact_atoms['R']), 32) 141 | 142 | 143 | class Test_2_ContactResidues(unittest.TestCase): 144 | """test get_contact_residues function.""" 145 | 146 | def setUp(self): 147 | self.pdb = Path(pdb_folder, '3CRO.pdb') 148 | self.db = interface(self.pdb) 149 | 150 | def test_get_contact_residues_default(self): 151 | """"verify get_contact_residues default.""" 152 | contact_residues = self.db.get_contact_residues() 153 | self.assertIsInstance(contact_residues, dict) 154 | self.assertEqual(len(contact_residues), 2) 155 | self.assertEqual(list(contact_residues.keys()), ['A', 'B']) 156 | # in pymol: 157 | # select natoms, chain A within 8.5 of chain B 158 | # stored.nres = set() 159 | # iterate (natoms), stored.nres.add((chain, resi, resn)) 160 | # print(len(stored.nres)) 161 | self.assertEqual(len(contact_residues['A']), 20) 162 | self.assertEqual(len(contact_residues['B']), 20) 163 | 164 | def test_get_contact_residues_cutoff(self): 165 | """"verify get_contact_residues(cutoff=5.5)""" 166 | cutoff = 5.5 167 | contact_residues = self.db.get_contact_residues(cutoff=cutoff) 168 | self.assertIsInstance(contact_residues, dict) 169 | self.assertEqual(len(contact_residues), 2) 170 | self.assertEqual(list(contact_residues.keys()), ['A', 'B']) 171 | self.assertEqual(len(contact_residues['A']), 20) 172 | self.assertEqual(len(contact_residues['B']), 20) 173 | 174 | def test_get_contact_residues_allchains(self): 175 | """"verify get_contact_residues(allchains=True)""" 176 | contact_residues = self.db.get_contact_residues(allchains=True) 177 | self.assertIsInstance(contact_residues, dict) 178 | self.assertEqual(len(contact_residues), 4) 179 | self.assertEqual(list(contact_residues.keys()), ['A', 'B', 'L', 'R']) 180 | self.assertEqual(len(contact_residues['A']), 20) 181 | self.assertEqual(len(contact_residues['B']), 20) 182 | self.assertEqual(len(contact_residues['L']), 47) 183 | self.assertEqual(len(contact_residues['R']), 48) 184 | 185 | def test_get_contact_residues_chain1chain2(self): 186 | """"verify get_contact_residues(chain1='L', chain2='R')""" 187 | contact_residues = self.db.get_contact_residues(chain1='L', chain2='R') 188 | self.assertIsInstance(contact_residues, dict) 189 | self.assertEqual(len(contact_residues), 2) 190 | self.assertEqual(list(contact_residues.keys()), ['L', 'R']) 191 | self.assertEqual(len(contact_residues['L']), 20) 192 | self.assertEqual(len(contact_residues['R']), 23) 193 | 194 | def test_get_contact_residues_exludeH(self): 195 | """"verify get_contact_residues(excludeH=True)""" 196 | pdb = Path(pdb_folder, '3CRO_H.pdb') 197 | db = interface(pdb) 198 | contact_residues = db.get_contact_residues( 199 | allchains=True, excludeH=True) 200 | self.assertIsInstance(contact_residues, dict) 201 | self.assertEqual(len(contact_residues), 4) 202 | self.assertEqual(list(contact_residues.keys()), ['A', 'B', 'L', 'R']) 203 | self.assertEqual(len(contact_residues['A']), 20) 204 | self.assertEqual(len(contact_residues['B']), 20) 205 | self.assertEqual(len(contact_residues['L']), 47) 206 | self.assertEqual(len(contact_residues['R']), 48) 207 | 208 | def test_get_contact_residues_onlybackbone_NA(self): 209 | """"verify get_contact_residues(only_backbone_atoms=True) for NA.""" 210 | with self.assertWarns(UserWarning) as ex: 211 | contact_residues = self.db.get_contact_residues( 212 | only_backbone_atoms=True) 213 | self.assertEqual(len(ex.warnings), 1) 214 | self.assertEqual(ex.warning.args[0], 215 | 'No contact atoms detected in pdb2sql') 216 | self.assertIsInstance(contact_residues, dict) 217 | self.assertEqual(len(contact_residues), 2) 218 | self.assertEqual(list(contact_residues.keys()), ['A', 'B']) 219 | # pymol `select catoms, (chain L and name CA+C+N+O) 220 | # within 8.5 of (chain R and name CA+C+N+O)` 221 | self.assertEqual(len(contact_residues['A']), 0) 222 | self.assertEqual(len(contact_residues['B']), 0) 223 | 224 | def test_get_contact_residues_onlybackbone_protein(self): 225 | """"verify get_contact_residues(only_backbone_atoms=True) for 226 | proteins.""" 227 | contact_residues = self.db.get_contact_residues( 228 | only_backbone_atoms=True, 229 | chain1='L', 230 | chain2='R' 231 | ) 232 | self.assertIsInstance(contact_residues, dict) 233 | self.assertEqual(len(contact_residues), 2) 234 | self.assertEqual(list(contact_residues.keys()), ['L', 'R']) 235 | # pymol `select catoms, (chain L and name CA+C+N+O) 236 | # within 8.5 of (chain R and name CA+C+N+O)` 237 | self.assertEqual(len(contact_residues['L']), 9) 238 | self.assertEqual(len(contact_residues['R']), 8) 239 | 240 | def test_get_contact_residues_contactpairs(self): 241 | """"verify get_contact_residues(return_conact_pairs=True)""" 242 | contact_residues = self.db.get_contact_residues( 243 | chain1='L', chain2='R', return_contact_pairs=True) 244 | self.assertIsInstance(contact_residues, dict) 245 | self.assertEqual(len(contact_residues), 20) 246 | for i in contact_residues.keys(): 247 | with self.subTest(i=i): 248 | self.assertIsInstance(contact_residues[i], list) 249 | self.assertNotEqual(len(contact_residues[i]), 0) 250 | # in pymol: 251 | # select natoms, (chain R) within 8.5 of (chain L and resi 60) 252 | self.assertEqual(len(contact_residues[('L', 60, 'GLN')]), 3) 253 | 254 | def test_get_contact_residues_alltrue(self): 255 | """"verify get_contact_residues(True)""" 256 | pdb = Path(pdb_folder, '3CRO_H.pdb') 257 | db = interface(pdb) 258 | contact_residues = db.get_contact_residues( 259 | allchains=True, only_backbone_atoms=True, excludeH=True) 260 | self.assertIsInstance(contact_residues, dict) 261 | self.assertEqual(len(contact_residues), 4) 262 | self.assertEqual(list(contact_residues.keys()), ['A', 'B', 'L', 'R']) 263 | self.assertEqual(len(contact_residues['A']), 0) 264 | self.assertEqual(len(contact_residues['B']), 0) 265 | self.assertEqual(len(contact_residues['L']), 9) 266 | self.assertEqual(len(contact_residues['R']), 8) 267 | 268 | 269 | class Test_3_PDB2SQLInstanceInput(unittest.TestCase): 270 | """test using pdb2sql instance as input""" 271 | 272 | def setUp(self): 273 | self.pdb = Path(pdb_folder, '3CRO.pdb') 274 | 275 | def test_get_contact_residues_default(self): 276 | """"verify get_contact_residues default.""" 277 | pdb_db = pdb2sql(self.pdb) 278 | self.db = interface(pdb_db) 279 | contact_residues = self.db.get_contact_residues() 280 | self.assertIsInstance(contact_residues, dict) 281 | self.assertEqual(len(contact_residues), 2) 282 | self.assertEqual(list(contact_residues.keys()), ['A', 'B']) 283 | self.assertEqual(len(contact_residues['A']), 20) 284 | self.assertEqual(len(contact_residues['B']), 20) 285 | 286 | def test_database_consistency(self): 287 | """"verify initilizing interface with updated pdb2sql database""" 288 | pdb_db = pdb2sql(self.pdb) 289 | pdb_db.update_column('temp', [99]*10) 290 | target = pdb_db.get('*') 291 | 292 | self.db = interface(pdb_db) 293 | result = self.db.get('*') 294 | self.assertEqual(target, result) 295 | 296 | if __name__ == '__main__': 297 | runner = unittest.TextTestRunner(verbosity=2) 298 | unittest.main(testRunner=runner) 299 | -------------------------------------------------------------------------------- /test/test_many2sql.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from pathlib import Path 4 | from pdb2sql import many2sql 5 | from pdb2sql import pdb2sql 6 | 7 | from .utils import CaptureOutErr 8 | from . import pdb_folder 9 | 10 | 11 | class TestMany2SQL(unittest.TestCase): 12 | 13 | def setUp(self): 14 | pdb1 = Path(pdb_folder, '1AK4', '1AK4_5w.pdb') 15 | pdb2 = Path(pdb_folder, '1AK4', '1AK4_10w.pdb') 16 | self.pdbs = [pdb1, pdb2] 17 | self.tablenames = [str(pdb1), str(pdb2)] 18 | 19 | def test_init_from_files(self): 20 | """Verify init from path.""" 21 | many = many2sql(self.pdbs, tablenames=self.tablenames) 22 | 23 | def test_init_from_pdb_data(self): 24 | """Verify init from data.""" 25 | sqls = [pdb2sql(p) for p in self.pdbs] 26 | data = [db.sql2pdb() for db in sqls] 27 | many = many2sql(data, tablenames=self.tablenames) 28 | 29 | def test_init_from_sql(self): 30 | """Verify default sqls.""" 31 | sqls = [pdb2sql(p) for p in self.pdbs] 32 | many = many2sql(sqls, tablenames=self.tablenames) 33 | 34 | def test_call(self): 35 | """Test call function.""" 36 | many = many2sql(self.pdbs, tablenames=self.tablenames) 37 | chainA = many(chainID='A') 38 | 39 | def test_get_all(self): 40 | """Test get_all function.""" 41 | many = many2sql(self.pdbs, tablenames=self.tablenames) 42 | data = many.get_all('x,y,z', chainID='A') 43 | 44 | def test_get_intersection(self): 45 | """Test get_all function.""" 46 | many = many2sql(self.pdbs, tablenames=self.tablenames) 47 | data = many.get_intersection('x,y,z') 48 | 49 | def test_intersect(self): 50 | """Test get_all function.""" 51 | many = many2sql(self.pdbs, tablenames=self.tablenames) 52 | chainA = many.intersect() 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() -------------------------------------------------------------------------------- /test/test_structureSimilarity.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from pdb2sql.StructureSimilarity import StructureSimilarity 4 | import unittest 5 | 6 | from . import pdb_folder 7 | 8 | class TestSim(unittest.TestCase): 9 | """Test Similarity calculation.""" 10 | 11 | def setUp(self): 12 | self.decoy = Path(pdb_folder, '1AK4', '1AK4_5w.pdb') 13 | self.ref = Path(pdb_folder, '1AK4', 'target.pdb') 14 | self.izone = Path(pdb_folder, '1AK4', 'target.izone') 15 | self.lzone = Path(pdb_folder, '1AK4', 'target.lzone') 16 | self.sim = StructureSimilarity(self.decoy, self.ref, enforce_residue_matching=False) 17 | # target values are calcualted using scripts from 18 | # https://github.com/haddocking/BM5-clean 19 | self.irmsd = 1.135 20 | self.lrmsd = 6.655 21 | self.fnat = 0.790698 22 | self.dockQ = 0.682191 23 | self.capriClass = 'medium' 24 | self.nclashes_ref = 4 25 | #################################################################### 26 | # test check_residues to see if pdb files match or not 27 | #################################################################### 28 | def test_check_residues(self): 29 | decoy = Path(pdb_folder, '1AK4', '1AK4_5w_nonmatch.pdb') 30 | with self.assertRaisesRegex(ValueError, 31 | 'Residue numbering not identical'): 32 | sim = StructureSimilarity(decoy, self.ref) 33 | sim.check_residues() 34 | 35 | #################################################################### 36 | # test i-rmsd 37 | #################################################################### 38 | def test_irmsdfast_default(self): 39 | """verify compute_irmsd_fast()""" 40 | result = self.sim.compute_irmsd_fast() 41 | self.assertEqual(result, self.irmsd) 42 | 43 | def test_irmsdfast_izone(self): 44 | """verify compute_irmsd_fast(izone='fast.izone)""" 45 | result = self.sim.compute_irmsd_fast(izone=self.izone) 46 | self.assertEqual(result, self.irmsd) 47 | 48 | def test_irmsdfast_method(self): 49 | """verify compute_irmsd_fast(method='quaternion')""" 50 | result = self.sim.compute_irmsd_fast(method='quaternion') 51 | self.assertEqual(result, self.irmsd) 52 | 53 | def test_irmsdfast_check(self): 54 | """verify compute_irmsd_fast(check=False)""" 55 | result = self.sim.compute_irmsd_fast(check=False) 56 | self.assertEqual(result, self.irmsd) 57 | 58 | def test_irmsdsql_default(self): 59 | """verify compute_irmsd_pdb2sql()""" 60 | result = self.sim.compute_irmsd_pdb2sql() 61 | self.assertEqual(result, self.irmsd) 62 | 63 | def test_irmsdsql_izone(self): 64 | """verify compute_irmsd_pdb2sql(izone='sql.izone)""" 65 | result = self.sim.compute_irmsd_pdb2sql(izone=self.izone) 66 | self.assertEqual(result, self.irmsd) 67 | 68 | def test_irmssql_method(self): 69 | """verify compute_irmsd_pdb2sql(method='quaternion')""" 70 | result = self.sim.compute_irmsd_pdb2sql(method='quaternion') 71 | self.assertEqual(result, self.irmsd) 72 | 73 | def test_irmsdsql_exportpdb(self): 74 | """verify compute_irmsd_pdb2sql(exportpath='.')""" 75 | result = self.sim.compute_irmsd_pdb2sql(exportpath='.') 76 | self.assertEqual(result, self.irmsd) 77 | self.assertTrue(os.path.isfile('./irmsd_ref.pdb')) 78 | self.assertTrue(os.path.isfile('./irmsd_decoy.pdb')) 79 | self.assertTrue(os.path.getsize('./irmsd_ref.pdb') > 0) 80 | self.assertTrue(os.path.getsize('./irmsd_decoy.pdb') > 0) 81 | os.remove('./irmsd_ref.pdb') 82 | os.remove('./irmsd_decoy.pdb') 83 | 84 | #################################################################### 85 | # test l-rmsd 86 | #################################################################### 87 | def test_lrmsdfast_default(self): 88 | """verify compute_lrmsd_fast()""" 89 | result = self.sim.compute_lrmsd_fast() 90 | self.assertEqual(result, self.lrmsd) 91 | 92 | def test_lrmsdfast_lzone(self): 93 | """verify compute_lrmsd_fast(lzone='fast.lzone)""" 94 | result = self.sim.compute_lrmsd_fast(lzone=self.lzone) 95 | self.assertEqual(result, self.lrmsd) 96 | 97 | def test_lrmsdfast_method(self): 98 | """verify compute_lrmsd_fast(method='quaternion')""" 99 | result = self.sim.compute_lrmsd_fast(method='quaternion') 100 | self.assertEqual(result, self.lrmsd) 101 | 102 | def test_lrmsdsql_default(self): 103 | """verify compute_lrmsd_pdb2sql()""" 104 | result = self.sim.compute_lrmsd_pdb2sql() 105 | self.assertEqual(result, self.lrmsd) 106 | 107 | def test_lrmsdsql_method(self): 108 | """verify compute_lrmsd_pdb2sql(method='quaternion')""" 109 | result = self.sim.compute_lrmsd_pdb2sql(method='quaternion') 110 | self.assertEqual(result, self.lrmsd) 111 | 112 | def test_lrmsdsql_exportpdb(self): 113 | """verify compute_lrmsd_pdb2sql(exportpath='.')""" 114 | result = self.sim.compute_lrmsd_pdb2sql(exportpath='.') 115 | self.assertEqual(result, self.lrmsd) 116 | self.assertTrue(os.path.isfile('./lrmsd_ref.pdb')) 117 | self.assertTrue(os.path.isfile('./lrmsd_decoy.pdb')) 118 | self.assertTrue(os.path.getsize('./lrmsd_ref.pdb') > 0) 119 | self.assertTrue(os.path.getsize('./lrmsd_decoy.pdb') > 0) 120 | os.remove('./lrmsd_ref.pdb') 121 | os.remove('./lrmsd_decoy.pdb') 122 | 123 | #################################################################### 124 | # test FNAT 125 | #################################################################### 126 | def test_fnatfast_default(self): 127 | """verify compute_fnat_fast()""" 128 | result = self.sim.compute_fnat_fast() 129 | self.assertEqual(result, self.fnat) 130 | 131 | def test_fnatsql_default(self): 132 | """verify compute_fnat_pdb2sql()""" 133 | result = self.sim.compute_fnat_pdb2sql() 134 | self.assertEqual(result, self.fnat) 135 | 136 | #################################################################### 137 | # test dockQ 138 | #################################################################### 139 | def test_dockQ_default(self): 140 | """verify compute_DockQScore()""" 141 | result = self.sim.compute_DockQScore(self.fnat, self.lrmsd, self.irmsd) 142 | self.assertEqual(result, self.dockQ) 143 | 144 | #################################################################### 145 | # test CAPRI 146 | #################################################################### 147 | def test_capri_default(self): 148 | """verify compute_CapriClass()""" 149 | result = self.sim.compute_CapriClass(self.fnat, self.lrmsd, self.irmsd) 150 | self.assertEqual(result, self.capriClass) 151 | 152 | def test_capri_dummy(self): 153 | """verify compute_CapriClass()""" 154 | fnat = [0.9, 0.8, 0.7, 0.5, 0.3, 0.1] 155 | lrmsd = [0.8, 2.4, 6.2, 7.5, 12.0, 10.0] 156 | irmsd = [0.6, 0.8, 1.6, 2.3, 3.1, 3.3] 157 | targets = ['high', 'high', 'medium', 158 | 'acceptable', 'acceptable', 'acceptable'] 159 | results = [] 160 | for i, j, k in zip(fnat, lrmsd, irmsd): 161 | results.append(self.sim.compute_CapriClass(i, j, k)) 162 | self.assertEqual(results, targets) 163 | 164 | #################################################################### 165 | # test clashes 166 | #################################################################### 167 | def test_clashes_default(self): 168 | """verify compute_clashes()""" 169 | result = self.sim.compute_clashes(self.ref) 170 | self.assertEqual(result, self.nclashes_ref) 171 | 172 | 173 | if __name__ == '__main__': 174 | runner = unittest.TextTestRunner(verbosity=2) 175 | unittest.main(testRunner=runner) 176 | -------------------------------------------------------------------------------- /test/test_superpose.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from pathlib import Path 4 | from pdb2sql.superpose import superpose 5 | 6 | from . import pdb_folder 7 | 8 | class TestSuperpose(unittest.TestCase): 9 | """Test the superpose functionality""" 10 | 11 | def setUp(self): 12 | self.pdb1 = Path(pdb_folder, '1AK4', '1AK4_5w.pdb') 13 | self.pdb2 = Path(pdb_folder, '1AK4', '1AK4_10w.pdb') 14 | 15 | def test_superpose(self): 16 | """Test superpose()""" 17 | with self.assertWarns(UserWarning) as ex: 18 | superpose(self.pdb1, self.pdb2, chainID='A') 19 | 20 | def test_superpose_backbone_error(self): 21 | """Test superpose() backbone error when specifying `name`""" 22 | with self.assertWarns(UserWarning) as ex: 23 | with self.assertRaises(ValueError) as err: 24 | superpose(self.pdb1, self.pdb2, name='CA') 25 | err_msg = err.exception.args[0] 26 | target = "Atom type specified but only_backbone == True" 27 | self.assertIn(target, err_msg) 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /test/test_transform.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from pathlib import Path 4 | from pdb2sql import pdb2sql 5 | from pdb2sql import transform 6 | 7 | from . import pdb_folder 8 | 9 | class TestTools(unittest.TestCase): 10 | 11 | def setUp(self): 12 | self.db = pdb2sql(Path(pdb_folder, 'dummy_transform.pdb')) 13 | self.xyz = self.db.get('x,y,z') 14 | 15 | def test_get_xyz(self): 16 | """Verfify getting xyz from sql.""" 17 | 18 | result = transform._get_xyz(self.db) 19 | target = np.array([[1., 0., 0.], [-1., 0., 0.], 20 | [0., 1., 0.], [0., -1., 0.], 21 | [0., 0., 1.], [0., 0., -1.]]) 22 | np.testing.assert_equal(result, target) 23 | 24 | def test_translation(self): 25 | """Verify sql translation.""" 26 | trans_vec = np.array([1, 1, 1]) 27 | target = np.array([[2., 1., 1.], [0., 1., 1.], 28 | [1., 2., 1.], [1., 0., 1.], 29 | [1., 1., 2.], [1., 1., 0.]]) 30 | transform.translation(self.db, trans_vec) 31 | result = self.db.get('x,y,z') 32 | np.testing.assert_almost_equal(result, target) 33 | 34 | def test_rot_axis(self): 35 | """Verify sql rotation using axis and angle.""" 36 | # rotate pi around x axis 37 | angle = np.pi 38 | axis = (1., 0., 0.) 39 | target = np.array([[1., 0., 0.], [-1., 0., 0.], 40 | [0., -1., 0.], [0., 1., 0.], 41 | [0., 0., -1.], [0., 0., 1.]]) 42 | transform.rot_axis(self.db, axis, angle) 43 | result = self.db.get('x,y,z') 44 | np.testing.assert_almost_equal(result, target) 45 | 46 | def test_rot_xyz_around_axis(self): 47 | """Verify xyz values rot ation using axis and angle.""" 48 | # rotate pi around x, y and z axis 49 | angle = np.pi 50 | axes_xyz = [(1., 0., 0.), 51 | (0., 1., 0.), 52 | (0., 0., 1.)] 53 | targets = [np.array([[1., 0., 0.], [-1., 0., 0.], 54 | [0., -1., 0.], [0., 1., 0.], 55 | [0., 0., -1.], [0., 0., 1.]]), 56 | np.array([[-1., 0., 0.], [1., 0., 0.], 57 | [0., 1., 0.], [0., -1., 0.], 58 | [0., 0., -1.], [0., 0., 1.]]), 59 | np.array([[-1., 0., 0.], [1., 0., 0.], 60 | [0., -1., 0.], [0., 1., 0.], 61 | [0., 0., 1.], [0., 0., -1.]])] 62 | for axis, target in zip(axes_xyz, targets): 63 | with self.subTest(axis=axis, target=target): 64 | xyz_rot = transform.rot_xyz_around_axis(self.xyz, axis, angle) 65 | np.testing.assert_almost_equal(xyz_rot, target) 66 | 67 | def test_get_rot_axis_angle(self): 68 | """Verify generation of random axis and angle.""" 69 | # number of repeats 70 | n = 1000 71 | for i in range(n): 72 | with self.subTest(i=i): 73 | axis, angle = transform.get_rot_axis_angle() 74 | # axis verctor must be unit vector 75 | result = axis[0]**2 + axis[1]**2 + axis[2]**2 76 | target = 1. 77 | np.testing.assert_almost_equal(result, target) 78 | # angle in the range [0, 2π) 79 | self.assertTrue(0. <= angle < 2 * np.pi) 80 | 81 | def test_get_rot_axis_angle_seed(self): 82 | """Verify specific random seed.""" 83 | seed = 2019 84 | axis1, angle1 = transform.get_rot_axis_angle(seed) 85 | axis2, angle2 = transform.get_rot_axis_angle(seed) 86 | self.assertEqual(axis1, axis2) 87 | self.assertEqual(angle1, angle2) 88 | 89 | def test_rot_euler(self): 90 | """Verify sql rotation using Euler angles.""" 91 | # rotate pi around z axis 92 | alpha, beta, gamma = 0, 0, np.pi 93 | target = np.array([[-1., 0., 0.], [1., 0., 0.], 94 | [0., -1., 0.], [0., 1., 0.], 95 | [0., 0., 1.], [0., 0., -1.]]) 96 | transform.rot_euler(self.db, alpha, beta, gamma) 97 | result = self.db.get('x,y,z') 98 | np.testing.assert_almost_equal(result, target) 99 | 100 | def test_rotation_euler(self): 101 | """Verify xyz values rotation using Euler angles.""" 102 | # rotate pi around x, y and z axis 103 | angles = [(np.pi, 0., 0.), 104 | (0., np.pi, 0.), 105 | (0., 0., np.pi)] 106 | targets = [np.array([[1., 0., 0.], [-1., 0., 0.], 107 | [0., -1., 0.], [0., 1., 0.], 108 | [0., 0., -1.], [0., 0., 1.]]), 109 | np.array([[-1., 0., 0.], [1., 0., 0.], 110 | [0., 1., 0.], [0., -1., 0.], 111 | [0., 0., -1.], [0., 0., 1.]]), 112 | np.array([[-1., 0., 0.], [1., 0., 0.], 113 | [0., -1., 0.], [0., 1., 0.], 114 | [0., 0., 1.], [0., 0., -1.]])] 115 | for angle, target in zip(angles, targets): 116 | with self.subTest(angle=angle, target=target): 117 | result = transform.rotation_euler( 118 | self.xyz, angle[0], angle[1], angle[2]) 119 | np.testing.assert_almost_equal(result, target) 120 | 121 | def test_rot_mat(self): 122 | """Verify sql roation using rotation matrix.""" 123 | # rotate pi around z-axis 124 | theta = np.pi 125 | cosa = np.cos(theta) 126 | sina = np.sin(theta) 127 | rot_mat = np.array([[cosa, -sina, 0], 128 | [sina, cosa, 0], 129 | [0, 0, 1]]) 130 | target = np.array([[-1., 0., 0.], [1., 0., 0.], 131 | [0., -1., 0.], [0., 1., 0.], 132 | [0., 0., 1.], [0., 0., -1.]]) 133 | transform.rot_mat(self.db, rot_mat) 134 | result = self.db.get('x,y,z') 135 | np.testing.assert_almost_equal(result, target) 136 | 137 | def test_rotation_matrix(self): 138 | """Verify xyz values roation using rotation matrix.""" 139 | theta = np.pi 140 | cosa = np.cos(theta) 141 | sina = np.sin(theta) 142 | # rotate pi around x, y and z axis 143 | rot_mats = [np.array([[1, 0, 0], [0, cosa, -sina], [0, sina, cosa]]), 144 | np.array([[cosa, 0, sina], [0, 1, 0], [-sina, 0, cosa]]), 145 | np.array([[cosa, -sina, 0], [sina, cosa, 0], [0, 0, 1]])] 146 | targets = [np.array([[1., 0., 0.], [-1., 0., 0.], 147 | [0., -1., 0.], [0., 1., 0.], 148 | [0., 0., -1.], [0., 0., 1.]]), 149 | np.array([[-1., 0., 0.], [1., 0., 0.], 150 | [0., 1., 0.], [0., -1., 0.], 151 | [0., 0., -1.], [0., 0., 1.]]), 152 | np.array([[-1., 0., 0.], [1., 0., 0.], 153 | [0., -1., 0.], [0., 1., 0.], 154 | [0., 0., 1.], [0., 0., - 1.]])] 155 | for mat, target in zip(rot_mats, targets): 156 | with self.subTest(mat=mat, target=target): 157 | result = transform.rotate(self.xyz, mat) 158 | np.testing.assert_almost_equal(result, target) 159 | 160 | def test_rotation_matrix_center(self): 161 | """Verify specific rotation center.""" 162 | # rotate pi around z-axis with rotation center [1,1,1,] 163 | theta = np.pi 164 | cosa = np.cos(theta) 165 | sina = np.sin(theta) 166 | xyz = np.array([0., 0., 0.]) 167 | rot_mat = np.array([[cosa, -sina, 0], [sina, cosa, 0], [0, 0, 1]]) 168 | centers = [np.array([1., 1., 1.]), [1., 1., 1.]] 169 | for center in centers: 170 | with self.subTest(center=center): 171 | result = transform.rotate(xyz, rot_mat, center=center) 172 | target = np.array([2., 2., 0.]) 173 | np.testing.assert_almost_equal(result, target) 174 | 175 | 176 | if __name__ == "__main__": 177 | unittest.main() 178 | -------------------------------------------------------------------------------- /test/test_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from pdb2sql import fetch 4 | 5 | class TestTools(unittest.TestCase): 6 | 7 | def test_fetch(self): 8 | """Verfify fetch with valid pdb""" 9 | pdbid = '1cbh' 10 | fetch(pdbid) 11 | self.assertTrue(os.path.isfile('./1cbh.pdb')) 12 | os.remove('./1cbh.pdb') 13 | 14 | def test_fetch_invalid_pdb(self): 15 | """Verfify fetch with invalid pdb""" 16 | pdbids = ['a', 'ab', 'abc', '1cbha', '1cb*', '-1cb'] 17 | for pdbid in pdbids: 18 | with self.subTest(pdbid=pdbid): 19 | with self.assertRaisesRegex(ValueError, 'Invalid PDB ID'): 20 | fetch(pdbid) 21 | 22 | def test_fetch_nonexist_pdbid(self): 23 | """Verfify fetch with non-exist PDB ID""" 24 | pdbid = '1000' 25 | with self.assertRaisesRegex(ValueError, 'PDB ID not exist'): 26 | fetch(pdbid) 27 | 28 | def test_fetch_nonexist_pdbfmt(self): 29 | """Verfify fetch PDB ID that has no pdb format but cif format""" 30 | pdbid = '6SL9' 31 | with self.assertRaisesRegex(ValueError, 32 | 'The PDB ID given is only represented in mmCIF format'): 33 | fetch(pdbid) 34 | 35 | 36 | if __name__ == "__main__": 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /test/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from io import StringIO 3 | 4 | 5 | class CaptureOutErr(object): 6 | """Context manager to capture the content of stdout and stderr. 7 | 8 | Example: 9 | >>> with CaptureOutErr() as cm: 10 | >>> ...run_code() 11 | >>> print(cm) 12 | """ 13 | 14 | def __enter__(self): 15 | self.stdout = [] 16 | self.stderr = [] 17 | self._out = StringIO() 18 | self._err = StringIO() 19 | sys.stdout = self._out 20 | sys.stderr = self._err 21 | return self 22 | 23 | def __exit__(self, *args): 24 | self.stdout.extend(self._out.getvalue().splitlines()) 25 | self.stderr.extend(self._err.getvalue().splitlines()) 26 | sys.stdout = sys.__stdout__ 27 | sys.stderr = sys.__stderr__ 28 | --------------------------------------------------------------------------------