├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── publish_pypi.yml │ └── stale_issue_pr.yml ├── .gitignore ├── .zenodo.json ├── CITATION.cff ├── LICENSE ├── MANIFEST.in ├── README.md ├── makefile ├── pssmgen ├── __init__.py ├── __version__.py ├── map_pssm2pdb.py └── pssm.py ├── setup.py └── test ├── 7CEI ├── pdb │ ├── 7CEI_1w.pdb │ ├── 7CEI_2w.pdb │ └── 7CEI_3w.pdb └── pssm_raw │ ├── 7CEI.A.pssm │ └── 7CEI.B.pssm └── test_pssm.py /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Environment:** 14 | - OS system: 15 | - Version: 16 | - Branch commit ID: 17 | - Inputs: 18 | 19 | **To Reproduce** 20 | Steps/commands to reproduce the behaviour: 21 | 1. 22 | 2. 23 | 3. 24 | 25 | **Expected Results** 26 | A clear and concise description of what you expected to happen. 27 | 28 | **Actual Results or Error Info** 29 | If applicable, add screenshots to help explain your problem. 30 | 31 | **Additional Context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | -------------------------------------------------------------------------------- /.github/workflows/publish_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Build and publish Python distributions to PyPI or TestPyPI 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | version: 6 | description: 'Version upload to pypi' 7 | required: true 8 | pypi_repo: 9 | description: 'Upload to testpypi or pypi' 10 | default: 'testpypi' 11 | required: true 12 | 13 | jobs: 14 | publish: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@master 18 | with: 19 | ref: ${{ github.event.inputs.version }} 20 | - uses: actions/setup-python@v3 21 | with: 22 | python-version: '3.9' 23 | - name: Check distribution version 24 | run: | 25 | v=$(python setup.py --version) 26 | if [[ $v != ${{ github.event.inputs.version }} ]]; then 27 | echo "ERROR: Package version $v is not same as input version ${{ github.event.inputs.version }}." 28 | echo "Update package version, tag the commit and rerun this workflow." 29 | exit 1 30 | fi 31 | - name: Install pypa/build 32 | run: >- 33 | python -m 34 | pip install 35 | build 36 | --user 37 | - name: Build a binary wheel and a source tarball 38 | run: >- 39 | python -m 40 | build 41 | --sdist 42 | --wheel 43 | --outdir dist/ 44 | . 45 | - name: Publish distribution to TestPyPI 46 | if: ${{ github.event.inputs.pypi_repo == 'testpypi' }} 47 | uses: pypa/gh-action-pypi-publish@master 48 | with: 49 | user: __token__ 50 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 51 | repository_url: https://test.pypi.org/legacy/ 52 | - name: Publish distribution to PyPI 53 | if: ${{ github.event.inputs.pypi_repo == 'pypi' }} 54 | uses: pypa/gh-action-pypi-publish@master 55 | with: 56 | user: __token__ 57 | password: ${{ secrets.PYPI_API_TOKEN }} 58 | -------------------------------------------------------------------------------- /.github/workflows/stale_issue_pr.yml: -------------------------------------------------------------------------------- 1 | name: Close inactive issues and pull requests 2 | on: 3 | schedule: 4 | - cron: "14 3 * * 1,3,5" # check at 03:14 on Monday, Wednesday, and Friday 5 | 6 | jobs: 7 | close-issues: 8 | runs-on: ubuntu-latest 9 | permissions: 10 | issues: write 11 | pull-requests: write 12 | steps: 13 | - uses: actions/stale@v5.0.0 14 | with: 15 | days-before-issue-stale: 30 16 | days-before-issue-close: 7 17 | stale-issue-label: "stale" 18 | stale-issue-message: "This issue is stale because it has been open for 30 days with no activity. Remove stale label or comment or this will be closed in 7 days." 19 | close-issue-message: "This issue was closed because it has been inactive for 7 days since being marked as stale." 20 | days-before-pr-stale: 14 21 | days-before-pr-close: 7 22 | stale-pr-message: "This PR is stale because it has been open for 14 days with no activity. Remove stale label or comment or this will be closed in 7 days." 23 | close-pr-message: "This PR was closed because it has been inactive for 7 days since being marked as stale." 24 | exempt-issue-labels: 'blocked' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *__pycache__* 2 | *sublime.* 3 | *.egg-info 4 | .vscode/* 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /.zenodo.json: -------------------------------------------------------------------------------- 1 | { 2 | "creators": [ 3 | { 4 | "affiliation": "Netherlands eScience Center", 5 | "name": "Renaud, Nicolas", 6 | "orcid": "0000-0001-9589-2694" 7 | }, 8 | { 9 | "affiliation": "Netherlands eScience Center", 10 | "name": "Geng, Cunliang", 11 | "orcid": "0000-0002-1409-8358" 12 | } 13 | ], 14 | "description": "Generates consistent PSSM and/or PDB files for protein-protein complexes", 15 | "keywords": [ 16 | "pssm", 17 | "pdb", 18 | "protein-protein complex", 19 | "docking", 20 | "bioinformatics", 21 | "CAPRI" 22 | ], 23 | "license": { 24 | "id": "Apache-2.0" 25 | }, 26 | "title": "PSSMGen" 27 | } 28 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # YAML 1.2 2 | --- 3 | abstract: "Generates consistent PSSM and/or PDB files for protein-protein complexes" 4 | 5 | authors: 6 | - 7 | affiliation: "Netherlands eScience Center" 8 | family-names: Renaud 9 | given-names: Nicolas 10 | orcid: "https://orcid.org/0000-0001-9589-2694" 11 | - 12 | affiliation: "Netherlands eScience Center" 13 | family-names: Geng 14 | given-names: Cunliang 15 | orcid: "https://orcid.org/0000-0002-1409-8358" 16 | cff-version: "1.1.0" 17 | 18 | keywords: 19 | - pssm 20 | - pdb 21 | - "protein-protein complex" 22 | - docking 23 | - bioinformatics 24 | - CAPRI 25 | license: "Apache-2.0" 26 | message: "If you use this software, please cite it using these metadata." 27 | repository-code: "https://github.com/DeepRank/PSSMGen" 28 | title: PSSMGen 29 | ... 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "{}" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PSSMGen 2 | 3 | 4 | | Fair-software.nl Recommendations | Badges | 5 | |:-|:-:| 6 | | [1. Code Repository](https://fair-software.nl/recommendations/repository) | [![GitHub URL](https://img.shields.io/badge/github-repo-000.svg?logo=github&labelColor=gray&color=blue)](https://github.com/DeepRank/pssmgen) | 7 | |   | [![GitHub](https://img.shields.io/github/last-commit/DeepRank/pssmgen)](https://github.com/DeepRank/pssmgen) | 8 | | [2. License](https://fair-software.nl/recommendations/license) | [![License](https://img.shields.io/github/license/DeepRank/pssmgen)](https://github.com/DeepRank/pssmgen) | 9 | | [3. Community Registry](https://fair-software.nl/recommendations/registry) | [![Research Software Directory](https://img.shields.io/badge/RSD-PSSMGen-red)](https://research-software.nl/software/pssmgen) | 10 | |   | [![PyPI](https://img.shields.io/pypi/v/pssmgen)](https://pypi.org/project/pssmgen/) | 11 | | [4. Enable Citation](https://fair-software.nl/recommendations/citation) | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3635711.svg)](https://doi.org/10.5281/zenodo.3635711) | 12 | | [5. Code Quality Checklist](https://fair-software.nl/recommendations/checklist) | [![CII best practices](https://bestpractices.coreinfrastructure.org/projects/3759/badge)](https://bestpractices.coreinfrastructure.org/projects/3759) | 13 | | Code Analysis | [![Codacy Badge](https://app.codacy.com/project/badge/Grade/0fa16bbe7f104c9791dfbdfdd1744227)](https://www.codacy.com/gh/DeepRank/PSSMGen/dashboard?utm_source=github.com&utm_medium=referral&utm_content=DeepRank/PSSMGen&utm_campaign=Badge_Grade) 14 | 15 | 33 | ----- 34 | 35 | PSSMGen: Generates Consistent PSSM and/or PDB Files for Protein-Protein Complexes 36 | 37 | ## Install 38 | 39 | 1. Make sure BLAST is installed and its database is available on your machine. Otherwise, install BLAST and download its databases by following the [BLAST guide](https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=Download). To calculate PSSM, the recommended database is the non-redundant protein sequences `nr` (i.e. `nr.*.tar.gz` files from the [ftp site](https://ftp.ncbi.nlm.nih.gov/blast/db/)). 40 | 2. Install the PSSMgen by `pip install PSSMGen`. 41 | 42 | 43 | ## Requirements for file structures and names 44 | 45 | `PSSMGen` is geared toward computing the pssm files for all models of a particular protein-protein complex. 46 | 47 | ### File structures 48 | This tool assumes your files have following structure: 49 | 50 | ``` 51 | workdir 52 | |_ pdb 53 | |_ fasta 54 | |_ pssm_raw 55 | |_ pssm 56 | |_ pdb_nonmatch 57 | ``` 58 | 59 | - `workdir` is your working directory for one specific protein-protein complex. 60 | - `pdb` folder contains the PDB files (consistent PDB files) 61 | - `fasta` folder contains the protein sequence [FASTA](https://en.wikipedia.org/wiki/FASTA_format) files. The code can generate the FASTA files by extracting sequences from the `pdb` file , or you can manually create this folder and put customised FASTA files there. 62 | - `pssm_raw` folder stores the PSSM files. The code can automatically generate them, or you can manually create this folder and put customised PSSM files there. 63 | - `pssm` folder stores consistent PSSM files, whose sequences are aligned with those of PDB files. This folder and its files are created automatically. 64 | - `pdb_nonmatch` folder stores the inconsistent PDB files, while the related consistent PDB files are in the `pdb` folder. This folder and its files are created automatically. 65 | 66 | ### File names 67 | The code assumes you follow the naming rules for different file types: 68 | - PDB files: caseID_*.chainID.pdb 69 | - FASTA files: caseID.chainID.fasta 70 | - PSSM files: caseID.chainID.pssm, caseID_*.chainID.pdb.pssm 71 | 72 | 73 | ## Examples 74 | 75 | Here are some examples for the complex `7CEI`. 76 | The file structure and input files should look like 77 | ``` 78 | 7CEI 79 | ├── pdb 80 | │   ├── 7CEI_1w.pdb 81 | │   ├── 7CEI_2w.pdb 82 | │   └── 7CEI_3w.pdb 83 | └── fasta 84 | ├── 7CEI.A.fasta 85 | └── 7CEI.B.fasta 86 | ``` 87 | 88 | ### Calculate PSSM with given FASTA files 89 | 90 | ```python 91 | from pssmgen import PSSM 92 | 93 | # initiate the PSSM object 94 | gen = PSSM(work_dir='7CEI') 95 | 96 | # set psiblast executable, database and other psiblast parameters (here shows the defaults) 97 | gen.configure(blast_exe='/home/software/blast/bin/psiblast', 98 | database='/data/DBs/blast_dbs/nr_v20180204/nr', 99 | num_threads = 4, evalue=0.0001, comp_based_stats='T', 100 | max_target_seqs=2000, num_iterations=3, outfmt=7, 101 | save_each_pssm=True, save_pssm_after_last_round=True) 102 | 103 | # generates raw PSSM files by running BLAST with fasta files 104 | gen.get_pssm(fasta_dir='fasta', out_dir='pssm_raw', run=True, save_all_psiblast_output=True) 105 | ``` 106 | 107 | The code will automatically create `pssm_raw` folder to store the generated PSSM files. 108 | 109 | 110 | ### Map PSSM files to PDB files to get consistent PSSM and PDB files 111 | 112 | After getting the raw PSSMs from last example, we could map them to PDB files to 113 | get consistent PSSM and PDB files as following: 114 | 115 | ```python 116 | # map PSSM and PDB to get consisitent/mapped PSSM files 117 | gen.map_pssm(pssm_dir='pssm_raw', pdb_dir='pdb', out_dir='pssm', chain=('A','B')) 118 | 119 | # write consistent/mapped PDB files and move inconsistent ones to another folder for backup 120 | gen.get_mapped_pdb(pdbpssm_dir='pssm', pdb_dir='pdb', pdbnonmatch_dir='pdb_nonmatch') 121 | ``` 122 | 123 | The code will automatically create `pssm` and `pdb_nonmatch` folders and related files. 124 | 125 | 126 | ### Extract FASTA files from PDB file 127 | 128 | If the FASTA files are not provided, you can also generate them from the PDB file. 129 | 130 | The file structure and input files should look like 131 | ``` 132 | 7CEI 133 | └── pdb 134 | ├── 7CEI_1w.pdb 135 | ├── 7CEI_2w.pdb 136 | └── 7CEI_3w.pdb 137 | ``` 138 | 139 | ```python 140 | # initiate the PSSM object 141 | gen = PSSM('7CEI') 142 | 143 | # extract FASTA file from the reference pdb file. 144 | # if `pdbref` is not set, the code will randomly select one pdb as reference. 145 | gen.get_fasta(pdb_dir='pdb', pdbref='7CEI_1w.pdb', chain=('A','B'), out_dir='fasta') 146 | ``` 147 | The code will automatically create `fasta` and `pssm_raw` folders for fasta files and raw pssm files, repsectively. 148 | 149 | 150 | ### Use existing PSSM files to get consistent PSSM and PDB files 151 | 152 | You can provide raw PSSM files intead of calculating them. 153 | 154 | The file structure and input files should look like 155 | ``` 156 | 7CEI 157 | ├── pdb 158 | │   ├── 7CEI_1w.pdb 159 | │   ├── 7CEI_2w.pdb 160 | │   └── 7CEI_3w.pdb 161 | └── pssm_raw 162 | ├── 7CEI.A.pssm 163 | └── 7CEI.B.pssm 164 | ``` 165 | 166 | ```python 167 | from pssmgen import PSSM 168 | 169 | # initiate the PSSM object 170 | gen = PSSM('7CEI') 171 | 172 | # map PSSM and PDB to get consisitent files 173 | gen.map_pssm() 174 | 175 | # write consistent files and move 176 | gen.get_mapped_pdb() 177 | ``` -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-build clean-pyc clean-test release dist 2 | 3 | help: 4 | @echo "clean - remove all build, test, coverage and Python artifacts" 5 | @echo "clean-build - remove build artifacts" 6 | @echo "clean-pyc - remove Python file artifacts" 7 | @echo "clean-test - remove test and coverage artifacts" 8 | @echo "release - package and upload a release" 9 | @echo "dist - package" 10 | 11 | clean: clean-build clean-pyc clean-test 12 | 13 | clean-build: 14 | rm -fr build/ 15 | rm -fr dist/ 16 | rm -fr .eggs/ 17 | find . -name '*.egg-info' -exec rm -fr {} + 18 | find . -name '*.egg' -exec rm -f {} + 19 | 20 | clean-pyc: 21 | find . -name '*.pyc' -exec rm -f {} + 22 | find . -name '*.pyo' -exec rm -f {} + 23 | find . -name '*~' -exec rm -f {} + 24 | find . -name '__pycache__' -exec rm -fr {} + 25 | find . -name '*_cache' -exec rm -fr {} + 26 | 27 | clean-test: 28 | rm -f .coverage 29 | 30 | dist: clean 31 | python setup.py sdist bdist_wheel 32 | ls -l dist 33 | 34 | release: 35 | python -m twine upload dist/* 36 | -------------------------------------------------------------------------------- /pssmgen/__init__.py: -------------------------------------------------------------------------------- 1 | from .pssm import PSSM 2 | from .__version__ import __version__ 3 | import logging 4 | 5 | LOG_FORMAT = "%(message)s" 6 | logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT) -------------------------------------------------------------------------------- /pssmgen/__version__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.2' -------------------------------------------------------------------------------- /pssmgen/map_pssm2pdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Map the the sequence of PDB file to that of PSSM file to get consistent sequence(no gap, no residue X), 5 | and output mapped PSSM file and/or PDB file with the consisitent sequence. 6 | 7 | Usage: python map_pssm2pdb.py 8 | Example: python map_pssm2pdb.py ./pssm/4CPA.A.pssm ./pdb/4CPA.pdb A ./test 9 | 10 | Author: {0} ({1}) 11 | """ 12 | 13 | import os 14 | import sys 15 | import logging 16 | import numpy as np 17 | from Bio import pairwise2 18 | 19 | __author__ = "Cunliang Geng" 20 | __email__ = "gengcunliang AT gmail.com" 21 | USAGE = __doc__.format(__author__, __email__) 22 | 23 | logger = logging.getLogger() 24 | 25 | def check_input(args): 26 | """Validate user input 27 | 28 | Arguments: 29 | args {tuple} -- user input arguments 30 | """ 31 | if len(args) != 4: 32 | sys.exit(USAGE) 33 | 34 | 35 | def get_seq_resi_from_pdb(fpdb, chainID): 36 | """Get FASTA sequence and residue ID from PDB file. 37 | 38 | Arguments: 39 | fpdb {str} -- input pdb file 40 | chainID {str} -- the chain to get the sequence and residue number 41 | 42 | Raises: 43 | ValueError -- ChainID not exist in the pdb file 44 | 45 | Returns: 46 | list - sequence of the specific chain 47 | list - residue ID of the specific chain 48 | """ 49 | 50 | res_codes = [ 51 | # 20 canonical amino acids 52 | ('CYS', 'C'), ('ASP', 'D'), ('SER', 'S'), ('GLN', 'Q'), 53 | ('LYS', 'K'), ('ILE', 'I'), ('PRO', 'P'), ('THR', 'T'), 54 | ('PHE', 'F'), ('ASN', 'N'), ('GLY', 'G'), ('HIS', 'H'), 55 | ('LEU', 'L'), ('ARG', 'R'), ('TRP', 'W'), ('ALA', 'A'), 56 | ('VAL', 'V'), ('GLU', 'E'), ('TYR', 'Y'), ('MET', 'M'), 57 | # Non-canonical amino acids 58 | ('ASX', 'B'), ('SEC', 'U'), ('GLX', 'Z'), 59 | # ('MSE', 'M'), ('SOC', 'C'), 60 | # Canonical xNA 61 | (' U', 'U'), (' A', 'A'), (' G', 'G'), (' C', 'C'), 62 | (' T', 'T'), 63 | ] 64 | 65 | three_to_one = dict(res_codes) 66 | _records = set(['ATOM ', 'HETATM']) 67 | 68 | chainID = chainID.upper() 69 | sequence = [] 70 | resID = [] 71 | chains = set() 72 | read = set() 73 | with open(fpdb, "r") as f: 74 | for line in f: 75 | line = line.strip() 76 | if line[0:6] in _records: 77 | resn = line[17:20] 78 | chain = line[21] 79 | resi = line[22:26] 80 | icode = line[26] 81 | r_uid = (resn, chain, resi, icode) 82 | chains.add(chain) 83 | if chain == chainID: 84 | if r_uid not in read: 85 | read.add(r_uid) 86 | else: 87 | continue 88 | aa_resn = three_to_one.get(resn, 'X') 89 | sequence.append(aa_resn) 90 | resID.append(resi) 91 | if chainID not in chains: 92 | raise ValueError( 93 | "Chain `{}` NOT exist in PDB file '{}'".format(chainID, fpdb)) 94 | 95 | return sequence, resID 96 | 97 | 98 | def get_pssm(fpssm): 99 | """Get the content of PSSM file. 100 | 101 | Arguments: 102 | fpssm {str} -- input pssm file 103 | 104 | Raises: 105 | ValueError -- the line with number of columns not equal to 44 106 | 107 | Returns: 108 | [2D list] -- pssm 109 | """ 110 | rule = tuple([str(i) for i in range(10)]) 111 | pssm = [] 112 | with open(fpssm, "r") as f: 113 | for line in f.readlines(): 114 | line_raw = line 115 | line = line.strip() 116 | # only select lines that contain pssm values 117 | if line.startswith(rule): 118 | # TODO parse pssm based on column index 119 | # normal PSSM line have 44 columns. 120 | # Abnormal <44 due to lakcing of gap between numbers. 121 | if len(line.split()) == 44: 122 | pssm.append(line.split()) 123 | else: 124 | raise ValueError( 125 | "Wrong format of the following line in PSSM file {}:\n{}".format(fpssm, line_raw)) 126 | return pssm 127 | 128 | 129 | def get_aligned_sequences(seq1, seq2): 130 | """Align two sequnces using global alignment and return aligned sequences. 131 | Paramters of global alignment: 132 | match: 1 133 | mismtach: 0 134 | gap open: -2 135 | gap extend: -1 136 | 137 | Arguments: 138 | seq1 {str} -- 1st sequence. 139 | seq2 {str} -- 2nd sequence. 140 | 141 | Returns: 142 | numpy array -- seq1_ali, aligned sequence for seq1 143 | numpy array -- seq2_ali, aligned sequence for seq1 144 | """ 145 | 146 | ali = pairwise2.align.globalxs(seq1, seq2, -2, -1) 147 | seq1_ali = np.array([i for i in ali[0][0]]) 148 | seq2_ali = np.array([i for i in ali[0][1]]) 149 | 150 | return seq1_ali, seq2_ali 151 | 152 | 153 | def write_pdb_remove_residue(fipdb, fopdb, chainID, resID): 154 | """Write PDB file with removing some residues. 155 | 156 | Arguments: 157 | fipdb {str} -- Input PDB file. 158 | fopdb {str} -- Output PDB file. 159 | chainID {str} -- The ID of the chain that the to-be-removed residues locates. 160 | resID {list} -- A list of residue ID to remove. 161 | """ 162 | 163 | fout = open(fopdb, "w") 164 | resID = [str(i) for i in resID] 165 | _records = set(['ATOM ', 'HETATM']) 166 | with open(fipdb, "r") as f: 167 | for line in f: 168 | if line[0:6] in _records: 169 | chain = line[21] 170 | resi = line[22:26].strip() 171 | if chain == chainID and resi in resID: 172 | continue 173 | else: 174 | fout.write(line) 175 | else: 176 | fout.write(line) 177 | fout.close() 178 | 179 | 180 | def write_mapped_pssm_pdb(fpssm, fpdb, chainID, outdir): 181 | """Map PDB sequence to PSSM sequence to get the consistent sequence, 182 | and output mapped PSSM and/or PDB file with consistent sequence. 183 | 184 | Arguments: 185 | fpssm {str} -- input PSSM file 186 | fpdb {str} -- input PDB file 187 | chainID {str} -- the specific chain of PDB used to be mapped with PSSM, e.g. A or a 188 | outdir {str} -- path for output pssm file, e.g. /home/tes 189 | 190 | Output: 191 | mapped PSSM file: pdbfilename.chainID.pdb.pssm 192 | mapped PDB file: pdbfilename.chainID.pssm.pdb, only output when sequence of input PDB longer than consistent sequence. 193 | """ 194 | # get pssm and pdb file name 195 | pdbname = os.path.basename(fpdb) 196 | pssmname = os.path.basename(fpssm) 197 | # get pdb sequence and residue numbers 198 | pdb_seq, pdb_resn = get_seq_resi_from_pdb(fpdb, chainID) 199 | pdb_seq_str = "".join(pdb_seq) 200 | # get pssm content and sequnce 201 | pssm = np.array(get_pssm(fpssm)) 202 | pssm_seq_str = "".join(pssm[:, 1]) 203 | 204 | # get aligned seqeuences 205 | pdb_seq_align, pssm_seq_align = get_aligned_sequences(pdb_seq_str, pssm_seq_str) 206 | 207 | # get indexes for matched and mismatched residues. 208 | index_match = pdb_seq_align == pssm_seq_align 209 | index_mismatch = np.logical_not(index_match) 210 | 211 | # make a gap sequence (only "-") and X sequence (only "X") that have same length as pdb/pssm_seq_align 212 | seqlen = len(pdb_seq_align) 213 | gap_seq = np.array(["-"] * seqlen) 214 | resX_seq = np.array(["X"] * seqlen) 215 | 216 | # get index of gap and residue X 217 | index_gappdb = gap_seq == pdb_seq_align 218 | index_resXpdb = resX_seq == pdb_seq_align 219 | index_gappssm = gap_seq == pssm_seq_align 220 | index_resXpssm = resX_seq == pssm_seq_align 221 | # get index of normal residues (not gap, not res X) for each sequence 222 | index_norm_pdb = np.logical_not(np.logical_or(index_gappdb, index_resXpdb)) 223 | index_norm_pssm = np.logical_not(np.logical_or(index_gappssm, index_resXpssm)) 224 | # get index of normal residues for both sequences 225 | index_norm = np.logical_and(index_norm_pdb, index_norm_pssm) 226 | # get index of mutated normal residues 227 | index_mut = np.logical_and(index_mismatch, index_norm) 228 | 229 | # raise warning for mutated normal residues 230 | if len(set(index_mut)) > 1: 231 | mut_seq = [] 232 | for i in index_mut: 233 | if i: 234 | mut_seq.append("^") 235 | else: 236 | mut_seq.append("_") 237 | 238 | logger.warning("\nWarning: Mutations exist in following sequences:") 239 | logger.warning(f'Warning: >{pdbname}_{chainID}:') 240 | logger.warning(f'Warning: {"".join(pdb_seq_align)}') 241 | logger.warning(f'Warning: >{pssmname}:') 242 | logger.warning(f'Warning: {"".join(pssm_seq_align)}') 243 | logger.warning(f'Warning: {"".join(mut_seq)}\n') 244 | 245 | # get pssm with index of normal residues for both sequences, this is the mapped pssm. 246 | index_norm_nogappssm = index_norm[np.logical_not(index_gappssm)] 247 | pssm_norm = pssm[index_norm_nogappssm] 248 | 249 | # add the residue number and name of PDB file to the mapped pssm 250 | # for pssm content, only keep the scoring matrix and information content 251 | header = ["pdbresi", "pdbresn", "seqresi", "seqresn", "A", "R", "N", 252 | "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", 253 | "S", "T", "W", "Y", "V", "IC"] 254 | header = np.transpose(np.array([[i] for i in header])) 255 | pdb_resn = [[i] for i in pdb_resn] 256 | pdb_seq = [[i] for i in pdb_seq] 257 | index_norm_nogappdb = index_norm[np.logical_not(index_gappdb)] 258 | resi_pdb = np.array(pdb_resn)[index_norm_nogappdb] 259 | resn_pdb = np.array(pdb_seq)[index_norm_nogappdb] 260 | pssm_out = np.concatenate((resi_pdb, resn_pdb, pssm_norm[:, :22], pssm_norm[:, -2:-1]), axis=1) 261 | pssm_out = np.concatenate((header, pssm_out)) 262 | 263 | # write mapped pssm to file which is named with input PDB file name, chain ID and ".pdb.pssm" 264 | fopssm = os.path.join(outdir, os.path.splitext(pdbname)[0] + "." + chainID.upper() + ".pdb.pssm") 265 | with open(fopssm, "w") as f: 266 | for i in pssm_out: 267 | tmp1 = ["{:>7s}".format(j) for j in i[:4]] 268 | tmp2 = ["{:>4s}".format(j) for j in i[4:]] 269 | f.write(" ".join(tmp1+tmp2) + "\n") 270 | logger.info(f' {fopssm}') 271 | 272 | # write mapped PDB file if some residues not exist in the mapped PSSM file 273 | index_toremove = np.logical_or(np.logical_or(index_gappssm, index_resXpssm), index_resXpdb) 274 | index_gappdb_resXpssm = np.logical_and(index_gappdb, index_resXpssm) 275 | index_toremove = np.logical_and(index_toremove, np.logical_not(index_gappdb_resXpssm)) 276 | fopdb = os.path.join(outdir, os.path.splitext(pdbname)[0] + "." + chainID.upper() + ".pssm.pdb") 277 | if len(np.unique(index_toremove)) == 2: 278 | # write PDB with removing some resdiues 279 | index_toremove_nogappdb = index_toremove[np.logical_not(index_gappdb)] 280 | resn_pdb_remove = np.array(pdb_resn)[index_toremove_nogappdb] 281 | resn_pdb_remove = [ i.strip() for i in resn_pdb_remove[:,0].tolist() ] 282 | write_pdb_remove_residue(fpdb, fopdb, chainID, resn_pdb_remove) 283 | 284 | 285 | if __name__ == "__main__": 286 | check_input(sys.argv[1:]) 287 | fpssm, fpdb, chainID, outdir = sys.argv[1:] 288 | write_mapped_pssm_pdb(fpssm, fpdb, chainID, outdir) 289 | -------------------------------------------------------------------------------- /pssmgen/pssm.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import shutil 4 | import re 5 | import logging 6 | from Bio.Blast.Applications import NcbipsiblastCommandline 7 | from pdb2sql import pdb2sql 8 | 9 | from pssmgen.map_pssm2pdb import write_mapped_pssm_pdb 10 | 11 | logger = logging.getLogger() 12 | 13 | class PSSM(): 14 | 15 | def __init__(self, work_dir='.'): 16 | """Compute PSSM and map the sequence for a series of decoys. 17 | 18 | Args: 19 | work_dir (str, optional): the working directory that contains the 20 | pdb directory and/or fasta directory. 21 | Defaults to '.'. 22 | """ 23 | 24 | self.work_dir = work_dir 25 | 26 | self.One2ThreeDict = { 27 | 'A' : 'ALA', 'R' : 'ARG', 'N' : 'ASN', 'D' : 'ASP', 'C' : 'CYS', 'E' : 'GLU', 'Q' : 'GLN', 28 | 'G' : 'GLY', 'H' : 'HIS', 'I' : 'ILE', 'L' : 'LEU', 'K' : 'LYS', 'M' : 'MET', 'F' : 'PHE', 29 | 'P' : 'PRO', 'S' : 'SER', 'T' : 'THR', 'W' : 'TRP', 'Y' : 'TYR', 'V' : 'VAL', 30 | 'B' : 'ASX', 'U' : 'SEC', 'Z' : 'GLX' 31 | } 32 | 33 | self.Three2OneDict = {v: k for k, v in self.One2ThreeDict.items()} 34 | 35 | self.psiblast_parameter = { 36 | 0 : { 'wordSize':2, 'gapOpen':9, 'gapExtend':1, 'scoringMatrix':'PAM30' }, 37 | 1 : { 'wordSize':3, 'gapOpen':9, 'gapExtend':1, 'scoringMatrix':'PAM30' }, 38 | 2 : { 'wordSize':3, 'gapOpen':10, 'gapExtend':1, 'scoringMatrix':'PAM70' }, 39 | 3 : { 'wordSize':3, 'gapOpen':10, 'gapExtend':1, 'scoringMatrix':'BLOSUM80'}, 40 | 4 : { 'wordSize':3, 'gapOpen':11, 'gapExtend':1, 'scoringMatrix':'BLOSUM62'} 41 | } 42 | 43 | 44 | def get_fasta(self, pdb_dir='pdb', pdbref=None, chain=('A','B'), out_dir='fasta'): 45 | """Extract the sequence of the chains and writes a fasta query file for each. 46 | 47 | Args: 48 | pdb_dir (str, optional): path for pdb files. 49 | Defaults to 'pdb'. 50 | pdbref (str, optional): filename of the reference pdb. 51 | Defaults to None, i.e. randomly select one. 52 | chain (tuple, optional): Name of the chains in the pdbs. 53 | Defaults to ('A','B'). 54 | out_dir (str, optional): output path for fasta files. 55 | Defaults to 'fasta'. 56 | """ 57 | out_dir = os.path.join(self.work_dir,out_dir) 58 | if not os.path.isdir(out_dir): 59 | os.mkdir(out_dir) 60 | 61 | if pdbref: 62 | pdb = os.path.join(self.work_dir, pdb_dir, pdbref) 63 | else: 64 | pdbs = os.listdir(os.path.join(self.work_dir, pdb_dir)) 65 | pdbs = list(filter(lambda x: x.endswith('.pdb'), pdbs)) 66 | pdb = os.path.join(self.work_dir, pdb_dir, pdbs[0]) 67 | 68 | sqldb = pdb2sql(pdb) 69 | print('Generating FASTA files...') 70 | logger.info('Output FASTA files:') 71 | for c in chain: 72 | 73 | # get the unique residues 74 | res = sqldb.get_residues(chainID=c) 75 | 76 | # get the one letter resiude 77 | seq = '' 78 | count = 0 79 | for r in res: 80 | seq += self.Three2OneDict[r[1]] 81 | count += 1 82 | if count == 79: 83 | seq += '\n' 84 | count = 0 85 | 86 | # write the file 87 | caseID = re.split('_|\.', os.path.basename(pdb))[0] 88 | fname = os.path.join(out_dir, caseID + '.%s' %c + '.fasta') 89 | with open(fname,'w') as f: 90 | f.write('>%s' %caseID + '.%s\n' %c) 91 | f.write(seq) 92 | logger.info(f' {fname}') 93 | 94 | print(f'FASTA files generated in {out_dir}.\n') 95 | 96 | def configure(self, blast_exe=None, database=None, 97 | num_threads = 4, evalue=0.0001, comp_based_stats='T', 98 | max_target_seqs=2000, num_iterations=3, outfmt=7, outspecifiers = None, 99 | save_each_pssm=True, save_pssm_after_last_round=True): 100 | """Configure the blast executable, database and psiblast parameters. 101 | 102 | Notes: 103 | For more details about psiblast paramters, check 'psiblast -help'. 104 | 105 | Args: 106 | blast_exe (str): Path to the psiblast executable 107 | database (str) : Path to the Blast database 108 | num_threads (int): Number of threads (CPUs) to use in the BLAST search. 109 | Defaults to 4. 110 | evalue (float): Expectation value (E) threshold for saving hits. 111 | Defaults to 0.0001. 112 | comp_based_stats (str, int): Use composition-based statistics, 113 | 0, F or f: no composition-based statistics 114 | 2, T or t, D or d : Composition-based score adjustment 115 | as in Bioinformatics 21:902-911, 2005, conditioned on 116 | sequence properties 117 | Defaults to 'T'. 118 | max_target_seqs (int): Maximum number of aligned sequences to keep, 119 | not applicable for outfmt <= 4. 120 | Defaults to 2000. 121 | num_iterations (int): Number of iterations to perform, 122 | 0 means run until convergence. 123 | Defaults to 3. 124 | outfmt (int): Format for output alignment view. Valid values are 0-18. 125 | Default format 7 is "tabular with comment lines". 126 | Check `psiblast -help` for details. 127 | outspecifiers (str): Output format 6, 7 and 10 can be additionally 128 | configured to produce a custom format specified by space 129 | delimited format specifiers. By default using all sepcifiers. 130 | Check `psiblast -help` for details. 131 | save_each_pssm (bool): Save PSSM after each iteration. 132 | Defaults to True. 133 | save_pssm_after_last_round (bool): Save PSSM after the last database search. 134 | Defaults to True. 135 | """ 136 | # output format sepcifiers for format 6, 7 and 10. See psiblast for details. 137 | specifiers = 'qseqid qgi qacc qaccver qlen sseqid sallseqid sgi sallgi \ 138 | sacc saccver sallacc slen qstart qend sstart send qseq sseq \ 139 | evalue bitscore score length pident nident mismatch positive \ 140 | gapopen gaps ppos frames qframe sframe btop staxids stitle \ 141 | salltitles sstrand qcovs qcovhsp qcovus' 142 | 143 | outfmt = str(outfmt) 144 | if outfmt in ['6', '7', '10']: 145 | if outspecifiers is not None: 146 | outfmt = outfmt + ' ' + outspecifiers 147 | else: 148 | outfmt = outfmt + ' ' + specifiers 149 | 150 | self.blast_exe = blast_exe 151 | self.blast_config = { 152 | 'db': database, 153 | 'num_threads': num_threads, 154 | 'evalue': evalue, 155 | 'comp_based_stats': comp_based_stats, 156 | 'max_target_seqs': max_target_seqs, 157 | 'num_iterations': num_iterations, 158 | 'save_each_pssm': save_each_pssm, 159 | 'save_pssm_after_last_round': save_pssm_after_last_round, 160 | 'outfmt': outfmt, 161 | } 162 | 163 | def get_pssm(self, fasta_dir='fasta', 164 | out_dir='pssm_raw', 165 | run=True, 166 | save_all_psiblast_output=False): 167 | """Compute the PSSMs by running BLAST 168 | 169 | Args: 170 | fasta_dir (str, optional): path of fasta files. 171 | Defaults to 'fasta'. 172 | out_dir (str, optional): output path for pssm files. 173 | Defaults to 'pssm_raw'. 174 | run (bool, optional): run blast. 175 | Defaults to True. 176 | save_all_psiblast_output (bool, optional): 177 | save all output from psiblast, including pssm files of 178 | each round and details of homologs, etc. 179 | Defaults to False. 180 | 181 | Raises: 182 | FileNotFoundError: BLAST failed to find homologs. 183 | """ 184 | fasta_dir = os.path.join(self.work_dir,fasta_dir) 185 | out_dir = os.path.join(self.work_dir,out_dir) 186 | if not os.path.isdir(out_dir): 187 | os.mkdir(out_dir) 188 | 189 | for q in os.listdir(fasta_dir): 190 | 191 | # get the fasta quey 192 | query = os.path.join(fasta_dir,q) 193 | name = os.path.splitext(os.path.basename(query))[0] 194 | 195 | # set up the output names 196 | out_ascii_pssm = os.path.join(out_dir,name + '.pssm') 197 | out_pssm = os.path.join(out_dir,name + '.cptpssm') 198 | out_homologs = os.path.join(out_dir,name + '.homologs') 199 | 200 | # get the parameters 201 | blast_param = self._get_psiblast_parameters(query) 202 | 203 | # set up the psiblast calculation 204 | psi_cline = NcbipsiblastCommandline( 205 | cmd = self.blast_exe, 206 | query = query, 207 | word_size = blast_param['wordSize'], 208 | gapopen = blast_param['gapOpen'], 209 | gapextend = blast_param['gapExtend'], 210 | matrix = blast_param['scoringMatrix'], 211 | out_ascii_pssm = out_ascii_pssm, 212 | out_pssm = out_pssm, 213 | out = out_homologs, 214 | **self.blast_config 215 | ) 216 | 217 | # check that it's correct 218 | psi_cline._validate() 219 | 220 | if run: 221 | # run the blast query 222 | print('Generatinng raw PSSMs with BLAST...') 223 | psi_cline() 224 | 225 | # copy the pssm of last exiting iteration to its final name 226 | logger.info('Generated raw PSSM files:') 227 | for i in reversed(range(self.blast_config['num_iterations'])): 228 | fpssm = out_ascii_pssm + '.' + str(i+1) 229 | if os.path.isfile(fpssm): 230 | shutil.copy2(fpssm, out_ascii_pssm) 231 | logger.info(f' {out_ascii_pssm}') 232 | break 233 | elif i==0: 234 | raise FileNotFoundError(f'Not found {fpssm}. \ 235 | PSIBlast may fail to find homologs for given fasta') 236 | 237 | if not save_all_psiblast_output: 238 | # remove all the other files 239 | for filename in glob.glob(out_pssm+'.*'): 240 | os.remove(filename) 241 | for filename in glob.glob(out_ascii_pssm+'.*'): 242 | os.remove(filename) 243 | os.remove(out_homologs) 244 | else: 245 | logger.info(f'Other intermediate output files are in {out_dir}') 246 | print(f'Raw PSSM files generated in {out_dir}.\n') 247 | 248 | 249 | def _get_psiblast_parameters(self, fasta_query): 250 | 251 | f = open(fasta_query) 252 | data =f.readlines() 253 | f.close() 254 | 255 | seq = 0 256 | for l in data[1:]: 257 | seq += len(l) 258 | 259 | if seq < 30: 260 | p = self.psiblast_parameter[0] 261 | elif seq < 35: 262 | p = self.psiblast_parameter[1] 263 | elif seq < 50: 264 | p = self.psiblast_parameter[2] 265 | elif seq < 85: 266 | p = self.psiblast_parameter[3] 267 | else: 268 | p = self.psiblast_parameter[4] 269 | 270 | return p 271 | 272 | def map_pssm(self, pssm_dir='pssm_raw', pdb_dir='pdb', out_dir='pssm', chain=('A','B')): 273 | """Map the raw pssm files to the pdb files of the decoys 274 | 275 | Args: 276 | pssm_dir (str, optional): path of raw pssm files. 277 | Defaults to 'pssm_raw'. 278 | pdb_dir (str, optional): path of pdb files. 279 | Defaults to 'pdb'. 280 | out_dir (str, optional): output path for mapped pssm files. 281 | Defaults to 'pssm'. 282 | chain (tuple, optional): chain names. Defaults to ('A','B'). 283 | """ 284 | pssm_dir = os.path.join(self.work_dir,pssm_dir) 285 | out_dir = os.path.join(self.work_dir,out_dir) 286 | if not os.path.isdir(out_dir): 287 | os.mkdir(out_dir) 288 | 289 | # get list of pssm files 290 | pf = os.listdir(pssm_dir) 291 | pssm_files = {} 292 | for c in chain: 293 | pssm_files[c] = list(filter(lambda x: x.endswith(c+'.pssm'),pf))[0] 294 | 295 | # get list of pdb files 296 | pdbs = os.listdir(os.path.join(self.work_dir, pdb_dir)) 297 | pdbs = list(filter(lambda x: x.endswith('.pdb'), pdbs)) 298 | 299 | # map pssm and pdb 300 | print('Generatinng mapped PSSMs...') 301 | logger.info('\nOutput mapped PSSM files:') 302 | for p in pdbs: 303 | pdb = os.path.join(os.path.join(self.work_dir, pdb_dir), p) 304 | for c in chain: 305 | pssm = os.path.join(pssm_dir,pssm_files[c]) 306 | write_mapped_pssm_pdb(pssm, pdb, c, out_dir) 307 | print(f'Mapped PSSM files generated in {out_dir}.\n') 308 | 309 | def get_mapped_pdb(self, pdbpssm_dir='pssm', pdb_dir='pdb', 310 | pdbnonmatch_dir='pdb_nonmatch'): 311 | """Write mapped pdb to working folder, by default 'pdb'. 312 | 313 | Args: 314 | pdbpssm_dir (str, optional): path of mapped pssm files. 315 | Defaults to 'pssm'. 316 | pdb_dir (str, optional): output path of mapped pdb files. 317 | Defaults to 'pdb'. 318 | pdbnonmatch_dir (str, optional): output path of nonconsistent pdb files. 319 | Defaults to 'pdb_nonmatch'. 320 | """ 321 | pdbpssm_dir = os.path.join(self.work_dir, pdbpssm_dir) 322 | pdbnonmatch_dir = os.path.join(self.work_dir, pdbnonmatch_dir) 323 | 324 | pdb_files = [f for f in os.listdir(pdbpssm_dir) if f.endswith('.pdb')] 325 | 326 | if pdb_files: 327 | print('Generatinng mapped PDB files...') 328 | if not os.path.isdir(pdbnonmatch_dir): 329 | os.mkdir(pdbnonmatch_dir) 330 | 331 | pdb_dict = {} 332 | for pdb in pdb_files: 333 | caseid, chain, _, _ = pdb.split('.') 334 | if caseid not in pdb_dict: 335 | pdb_dict[caseid] = [] 336 | pdb_dict[caseid].append(chain) 337 | 338 | for caseid, chains in pdb_dict.items(): 339 | pdb_wd = os.path.join(self.work_dir, pdb_dir, caseid+".pdb") 340 | pdb_raw = os.path.join(pdbnonmatch_dir, caseid+".pdb") 341 | os.rename(pdb_wd, pdb_raw) 342 | if len(chains) == 1: 343 | pdb_new = os.path.join(pdbpssm_dir, caseid + "." + chains[0] + '.pssm.pdb') 344 | os.rename(pdb_new, pdb_wd) 345 | else: 346 | with open(pdb_wd, 'w') as f: 347 | # write REMARK 348 | pdb_new = os.path.join(pdbpssm_dir, caseid + "." + chains[0] + '.pssm.pdb') 349 | with open(pdb_new, 'r') as fpdb: 350 | lines = [line for line in fpdb if line.startswith('REMARK')] 351 | f.writelines(lines) 352 | 353 | # write each chain 354 | for chain in chains: 355 | pdb_new = os.path.join(pdbpssm_dir, caseid + "." + chain + '.pssm.pdb') 356 | with open(pdb_new, 'r') as fpdb: 357 | lines = [line for line in fpdb 358 | if line.startswith('ATOM') and 359 | line[21:22] == chain] 360 | f.writelines(lines) 361 | os.remove(pdb_new) 362 | f.write('END\n') 363 | print(f'Inconsistent raw PDB files moved to {pdbnonmatch_dir}') 364 | print(f'Mapped PDB files generated in {os.path.join(self.work_dir, pdb_dir)}.\n') 365 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | cwd = os.path.abspath(os.path.dirname(__file__)) 5 | 6 | # To update the package version number, edit pssmgen/__version__.py 7 | version = {} 8 | with open(os.path.join(cwd, 'pssmgen', '__version__.py')) as f: 9 | exec(f.read(), version) 10 | 11 | with open('README.md') as readme_file: 12 | readme = readme_file.read() 13 | 14 | setup( 15 | name='PSSMGen', 16 | version=version['__version__'], 17 | description='Generates consistent PSSM and/or PDB files for protein-protein complexes', 18 | long_description=readme + '\n\n', 19 | long_description_content_type='text/markdown', 20 | author='Nicolas Renaud, Cunliang Geng', 21 | author_email='n.renaud@esciencecenter.nl, c.geng@esciencecenter.nl', 22 | url='https://github.com/DeepRank/PSSMGen', 23 | packages=['pssmgen'], 24 | license="Apache Software License 2.0", 25 | keywords='pssmgen', 26 | 27 | install_requires=[ 28 | 'numpy >= 1.13', 29 | 'scipy', 30 | 'biopython', 31 | 'pdb2sql >= 0.5'], 32 | 33 | extras_require= { 34 | 'test': ['nose', 'coverage', 'pytest', 35 | 'pytest-cov','codacy-coverage','coveralls'], 36 | } 37 | ) 38 | -------------------------------------------------------------------------------- /test/7CEI/pssm_raw/7CEI.A.pssm: -------------------------------------------------------------------------------- 1 | 2 | Last position-specific scoring matrix computed, weighted observed percentages rounded down, information per position, and relative weight of gapless real matches to pseudocounts 3 | A R N D C Q E G H I L K M F P S T W Y V A R N D C Q E G H I L K M F P S T W Y V 4 | 1 M -6 -7 -7 -8 -7 -6 -7 -8 -7 1 -3 -7 11 -5 -8 -7 -6 -7 -6 -4 0 0 0 0 0 0 0 0 0 7 0 0 93 0 0 0 0 0 0 0 3.60 1.20 5 | 2 E -2 -1 3 0 -3 0 4 -3 -3 1 -4 3 -3 -4 -4 0 1 -5 -4 -2 2 2 16 5 0 2 26 1 0 9 1 19 0 0 0 5 10 0 0 2 0.50 0.35 6 | 3 L -4 -4 -1 -3 -5 -2 -3 -6 -1 0 4 -3 7 3 -4 -3 -3 -4 -3 -2 1 1 3 2 0 1 1 0 1 5 39 1 25 14 1 2 1 0 0 2 0.96 0.49 7 | 4 K -4 0 -1 -5 -8 -2 -2 -5 -3 -7 -5 8 -6 -6 -3 -3 -5 -8 -5 -6 1 3 3 0 0 1 2 1 1 0 1 82 0 0 1 1 0 0 0 0 2.08 0.91 8 | 5 N -1 -1 4 2 -5 1 1 -2 1 -4 -4 3 -2 -3 -1 1 0 -5 -2 -4 4 2 22 9 0 7 8 2 3 1 1 18 1 1 4 10 6 0 1 0 0.45 0.31 9 | 6 S -2 2 2 -2 -1 0 -2 -4 0 -3 -4 4 -3 -5 -4 3 2 -6 -4 -5 3 10 9 2 1 3 2 1 2 1 1 30 1 0 0 21 12 0 0 0 0.64 0.42 10 | 7 I -5 -6 -7 -7 -5 -6 -7 -7 -6 6 4 -6 -1 3 -7 -6 -4 -5 1 -1 0 0 0 0 0 0 0 0 0 37 40 0 1 13 0 0 1 0 4 3 1.39 0.61 11 | 8 S 0 -3 1 -2 -2 1 4 -4 -3 -4 -5 -1 -4 -5 -3 4 0 -6 -2 -5 6 0 5 1 1 7 28 1 0 1 0 3 0 0 1 35 6 0 1 0 0.80 0.52 12 | 9 D -4 -6 0 7 -4 -1 4 -5 -1 -8 -8 -3 -7 -4 -6 -4 -5 -8 -3 -7 1 0 3 63 1 2 23 1 2 0 0 1 0 1 0 1 0 0 1 0 1.87 0.83 13 | 10 Y -7 -7 -8 -6 -3 -7 -8 -9 0 -2 -4 -8 2 2 -5 -7 -7 -4 10 -7 0 0 0 1 1 0 0 0 2 2 2 0 4 6 1 0 0 0 83 0 3.08 1.20 14 | 11 T -3 -4 -4 -4 -6 -6 -4 -7 -7 -6 -6 -5 -6 -5 -7 1 8 -8 -7 -6 2 1 1 1 0 0 1 0 0 0 1 1 0 1 0 10 84 0 0 0 2.37 1.09 15 | 12 E -3 0 -6 -3 -3 -1 8 -7 -4 -8 -8 -2 -7 -5 -5 -4 -5 -8 -2 -6 2 4 0 0 1 2 84 0 0 0 0 2 0 1 1 1 0 0 2 0 2.26 1.04 16 | 13 A 3 0 0 0 -3 1 1 -2 1 -2 -3 0 -2 -3 -1 2 0 -5 -3 -2 26 4 3 6 0 8 9 2 4 2 2 4 1 1 2 19 5 0 0 2 0.32 0.29 17 | 14 E -4 -4 -4 0 -8 -1 8 -5 -5 -8 -6 -3 -7 -3 -6 -4 -4 -7 -3 -7 1 1 1 5 0 3 83 1 0 0 1 1 0 1 0 1 1 0 1 0 2.20 1.01 18 | 15 F -4 -8 -8 -8 -8 -8 -8 -8 -7 -6 -2 -8 0 9 -8 -7 -7 -5 1 -6 2 0 0 0 0 0 0 0 0 0 4 0 2 88 0 0 0 0 3 0 3.12 1.18 19 | 16 V -4 -1 -4 -6 -2 -2 -3 -5 -3 4 4 -1 1 0 -6 -3 -2 -5 -2 1 1 3 1 0 1 2 1 1 0 21 46 4 3 3 0 1 2 0 1 7 0.79 0.46 20 | 17 Q 2 -1 1 2 -3 1 4 -2 0 -3 -3 0 -3 -4 -4 1 -1 -6 -3 -3 16 3 6 9 0 6 28 3 2 2 2 5 0 0 0 11 3 0 1 2 0.44 0.36 21 | 18 L -5 -7 -8 -8 -1 -7 -4 -8 -7 0 4 -7 -1 7 -4 -5 -4 -1 0 -2 1 0 0 0 1 0 1 0 0 5 42 0 1 41 1 1 1 1 2 2 1.56 0.74 22 | 19 L -3 -7 -8 -8 -6 -7 -7 -8 -7 4 4 -7 2 -1 -7 -6 -3 -7 -5 4 2 0 0 0 0 0 0 0 0 19 40 0 5 2 0 0 1 0 0 30 1.29 0.65 23 | 20 K -2 3 3 0 -3 2 2 -1 0 -2 -3 2 -2 -5 -4 0 0 -2 -1 -3 2 14 12 5 1 10 15 4 2 2 2 15 1 0 0 6 6 0 2 2 0.40 0.33 24 | 21 E -1 2 1 1 -3 0 4 0 -1 -4 -4 3 -2 -3 -5 -1 -2 -1 -3 -3 5 10 5 7 1 3 28 7 1 1 1 18 1 1 0 5 2 1 1 2 0.52 0.39 25 | 22 I -6 -7 -4 -7 -6 -7 -7 -8 -7 7 3 -7 -1 4 -4 -4 -4 -6 -5 -1 0 0 1 0 0 0 0 0 0 53 22 0 1 16 1 1 1 0 0 3 1.58 0.77 26 | 23 E -3 1 -2 -3 5 -5 -2 -4 -2 2 -1 -3 -2 5 -5 -1 -2 4 3 -2 2 7 2 1 13 0 3 1 1 10 6 1 1 27 0 5 3 6 9 3 0.72 0.43 27 | 24 K 0 0 3 2 -2 0 2 -1 0 -3 -3 1 -2 -3 -2 1 0 -5 -1 -2 10 5 17 12 1 4 12 4 2 2 2 7 1 1 1 9 6 0 2 3 0.28 0.24 28 | 25 E 3 -3 2 0 -2 -1 1 0 -1 -1 -2 -2 -2 -2 0 0 -1 -5 -2 2 23 1 10 5 1 2 11 7 2 2 3 1 1 2 5 6 3 0 1 14 0.22 0.24 29 | 26 N -2 -1 4 4 -2 -1 2 -2 2 -3 -5 -1 -4 -3 1 -1 2 -5 -4 -5 3 2 19 23 1 2 12 3 5 2 0 4 0 1 6 4 11 0 0 0 0.57 0.41 30 | 27 V -1 -1 0 0 2 -1 0 2 0 -1 -1 0 -1 0 -1 0 0 0 2 -1 5 3 4 7 5 1 8 16 2 3 5 7 2 4 2 8 5 1 8 5 0.08 0.10 31 | 28 A 1 0 1 2 -1 0 1 -1 -1 -3 -3 1 -1 -2 1 1 0 -4 -1 -2 12 5 6 15 2 3 10 4 1 0 2 10 2 2 6 10 5 0 2 2 0.20 0.19 32 | 29 A 0 -1 0 0 -3 0 2 1 -2 -1 0 0 0 -2 0 0 2 -3 -1 -2 7 2 3 5 0 2 18 13 0 2 9 7 3 2 4 5 13 0 2 2 0.14 0.17 33 | 30 T -1 -1 -1 -1 -4 -1 2 -2 0 -2 -1 -1 -3 1 0 2 3 -4 -1 -2 2 3 3 3 0 2 18 3 2 2 7 3 0 7 4 15 21 0 2 3 0.25 0.28 34 | 31 D -2 0 0 3 -6 1 5 -1 -4 -1 -5 0 -5 -5 -1 -1 -1 -6 0 -2 4 4 4 14 0 5 37 5 0 4 0 4 0 0 4 5 4 0 3 4 0.61 0.43 35 | 32 D 0 -1 -1 2 0 2 4 -2 1 -2 -2 1 0 -1 -1 -1 -1 -4 0 -2 6 3 3 10 3 8 25 3 3 3 3 10 3 3 3 3 3 0 3 3 0.27 0.24 36 | 33 V -1 -1 -2 2 -5 1 3 -3 4 0 -2 -1 -2 -1 -4 -2 -1 0 4 -1 6 3 1 13 0 5 21 1 10 5 3 3 1 3 0 2 3 1 15 4 0.44 0.35 37 | 34 L -2 -1 -1 0 -1 1 -1 -2 5 2 1 -3 2 0 -4 -2 -1 2 -1 0 3 3 3 6 1 8 3 3 15 13 15 0 6 4 0 2 3 3 2 5 0.26 0.25 38 | 35 D -2 -1 2 6 -6 -1 0 -1 -1 -2 -4 0 -1 -6 -5 0 -1 -6 -2 -2 3 3 10 44 0 2 5 4 2 3 2 5 2 0 0 7 4 0 2 3 0.80 0.49 39 | 36 V 1 1 1 1 0 0 1 -2 0 -2 -2 1 0 -1 -1 0 -1 3 0 -2 14 10 7 9 2 4 12 2 2 2 3 7 3 2 2 4 3 4 4 3 0.13 0.15 40 | 37 L -1 0 -1 -1 -3 -1 -3 -2 1 0 3 -3 1 0 -1 -1 -1 3 1 2 5 5 3 3 0 3 0 3 3 5 31 0 3 3 3 3 3 4 5 14 0.20 0.20 41 | 38 L 0 1 -1 1 -3 1 1 -3 2 0 0 -1 2 1 -3 0 0 -3 -1 0 8 8 0 8 0 8 8 0 8 8 8 0 8 8 0 8 8 0 0 8 0.09 0.13 42 | 39 E 0 0 0 3 -2 -1 3 -3 1 -2 -1 -1 1 0 -2 -1 -2 -4 1 -3 9 5 4 20 1 2 24 1 3 1 7 2 4 4 1 2 2 0 4 1 0.33 0.28 43 | 40 H -2 0 2 -2 -5 1 0 -4 8 -3 -3 -1 -1 -1 -2 -1 -2 0 0 -2 4 5 9 2 0 5 6 0 43 2 2 3 1 2 2 4 2 2 2 3 1.04 0.50 44 | 41 F -5 -6 -7 -7 -6 -6 -7 -7 -5 3 0 -6 -3 8 -7 -6 -5 4 2 0 0 0 0 0 0 0 0 0 0 14 8 0 0 60 0 0 0 6 6 7 1.74 0.65 45 | 42 V -1 0 1 1 1 0 2 -2 0 1 -1 0 0 -3 -3 -1 0 -4 -3 2 5 5 6 8 4 2 18 2 2 11 3 5 2 0 0 3 7 0 0 17 0.15 0.15 46 | 43 K 0 0 1 0 2 1 0 0 1 0 -1 0 1 -2 1 0 0 -3 -2 0 6 6 6 6 6 6 6 6 6 6 6 6 6 0 6 6 6 0 0 6 0.06 0.09 47 | 44 I 0 -2 0 -2 -2 -2 -2 0 -2 2 2 -1 1 1 -2 -1 0 -2 1 1 5 0 6 0 0 0 1 8 0 16 20 5 5 4 0 4 5 0 7 12 0.09 0.10 48 | 45 T 0 1 -1 -2 3 1 -1 0 -2 0 0 -1 -1 -2 2 0 1 -3 -2 0 9 9 0 0 8 9 1 9 0 9 9 0 0 0 9 9 9 0 0 9 0.10 0.10 49 | 46 E 0 0 1 0 -4 1 3 1 1 -3 -2 1 -2 -1 1 0 -2 -4 -2 -1 6 4 7 5 0 5 28 10 4 0 4 8 0 4 7 4 0 0 0 5 0.24 0.19 50 | 47 H -3 -1 0 0 2 -2 0 -4 9 -5 -3 -3 -4 -3 -1 -3 1 -4 1 -4 0 3 4 5 5 0 4 0 59 0 3 0 0 0 3 0 11 0 4 0 1.34 0.50 51 | 48 P 1 1 -1 -1 -2 -1 1 1 -2 -1 0 -1 -1 -2 2 1 1 -3 -2 0 11 11 0 0 0 0 11 11 0 0 11 0 0 0 11 11 11 0 0 11 0.14 0.09 52 | 49 D 1 1 1 3 -4 0 2 -1 3 -3 -2 1 -3 -3 -3 0 -1 -4 -2 -3 16 7 6 18 0 3 15 3 7 1 3 10 0 1 0 6 1 0 1 1 0.31 0.20 53 | 50 G -1 -1 -3 -3 -1 0 -2 6 -4 -1 -4 1 -1 -5 -3 -2 -3 -5 -5 -2 5 4 0 1 1 3 2 59 0 5 1 9 2 0 1 2 1 0 0 3 0.94 0.40 54 | 51 T 0 0 2 -1 1 -2 -2 -1 -1 -2 -1 -1 -1 0 -2 4 2 2 -1 -3 4 6 9 1 3 0 1 3 1 2 6 1 2 3 1 37 12 3 2 0 0.32 0.22 55 | 52 D -1 -2 1 6 -4 1 1 1 0 -3 -2 -2 -1 -4 -3 -1 -1 -4 -3 -2 3 0 3 58 0 6 3 10 3 0 3 0 3 0 0 3 3 0 0 3 0.66 0.21 56 | 53 L -2 -3 -4 -4 -2 -3 -3 -4 -2 2 3 -3 2 2 -3 -3 -2 4 2 1 0 0 0 0 0 0 0 0 0 13 45 0 7 8 0 0 0 7 10 10 0.41 0.09 57 | 54 I -1 -2 -3 -3 -2 -2 -3 -3 -3 2 2 -2 3 3 -3 -1 1 -2 0 2 0 0 0 0 0 0 0 0 0 17 17 0 17 17 0 0 17 0 0 17 0.25 0.07 58 | 55 Y -2 -3 -3 -2 1 -3 -3 -4 1 0 0 -3 0 5 -4 -2 -1 -1 6 0 3 0 0 3 4 0 0 0 3 3 5 0 3 30 0 3 5 0 33 6 0.70 0.27 59 | 56 Y -2 1 1 1 3 1 1 -3 3 -3 -3 1 -2 2 -3 -2 -2 5 2 -3 0 9 9 9 9 9 9 0 9 0 0 9 0 9 0 0 0 9 9 0 0.36 0.20 60 | 57 P -1 -4 2 -3 1 -3 -3 -4 1 -1 -2 -3 -4 -5 7 -1 0 -5 -4 -4 5 0 11 0 4 0 0 0 4 5 4 0 0 0 54 4 7 0 0 0 1.23 0.50 61 | 58 S 0 0 0 0 -2 1 0 0 1 0 -1 0 1 0 1 0 0 -2 1 0 6 6 5 6 0 5 6 6 5 6 6 6 5 5 6 6 6 0 5 6 0.03 0.06 62 | 59 D 0 0 1 0 -3 1 0 0 2 0 -1 0 1 -2 1 0 0 -2 1 0 6 6 6 6 0 6 6 6 6 6 6 6 6 0 6 6 6 0 6 6 0.05 0.08 63 | 60 N -1 -1 1 2 1 0 0 4 1 -3 -2 0 0 -1 1 -1 -1 -4 -3 -2 4 3 5 11 3 4 6 30 3 0 3 4 3 3 6 4 3 0 0 4 0.27 0.24 64 | 61 R 1 1 1 0 -1 1 1 0 -1 0 -2 1 -1 -1 -1 0 -1 0 -1 -1 12 8 7 5 2 5 12 6 1 7 3 9 2 3 2 6 3 1 2 4 0.07 0.10 65 | 62 D 0 0 0 4 -4 2 2 -1 -3 -1 -2 -1 -3 -4 0 0 -1 -4 0 -1 10 4 4 25 0 9 12 4 0 4 4 4 0 0 4 6 4 0 4 4 0.29 0.26 66 | 63 D -1 0 2 3 2 0 -1 1 1 -1 -2 -1 -3 -1 0 0 0 -4 0 -1 4 4 12 18 5 3 3 12 3 3 4 3 0 3 4 6 5 0 3 5 0.16 0.17 67 | 64 S 0 -1 0 0 2 -2 0 -1 -3 -4 -2 -1 0 -4 -1 3 4 -5 -4 -3 5 3 3 5 4 0 6 4 0 0 3 4 3 0 3 28 28 0 0 0 0.48 0.40 68 | 65 P 0 0 1 1 -2 -1 -1 -2 2 0 0 -1 -1 1 1 0 1 -3 -1 0 8 8 8 8 0 0 0 1 8 8 8 0 0 8 8 8 8 0 0 8 0.07 0.10 69 | 66 E -1 0 0 1 -5 1 4 0 1 -1 -2 0 0 -4 -4 -1 -1 2 0 0 4 4 3 6 0 6 35 7 3 3 3 4 3 0 0 3 3 3 3 7 0.35 0.29 70 | 67 G 1 -1 0 -1 -4 -1 -1 5 0 -2 -3 -1 -3 -2 -4 1 -1 -4 -1 -2 10 4 5 3 0 3 4 41 3 2 3 3 0 2 0 10 2 0 2 2 0.47 0.31 71 | 68 I -1 -6 0 -6 -4 -5 -6 -6 -6 6 -1 -5 2 0 -6 -5 -3 -6 -4 5 5 0 6 0 0 0 0 0 0 43 4 0 4 4 0 0 0 0 0 35 1.13 0.49 72 | 69 V -1 -5 -2 -5 -4 -4 -5 -5 -5 4 2 -5 2 0 -5 -3 2 -5 -3 4 4 0 2 0 0 0 0 0 0 20 16 0 4 3 0 0 14 0 0 37 0.75 0.37 73 | 70 K 0 2 1 1 -4 2 2 -1 1 -3 -3 3 -2 -2 -3 -1 -1 -4 0 -4 6 14 6 8 0 7 14 5 3 1 2 23 1 1 0 3 2 0 4 0 0.35 0.22 74 | 71 E -1 0 -2 -2 -4 0 3 -3 1 3 -2 0 -2 0 -3 -1 2 -4 0 0 5 6 1 1 0 2 24 1 3 18 2 5 0 5 0 2 17 0 3 5 0.29 0.25 75 | 72 I -2 -3 -6 -6 -4 -5 -5 -6 -6 5 1 -5 0 -1 -5 -5 -3 -5 -4 5 2 2 0 0 0 0 0 0 0 31 10 0 2 3 0 0 0 0 0 49 1.16 0.46 76 | 73 K -3 2 -1 -4 -6 -1 -1 -4 -2 -2 -2 7 -1 -4 -2 -2 -3 -6 -4 -2 1 9 2 0 0 2 2 1 1 3 4 66 1 1 2 2 1 0 0 2 1.17 0.53 77 | 74 E 1 1 0 1 -4 2 4 -3 -1 -3 -3 2 -2 -5 -1 0 -1 -5 -4 -3 13 7 5 6 0 8 31 1 1 1 1 13 1 0 3 5 4 0 0 1 0.46 0.28 78 | 75 W -4 -6 -4 -7 -2 -6 -4 -2 -6 -6 -3 -6 -5 -2 -7 -4 -6 12 2 -6 1 0 1 0 1 0 1 3 0 0 3 0 0 1 0 1 0 80 5 0 3.66 1.05 79 | 76 R -3 7 -3 -5 2 -2 -4 -6 4 -5 -4 -2 -3 -3 -6 -4 -5 -1 2 -6 2 71 1 0 4 1 0 0 9 1 2 0 1 1 0 1 0 1 6 0 1.72 0.69 80 | 77 A 4 2 0 -3 -3 1 0 -2 1 -3 -2 2 -2 -3 -2 0 -1 -4 -1 -2 41 10 3 1 0 6 5 1 4 1 3 11 1 1 1 4 2 0 2 2 0.40 0.30 81 | 78 A 3 -1 -1 0 -3 1 1 -1 -1 -3 -2 1 -3 -3 -2 2 0 -4 -3 -2 29 2 2 5 0 7 11 2 2 1 5 10 0 1 1 18 5 0 0 1 0.33 0.27 82 | 79 N -4 0 6 -3 -6 5 -1 -4 3 -6 -5 1 -4 -5 -5 -2 -3 -2 -1 -4 1 4 47 0 0 25 3 0 6 0 1 7 0 0 0 2 1 1 2 1 1.26 0.59 83 | 80 G -3 -2 -1 -1 -6 -3 -3 7 -2 -7 -7 -3 -6 -6 -5 -2 -5 -2 -1 -6 1 2 3 4 0 1 1 81 1 0 0 1 0 0 0 2 0 1 3 0 1.77 0.67 84 | 81 K -3 0 -4 -4 -4 -1 -3 -4 -2 -2 4 5 0 -2 -3 -3 -2 -5 -4 -2 1 5 0 0 0 2 1 1 1 1 39 41 1 1 1 0 2 0 0 2 0.80 0.45 85 | 82 P 1 -3 -4 -4 -6 -3 -2 -4 -4 -2 -4 -4 -5 -5 7 1 -1 -7 -6 -3 10 1 0 1 0 1 2 1 0 2 2 0 0 0 62 10 4 0 0 2 1.59 0.66 86 | 83 G 1 -5 -4 -2 4 -4 -3 6 -5 -4 0 -4 -4 -5 -1 -1 -4 -5 -5 -3 13 0 0 3 9 0 1 56 0 1 10 0 0 0 4 3 0 0 0 2 1.02 0.47 87 | 84 F -6 -7 -7 -8 -6 -7 -7 -7 -5 -3 -1 -7 -4 9 -8 -4 -6 -3 0 -5 0 0 0 0 0 0 0 0 0 1 4 0 0 92 0 1 0 0 2 0 2.85 0.90 88 | 85 K -3 3 -4 -4 -7 -1 -3 -5 -4 -6 -6 7 -5 -7 -5 -4 -4 -7 -5 -6 1 14 0 0 0 2 0 0 0 0 0 83 0 0 0 0 0 0 0 0 2.02 0.73 89 | 86 Q -1 2 -1 0 -4 4 4 -1 -2 -3 -4 2 -3 -4 -1 -1 -1 -4 -3 -2 2 9 2 3 0 23 34 4 0 1 0 10 0 0 3 3 2 0 1 4 0.54 0.24 90 | 87 G 4 -3 -2 -3 -3 -3 -3 5 -3 -4 -4 -3 -3 -4 -3 -1 -2 -4 -4 -3 41 0 0 0 0 0 0 59 0 0 0 0 0 0 0 0 0 0 0 0 1.04 0.23 91 | 92 | K Lambda 93 | Standard Ungapped 0.1340 0.3110 94 | Standard Gapped 0.0536 0.2670 95 | PSI Ungapped 0.1751 0.3187 96 | PSI Gapped 0.0536 0.2670 97 | -------------------------------------------------------------------------------- /test/7CEI/pssm_raw/7CEI.B.pssm: -------------------------------------------------------------------------------- 1 | 2 | Last position-specific scoring matrix computed, weighted observed percentages rounded down, information per position, and relative weight of gapless real matches to pseudocounts 3 | A R N D C Q E G H I L K M F P S T W Y V A R N D C Q E G H I L K M F P S T W Y V 4 | 1 E -2 0 0 3 -4 2 5 -3 -1 -4 -4 2 -3 -4 -2 -1 -1 -4 -3 -3 0 0 0 20 0 9 59 0 0 0 0 11 0 0 0 0 0 0 0 0 0.75 0.13 5 | 2 R -2 5 -1 -2 -4 3 0 -3 0 -3 -3 2 -2 -4 -3 -1 0 -4 -3 -1 0 57 0 0 0 19 0 0 1 0 0 11 0 0 0 0 8 0 0 4 0.75 0.20 6 | 3 F -1 -4 -4 -5 -4 -4 -4 -4 -2 0 0 -4 -2 7 -5 -3 -3 0 5 -2 6 0 0 0 0 0 0 0 0 6 4 0 0 56 0 0 0 0 28 0 1.19 0.33 7 | 4 A 5 -1 -3 -3 -3 -2 1 -2 -3 1 -3 -2 -2 -4 -3 -1 1 -4 -3 1 60 3 0 0 0 0 9 0 0 9 0 0 0 0 0 0 8 0 0 10 0.64 0.38 8 | 5 R 1 2 0 0 -4 0 -1 -3 6 -3 -3 -2 -3 2 -2 -2 1 3 0 -1 11 16 3 4 0 4 2 0 24 0 0 0 0 10 1 0 11 4 2 6 0.43 0.31 9 | 6 E 2 -2 2 5 -4 -2 0 -1 -3 -2 -4 -3 -4 -5 0 0 1 -5 -4 -1 18 2 10 40 0 0 4 3 0 2 0 0 0 0 5 4 8 0 0 5 0.63 0.39 10 | 7 P -3 -6 -6 -5 -7 -3 -3 -1 -1 -4 -3 -5 -2 -7 8 -5 -5 -7 -6 -3 1 0 0 0 0 1 2 4 2 1 3 0 1 0 82 0 0 0 0 2 2.43 1.00 11 | 8 M 1 -2 0 3 -4 -1 1 2 -3 -3 -3 -2 2 -4 -1 1 1 -5 -4 -1 13 2 2 22 0 2 12 14 0 1 1 0 6 0 2 12 7 0 0 5 0.35 0.31 12 | 9 A 1 -1 0 2 -3 -1 1 1 1 -2 -3 -1 -3 -4 2 1 -1 -4 0 -1 15 2 4 15 0 2 9 12 3 1 1 2 0 0 11 13 3 0 3 4 0.26 0.23 13 | 10 A -1 -4 0 1 -5 -2 -1 2 -4 -5 -4 -1 -5 -6 6 2 1 -6 -5 -4 4 0 5 9 0 1 2 14 0 0 1 3 0 0 38 13 9 0 0 1 0.89 0.53 14 | 11 G -1 -3 -2 0 -5 -2 -1 5 -4 -1 -5 -4 -4 -5 1 -1 2 -1 -5 -3 4 1 1 4 0 1 5 50 0 4 0 0 0 0 8 4 15 1 0 2 0.83 0.52 15 | 12 H 0 1 1 2 -4 -1 1 -1 3 -2 -2 0 -3 -3 1 0 1 -4 1 -2 6 8 8 12 0 1 12 3 10 2 3 5 0 0 6 4 11 0 5 3 0.22 0.22 16 | 13 R 0 1 -1 0 -3 0 0 0 -2 1 0 0 0 -1 0 0 0 -3 -2 1 7 12 2 6 0 4 4 9 0 8 6 5 2 2 5 7 5 0 0 15 0.06 0.10 17 | 14 M -1 -4 -1 0 -3 -3 -1 -1 -2 1 0 -3 2 -3 4 1 0 -5 -3 2 4 0 2 5 0 0 3 4 1 9 9 0 6 0 21 11 5 0 1 19 0.35 0.31 18 | 15 W 0 -4 -1 -3 -4 -1 -4 -3 1 -1 1 -3 -2 1 2 -1 1 8 2 0 7 0 2 1 0 2 0 1 3 3 15 1 0 6 11 4 9 21 7 8 0.60 0.39 19 | 16 Q -1 -3 -2 -1 -1 3 -1 0 -1 -2 1 -1 -2 -2 1 -1 3 0 -3 0 4 0 1 3 2 14 4 6 2 1 15 4 0 1 8 3 25 1 0 5 0.25 0.30 20 | 17 M -1 -3 -1 0 -4 -1 -1 1 0 0 -1 -3 2 -4 3 0 3 -4 -2 -2 2 0 3 5 0 2 3 14 3 5 6 0 6 0 15 7 24 0 1 2 0.35 0.33 21 | 18 A 1 -4 -2 -4 -4 -3 -1 3 0 -1 -4 -2 -4 -5 4 0 2 -5 -4 -1 14 0 2 0 0 0 3 25 3 4 0 2 0 0 23 6 15 0 0 3 0.65 0.45 22 | 19 G 2 -2 1 0 -4 1 0 3 -2 -2 -3 -2 -3 -3 0 1 0 -5 -4 0 22 2 6 4 0 7 5 24 1 1 2 1 0 1 4 11 5 0 0 6 0.33 0.30 23 | 20 L 0 0 0 -1 -3 -1 -2 0 2 0 2 -2 -1 0 0 -1 -1 -3 2 1 6 6 5 3 0 2 1 6 7 6 24 0 0 4 4 4 4 0 8 9 0.11 0.17 24 | 21 K -3 0 2 1 -5 1 1 -2 0 -2 -4 1 -4 -3 4 -1 0 -4 1 -4 1 4 10 9 0 5 10 2 2 2 1 9 0 1 25 4 7 0 6 0 0.51 0.38 25 | 22 A 2 -2 -1 1 -3 0 1 0 -3 -1 0 -1 -1 -1 0 0 1 1 -3 0 21 1 2 9 0 3 10 8 0 2 11 2 1 2 5 4 11 2 0 5 0.14 0.19 26 | 23 Q 1 -1 1 1 -3 1 2 3 -2 -2 -3 -1 -3 -3 2 0 0 -4 -3 -1 10 3 6 6 0 8 14 20 0 3 1 3 0 1 9 6 5 0 1 4 0.26 0.23 27 | 24 R 0 1 0 0 -3 0 0 -2 -2 -1 -2 1 -1 0 2 1 1 0 -1 1 6 8 5 4 0 3 7 1 1 3 2 8 2 5 12 13 7 2 1 11 0.13 0.15 28 | 25 A 2 -2 0 -3 -4 1 0 0 -3 0 -1 -2 -3 -2 4 2 0 -5 -4 -2 16 1 4 0 0 6 7 8 0 6 5 1 0 2 22 15 5 0 0 1 0.39 0.34 29 | 26 Q 0 -2 -1 -2 -3 1 -1 1 0 2 1 -1 0 -1 -1 -1 0 -3 -2 1 6 0 2 2 0 9 2 10 3 13 17 4 1 2 3 4 7 0 0 12 0.11 0.16 30 | 27 T -2 -2 2 2 -4 -1 1 -2 -1 -2 -3 -1 -4 -3 4 1 2 -5 -2 -2 1 1 10 14 0 3 11 3 1 2 3 3 0 1 19 13 13 0 1 2 0.46 0.38 31 | 28 D 0 -2 0 1 -4 1 0 -1 1 0 -2 -2 -3 -1 4 0 0 -4 -2 0 10 2 4 10 0 8 6 4 4 6 4 1 0 3 20 5 5 0 1 6 0.27 0.25 32 | 29 V 0 -4 -2 -3 -3 -3 -2 -2 0 3 -1 -4 1 -1 1 -1 1 -4 2 3 6 0 1 1 0 1 3 3 3 19 5 0 3 2 7 3 10 0 7 25 0.36 0.32 33 | 30 N -1 0 3 0 -4 -1 1 0 0 -3 0 1 -1 -2 0 0 1 3 -1 -1 4 6 17 6 0 2 10 6 2 0 9 7 1 1 5 6 9 5 2 3 0.18 0.20 34 | 31 N 0 0 0 0 -3 1 1 -1 1 1 -1 0 -1 -1 -1 0 0 1 -1 1 7 4 5 4 0 6 9 6 4 13 4 6 0 2 3 4 5 2 2 14 0.05 0.11 35 | 32 K 0 -1 -2 -2 -3 0 -2 0 0 -1 1 0 0 -1 3 -1 1 2 2 0 6 3 1 2 0 4 2 7 2 1 18 7 2 3 13 3 9 3 7 8 0.15 0.19 36 | 33 K -1 0 -3 -1 -4 2 -2 -3 -1 1 -2 0 -1 -2 5 -2 -1 -4 1 1 4 6 1 3 0 11 2 1 1 7 3 5 2 2 27 3 3 0 6 12 0.46 0.37 37 | 34 A 0 0 0 -1 -3 0 0 -2 0 1 -1 0 -2 -1 3 0 1 -4 -1 0 10 5 4 3 0 3 5 1 2 8 6 5 0 3 18 8 10 0 2 9 0.18 0.18 38 | 35 A 2 0 0 -2 -4 0 0 -2 2 -1 -2 -1 -3 -1 3 -1 1 1 0 -1 19 5 4 2 0 5 7 2 6 3 4 2 0 3 17 2 9 2 3 5 0.26 0.25 39 | 36 F -1 -3 0 0 -3 -1 0 -2 -3 2 1 -3 1 1 1 0 0 -3 1 0 4 0 4 5 0 3 6 3 0 17 17 0 3 7 8 8 6 0 5 5 0.14 0.18 40 | 37 D -1 -1 1 3 -5 1 2 -2 0 0 -1 -1 -3 -4 3 -2 0 -5 0 -1 4 3 6 20 0 6 14 2 2 5 8 4 0 0 13 2 6 0 3 3 0.30 0.29 41 | 38 A 1 -2 -1 2 -3 0 1 2 1 -2 -2 -1 -3 -2 1 0 0 0 0 -2 15 2 2 11 0 3 9 17 3 2 4 3 0 2 7 5 7 1 4 2 0.17 0.19 42 | 39 A 0 0 0 1 -3 0 0 1 1 0 -1 -1 -1 -1 2 0 1 -3 -1 0 10 7 3 8 0 5 3 10 4 6 6 2 2 2 9 5 11 0 2 5 0.08 0.10 43 | 40 A -1 0 0 1 -3 2 1 0 0 1 -1 0 0 -3 0 0 0 0 -3 0 5 6 4 8 0 11 10 7 2 12 6 4 2 0 4 4 6 2 0 7 0.09 0.13 44 | 41 K -1 0 1 2 -3 2 0 -1 0 1 -1 1 1 -3 -1 0 1 -4 -3 0 4 4 7 11 0 10 5 3 2 10 6 8 4 0 3 5 11 0 0 7 0.11 0.16 45 | 42 E 0 -1 0 0 -3 0 3 -1 -2 0 0 0 0 0 -1 0 0 -3 0 0 8 3 4 4 0 4 22 5 0 5 7 5 3 4 3 5 7 0 3 6 0.10 0.15 46 | 43 K 0 0 0 0 -2 1 0 0 1 0 -1 0 1 0 1 0 0 -2 1 0 6 6 5 6 0 5 6 6 5 6 6 6 5 5 6 6 6 0 5 6 0.03 0.06 47 | 44 S 1 -2 1 -1 -3 -1 0 -1 0 -1 -1 0 -1 -1 2 1 2 -4 -1 0 12 1 9 2 0 1 7 4 2 4 5 7 1 3 11 9 14 0 2 6 0.15 0.19 48 | 45 D -1 -2 0 2 -4 1 1 -2 -1 0 -2 -1 -1 -1 4 -1 1 -4 -2 -2 5 1 5 11 0 7 10 3 2 6 3 3 1 3 21 4 10 0 1 3 0.33 0.29 49 | 46 A 3 -1 -2 -1 0 -2 -2 0 -4 0 -1 -3 -1 -1 4 0 -1 -4 -1 1 25 3 1 3 2 2 1 7 0 5 6 0 1 2 19 6 4 0 2 10 0.34 0.31 50 | 47 D -1 -1 0 3 -4 1 2 0 0 -2 -2 -1 -3 -2 1 -1 1 1 1 -1 5 3 4 19 0 7 15 6 3 2 4 2 0 1 8 3 8 2 7 4 0.26 0.27 51 | 48 V 0 0 0 1 -3 0 1 -1 1 0 -1 0 -1 1 1 0 0 -2 2 0 8 4 4 7 0 2 8 4 3 8 5 4 1 9 9 4 5 0 7 6 0.07 0.10 52 | 49 A 1 0 -1 1 -4 -1 0 -1 -3 0 -2 0 -2 -2 4 -1 1 -5 -1 -1 13 5 2 8 0 3 6 5 0 7 3 6 1 1 23 2 8 0 2 4 0.32 0.29 53 | 50 L 0 -2 -1 0 -3 -1 0 -2 0 1 2 -1 1 1 1 -1 0 -3 1 1 7 2 3 6 0 3 5 2 2 7 22 3 3 6 7 5 4 0 4 9 0.08 0.12 54 | 51 S 1 -2 0 3 -4 0 1 -2 2 -2 -3 -2 -3 2 3 0 1 -4 1 -1 11 2 5 15 0 4 8 2 5 2 1 2 0 8 13 8 8 0 4 3 0.26 0.27 55 | 52 S 0 -2 0 2 -3 -1 0 -1 1 -1 0 -1 0 0 2 -1 0 2 2 0 6 2 4 12 0 2 6 6 3 2 9 2 2 5 11 5 4 3 9 6 0.13 0.16 56 | 53 A 2 -1 0 2 -4 0 1 1 -1 -2 -3 -1 0 -1 2 -1 -2 -4 -1 -2 21 4 3 15 0 4 8 13 2 2 2 3 3 3 10 2 2 0 2 2 0.26 0.26 57 | 54 L 0 0 0 2 0 0 1 -1 0 0 0 0 0 0 1 -1 -1 -3 0 0 7 4 3 14 3 4 9 3 3 6 7 6 3 4 6 3 4 0 5 8 0.06 0.08 58 | 55 E -1 -3 0 2 1 0 2 -1 1 2 -1 -1 -2 0 1 -1 -3 -4 1 0 5 0 4 12 3 4 13 6 4 16 6 4 0 4 6 4 0 0 5 5 0.16 0.20 59 | 56 R -2 -1 0 1 -3 0 1 -3 -3 3 1 -1 0 0 0 0 0 -2 -2 0 0 3 4 8 0 4 10 1 0 20 16 2 1 3 4 9 5 1 1 6 0.15 0.20 60 | 57 R -1 3 -1 -1 -1 0 0 -2 -1 1 -1 1 -2 -2 1 0 0 -1 -1 1 2 21 3 3 1 3 4 2 1 8 6 7 0 1 9 6 6 1 2 14 0.18 0.20 61 | 58 K -1 -1 0 2 -4 1 0 -2 0 -1 -1 2 -2 3 0 -2 0 3 -1 -2 4 2 4 15 0 6 6 2 2 5 4 17 0 15 5 1 5 4 1 3 0.21 0.23 62 | 59 Q -2 -2 -1 0 -4 1 -1 -2 -3 1 -2 0 -1 4 4 -2 -3 4 0 -1 2 2 4 7 0 7 4 3 0 8 3 7 1 21 19 1 0 6 3 3 0.42 0.33 63 | 60 K 2 1 0 0 -3 -1 -1 -2 -1 -1 -1 1 -2 -1 4 -1 -1 -4 -3 -1 17 7 6 5 0 2 3 3 1 4 6 9 1 4 22 3 3 0 0 5 0.29 0.23 64 | 61 E 1 -1 -2 5 0 0 3 -2 -3 -3 -4 -2 -4 -3 1 -2 0 -5 -2 -3 12 3 1 34 2 4 20 2 0 2 0 1 0 1 7 2 5 0 1 2 0.62 0.43 65 | 62 N 2 -3 1 3 -1 0 -1 0 -3 -2 -2 -1 -3 0 -1 1 1 -4 0 -2 17 0 6 17 1 4 3 8 0 2 3 3 0 5 2 14 10 0 4 2 0.21 0.24 66 | 63 K -1 -1 -1 0 -4 -1 -1 2 -3 1 -2 2 -1 -1 1 0 0 -3 2 -1 5 2 3 5 0 2 2 20 0 12 1 14 1 2 6 7 5 0 9 4 0.18 0.22 67 | 64 E 0 -1 -2 -2 2 -1 0 1 -2 2 0 0 0 -1 -1 0 -1 -3 1 0 9 2 1 1 4 1 6 14 0 17 11 7 3 2 2 7 2 0 5 6 0.08 0.14 68 | 65 K -1 0 -1 0 -1 0 0 1 -1 0 0 1 -2 -2 2 -1 -1 5 -2 0 4 4 3 5 1 4 4 11 1 7 9 12 0 1 9 4 3 9 0 7 0.15 0.16 69 | 66 D -1 -1 -1 1 -3 0 1 -1 0 -1 -1 1 -1 2 3 1 -1 -3 -1 -1 4 2 2 7 0 3 9 4 3 4 6 9 1 13 14 10 2 0 1 4 0.16 0.16 70 | 67 A 2 -3 -1 -2 -1 -2 -1 0 -3 0 0 0 -2 -3 5 0 -1 -4 -3 -1 18 0 2 2 1 0 4 8 0 7 8 5 0 0 27 7 3 0 0 4 0.42 0.28 71 | 68 K -1 -3 -3 -1 -4 -1 1 -1 -3 1 -1 1 -1 -1 4 0 -1 -3 3 -1 4 0 0 3 0 2 9 4 0 9 5 11 1 2 24 7 4 0 11 4 0.36 0.30 72 | 69 A 0 0 0 0 -3 0 -1 0 0 1 0 -1 1 -1 -1 0 1 -2 2 1 5 5 4 7 0 5 4 6 2 9 5 2 3 0 2 5 10 0 11 14 0.06 0.13 73 | 70 K -1 -1 -1 -1 -3 -1 -2 -2 -2 -1 -1 2 1 0 2 0 -1 1 4 0 6 3 3 4 0 2 0 3 0 3 6 14 4 3 13 6 3 2 17 8 0.22 0.23 74 | 71 L 0 0 -2 -2 -2 0 -1 -1 -2 1 1 0 1 0 0 -1 1 -2 0 1 5 7 0 0 0 4 4 4 0 8 16 7 3 5 4 4 8 0 4 16 0.07 0.11 75 | 72 D 0 -1 0 0 -2 1 0 0 -1 0 0 0 1 1 1 0 0 -2 1 0 6 0 6 6 0 6 6 6 0 6 6 6 6 6 6 6 6 0 6 6 0.04 0.08 76 | 73 K 0 0 0 0 -2 -1 0 -2 -2 0 0 0 -1 1 1 0 0 4 1 0 7 7 7 7 0 0 7 0 0 7 7 7 0 6 7 7 7 6 6 7 0.09 0.11 77 | 74 E 0 1 1 -1 -2 0 2 -1 -1 -1 -2 2 1 -2 -3 0 0 1 -1 0 7 9 7 3 1 3 15 3 1 4 2 18 5 1 0 5 7 2 2 6 0.16 0.17 78 | 75 S -2 -1 3 2 -4 0 0 -1 -2 -2 -1 1 -3 -4 1 2 0 -4 -1 0 2 2 18 15 0 4 4 3 0 1 6 8 0 0 6 15 7 0 2 7 0.26 0.23 79 | 76 K 1 2 1 0 -4 -1 -1 -2 -1 -2 -2 2 -1 -1 4 -1 -2 -4 -3 -1 12 13 7 4 0 1 4 2 1 2 3 12 1 2 25 3 2 0 0 6 0.42 0.28 80 | 77 R -2 7 -4 -5 0 -3 -4 -5 -4 -2 -4 1 2 -5 1 -2 -4 2 -4 -1 3 58 0 0 2 0 0 0 0 3 0 8 5 0 8 3 0 3 0 5 1.26 0.61 81 | 78 N -1 1 3 2 0 0 0 -1 3 -4 -2 0 0 -1 2 0 -2 -4 0 -4 4 7 18 14 2 3 7 3 7 0 5 5 2 2 11 5 0 0 4 0 0.29 0.25 82 | 79 K -1 1 1 0 0 1 1 1 1 -1 -2 1 0 -1 -3 0 -1 -3 0 -1 5 7 7 6 3 8 9 13 3 3 3 10 3 3 0 7 3 0 3 3 0.12 0.13 83 | 80 P 0 -1 0 -4 -5 -4 -1 -1 -4 -4 -2 0 -4 0 6 0 0 -5 0 -1 9 4 4 0 0 0 4 4 0 0 4 6 0 4 42 6 5 0 4 4 0.80 0.46 84 | 81 G -3 -4 -3 -4 -5 0 -1 6 -4 -5 -1 -4 -4 -5 -5 0 0 -5 -5 -5 0 0 0 0 0 5 4 72 0 0 7 0 0 0 0 6 6 0 0 0 1.35 0.47 85 | 82 K -1 -1 -2 -3 -3 0 0 -2 1 1 -2 2 0 -3 -1 0 2 -4 -3 2 4 3 0 0 0 3 6 3 4 9 0 17 3 0 3 4 19 0 0 22 0.25 0.24 86 | 83 A 3 -4 -3 1 -3 -3 -3 0 2 1 -3 -4 2 -4 -4 1 0 -5 -4 3 24 0 0 8 0 0 0 7 6 9 0 0 6 0 0 11 6 0 0 24 0.43 0.30 87 | 84 T -1 -3 0 -3 1 -3 -1 -2 1 -1 -2 -1 0 1 0 1 3 3 3 -1 5 0 4 0 3 0 4 3 4 3 3 3 3 6 5 10 25 4 11 3 0.28 0.23 88 | 85 G -2 -2 -4 -1 -6 -1 -2 7 -6 -7 -5 -5 -6 -7 -6 0 -2 -7 -7 -7 2 2 0 3 0 3 2 78 0 0 1 0 0 0 0 7 3 0 0 0 1.78 0.77 89 | 86 K -1 1 2 -1 -4 2 -1 1 3 -2 -3 2 -2 0 -3 1 -1 -1 2 -1 2 8 11 2 0 8 3 9 7 2 1 13 1 3 0 14 3 1 7 5 0.22 0.21 90 | 87 G -1 -3 1 0 -5 -3 -3 6 1 -6 -6 -2 -5 1 -5 -2 -1 -5 -2 -4 4 1 7 5 0 1 1 61 3 0 0 2 0 6 0 2 3 0 1 1 1.14 0.54 91 | 88 K -1 0 1 -1 -4 4 3 -2 0 -1 -3 2 -3 -4 -2 0 -2 -4 0 0 5 3 6 1 0 20 20 3 2 3 2 11 0 0 2 8 2 0 3 9 0.35 0.32 92 | 89 P -1 -1 -1 3 -4 0 -1 -2 0 -1 1 0 1 -2 3 0 -1 1 -3 0 5 2 3 17 0 3 3 3 3 4 13 6 3 1 16 5 4 2 0 6 0.21 0.20 93 | 90 V 0 -3 -1 0 -3 0 0 -2 -3 2 -1 -1 0 -1 2 -1 0 -4 -3 3 6 0 3 5 0 5 5 2 0 12 4 3 2 2 13 5 5 0 0 29 0.27 0.26 94 | 91 N 0 -1 1 0 -3 0 1 2 0 -2 -2 -1 -1 -1 0 1 2 -4 -1 -2 9 2 8 3 0 2 10 15 2 2 2 3 2 2 5 9 18 0 3 2 0.17 0.18 95 | 92 N -1 -1 0 1 -4 2 1 4 -3 -4 -2 0 -3 -4 0 -1 0 2 -4 -2 4 3 4 8 0 9 12 34 0 0 3 5 0 0 4 4 5 3 0 3 0.43 0.31 96 | 93 K -2 2 2 1 -4 0 1 -1 0 2 -2 3 -1 -4 -2 -1 0 -5 -4 0 1 9 10 7 0 3 8 5 2 13 3 21 1 0 1 3 7 0 0 6 0.24 0.23 97 | 94 W -6 -2 -7 -7 -6 -6 -2 -2 -6 1 -3 0 1 1 -7 -6 -1 11 -3 -2 0 2 0 0 0 0 3 3 0 7 3 5 3 5 0 0 5 59 0 2 2.56 1.01 98 | 95 L -2 0 -6 -6 -5 -1 0 -3 -5 -1 5 -1 2 -1 -6 -2 -2 4 -4 -1 2 5 0 0 0 3 6 3 0 2 55 4 6 2 0 3 3 5 0 2 0.80 0.53 99 | 96 N 0 0 1 2 -4 0 1 1 1 -1 -2 0 2 -1 -1 1 -1 -4 0 -2 7 6 6 13 0 3 12 10 3 3 3 4 6 3 3 9 4 0 3 3 0.13 0.16 100 | 97 N 0 0 0 2 0 0 0 2 0 -1 -1 0 0 -1 -1 0 0 -3 1 -1 10 5 5 11 3 3 4 17 3 3 5 5 3 3 3 4 6 0 5 3 0.09 0.12 101 | 98 A 5 -2 1 -3 0 -1 -2 0 -1 -2 -3 -2 -2 -5 -1 1 0 -5 -4 -2 50 2 6 1 2 3 1 6 1 2 2 2 1 0 3 8 6 0 0 2 0.63 0.47 102 | 99 G 0 0 1 1 -3 0 0 2 1 -2 -2 -1 -1 -1 -2 2 1 -3 -2 -1 9 5 7 6 0 3 5 17 4 2 3 3 2 3 0 19 8 0 0 3 0.17 0.17 103 | 100 K 0 1 0 0 -2 1 0 1 0 0 -1 1 0 -2 0 0 1 -3 -2 -1 8 9 4 3 0 8 5 10 3 7 6 9 4 0 3 8 8 0 0 4 0.08 0.09 104 | 101 D 0 0 1 0 -3 0 0 3 -2 -1 -2 0 -2 -3 1 1 0 -4 -3 -1 6 4 5 6 0 4 6 30 0 4 4 5 0 0 6 9 5 0 0 4 0.24 0.19 105 | 102 L 0 -1 1 0 -3 2 2 -2 -1 1 1 0 1 -2 -2 0 0 -3 -2 0 10 1 7 3 0 10 15 2 1 10 15 4 6 1 1 5 4 0 0 5 0.11 0.12 106 | 103 G 0 -5 -3 -4 -5 -4 -5 7 -5 -4 -6 -2 -5 -6 -1 -2 -3 -5 -5 -5 6 0 0 0 0 0 0 84 0 1 0 2 0 0 3 2 1 0 0 0 1.78 0.58 107 | 104 S 4 0 -3 -3 -2 -1 -3 -1 -4 1 -1 -2 -2 -3 -3 1 -1 -4 -3 2 47 6 0 1 1 2 0 2 0 9 4 1 0 0 0 10 2 0 0 16 0.51 0.32 108 | 105 P 2 -2 -3 -3 -3 -2 -2 -2 -1 -3 -2 -1 -3 -1 7 -2 -2 0 -2 -2 15 2 1 1 0 1 2 2 1 1 4 3 0 3 54 3 1 1 2 3 1.16 0.38 109 | 106 V -3 -5 -6 -6 -4 -5 -6 -6 -6 7 0 -5 1 -1 -5 -4 0 -5 -4 2 1 0 0 0 0 0 0 0 0 66 6 0 4 2 0 0 7 0 0 14 1.35 0.52 110 | 107 P -2 -6 -6 -5 -7 -5 -5 -3 -6 -7 -7 -5 -6 -8 8 -2 -5 -8 -7 -6 2 0 0 0 0 0 0 2 0 0 0 0 0 0 93 2 0 0 0 0 3.00 0.94 111 | 108 D 2 -1 -1 0 -3 1 1 -1 -3 -3 -3 2 -1 -3 -2 2 2 -1 -3 -1 18 1 1 4 0 8 9 4 0 1 1 13 1 1 1 21 13 1 0 3 0.30 0.25 112 | 109 R -3 2 -1 -1 -5 6 2 -3 3 -5 -4 -1 -4 -1 -3 0 -2 -2 -1 -3 1 12 2 2 0 48 13 1 6 0 1 1 0 2 1 5 1 1 2 1 0.99 0.53 113 | 110 I -2 -5 -1 -5 -4 -5 -5 -2 -5 6 0 -5 0 0 -5 -4 -2 -5 -3 4 1 0 4 0 0 0 0 3 0 57 3 0 2 4 0 0 1 0 0 25 1.07 0.48 114 | 111 A 5 -1 -3 -4 -3 -3 -3 2 -4 -2 -4 -2 -3 -4 -2 -1 -2 -5 -4 -2 66 4 0 0 0 0 0 16 0 3 0 2 0 0 2 2 2 0 0 2 0.96 0.49 115 | 112 N -2 0 3 6 -4 0 2 -1 0 -4 -4 0 -4 -4 -3 -1 -2 -5 -4 -3 2 6 13 46 0 3 11 5 2 0 2 4 0 0 0 2 2 0 0 1 0.80 0.37 116 | 113 K -1 1 0 -3 -1 4 -1 -1 0 -4 -1 4 -3 -4 -2 -1 -2 -4 -3 -2 4 6 4 0 1 24 2 5 2 0 7 36 0 0 2 2 2 0 0 3 0.57 0.36 117 | 114 L -3 -1 -4 -5 -3 -3 -4 -5 -4 0 5 -1 3 0 -4 -4 -3 -3 1 -2 1 3 0 0 0 0 0 1 0 4 70 5 6 3 0 1 0 0 4 1 0.97 0.45 118 | 115 R -1 6 -1 -3 -1 2 -1 -3 0 -2 -3 0 -1 -4 -4 -1 -2 -4 -3 -2 4 58 3 0 1 8 4 2 2 2 2 3 1 0 0 4 2 0 0 2 0.95 0.45 119 | 116 D -1 -1 0 0 -4 -3 -1 6 0 -5 -3 -1 -4 -4 -1 -1 -3 2 -1 -4 3 3 3 5 0 0 5 59 3 0 3 3 0 0 3 3 0 3 3 0 0.88 0.44 120 | 117 K -3 5 0 -3 -5 2 0 -1 -3 -1 -1 3 2 -4 -4 -1 -3 -5 -4 -1 1 30 4 0 0 10 5 5 0 5 5 17 6 0 0 5 0 0 0 5 0.58 0.36 121 | 118 E -1 1 0 0 -4 1 3 -2 1 -1 -2 1 0 -3 0 1 0 2 0 -1 3 7 5 3 0 5 26 3 3 3 3 7 3 0 4 9 5 3 4 3 0.23 0.20 122 | 119 F -4 -4 -4 -5 -4 -4 -4 -4 -3 1 -1 -4 -2 8 -4 -4 -3 -1 3 -1 1 0 0 0 0 0 1 1 0 6 3 1 0 71 0 1 1 0 9 3 1.54 0.51 123 | 120 K 1 1 1 -1 -3 0 0 0 0 -3 -3 3 -2 -3 -1 2 1 -3 1 -2 13 7 6 3 0 2 3 6 1 0 1 22 0 0 2 20 6 0 6 2 0.24 0.19 124 | 121 S -1 1 4 1 -3 0 0 -1 0 -2 -3 -1 -3 -4 -3 4 1 -4 -3 -2 1 7 22 7 0 3 4 4 2 2 1 2 0 0 0 36 7 0 0 2 0.47 0.30 125 | 122 F -3 -4 -4 -5 -1 -2 -4 -4 -3 -1 -1 -4 1 8 -1 -4 -2 5 1 -3 2 0 0 0 2 2 0 1 0 2 2 0 4 67 3 1 2 9 2 0 1.46 0.55 126 | 123 D 0 2 1 5 -4 -1 0 0 1 -3 -2 0 0 -4 -1 0 -2 -4 -3 -3 7 12 4 40 0 0 3 7 4 0 3 4 3 0 3 7 0 0 0 0 0.48 0.27 127 | 124 D 0 2 0 1 -3 0 1 -1 1 -2 -2 1 -2 -1 -2 2 0 -3 0 -1 7 12 4 6 0 4 11 4 3 0 4 9 0 3 0 21 4 0 3 4 0.16 0.14 128 | 125 F 0 -2 -2 -2 3 1 -1 -2 -1 0 0 -1 2 2 -2 0 1 -1 2 0 10 0 0 0 9 9 1 1 0 0 10 1 9 9 0 10 10 0 9 10 0.13 0.12 129 | 126 R 0 1 1 1 3 1 1 0 2 -3 -3 1 -2 -3 -2 1 1 -3 -2 -2 8 8 8 8 8 8 8 8 8 0 1 8 0 0 0 8 8 0 0 0 0.19 0.13 130 | 127 K -1 1 0 1 -3 2 2 0 1 -1 -2 1 0 -3 -2 1 0 -3 -2 -1 4 9 4 6 0 14 15 7 3 4 1 10 3 0 0 9 6 0 0 4 0.20 0.15 131 | 128 K 4 -1 -1 -1 -1 -1 2 -1 -2 -3 -3 0 -1 -2 -2 0 0 -3 -2 -1 51 2 2 2 1 2 15 2 0 0 1 4 1 2 0 6 4 0 1 2 0.48 0.29 132 | 129 F -3 -3 -4 -4 -3 -3 -4 -4 -3 2 2 -3 1 6 -4 -2 -1 -2 1 0 1 1 1 1 0 0 1 1 0 11 17 1 3 48 1 4 4 0 3 4 0.80 0.29 133 | 130 W -4 -4 -4 -4 -4 -4 1 0 -4 -4 -1 -4 -3 -2 -5 -4 -4 11 -1 -4 1 1 1 1 0 0 9 7 0 1 6 1 0 0 1 1 1 70 0 1 2.73 0.64 134 | 131 E 0 1 0 0 -3 1 1 0 -2 -1 -1 3 1 -1 -2 -1 0 -3 0 -1 5 5 3 3 0 4 11 7 0 4 6 26 4 3 0 3 6 0 3 5 0.14 0.16 135 | 132 E 0 -2 -1 1 3 1 0 -2 -1 1 0 -1 2 -1 -2 0 1 -2 1 0 8 0 0 8 8 8 8 1 0 8 8 0 8 0 0 8 8 0 8 8 0.09 0.12 136 | 133 V 0 0 -3 -3 -2 -2 -3 -3 -3 2 0 -2 2 1 -3 -2 0 -2 1 4 6 6 0 0 0 0 1 1 0 12 6 0 6 7 0 1 7 0 5 42 0.37 0.22 137 | 134 S 2 0 -2 -2 1 0 0 1 -2 -1 -2 -1 -2 0 2 2 0 -3 0 -1 22 4 0 0 4 4 4 8 0 4 1 0 0 4 12 19 4 0 4 4 0.20 0.15 138 | 135 K 0 0 4 1 -4 1 2 -1 0 -3 -3 3 -1 -4 -3 0 -1 -5 -2 -3 9 4 23 8 0 6 13 3 1 1 1 18 1 0 0 6 2 0 1 1 0.42 0.29 139 | 136 D -2 -2 0 6 0 -1 1 -2 0 -3 -5 -2 -4 -2 -4 1 -2 -5 1 -3 2 2 2 56 2 2 7 2 2 2 0 2 0 2 0 9 2 0 4 2 1.01 0.54 140 | 137 P 1 -1 -1 -1 -4 -1 0 0 0 -2 -3 -1 -3 -2 5 2 0 -5 -4 -2 13 3 2 2 0 3 8 7 2 2 2 3 0 2 27 16 6 0 0 2 0.46 0.32 141 | 138 E 1 -1 -1 1 -3 1 2 -1 0 0 -1 0 0 -1 -3 1 0 -3 -1 0 12 2 3 7 0 8 20 3 2 8 3 5 2 3 0 9 3 0 2 6 0.14 0.16 142 | 139 L 0 -2 -2 0 3 -1 0 -3 2 1 0 -2 2 1 1 -1 -1 -2 2 0 8 0 0 8 8 0 8 1 8 8 8 0 8 8 8 1 0 0 8 8 0.12 0.13 143 | 140 S 1 0 0 -1 1 0 -1 -1 0 0 0 0 2 0 -3 2 0 2 -2 -1 14 5 4 3 3 3 3 3 3 5 7 4 7 5 0 19 3 3 0 4 0.11 0.17 144 | 141 K 0 0 1 1 -3 1 1 0 1 -1 -3 2 2 -1 -3 1 0 -4 -3 -1 10 5 9 7 0 5 7 8 3 4 1 13 6 3 0 10 5 0 0 4 0.14 0.17 145 | 142 Q 0 1 1 1 -3 1 0 0 2 -3 -1 0 -2 -2 1 0 0 4 1 -3 7 7 7 7 0 7 7 7 7 0 7 7 0 0 7 7 7 7 7 0 0.17 0.16 146 | 143 F -6 -7 -7 -7 -6 -7 -7 -1 -5 0 0 -7 1 8 -7 -6 -1 3 2 -1 0 0 0 0 0 0 0 6 0 5 8 0 4 61 0 0 4 4 6 4 1.73 0.71 147 | 144 S -1 0 2 1 1 0 -1 -1 1 0 -2 1 0 -3 1 1 1 -4 0 -1 3 4 10 7 3 3 3 5 3 8 3 13 3 0 6 12 7 0 3 4 0.11 0.16 148 | 145 R -1 0 0 0 -4 0 0 -1 1 -1 -1 1 0 -1 3 0 0 -4 1 -1 4 5 4 5 0 4 7 4 4 3 4 12 3 3 17 8 5 0 6 3 0.19 0.18 149 | 146 N 1 0 0 0 -3 1 -1 0 0 -1 -1 1 0 0 -1 1 0 0 -1 -1 13 6 5 7 0 6 2 7 3 4 6 9 2 5 2 15 4 1 2 3 0.07 0.11 150 | 147 N 0 1 1 1 -3 1 1 1 -1 0 -2 1 -2 -3 -2 1 1 -3 -2 -2 9 9 9 9 0 9 9 9 0 9 0 9 0 0 0 9 9 0 0 0 0.17 0.11 151 | 148 N -1 2 0 -3 -3 2 -1 -1 1 1 1 1 1 0 -3 -1 -1 -3 0 0 5 11 4 0 0 13 3 3 3 10 15 11 4 3 0 3 3 0 3 5 0.13 0.18 152 | 149 D 0 0 0 1 -3 0 1 1 0 -1 -1 1 0 -1 -3 0 1 -3 0 -1 5 4 5 9 0 4 8 12 3 4 4 11 3 3 0 5 10 0 3 6 0.08 0.11 153 | 150 R -1 4 1 0 0 1 1 -1 1 -1 0 0 0 -1 -4 -1 -1 1 -3 -2 3 29 8 5 2 5 8 4 4 3 10 3 2 3 0 3 3 2 0 3 0.28 0.27 154 | 151 M 0 -4 -5 -5 -4 -4 -5 -5 2 3 2 0 7 -3 -5 -4 0 -5 -4 2 6 0 0 0 0 0 0 0 6 16 15 7 29 0 0 0 6 0 0 13 0.84 0.40 155 | 152 K 0 0 0 0 -3 1 0 0 -1 0 0 0 1 0 1 0 0 -2 1 0 6 6 6 6 0 6 6 6 0 6 6 6 6 6 6 6 6 0 6 6 0.04 0.07 156 | 153 V -1 1 3 0 -5 3 1 0 1 -2 -3 2 -1 -5 -4 0 -1 -5 -1 -2 4 6 18 6 0 17 6 8 3 2 2 16 2 0 0 5 2 0 2 2 0.39 0.25 157 | 154 G -2 -1 -4 -5 -6 -1 -2 7 0 -7 -6 -1 -6 -6 -1 -2 -5 -6 -1 -6 3 3 0 0 0 2 3 73 2 0 0 4 0 0 4 3 0 0 2 0 1.55 0.58 158 | 155 K -1 3 1 0 -1 -1 -1 -2 1 -2 1 2 -1 1 -4 -2 -2 3 2 -2 3 16 8 6 1 2 2 3 3 2 17 14 1 5 0 2 2 4 8 1 0.22 0.18 159 | 156 A 1 -2 -1 -2 -2 -2 -2 1 -2 1 -1 -2 -1 -2 3 1 1 -3 -2 1 14 0 0 0 0 0 0 14 0 14 0 0 0 0 14 14 14 0 0 14 0.23 0.07 160 | 157 P 0 -4 -4 -4 -5 1 -4 -5 -5 -5 0 1 -4 -6 7 0 0 -6 -5 -4 8 0 0 0 0 7 0 0 0 0 9 7 0 0 54 7 7 0 0 0 1.34 0.53 161 | 158 K 0 0 0 -2 -3 0 -2 -3 2 0 0 0 1 1 1 0 0 3 1 0 6 6 6 0 0 6 0 0 6 6 6 6 6 6 6 6 6 6 6 6 0.07 0.16 162 | 159 T 3 -4 -4 -4 2 -4 -1 -4 -5 -3 -1 -4 -3 -5 3 1 2 -5 -4 3 25 0 0 0 5 0 4 0 0 0 5 0 0 0 16 10 13 0 0 21 0.60 0.42 163 | 160 R -2 4 -1 1 0 0 -1 -2 0 0 -1 0 -1 -2 3 0 0 -4 -3 -1 2 24 2 8 2 3 3 2 2 8 4 4 2 2 16 6 5 0 0 4 0.29 0.23 164 | 161 T -1 1 -1 0 -4 0 1 -2 -1 -1 0 2 -1 0 2 -1 0 0 -1 -1 4 6 3 4 0 3 12 3 2 2 8 17 2 5 12 4 6 1 2 4 0.17 0.16 165 | 162 Q 0 1 1 1 -1 1 1 -1 -1 -1 -1 1 -1 -4 -1 2 1 -4 -2 -2 7 6 7 7 1 8 9 3 1 5 5 9 1 0 3 19 7 0 1 2 0.16 0.17 166 | 163 D -2 -4 -2 5 -6 3 4 1 -1 -6 -4 -3 -5 -6 -5 -2 -2 1 0 -6 2 0 2 29 0 12 31 9 2 0 2 0 0 0 0 3 2 2 4 0 0.96 0.56 167 | 164 V 0 0 0 -2 -3 1 0 0 2 0 0 0 1 1 -3 0 0 3 1 0 6 6 6 0 0 6 6 6 6 6 6 6 6 6 0 6 6 6 6 6 0.08 0.12 168 | 165 S 0 -1 0 -1 1 -2 -1 -1 3 0 -1 0 1 -1 -3 -1 -1 2 4 1 8 4 4 3 4 0 5 4 10 6 4 6 4 0 0 3 3 3 16 12 0.18 0.21 169 | 166 G -1 0 0 -1 -4 -3 -3 4 1 -1 -2 0 0 0 0 0 -3 -3 1 -1 5 4 5 4 0 0 0 39 3 4 4 5 3 3 4 6 0 0 6 4 0.38 0.29 170 | 167 K -1 0 1 0 -4 0 0 3 -3 -2 -2 2 0 -1 -1 0 -1 -4 -3 -2 3 4 6 5 0 3 6 28 0 2 4 18 3 3 3 7 2 0 0 3 0.28 0.25 171 | 168 R -1 4 1 -3 0 1 -2 -1 2 -1 -1 1 -1 -4 2 -1 -2 -4 -1 -1 4 31 7 0 2 8 0 4 5 3 6 10 2 0 9 3 2 0 2 4 0.37 0.32 172 | 169 T 0 2 1 0 -3 0 0 1 0 0 -2 2 0 -1 -2 0 0 0 -1 0 5 12 6 3 0 2 7 10 2 5 1 15 4 2 1 6 6 1 2 7 0.10 0.14 173 | 170 S 0 0 1 0 2 1 0 0 2 -2 -1 0 -2 -3 1 0 0 3 -2 -1 6 6 6 6 6 6 6 6 6 0 6 6 0 0 6 6 6 6 0 6 0.11 0.13 174 | 171 F 0 -2 1 -2 3 -2 -2 -2 2 1 0 -2 2 1 -2 0 1 -2 2 0 8 0 8 0 8 0 0 1 8 8 8 0 8 8 0 8 8 0 8 8 0.12 0.13 175 | 172 E -1 0 -1 1 -4 2 5 -2 3 -1 -2 -1 -3 -4 -3 -1 -2 -4 -3 -2 3 3 2 5 0 9 54 2 9 4 4 0 0 0 0 2 0 0 0 3 0.69 0.33 176 | 173 L -3 -4 -4 -5 1 -4 -4 -5 -4 4 4 -4 2 1 -4 -3 -1 -3 1 1 0 0 0 0 3 0 0 0 0 27 46 0 4 4 0 0 4 0 5 8 0.63 0.28 177 | 174 H -3 3 -1 -1 1 -2 -2 -2 8 -5 -2 -2 -3 0 -2 -3 -4 -4 2 -4 0 19 2 2 4 0 0 4 55 0 4 0 0 3 2 0 0 0 5 0 1.26 0.49 178 | 175 H -1 0 -1 -3 -5 0 -2 -4 9 -3 -3 -3 -3 -1 -2 -3 -2 -4 1 -3 5 4 2 0 0 4 2 0 71 2 2 0 0 2 2 0 2 0 3 2 1.69 0.58 179 | 176 E -1 0 -1 -1 0 -1 -1 -3 0 2 0 2 0 -1 -2 -1 -1 -4 -1 3 3 4 2 3 2 3 3 2 2 12 10 14 2 3 2 4 2 0 2 25 0.19 0.20 180 | 177 K -2 -1 1 2 -4 1 1 -2 3 1 -2 2 0 -4 -4 -1 0 -5 -3 2 3 3 6 10 0 6 11 3 8 10 3 13 3 0 0 3 6 0 0 14 0.25 0.25 181 | 178 P 0 0 -1 -1 -5 0 2 -2 0 -4 -2 0 -1 -1 5 0 -1 2 0 -3 10 6 2 3 0 3 15 2 3 0 4 5 2 3 26 6 3 3 3 2 0.44 0.34 182 | 179 I -1 -6 -6 -6 0 -6 -6 -6 -6 7 0 -6 0 -4 -1 -2 -2 -6 -1 3 4 0 0 0 2 0 0 0 0 57 7 0 2 0 4 3 3 0 3 15 1.28 0.61 183 | 180 S 1 -1 0 -1 -4 2 1 -1 1 -2 -1 2 -1 -2 -3 2 -1 0 0 -1 13 2 3 2 0 9 11 3 3 1 5 12 1 1 0 20 3 2 3 4 0.22 0.24 184 | 181 Q -2 -1 2 3 -1 2 2 -3 3 -2 -2 1 -2 -2 -2 -1 -3 0 1 -2 3 2 10 18 1 10 14 1 8 2 4 10 1 2 1 4 1 2 5 2 0.38 0.29 185 | 182 N -3 -2 0 0 -7 -3 -1 6 -2 -8 -3 -3 -7 -7 -6 -1 -2 1 -7 -7 2 2 4 5 0 1 4 66 1 0 3 1 0 0 0 5 3 2 0 0 1.44 0.70 186 | 183 G -1 -2 -1 -6 -7 -6 -2 7 -7 -2 -8 -6 -7 -8 -2 -2 -6 -8 -8 -7 4 2 2 0 0 0 3 79 0 3 0 0 0 0 3 3 0 0 0 0 1.98 0.82 187 | 184 G 3 -1 0 2 -4 -1 2 2 -3 -3 -3 -1 -2 -5 -1 -1 -2 -5 -3 -3 29 4 4 12 0 2 17 16 0 1 2 2 1 0 3 4 1 0 1 1 0.42 0.29 188 | 185 V -4 -2 -6 -7 -5 -6 -6 -5 -6 1 0 -6 1 0 1 -3 -2 -6 -5 6 0 2 0 0 0 0 0 1 0 7 6 0 3 4 7 2 2 0 0 65 1.30 0.58 189 | 186 Y -7 -7 -7 -8 0 -7 -7 -8 -4 -6 -6 -7 1 4 -8 -7 -7 -3 9 -6 0 0 0 0 2 0 0 0 0 0 0 0 3 14 0 0 0 0 81 0 3.11 1.04 190 | 187 D -2 -3 5 7 -7 -4 -2 -2 -1 -7 -7 -2 -6 -4 -5 -3 -4 -7 -3 -7 3 1 26 60 0 0 1 3 2 0 0 2 0 1 0 1 0 0 1 0 1.74 0.66 191 | 188 M 0 -5 -5 -6 -4 -4 -5 -6 -2 4 3 -5 5 0 -5 -3 -2 -5 -4 3 7 0 0 0 0 0 0 0 1 23 31 0 14 3 0 2 1 0 0 19 0.82 0.38 192 | 189 D -4 -5 -1 8 -7 -4 1 -3 -5 -7 -7 -3 -6 -7 -5 -3 -5 -7 -3 -7 1 0 2 83 0 0 8 1 0 0 0 1 0 0 0 1 0 0 1 0 2.17 0.71 193 | 190 N -5 -1 8 -2 -7 1 -2 -4 0 -7 -7 -2 -6 -7 -6 -3 -2 -7 -6 -7 0 4 83 1 0 5 2 0 2 0 0 2 0 0 0 1 1 0 0 0 2.23 0.76 194 | 191 I -4 -5 -6 -6 -4 -5 -6 -6 -6 4 5 -5 4 0 -3 -5 -4 -5 -4 0 0 0 0 0 0 0 0 0 0 29 52 0 13 3 1 0 0 0 0 2 1.22 0.46 195 | 192 S -2 6 0 -4 -1 -1 -3 -1 -1 0 -2 0 0 -2 -4 1 -1 -2 -1 -1 2 44 3 0 1 1 0 5 1 6 3 5 3 1 0 14 3 0 2 5 0.60 0.33 196 | 193 V -1 -6 -6 -6 -4 -5 -6 -4 -3 6 0 -5 1 -2 -6 -5 -3 -6 -4 5 5 0 0 0 0 0 0 1 1 48 4 0 3 1 0 0 0 0 0 38 1.31 0.48 197 | 194 V 0 -2 1 -4 -1 -3 -3 -4 1 1 1 -2 4 -2 -4 -1 2 -4 -2 3 7 2 7 0 1 0 0 0 4 4 12 2 16 0 0 3 13 0 0 27 0.41 0.27 198 | 195 T 0 -1 -3 -4 -4 -4 -4 -5 -5 -3 -2 -2 -4 -4 -3 2 7 -6 -5 -4 7 3 0 0 0 0 0 0 0 1 3 2 0 1 1 11 72 0 0 0 1.47 0.60 199 | 196 P -1 -6 -6 -5 -6 -3 -5 -6 -6 -6 -6 -2 -6 -7 8 -3 -4 -7 -7 -4 4 0 0 0 0 1 0 0 0 0 0 2 0 0 90 1 1 0 0 1 2.80 0.78 200 | 197 K -2 3 -1 -2 -5 2 -2 -4 -2 -5 -4 6 1 -4 -4 -1 -3 -5 -2 -5 1 16 2 2 0 7 0 0 1 0 1 59 4 0 0 5 1 0 1 0 1.11 0.44 201 | 198 R -1 5 3 -1 -3 2 -2 -4 1 -2 0 0 1 -1 -4 -1 -2 -3 -1 -2 5 35 16 3 0 9 1 0 3 1 8 1 3 3 0 5 2 0 2 2 0.46 0.26 202 | 199 H -5 -1 -3 -5 -7 -3 -4 -6 10 -7 -5 -4 -5 -5 -6 -1 -5 -6 2 -7 0 3 0 0 0 0 0 0 86 0 1 0 0 0 0 4 0 0 6 0 3.08 0.78 203 | 200 I -1 -3 -2 -1 -4 1 -3 -3 2 6 -1 -1 -1 -3 -4 -2 -3 -3 -2 1 5 1 2 3 0 7 1 2 6 57 2 5 1 0 0 2 0 0 1 6 0.73 0.33 204 | 201 D -1 -1 2 3 -5 2 4 0 -1 -3 -4 1 0 -4 -3 -1 -2 -5 -3 -4 3 3 8 19 0 9 31 7 1 1 1 8 3 1 0 3 2 0 0 0 0.55 0.25 205 | 202 I -1 -5 -3 -5 -4 -3 -4 -5 0 6 1 -2 -1 -2 -5 -3 0 -5 -3 2 4 0 2 0 0 1 1 0 3 60 8 3 1 1 0 0 6 0 0 11 0.97 0.37 206 | 203 H -5 -1 -2 -4 -7 -1 -4 -3 10 -7 -5 -4 -5 -5 -6 -1 -4 -6 -2 -7 0 3 1 1 0 3 0 2 84 0 1 0 0 0 0 5 1 0 0 0 2.86 0.74 207 | 204 R -1 4 -2 -2 -4 1 -2 -3 -1 -4 -4 3 -3 0 -4 2 -2 -4 2 -4 3 28 1 1 0 7 0 1 1 0 0 24 0 4 0 19 1 0 9 0 0.64 0.30 208 | 205 G -2 2 2 -2 -5 -2 -1 5 -3 -5 -5 3 -4 -5 -3 -2 -3 -5 -4 -4 1 11 12 0 0 1 4 45 0 0 0 22 0 0 1 1 1 0 0 1 0.85 0.31 209 | 206 K -3 4 -2 -3 -5 2 2 -4 -3 -5 -5 6 -3 -5 -3 -2 -3 -5 -4 -5 0 21 0 0 0 8 11 0 0 0 0 59 0 0 0 0 0 0 0 0 1.20 0.35 210 | 211 | K Lambda 212 | Standard Ungapped 0.1267 0.3101 213 | Standard Gapped 0.0344 0.2670 214 | PSI Ungapped 0.1124 0.3179 215 | PSI Gapped 0.0344 0.2670 216 | -------------------------------------------------------------------------------- /test/test_pssm.py: -------------------------------------------------------------------------------- 1 | from pssmgen.pssm import PSSM 2 | 3 | # initiate the PSSM object 4 | gen = PSSM(work_dir='7CEI') 5 | 6 | # set psiblast executable, database and other psiblast parameters (here shows the defaults) 7 | gen.configure(blast_exe='/home/software/blast/bin/psiblast', 8 | database='/data/DBs/blast_dbs/nr_v20180204/nr', 9 | num_threads = 4, evalue=0.0001, comp_based_stats='T', 10 | max_target_seqs=2000, num_iterations=3, outfmt=7, 11 | save_each_pssm=True, save_pssm_after_last_round=True) 12 | 13 | # generates FASTA files 14 | gen.get_fasta(pdb_dir='pdb', chain=('A','B'), out_dir='fasta') 15 | 16 | # generates PSSM 17 | gen.get_pssm(fasta_dir='fasta', out_dir='pssm_raw', run=True) 18 | 19 | # map PSSM and PDB to get consisitent files 20 | gen.map_pssm(pssm_dir='pssm_raw', pdb_dir='pdb', out_dir='pssm', chain=('A','B')) 21 | 22 | # write consistent files and move 23 | gen.get_mapped_pdb(pdbpssm_dir='pssm', pdb_dir='pdb', pdbnonmatch_dir='pdb_nonmatch') --------------------------------------------------------------------------------