├── .github ├── ISSUE_TEMPLATE │ └── bug-report.md └── workflows │ ├── github_action_test_dummy.yml │ ├── pip_installation.yml │ └── publish_and_release.yml ├── .gitignore ├── HISTORY.md ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── data └── test_files │ ├── O15552.cif │ ├── Q7Z6M3.cif │ ├── Q7Z6M3.pdb │ ├── kinase_motifs.csv │ ├── pae_O15552.hdf │ ├── pae_Q7Z6M3.hdf │ ├── ptm_file.csv │ └── test_alphafold_annotation.csv ├── misc ├── CLA.md ├── bumpversion.cfg ├── check_version.sh ├── loose_pip_install.sh └── stable_pip_install.sh ├── nbs └── tutorial.ipynb ├── release ├── logos │ ├── alpha_logo.icns │ ├── alpha_logo.ico │ └── alpha_logo.png ├── one_click_linux_gui │ ├── control │ └── create_installer_linux.sh ├── one_click_macos_gui │ ├── Info.plist │ ├── Resources │ │ ├── conclusion.html │ │ └── welcome.html │ ├── create_installer_macos.sh │ ├── distribution.xml │ ├── scripts │ │ ├── postinstall │ │ └── preinstall │ └── structuremap_terminal ├── one_click_windows_gui │ ├── create_installer_windows.sh │ └── structuremap_innoinstaller.iss ├── pyinstaller │ ├── structuremap.spec │ └── structuremap_pyinstaller.py └── pypi │ ├── install_pypi_wheel.sh │ ├── install_test_pypi_wheel.sh │ └── prepare_pypi_wheel.sh ├── requirements ├── requirements.txt └── requirements_development.txt ├── setup.py ├── structuremap ├── __init__.py ├── cli.py ├── gui.py ├── plotting.py ├── processing.py └── utils.py └── tests ├── __init__.py ├── run_tests.sh ├── test_cli.py ├── test_gui.py └── test_processing.py /.github/ISSUE_TEMPLATE/bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | Make sure your bug is not addressed in the [troubleshooting section](https://github.com/MannLabs/structuremap#troubleshooting) or in [previous issues](https://github.com/MannLabs/structuremap/issues?q=is%3Aissue). If not, provide a clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Logs** 24 | Please provide the log (see the structuremap terminal on where to find it). 25 | 26 | **Screenshots** 27 | If applicable, add screenshots to help explain your problem. 28 | 29 | **Version (please complete the following information):** 30 | - Installation Type [e.g. One-Click Installer / Pip / Developer] 31 | - If no log is available, provide the following: 32 | - Platform information 33 | - system [e.g. Darwin] 34 | - release [e.g. 19.6.0] 35 | - version [e.g. 10.15.7] 36 | - machine [e.g. x86_64] 37 | - processor [e.g. i386] 38 | - cpu count [e.g. 8] 39 | - Python information: 40 | - structuremap version [e.g. 0.1.2] 41 | - [other packages] 42 | 43 | **Additional context** 44 | Add any other context about the problem here. Attached log files or upload data files if possible. 45 | -------------------------------------------------------------------------------- /.github/workflows/github_action_test_dummy.yml: -------------------------------------------------------------------------------- 1 | on: 2 | workflow_dispatch: 3 | 4 | name: Test new GitHub action workflow 5 | 6 | 7 | jobs: 8 | Version_bumped: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Checkout code 12 | uses: actions/checkout@v2 13 | -------------------------------------------------------------------------------- /.github/workflows/pip_installation.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [ main ] 4 | pull_request: 5 | branches: [ main, development ] 6 | workflow_dispatch: 7 | 8 | name: Default installation and tests 9 | 10 | jobs: 11 | stable_installation: 12 | name: Test stable pip installation on ${{ matrix.os }} 13 | runs-on: ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | os: [ubuntu-latest, macOS-latest, windows-latest] 17 | steps: 18 | - uses: actions/checkout@v2 19 | - uses: conda-incubator/setup-miniconda@v2 20 | with: 21 | auto-update-conda: true 22 | python-version: ${{ matrix.python-version }} 23 | - name: Conda info 24 | shell: bash -l {0} 25 | run: conda info 26 | - name: Test pip installation with all stable dependencies 27 | shell: bash -l {0} 28 | run: | 29 | cd misc 30 | . ./stable_pip_install.sh 31 | - name: Unittests 32 | shell: bash -l {0} 33 | run: | 34 | cd tests 35 | . ./run_tests.sh 36 | loose_installation: 37 | name: Test loose pip installation on ${{ matrix.os }} 38 | runs-on: ${{ matrix.os }} 39 | strategy: 40 | matrix: 41 | os: [ubuntu-latest, macOS-latest, windows-latest] 42 | steps: 43 | - uses: actions/checkout@v2 44 | - uses: conda-incubator/setup-miniconda@v2 45 | with: 46 | auto-update-conda: true 47 | python-version: ${{ matrix.python-version }} 48 | - name: Conda info 49 | shell: bash -l {0} 50 | run: conda info 51 | - name: Test pip installation with all loose dependencies 52 | shell: bash -l {0} 53 | run: | 54 | cd misc 55 | . ./loose_pip_install.sh 56 | - name: Unittests 57 | shell: bash -l {0} 58 | run: | 59 | cd tests 60 | . ./run_tests.sh 61 | -------------------------------------------------------------------------------- /.github/workflows/publish_and_release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | # push: 3 | # branches: [ main ] 4 | workflow_dispatch: 5 | 6 | 7 | name: Publish on PyPi and release on GitHub 8 | 9 | jobs: 10 | Version_Bumped: 11 | runs-on: ubuntu-latest 12 | outputs: 13 | version: ${{ steps.master_version_bumped.outputs.version }} 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v2 17 | - uses: conda-incubator/setup-miniconda@v2 18 | with: 19 | auto-update-conda: true 20 | python-version: ${{ matrix.python-version }} 21 | - name: Master version bumped 22 | id: master_version_bumped 23 | shell: bash -l {0} 24 | run: | 25 | cd misc 26 | . ./check_version.sh 27 | echo ::set-output name=version::$current_version 28 | # Create_Draft_On_GitHub: 29 | # runs-on: ubuntu-latest 30 | # needs: Version_Bumped 31 | # outputs: 32 | # upload_url: ${{ steps.draft_release.outputs.upload_url }} 33 | # steps: 34 | # - name: Draft Release 35 | # id: draft_release 36 | # uses: actions/create-release@v1 37 | # env: 38 | # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token 39 | # with: 40 | # tag_name: ${{ needs.Version_Bumped.outputs.version }} 41 | # release_name: Release version ${{ needs.Version_Bumped.outputs.version }} 42 | # draft: false 43 | # prerelease: false 44 | # Create_Linux_Release: 45 | # runs-on: ubuntu-latest 46 | # needs: Create_Draft_On_GitHub 47 | # steps: 48 | # - name: Checkout code 49 | # uses: actions/checkout@v2 50 | # - uses: conda-incubator/setup-miniconda@v2 51 | # with: 52 | # auto-update-conda: true 53 | # python-version: ${{ matrix.python-version }} 54 | # - name: Conda info 55 | # shell: bash -l {0} 56 | # run: conda info 57 | # - name: Creating installer for Linux 58 | # shell: bash -l {0} 59 | # run: | 60 | # cd release/one_click_linux_gui 61 | # . ./create_installer_linux.sh 62 | # - name: Test installer for Linux 63 | # shell: bash -l {0} 64 | # run: | 65 | # sudo dpkg -i release/one_click_linux_gui/dist/structuremap_gui_installer_linux.deb 66 | # - name: Upload Linux Installer 67 | # id: upload-release-asset 68 | # uses: actions/upload-release-asset@v1 69 | # env: 70 | # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 71 | # with: 72 | # upload_url: ${{ needs.Create_Draft_On_GitHub.outputs.upload_url }} 73 | # asset_path: release/one_click_linux_gui/dist/structuremap_gui_installer_linux.deb 74 | # asset_name: structuremap_gui_installer_linux.deb 75 | # asset_content_type: application/octet-stream 76 | # Create_MacOS_Release: 77 | # runs-on: macos-latest 78 | # needs: Create_Draft_On_GitHub 79 | # steps: 80 | # - name: Checkout code 81 | # uses: actions/checkout@v2 82 | # - uses: conda-incubator/setup-miniconda@v2 83 | # with: 84 | # auto-update-conda: true 85 | # python-version: ${{ matrix.python-version }} 86 | # - name: Conda info 87 | # shell: bash -l {0} 88 | # run: conda info 89 | # - name: Creating installer for MacOS 90 | # shell: bash -l {0} 91 | # run: | 92 | # cd release/one_click_macos_gui 93 | # . ./create_installer_macos.sh 94 | # - name: Test installer for MacOS 95 | # shell: bash -l {0} 96 | # run: | 97 | # sudo installer -pkg release/one_click_macos_gui/dist/structuremap_gui_installer_macos.pkg -target / 98 | # - name: Upload MacOS Installer 99 | # id: upload-release-asset 100 | # uses: actions/upload-release-asset@v1 101 | # env: 102 | # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 103 | # with: 104 | # upload_url: ${{ needs.Create_Draft_On_GitHub.outputs.upload_url }} 105 | # asset_path: release/one_click_macos_gui/dist/structuremap_gui_installer_macos.pkg 106 | # asset_name: structuremap_gui_installer_macos.pkg 107 | # asset_content_type: application/octet-stream 108 | # Create_Windows_Release: 109 | # runs-on: windows-latest 110 | # needs: Create_Draft_On_GitHub 111 | # steps: 112 | # - name: Checkout code 113 | # uses: actions/checkout@v2 114 | # - uses: conda-incubator/setup-miniconda@v2 115 | # with: 116 | # auto-update-conda: true 117 | # python-version: ${{ matrix.python-version }} 118 | # - name: Conda info 119 | # shell: bash -l {0} 120 | # run: conda info 121 | # - name: Creating installer for Windows 122 | # shell: bash -l {0} 123 | # run: | 124 | # cd release/one_click_windows_gui 125 | # . ./create_installer_windows.sh 126 | # - name: Test installer for Windows 127 | # shell: bash -l {0} 128 | # run: | 129 | # cd release/one_click_windows_gui/dist/ 130 | # echo "TODO, this test seems to freeze the runner..." 131 | # # ./structuremap_gui_installer_windows.exe //verysilent //log=log.txt //noicons //tasks= //portable=1 132 | # # cat log.txt 133 | # - name: Upload Windows Installer 134 | # id: upload-release-asset 135 | # uses: actions/upload-release-asset@v1 136 | # env: 137 | # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 138 | # with: 139 | # upload_url: ${{ needs.Create_Draft_On_GitHub.outputs.upload_url }} 140 | # asset_path: release/one_click_windows_gui/dist/structuremap_gui_installer_windows.exe 141 | # asset_name: structuremap_gui_installer_windows.exe 142 | # asset_content_type: application/octet-stream 143 | Create_PyPi_Release: 144 | runs-on: ubuntu-latest 145 | needs: Version_Bumped 146 | steps: 147 | - name: Checkout code 148 | uses: actions/checkout@v2 149 | - uses: conda-incubator/setup-miniconda@v2 150 | with: 151 | auto-update-conda: true 152 | python-version: ${{ matrix.python-version }} 153 | - name: Conda info 154 | shell: bash -l {0} 155 | run: conda info 156 | - name: Prepare distribution 157 | shell: bash -l {0} 158 | run: | 159 | cd release/pypi 160 | . ./prepare_pypi_wheel.sh 161 | - name: Publish distribution to Test PyPI 162 | uses: pypa/gh-action-pypi-publish@master 163 | with: 164 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 165 | repository_url: https://test.pypi.org/legacy/ 166 | - name: Test PyPI test release 167 | shell: bash -l {0} 168 | run: | 169 | cd release/pypi 170 | . ./install_test_pypi_wheel.sh 171 | - name: Publish distribution to PyPI 172 | uses: pypa/gh-action-pypi-publish@master 173 | with: 174 | password: ${{ secrets.PYPI_API_TOKEN }} 175 | Test_PyPi_Release: 176 | name: Test_PyPi_version_on_${{ matrix.os }} 177 | runs-on: ${{ matrix.os }} 178 | needs: Create_PyPi_Release 179 | strategy: 180 | matrix: 181 | os: [ubuntu-latest, macOS-latest, windows-latest] 182 | steps: 183 | - uses: actions/checkout@v2 184 | - uses: conda-incubator/setup-miniconda@v2 185 | with: 186 | auto-update-conda: true 187 | python-version: ${{ matrix.python-version }} 188 | - name: Conda info 189 | shell: bash -l {0} 190 | run: conda info 191 | - name: Test pip installation from PyPi 192 | shell: bash -l {0} 193 | run: | 194 | cd release/pypi 195 | . ./install_pypi_wheel.sh 196 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | # lib/ 18 | # lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | # *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # User defined: 132 | structuremap/logs 133 | *.DS_Store 134 | *sandbox* 135 | -------------------------------------------------------------------------------- /HISTORY.md: -------------------------------------------------------------------------------- 1 | ## Changelog 2 | 3 | ### 0.0.1 4 | 5 | * FEAT: Initial creation of structuremap. 6 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2020 MannLabs 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include structuremap * 2 | include LICENSE.txt 3 | include README.md 4 | recursive-exclude structuremap/logs * 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Pip installation](https://github.com/MannLabs/structuremap/workflows/Default%20installation%20and%20tests/badge.svg) 2 | ![GUI and PyPi releases](https://github.com/MannLabs/structuremap/workflows/Publish%20on%20PyPi%20and%20release%20on%20GitHub/badge.svg) 3 | [![Downloads](https://pepy.tech/badge/structuremap)](https://pepy.tech/project/structuremap) 4 | [![Downloads](https://pepy.tech/badge/structuremap/month)](https://pepy.tech/project/structuremap) 5 | [![Downloads](https://pepy.tech/badge/structuremap/week)](https://pepy.tech/project/structuremap) 6 | 7 | 8 | # structuremap 9 | An open-source Python package for integrating information from predicted protein structures deposited in the [AlphaFold database](https://alphafold.ebi.ac.uk/) with proteomics data and specifically with post-translational modifications (PTMs). PTMs on the 3D protein structures can be visualised by [AlphaMap](https://github.com/MannLabs/alphamap). To enable all hyperlinks in this document, please view it at [GitHub](https://github.com/MannLabs/structuremap). 10 | 11 | * [**About**](#about) 12 | * [**License**](#license) 13 | * [**Installation**](#installation) 14 | * [**Pip installer**](#pip) 15 | * [**Developer installer**](#developer) 16 | * [**Usage**](#usage) 17 | * [**Python and jupyter notebooks**](#python-and-jupyter-notebooks) 18 | * [**Troubleshooting**](#troubleshooting) 19 | * [**Citing structuremap**](#citing-structuremap) 20 | * [**How to contribute**](#how-to-contribute) 21 | * [**Changelog**](#changelog) 22 | 23 | --- 24 | ## About 25 | 26 | An open-source Python package for integrating information from predicted protein structures deposited in the [AlphaFold database](https://alphafold.ebi.ac.uk/) with proteomics data and specifically with post-translational modifications (PTMs). You can find a detailed description of the tool and its capabilities to generate biological insights in ["The structural context of PTMs at a proteome wide scale" by Bludau et al. (2022)](https://doi.org/10.1371/journal.pbio.3001636). The complete anlaysis workflow of this study performed with structuremap can be found [here](https://github.com/MannLabs/structuremap_analysis). 27 | 28 | --- 29 | ## License 30 | 31 | structuremap was developed by the [Mann Labs at the Max Planck Institute of Biochemistry](https://www.biochem.mpg.de/mann) and the [University of Copenhagen](https://www.cpr.ku.dk/research/proteomics/mann/) and is freely available with an [Apache License](LICENSE.txt). External Python packages (available in the [requirements](requirements) folder) have their own licenses, which can be consulted on their respective websites. 32 | 33 | --- 34 | ## Installation 35 | 36 | structuremap can be installed and used on all major operating systems (Windows, macOS and Linux). 37 | There are two different types of installation possible: 38 | 39 | * [**Pip installer:**](#pip) Choose this installation if you want to use structuremap as a Python package in an existing Python 3.8 environment (e.g. a Jupyter notebook). If needed, the GUI and CLI can be installed with pip as well. 40 | * [**Developer installer:**](#developer) Choose this installation if you are familiar with CLI tools, [conda](https://docs.conda.io/en/latest/) and Python. This installation allows access to all available features of structuremap and even allows to modify its source code directly. Generally, the developer version of structuremap outperforms the precompiled versions which makes this the installation of choice for high-throughput experiments. 41 | 42 | ### Pip 43 | 44 | structuremap can be installed in an existing Python 3.8 environment with a single `bash` command. *This `bash` command can also be run directly from within a Jupyter notebook by prepending it with a `!`*: 45 | 46 | ```bash 47 | pip install structuremap 48 | ``` 49 | 50 | Installing structuremap like this avoids conflicts when integrating it in other tools, as this does not enforce strict versioning of dependencies. However, if new versions of dependencies are released, they are not guaranteed to be fully compatible with structuremap. While this should only occur in rare cases where dependencies are not backwards compatible, you can always force structuremap to use dependency versions which are known to be compatible with: 51 | 52 | ```bash 53 | pip install "structuremap[stable]" 54 | ``` 55 | 56 | NOTE: You might need to run `pip install pip==21.0` before installing structuremap like this. Also note the double quotes `"`. 57 | 58 | For those who are really adventurous, it is also possible to directly install any branch (e.g. `@development`) with any extras (e.g. `#egg=structuremap[stable,development-stable]`) from GitHub with e.g. 59 | 60 | ```bash 61 | pip install "git+https://github.com/MannLabs/structuremap.git@development#egg=structuremap[stable,development-stable]" 62 | ``` 63 | 64 | ### Developer 65 | 66 | structuremap can also be installed in editable (i.e. developer) mode with a few `bash` commands. This allows to fully customize the software and even modify the source code to your specific needs. When an editable Python package is installed, its source code is stored in a transparent location of your choice. While optional, it is advised to first (create and) navigate to e.g. a general software folder: 67 | 68 | ```bash 69 | mkdir ~/folder/where/to/install/software 70 | cd ~/folder/where/to/install/software 71 | ``` 72 | 73 | ***The following commands assume you do not perform any additional `cd` commands anymore***. 74 | 75 | Next, download the structuremap repository from GitHub either directly or with a `git` command. This creates a new structuremap subfolder in your current directory. 76 | 77 | ```bash 78 | git clone https://github.com/MannLabs/structuremap.git 79 | ``` 80 | 81 | For any Python package, it is highly recommended to use a separate [conda virtual environment](https://docs.conda.io/en/latest/), as otherwise *dependency conflicts can occur with already existing packages*. 82 | 83 | ```bash 84 | conda create --name structuremap python=3.8 -y 85 | conda activate structuremap 86 | ``` 87 | 88 | Finally, structuremap and all its [dependencies](requirements) need to be installed. To take advantage of all features and allow development (with the `-e` flag), this is best done by also installing the [development dependencies](requirements/requirements_development.txt) instead of only the [core dependencies](requirements/requirements.txt): 89 | 90 | ```bash 91 | pip install -e "./structuremap[development]" 92 | ``` 93 | 94 | By default this installs loose dependencies (no explicit versioning), although it is also possible to use stable dependencies (e.g. `pip install -e "./structuremap[stable,development-stable]"`). 95 | 96 | ***By using the editable flag `-e`, all modifications to the [structuremap source code folder](structuremap) are directly reflected when running structuremap. Note that the structuremap folder cannot be moved and/or renamed if an editable version is installed. In case of confusion, you can always retrieve the location of any Python module with e.g. the command `import module` followed by `module.__file__`.*** 97 | 98 | --- 99 | ## Usage 100 | 101 | ### Python and Jupyter notebooks 102 | 103 | structuremap can be imported as a Python package into any Python script or notebook with the command `import structuremap`. 104 | 105 | A brief [Jupyter notebook tutorial](nbs/tutorial.ipynb) on how to use the API is also present in the [nbs folder](nbs). 106 | 107 | --- 108 | ## Troubleshooting 109 | 110 | In case of issues, check out the following: 111 | 112 | * [Issues](https://github.com/MannLabs/structuremap/issues): Try a few different search terms to find out if a similar problem has been encountered before 113 | * [Discussions](https://github.com/MannLabs/structuremap/discussions): Check if your problem or feature requests has been discussed before. 114 | 115 | --- 116 | ## Citing structuremap 117 | 118 | If you use structuremap for your work, please cite our publication: 119 | 120 | Bludau I, et al. (2022) The structural context of posttranslational modifications at a proteome-wide scale. PLOS Biology 20(5): e3001636. https://doi.org/10.1371/journal.pbio.3001636 121 | 122 | --- 123 | ## How to contribute 124 | 125 | If you like this software, you can give us a [star](https://github.com/MannLabs/structuremap/stargazers) to boost our visibility! All direct contributions are also welcome. Feel free to post a new [issue](https://github.com/MannLabs/structuremap/issues) or clone the repository and create a [pull request](https://github.com/MannLabs/structuremap/pulls) with a new branch. For an even more interactive participation, check out the [discussions](https://github.com/MannLabs/structuremap/discussions) and the [the Contributors License Agreement](misc/CLA.md). 126 | 127 | --- 128 | ## Changelog 129 | 130 | See the [HISTORY.md](HISTORY.md) for a full overview of the changes made in each version. 131 | -------------------------------------------------------------------------------- /data/test_files/kinase_motifs.csv: -------------------------------------------------------------------------------- 1 | enzyme motif mod_pos 2 | Akt kinase substrate motif [R][A-Z][R][A-Z][A-Z][ST][FL] 5 3 | Akt kinase substrate motif [R][A-Z][R][A-Z][A-Z][ST] 5 4 | Akt kinase substrate motif [G][R][A][R][T][ST][S][FAE] 6 5 | Akt kinase substrate motif [RQK][RKNQPH][RK][RST][NKQHDP][S][FWIMNS][STH][RSK][STPQ] 5 6 | Akt kinase substrate motif [RK][A-Z][RK][ST][A-Z][S] 5 7 | AMP-activated protein kinase substrate motif [MVLIF][RKH][A-Z][A-Z][A-Z][ST][A-Z][A-Z][A-Z][MVLIF] 5 8 | AMP-activated protein kinase substrate motif [MVLI][A-Z][A-Z][RKH][A-Z][ST][A-Z][A-Z][A-Z][MVLI] 5 9 | AMP-activated protein kinase substrate motif [MVLIF][RKH][A-Z][A-Z][ST][A-Z][A-Z][A-Z][MVLIF] 4 10 | AMP-activated protein kinase 2 substrate motif [RK][A-Z][R][A-Z][A-Z][S][A-Z][A-Z][A-Z][RK] 5 11 | ATM kinase substrate motif [PLIM][A-Z][LIDE][S][Q] 3 12 | ATM kinase substrate motif [L][S][Q][E] 1 13 | ATM kinase substrate motif [S][Q] 0 14 | Aurora-A kinase substrate motif [RKN][R][A-Z][ST][MLVI] 3 15 | b-Adrenergic Receptor kinase substrate motif [DE][ST][A-Z][A-Z][A-Z] 1 16 | Branched chain alpha-ketoacid dehydrogenase kinase substrate motif [H][S][T][S][D][D] 1 17 | Branched chain alpha-ketoacid dehydrogenase kinase substrate motif [Y][R][S][V][D][E] 2 18 | Calmodulin-dependent protein kinase I substrate motif [MVLIF][A-Z][R][A-Z][A-Z][ST][A-Z][A-Z][A-Z][MVLIF] 5 19 | Calmodulin-dependent protein kinase II alpha substrate motif [MILVFY][A-Z][R][A-Z][A-Z][ST][MILVFY] 5 20 | Calmodulin-dependent protein kinase II substrate motif [R][A-Z][A-Z][ST] 3 21 | Calmodulin-dependent protein kinase II substrate motif [KF][RK][QM][QMKLF][S][FIMLV][DEI][LMKI][FK] 4 22 | Calmodulin-dependent protein kinase II substrate motif [MVLIF][A-Z][RK][A-Z][A-Z][ST][A-Z][A-Z] 5 23 | Calmodulin-dependent protein kinase II substrate motif [R][A-Z][A-Z][S] 3 24 | Calmodulin-dependent protein kinase IV substrate motif [V][P][G][K][A][R][K][K][S][S][C][Q][L][L] 8 25 | Calmodulin-dependent protein kinase IV substrate motif [P][L][A][R][T][L][S][V][A][G][L][P] 6 26 | Calmodulin-dependent protein kinase IV substrate motif [MILVFY][A-Z][R][A-Z][A-Z][ST] 5 27 | Casein kinase I delta substrate motif [E][FE][D][TAG][G][S][I][IFYG][IGF][FG][FPL] 5 28 | Casein kinase I gamma substrate motif [Y][YE][DY][AD][AG][S][I][IYFG][IGF][FG][FPL] 5 29 | Casein kinase I substrate motif [DE][A-Z][A-Z][ST] 3 30 | Casein kinase II substrate motif [EDA][DE][ED][ED][S][EDA][DEA][ED][ED] 4 31 | Casein kinase II substrate motif [S][A-Z][EST] 0 32 | Casein kinase II substrate motif [S][A-Z][A-Z][EST] 0 33 | Casein kinase II substrate motif [ST][A-Z][A-Z][ED] 0 34 | Casein kinase II substrate motif [S][D][A-Z][E] 0 35 | Casein kinase II substrate motif [S][A-Z][A-Z][ED] 0 36 | Casein kinase II substrate motif [S][DE][A-Z][DE][A-Z][DE] 0 37 | Casein kinase II substrate motif [DE][S][DE][A-Z][DE] 1 38 | Casein kinase II substrate motif [S][DE][DE][DE] 0 39 | Casein kinase II substrate motif [ST][A-Z][A-Z][DE] 0 40 | Casein kinase II substrate motif [ST][A-Z][A-Z][EDSY] 0 41 | Casein kinase II substrate motif [SEPG][DSNEP][EDGQW][YEDSWT][WED][S][DE][DEWN][ED][EDNQ] 5 42 | Cdc2 kinase substrate motif [RK][S][P][RP][RKH] 1 43 | Cdc2 kinase substrate motif [ST][P][A-Z][RK] 0 44 | Cdc2 kinase substrate motif [H][H][H][RK][S][P][R][RK][R] 4 45 | Cdc2 like protein kinase substrate motif [P][A-Z][ST][P][K][K][A-Z][K][K] 2 46 | CDK1,2,4,6 kinase substrate motif [ST][P][A-Z][RK] 0 47 | CDK kinase substrate motif [S][P][A-Z][RK][A-Z] 0 48 | CDK4 kinase substrate motif [P][L][ST][P][I][P][KRH] 2 49 | CDK4 kinase substrate motif [P][L][ST][P][A-Z][KRH] 2 50 | CDK5 kinase substrate motif [T][P][A-Z][K] 0 51 | CDK5 kinase substrate motif [KHG][H][HP][KGH][S][P][RK][HRK][RHK] 4 52 | CDK5 kinase substrate motif [ST][P][G][ST][P][G][T][P] 3 53 | Chk1 kinase substrate motif [MILV][A-Z][RK][A-Z][A-Z][ST] 5 54 | CLK1 kinase substrate motif [R][A-Z][A-Z][ST][A-Z][A-Z][R] 3 55 | CLK1,2 kinase substrate motif [RK][A-Z][RK][A-Z][RK][A-Z][S][A-Z][A-Z][R] 6 56 | CLK2 kinase substrate motif [R][RH][RH][RE][R][E][RH][S][R][RD][L] 7 57 | DMPK1 kinase substrate motif [K][K][A-Z][R][R][T][LV][A-Z] 5 58 | DMPK1 kinase substrate motif [K][K][R][A-Z][R][T][LV][A-Z] 5 59 | DMPK1 kinase substrate motif [RK][A-Z][R][R][A-Z][ST][LV][A-Z] 5 60 | DMPK1,2 kinase substrate motif [R][A-Z][A-Z][ST][LV][R] 3 61 | DNA dependent Protein kinase substrate motif [A-Z][S][Q] 1 62 | DNA dependent Protein kinase substrate motif [P][ST][A-Z] 1 63 | DOA/CDC-like kinase 2 substrate motif [R][RK][R][ER][R][EA][HR][S][R][R][RD][LE] 7 64 | Doublecortin kinase-1 kinase substrate motif [ILVFM][R][R][A-Z][A-Z][ST][ILMVF] 5 65 | elF2 alpha kinase substrate motif [E][A-Z][S][A-Z][R][A-Z][A-Z][R] 2 66 | ERK1 kinase substrate motif [TPS][GPEY][PLI][LMP][S][P][GPF][PFGY][FYI] 4 67 | ERK1 kinase substrate motif [T][E][Y] 0 68 | ERK1,2 kinase substrate motif [P][A-Z][ST][P][P] 2 69 | ERK1,2 kinase substrate motif [A-Z][A-Z][P][A-Z][ST][P][P][P][A-Z] 4 70 | ERK1,2 kinase substrate motif [P][A-Z][ST][P] 2 71 | ERK1,2 kinase substrate motif [S][P] 0 72 | ERK1, ERK2, SAPK, CDK5 and GSK3 kinase substrate motif [K][S][P][P] 1 73 | ERK2 kinase substrate motif [DYWE][C][PSCE][PCSLTV][LMT][S][PA][TSGRCF][WPS][WF] 5 74 | G protein-coupled receptor kinase 1 substrate motif [A-Z][A-Z][ST][E] 2 75 | G protein-coupled receptor kinase 1 substrate motif [A-Z][ST][A-Z][A-Z][A-Z][APST] 1 76 | Growth associated histone HI kinase substrate motif [ST][ST][P][A-Z][KR] 1 77 | Growth associated histone HI kinase substrate motif [KR][ST][P] 1 78 | Growth associated histone HI kinase substrate motif [ST][P][KR] 0 79 | GSK3 kinase substrate motif [S][A-Z][A-Z][A-Z][S] 0 80 | GSK3, Erk1, Erk2 and CDK5 kinase motif [P][A-Z][T][P] 2 81 | GSK-3, ERK1, ERK2, CDK5 substrate motif [R][A-Z][A-Z][S][P][V] 3 82 | GSK-3, ERK1, ERK2, CDK5 substrate motif [K][ST][P][A-Z][K] 1 83 | GSK-3, ERK1, ERK2, CDK5 substrate motif [K][S][P][A-Z][A-Z][A-Z][K] 1 84 | GSK-3, ERK1, ERK2, CDK5 substrate motif [K][S][P][A-Z][A-Z][K] 1 85 | GSK-3, ERK1, ERK2, CDK5 substrate motif [K][S][P][A-Z][A-Z][A-Z][A-Z][K] 1 86 | GSK-3, ERK1, ERK2, CDK5 substrate motif [K][T][P][A][K][E][E] 1 87 | GSK-3, ERK1, ERK2, CDK5 substrate motif [P][A-Z][S][P] 2 88 | GSK-3, ERK1, ERK2, CDK5 substrate motif [A-Z][ST][P] 1 89 | GSK-3, ERK1, ERK2, CDK5 substrate motif [A-Z][A-Z][S][P] 2 90 | HMGCoA Reductase kinase substrate motif [MLVIF][RKH][A-Z][A-Z][S][A-Z][A-Z][A-Z][MLVIF] 4 91 | JNK1 kinase substrate motif [G][P][QM][S][P][I] 3 92 | LKB1 kinase substrate motif [L][R][T] 2 93 | MAPKAPK1 kinase substrate motif [RK][A-Z][R][A-Z][A-Z][S] 5 94 | MAPKAPK1 kinase substrate motif [R][R][R][A-Z][S] 4 95 | MAPKAPK2 kinase substrate motif [LFI][A-Z][A-Z][A-Z][R][QST][L][ST][MLIV] 7 96 | MAPKAPK2 kinase substrate motif [A-Z][A-Z][ND][A-Z][R][A-Z][A-Z][S][A-Z][A-Z] 7 97 | MAPKAPK2 kinase substrate motif [S][A-Z][A-Z][A-Z][ST] 4 98 | MAPK 11,13,14 kinase substrate motif [T][GPE][Y] 0 99 | MEKK kinase substrate motif [R][R][F][G][S][ND][R][R][F] 4 100 | MEKK kinase substrate motif [R][R][F][G][S][MLVIF][R][R][MLVIF] 4 101 | MLCK kinase substrate motif [K][K][R][A-Z][A-Z][S][A-Z][RK][RK] 5 102 | mTOR kinase substrate motif [F][T][Y] 1 103 | Nek 2 kinase substrate motif [I][R][R][L][S][T][R][R][R] 4 104 | NIMA kinase substrate motif [RN][FLM][RK][RK][S][RIVM][RIMV][MIFV][IFM] 4 105 | NIMA kinase substrate motif [F][R][A-Z][ST] 3 106 | NIMA kinase substrate motif [R][F][RK][RK][S][RI][RI][M][I] 4 107 | p70 Ribosomal S6 kinase substrate motif [RK][A-Z][R][A-Z][A-Z][ST][MLVI] 5 108 | p70 Ribosomal S6 kinase substrate motif [V][F][L][G][F][T][Y][V][A][P] 5 109 | PAK1 kinase substrate motif [A][K][R][R][R][L][S][S][S][L][R][A] 8 110 | PAK1 kinase substrate motif [V][R][K][R][T][L][R][R][L] 4 111 | PAK2 kinase substrate motif [RK][RA-Z][A-Z][ST] 3 112 | PDK1 kinase substrate motif [F][A-Z][A-Z][F][ST][FY] 4 113 | Phosphorylase kinase substrate motif [K][R][K][Q][I][S][V][R] 5 114 | Phosphorylase kinase substrate motif [FMK][RK][MRQF][MFLI][S][S][FIML][FRK][LI][FLI] 5 115 | Phosphorylase kinase substrate motif [KR][A-Z][A-Z][S][VI] 3 116 | Pim1 kinase substrate sequence [RK][RK][RK][A-Z][ST][A-Z] 4 117 | Pim2 kinase substrate sequence [RK][RKAQP][RK][RQHNY][PHK][S][GST][PSGQHST][SPQGD][TSPG] 5 118 | PKA kinase substrate motif [R][R][A-Z][S][MILVFY] 3 119 | PKA kinase substrate motif [R][A-Z][S] 2 120 | PKA kinase substrate motif [K][R][A-Z][A-Z][S] 4 121 | PKA kinase substrate motif [R][A-Z][A-Z][S] 3 122 | PKA kinase substrate motif [RK][A-Z][ST] 2 123 | PKA kinase substrate motif [K][A-Z][A-Z][ST] 3 124 | PKA kinase substrate motif [RK][RK][A-Z][ST] 3 125 | PKA kinase substrate motif [K][A-Z][A-Z][A-Z][ST] 4 126 | PKA kinase substrate motif [ST][A-Z][RK] 0 127 | PKA kinase substrate motif [R][R][R][R][S][I][I][F][I] 4 128 | PKA kinase substrate motif [R][R][A-Z][S] 3 129 | PKA kinase substrate motif [R][RK][A-Z][ST][ILVFY][DCA-Z][A-Z][D] 3 130 | PKA kinase substrate motif [R][R][A-Z][S] 3 131 | PKA kinase substrate motif [R][R][R][RN][S][I][I][FD] 4 132 | PKA kinase substrate motif [RCPK][RAP][RK][RKS][NLSMP][S][ILVC][SPHQ][SWQ][SLG] 5 133 | PKA, PKG kinase substrate motif [R][RK][A-Z][ST][ND] 3 134 | PKC alpha kinase substrate motif [A][R][K][G][S][L][R][Q] 4 135 | PKC alpha kinase substrate motif [R][RF][R][R][RK][G][S][F][RK][RK] 6 136 | PKC beta kinase substrate motif [LRF][RK][R][KQ][G][S][FM][K][K][A-Z][A] 5 137 | PKC delta kinase substrate motif [R][A-Z][R][K][G][S][F] 5 138 | PKC epsilon kinase substrate motif [K][R][Q][G][S][V][R][R] 4 139 | PKC epsilon kinase substrate motif [R][KER][A-Z][S] 3 140 | PKC eta kinase substrate motif [A][R][A-Z][A-Z][R][RK][R][S][F][R][R] 7 141 | PKC family kinase substrate motif [F][A-Z][A-Z][F][ST][ST][FY] 5 142 | PKC gamma kinase substrate motif [R][R][R][K][GK][S][F][RK][RK][K][A] 5 143 | PKC kinase substrate motif [A-Z][R][A-Z][A-Z][S][A-Z][R][A-Z] 4 144 | PKC kinase substrate motif [ST][A-Z][RK] 0 145 | PKC kinase substrate motif [RK][A-Z][A-Z][ST] 3 146 | PKC kinase substrate motif [RK][A-Z][A-Z][ST][A-Z][RK] 3 147 | PKC kinase substrate motif [KR][A-Z][ST] 2 148 | PKC kinase substrate motif [RK][A-Z][ST][A-Z][RK] 2 149 | PKC mu kinase substrate motif [LV][VLA][R][QKE][M][S] 5 150 | PKC theta kinase substrate motif [RFWM][WAKS][RSKH][RHSQ][RKNPGQ][S][IFRVKSLM][KMRST][RSKW][RKG] 5 151 | PKC zeta kinase substrate motif [F][A-Z][R][A-Z][A-Z][S][FM][FM] 5 152 | PKD kinase substrate motif [LVI][RKQ][RK][RKTQM][NKRLMH][S][FWIMLV][SN][RSPYW][SRNL] 5 153 | PKG kinase substrate motif [R][RK][A-Z][ST][ND] 3 154 | PKR kinase substrate motif [R][A-Z][A-Z][ST][A-Z][R][A-Z][A-Z][R] 3 155 | Plk1 kinase substrate motif [DE][A-Z][ST][ILVM][A-Z][DE] 2 156 | Pyruvate dehydrogenase kinase substrate motif [A-Z][S][A-Z][A-Z][D][A-Z][A-Z] 1 157 | RAF1 kinase substrate motif [P][L][T][L][P] 2 158 | RAF1 kinase substrate motif [P][L][L][T][P] 3 159 | RAF1 kinase substrate motif [P][L][T][P] 2 160 | RAF1 kinase substrate motif [P][T][L][P] 1 161 | RAF1 kinase substrate motif [P][L][T][L][P] 2 162 | RAF1 kinase substrate motif [P][T][L][P] 1 163 | RAF1 kinase substrate motif [L][T][P] 1 164 | TGF beta receptor kinase substrate motif [K][K][K][K][K][K][ST][A-Z][A-Z][A-Z] 6 165 | TGF beta receptor kinase substrate motif [RKQN][MCW][RTSN][EDSN][RKEDN][S][SDE][SD][RN][NHSRC] 5 166 | ZIP kinase substrate motif [R][R][A-Z][A-Z][S] 4 167 | ZIP kinase substrate motif [K][R][A-Z][R][S] 4 168 | ZIP kinase substrate motif [K][R][R][A-Z][T] 4 169 | Dual specificity protein phosphatase 1 substrate motif [T][E][Y] 0 170 | Dual specificity protein phosphatase 6 substrate motif [T][A-Z][Y] 0 171 | PP2A, PP2C substrate motif [R][R][A][ST][V][A] 3 172 | PP2B substrate motif [A-Z][R][A-Z][A-Z][S][V][A] 4 173 | PP2C delta substrate motif [A-Z][T][A-Z][Y][A-Z] 1 174 | 14-3-3 domain binding motif [K][C][S][T][W][P] 3 175 | 14-3-3 domain binding motif [R][A-Z][A-Z][S] 3 176 | 14-3-3 domain binding motif [R][A-Z][R][A-Z][A-Z][S][A-Z][P] 5 177 | 14-3-3 domain binding motif [Y][T][V] 1 178 | 14-3-3 domain binding motif [R][S][A-Z][ST][A-Z][P] 3 179 | 14-3-3 domain binding motif [R][A-Z][YF][A-Z][S][A-Z][P] 4 180 | 14-3-3 domain binding motif [R][P][V][S][S][A][A][S][V][Y] 7 181 | BARD1 BRCT domain binding motif [S][DE][DE][E] 0 182 | Beta-TrCP1 domain binding motif [D][S][G][A-Z][A-Z][S] 5 183 | BRCA1 BRCT domain binding motif [S][FYH][VFY][FY] 0 184 | CDC4 WD40 domain binding motif [IL][ILP][T][P][RK] 2 185 | Chk2 FHA domain binding motif [H][F][D][T][Y][L][I] 3 186 | FHA domain binding motif [RDH][LY][LM][KA][T][QLMEV][KLIR] 4 187 | MDC1 BRCT domain binding motif [S][ST][A-Z] 1 188 | Plk1 PBD domain binding motif [S][ST][A-Z] 1 189 | RAD9 BRCT domain binding motif [S][Y][I][I] 0 190 | WW domain binding motif [ST][P] 0 191 | Abl kinase substrate motif [Y][M][A-Z][M] 0 192 | Abl kinase substrate motif [E][D][A][I][Y] 4 193 | Abl kinase substrate motif [A-Z][V][I][Y][A][A][P][F] 3 194 | Abl kinase substrate motif [E][A][I][Y][A][A][P][F] 3 195 | Abl kinase substrate motif [E][E][I][Y][E][E][Y] 6 196 | Abl kinase substrate motif [E][E][I][Y][E][E][Y] 3 197 | Abl kinase substrate motif [E][A-Z][I][Y][A-Z][A-Z][P][A-Z] 3 198 | Abl kinase substrate motif [E][E][I][Y][Y][Y][V][H] 3 199 | Abl kinase substrate motif [E][R][I][Y][A][R][T][K] 3 200 | Abl kinase substrate motif [A][E][V][IVLF][Y][A][A][PF][F] 4 201 | ALK kinase substrate motif [Y][A-Z][A-Z][Y][Y] 0 202 | ALK kinase substrate motif [Y][DE][A-Z][ILVM] 0 203 | ALK kinase substrate motif [DE][A-Z][A-Z][Y] 3 204 | ALK kinase substrate motif [Y][A-Z][A-Z][A-Z][A-Z][FY] 0 205 | CSK kinase substrate motif [E][E][DE][I][Y][F][F][F][F] 4 206 | CSK kinase substrate motif [A-Z][A-Z][A-Z][I][Y][MIF][F][F][F] 4 207 | EGFR kinase substrate motif [E][E][E][E][Y][F][E][L][V] 4 208 | EGFR kinase substrate motif [EDRA][DE][DE][EDI][Y][FVIE][EFD][LIFV][V] 4 209 | EGFR kinase substrate motif [A-Z][DE][Y][A-Z] 2 210 | EGFR kinase substrate motif [Y][I][P][P] 0 211 | EGFR kinase substrate motif [A-Z][DE][Y][ILV] 2 212 | Fes kinase substrate motif [E][E][E][I][Y][E][E][I][E] 4 213 | Fes kinase substrate motif [EAD][EA][EA][IEV][Y][DE][DE][IVE][EIV] 4 214 | FGFR kinase substrate motif [E][E][E][Y][F][F][L][F] 3 215 | FGFR kinase substrate motif [A][EA][E][E][Y][FV][F][LFMIV][F] 4 216 | Fgr kinase substrate motif [M][E][EN][IV][Y][GE][I][F][F] 4 217 | IGF1 receptor kinase substrate motif [K][K][K][S][P][G][E][Y][V][N][I][E][F][G] 7 218 | Insulin receptor kinase substrate motif [Y][M][A-Z][M] 0 219 | Insulin receptor kinase substrate motif [E][E][END][Y][MF][MF][MFIE][MF] 3 220 | Insulin receptor kinase substrate motif [A-Z][E][E][E][Y][M][M][M][M] 4 221 | Insulin receptor kinase substrate motif [K][K][S][R][G][D][Y][M][T][M][Q][I][G] 6 222 | Insulin receptor kinase substrate motif [K][K][K][L][P][A][T][G][D][Y][M][N][M][S][P][V][G][D] 9 223 | JAK2 kinase substrate motif [Y][A-Z][A-Z][LIV] 0 224 | JNK kinase substrate motif [T][P][Y] 2 225 | Lck kinase substrate motif [A-Z][E][A-Z][I][Y][G][V][L][F] 4 226 | Lck kinase substrate motif [E][A-Z][IVLF][Y][GA][V][LVFI][FLVI] 3 227 | Lyn kinase substrate motif [D][E][E][I][Y][EG][E][L][A-Z] 4 228 | Lyn kinase substrate motif [DE][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][DE][A-Z][A-Z][Y][A-Z][A-Z][L][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][Y][A-Z][A-Z][LI] 11 229 | PDGFR kinase substrate motif [E][E][E][E][Y][V][F][I][A-Z] 4 230 | PDGFR kinase substrate motif [LN][RI][T][Y] 3 231 | PDGFR kinase substrate motif [DE][DE][DE][DE][Y][VEI][F][IVF] 4 232 | Src family kinase substrate motif [DE][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][DE][A-Z][A-Z][Y][A-Z][A-Z][L][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][Y][A-Z][A-Z][LI] 11 233 | Src family kinase substrate motif [IVLS][A-Z][Y][A-Z][A-Z][LI] 2 234 | Src kinase substrate motif [Y][M][A-Z][M] 0 235 | Src kinase substrate motif [Y][I][Y][G][S][F][K] 2 236 | Src kinase substrate motif [E][E][E][I][Y][GE][E][F][D] 4 237 | Src kinase substrate motif [D][DE][EDG][IVL][Y][GE][E][FI][F] 4 238 | Src kinase substrate motif [DE][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][DE][A-Z][A-Z][Y][A-Z][A-Z][L][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][Y][A-Z][A-Z][LI] 11 239 | Src kinase substrate motif [DE][DE][EDG][IVL][Y][GED][E][FILV][DE] 4 240 | Src kinase substrate motif [Y][AGSTDE] 0 241 | Syk kinase substrate motif [EDTY][A-Z][Y][E][E] 2 242 | PTP1B phosphatase substrate motif [DE][Y][Y][RK] 2 243 | PTP1B phosphatase substrate motif [DE][Y][Y][RK] 1 244 | PTP1B phosphatase substrate motif [E][F][Y][GA][T][Y][GA] 2 245 | PTP1B phosphatase substrate motif [E][YFD][Y][M] 2 246 | PTP1B phosphatase substrate motif [EP][MLIVF][Y][GA][A-Z][MLIVFY][A] 2 247 | PTP1B phosphatase substrate motif [R][D][A-Z][Y][A-Z][T][D][Y][Y][R] 8 248 | PTP1B phosphatase substrate motif [E][FDY][Y] 2 249 | PTP1B, TC-PTP phosphatase substrate motif [D][Y][Y][R] 2 250 | PTP1B, TC-PTP phosphatase substrate motif [D][Y][Y][R] 1 251 | PTPRH phosphatase substrate motif [DE][F][Y][GA][FY][AG] 2 252 | PTPRJ phosphatase substrate motif [F][MLVI][Y] 2 253 | SHP1 phosphatase substrate motif [DE][A-Z][LIV][A-Z][Y][A-Z][A-Z][LIV] 4 254 | SHP1 phosphatase substrate motif [DE][A-Z][LIV][A-Z][A-Z][Y][A-Z][A-Z][LIV] 5 255 | SHP1 phosphatase substrate motif [DE][DE][DE][L][A-Z][Y][A-Z][A-Z][FMLVI][DE] 5 256 | SHP1 phosphatase substrate motif [DE][A-Z][Y] 2 257 | SHP1 phosphatase substrate motif [EP][FIL][Y][Y][A][A-Z][FILV] 3 258 | SHP2 phosphatase substrate motif [Y][I][D][L] 0 259 | SHP2 phosphatase substrate motif [Y][A][S][I] 0 260 | SHP2 phosphatase substrate motif [E][F][Y][A][A-Z][VI][G][RKH][S] 2 261 | TC-PTP phosphatase substrate motif [DE][DE][A-Z][A-Z][A-Z][Y][V][A] 5 262 | TC-PTP phosphatase substrate motif [EDY][Y] 1 263 | 3BP2 SH2 domain binding motif [Y][EMV][NVI] 0 264 | Abl SH2 domain binding motif [Y][E][N][P] 0 265 | Crk SH2 domain binding motif [Y][A-Z][A-Z][P] 0 266 | Crk SH2 domain binding motif [Y][D][H][P] 0 267 | Csk SH2 domain binding motif [Y][TAS][KRQN][MIVR] 0 268 | Grb2 SH2 domain binding motif [Y][YIV][N][FLIV] 0 269 | Fes SH2 domain binding motif [Y][E][A-Z][VI] 0 270 | Fgr SH2 domain binding motif [Y][E][E][IV] 0 271 | Fyn SH2 domain binding motif [Y][E][D][P] 0 272 | GRB2, 3BP2, Csk, Fes, Syk C-terminal SH2 domain binding motif [Y][Y][MILV][A-Z][MILV] 1 273 | Grb2 SH2 domain binding motif [Y][QYV][N][YQF] 0 274 | Grb2 SH2 domain binding motif [Y][A-Z][N] 0 275 | GRB7, GRB10 SH2 domain binding motif [FY][Y][ETYS][N][ILVPTYS] 1 276 | HCP SH2 domain binding motif [Y][F][A-Z][FPLY] 0 277 | Itk SH2 domain binding motif [Y][AEV][YFESNV][PFIH] 0 278 | Lck and Src SH2 domain binding motif [Y][D][Y][V] 0 279 | Nck SH2 domain binding motif [Y][D][E][P] 0 280 | PI3 kinase p85 SH2 domain binding motif [Y][M][A-Z][M] 0 281 | PI3 kinase p85 SH2 domain binding motif [Y][A-Z][A-Z][M] 0 282 | PI3 kinase p85 SH2 domain binding motif [Y][M][P][M][S] 0 283 | PLCgamma C and N-terminal SH2 domain binding motif [Y][LIV][E][LIV] 0 284 | RasGAP C-terminal SH2 domain binding motif [Y][A-Z][A-Z][P] 0 285 | RasGAP N-terminal SH2 domain binding motif [Y][I][L][V][A-Z][MLIVP] 0 286 | SAP and EAT2 SH2 domain binding motif [T][I][Y][A-Z][A-Z][VI] 2 287 | Sem5 SH2 domain binding motif [Y][LV][N][VP] 0 288 | Shb SH2 domain binding motif [Y][TVI][A-Z][L] 0 289 | SHC SH2 domain binding motif [Y][IEYL][A-Z][ILM] 0 290 | SHIP2 SH2 domain binding motif [IVLS][A-Z][Y][A-Z][A-Z][LI] 2 291 | SHP1 C-terminal SH2 domain binding motif [VIL][A-Z][Y][A][A-Z][LV] 2 292 | SHP1 C-terminal SH2 domain binding motif [A-Z][A-Z][Y][Y][M][KR] 2 293 | SHP1 N-terminal SH2 domain binding motif [L][YH][Y][MF][A-Z][FM] 2 294 | SHP1 N-terminal SH2 domain binding motif [L][A-Z][Y][A][A-Z][L] 2 295 | SHP1 SH2 domain binding motif [IV][A-Z][Y][A-Z][A-Z][LV] 2 296 | SHP1, SHP2 SH2 domain binding motif [VIL][A-Z][Y][MLF][A-Z][P] 2 297 | SHP2 CSH2 domain binding motif [TVIY][A-Z][Y][ASTV][A-Z][IVL] 2 298 | SHP2 C-terminal SH2 domain binding motif [ILV][ILV][ILVFTY][Y][TILV][IL][ILVP] 3 299 | SHP2 N-terminal SH2 domain binding motif [HF][A-Z][V][A-Z][TSA][Y] 5 300 | SHP2 N-terminal SH2 domain binding motif [IVL][A-Z][Y][FM][A-Z][P] 2 301 | SHP2 N-terminal SH2 domain binding motif [Y][IV][A-Z][IV] 0 302 | SHP2 N-terminal SH2 domain binding motif [ILVM][A-Z][Y][TVA][A-Z][IVLF] 2 303 | SHP2 N-terminal SH2 domain binding motif [IV][A-Z][Y][LMT][Y][APT][S][G] 2 304 | SHP2 N-terminal SH2 domain binding motif [W][MTV][Y][YR][IL][A-Z] 2 305 | SHP2, PLCgamma SH2 domain binding motifs [Y][I][P][P] 0 306 | Src and Abl SH2 domain binding motif [Y][M][A-Z][M] 0 307 | Src, Fyn, Lck, Fgr, Abl, Crk, Nck SH2 domain binding motif [Y][RKHQED][RKHQED][IP] 0 308 | Src, Fyn,Csk, Nck and SHC SH2 domain binding motif [P][P][A-Z][Y] 3 309 | Src,Lck and Fyn SH2 domains binding motif [Y][E][E][I] 0 310 | STAT1 SH2 domain binding motif [Y][DE][PR][RPQ] 0 311 | STAT3 SH2 domain binding motif [Y][A-Z][A-Z][Q] 0 312 | STAT3 SH2 domain binding motif [Y][MLVIF][PRKH][Q] 0 313 | Syk C-terminal SH2 domain binding motif [Y][QTE][EQ][LI] 0 314 | Syk N-terminal SH2 domain binding motif [Y][T][T][ILM] 0 315 | Syk, ZAP-70, Shc, Lyn SH2 domain binding motif [DE][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][DE][A-Z][A-Z][Y][A-Z][A-Z][L][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][Y][A-Z][A-Z][LI] 11 316 | Tensin SH2 domain binding motif [Y][E][N][FIV] 0 317 | Vav SH2 domain binding motif [Y][MLE][E][P] 0 318 | Vav SH2 domain binding motif [Y][E][S][P] 0 319 | Cbl PTB domain binding motif [D][ND][A-Z][Y] 3 320 | Dok1 PTB domain binding motif [N][A-Z][L][Y] 3 321 | FRIP PTB domain binding motif [N][A-Z][A-Z][Y] 3 322 | Shc PTB domain binding motif [N][P][A-Z][Y] 3 323 | Shb PTB domain binding motif [D][D][A-Z][Y] 3 324 | ShcA PTB domain binding motif [N][P][A-Z][Y][F][A-Z][R] 3 325 | ShcC PTB domain binding motif [H][N][MLVI][MLVIN][N][P][ST][Y] 7 326 | -------------------------------------------------------------------------------- /data/test_files/pae_O15552.hdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/data/test_files/pae_O15552.hdf -------------------------------------------------------------------------------- /data/test_files/pae_Q7Z6M3.hdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/data/test_files/pae_Q7Z6M3.hdf -------------------------------------------------------------------------------- /data/test_files/ptm_file.csv: -------------------------------------------------------------------------------- 1 | ,protein_id,AA,position,ac,ac_reg,ga,gl,gl_reg,m,m_reg,p,p_reg,sm,sm_reg,ub,ub_reg 2 | 0,O43353,K,17,0,0,0,0,0,0,0,0,0,0,0,1,0 3 | 1,O43353,K,182,0,0,0,0,0,0,0,0,0,0,0,1,0 4 | 2,O43353,K,203,0,0,0,0,0,0,0,0,0,0,0,1,0 5 | 3,O43353,K,209,0,0,0,0,0,0,0,0,0,0,0,1,1 6 | 4,O43353,K,384,0,0,0,0,0,1,0,0,0,0,0,0,0 7 | 5,O43353,K,410,0,0,0,0,0,0,0,0,0,0,0,1,0 8 | 6,O43353,K,480,0,0,0,0,0,0,0,0,0,0,0,1,0 9 | 7,O43353,K,508,0,0,0,0,0,0,0,0,0,0,0,1,0 10 | 8,O43353,K,538,0,0,0,0,0,0,0,0,0,0,0,1,0 11 | 9,O43353,R,26,0,0,0,0,0,1,0,0,0,0,0,0,0 12 | 10,O43353,S,168,0,0,0,0,0,0,0,1,0,0,0,0,0 13 | 11,O43353,S,174,0,0,0,0,0,0,0,1,0,0,0,0,0 14 | 12,O43353,S,176,0,0,0,0,0,0,0,1,1,0,0,0,0 15 | 13,O43353,S,178,0,0,0,0,0,0,0,1,0,0,0,0,0 16 | 14,O43353,S,180,0,0,0,0,0,0,0,1,0,0,0,0,0 17 | 15,O43353,S,181,0,0,0,0,0,0,0,1,0,0,0,0,0 18 | 16,O43353,S,319,0,0,0,0,0,0,0,1,0,0,0,0,0 19 | 17,O43353,S,345,0,0,0,0,0,0,0,1,0,0,0,0,0 20 | 18,O43353,S,357,0,0,0,0,0,0,0,1,0,0,0,0,0 21 | 19,O43353,S,363,0,0,0,0,0,0,0,1,0,0,0,0,0 22 | 20,O43353,S,374,0,0,0,0,0,0,0,1,0,0,0,0,0 23 | 21,O43353,S,393,0,0,0,0,0,0,0,1,0,0,0,0,0 24 | 22,O43353,S,399,0,0,0,0,0,0,0,1,0,0,0,0,0 25 | 23,O43353,S,428,0,0,0,0,0,0,0,1,0,0,0,0,0 26 | 24,O43353,S,527,0,0,0,0,0,0,0,1,0,0,0,0,0 27 | 25,O43353,S,529,0,0,0,0,0,0,0,1,0,0,0,0,0 28 | 26,O43353,S,531,0,0,0,0,0,0,0,1,0,0,0,0,0 29 | 27,O43353,S,539,0,0,0,0,0,0,0,1,0,0,0,0,0 30 | 28,O43353,T,411,0,0,0,0,0,0,0,1,0,0,0,0,0 31 | 29,O43353,T,412,0,0,0,0,0,0,0,1,0,0,0,0,0 32 | 30,O43353,Y,23,0,0,0,0,0,0,0,1,0,0,0,0,0 33 | 31,O43353,Y,381,0,0,0,0,0,0,0,1,0,0,0,0,0 34 | 32,O43353,Y,474,0,0,0,0,0,0,0,1,1,0,0,0,0 35 | 33,O96017,K,119,0,0,0,0,0,0,0,0,0,0,0,1,0 36 | 34,O96017,K,131,0,0,0,0,0,0,0,0,0,0,0,1,0 37 | 35,O96017,K,224,0,0,0,0,0,0,0,0,0,0,0,1,0 38 | 36,O96017,K,235,1,0,0,0,0,0,0,0,0,0,0,0,0 39 | 37,O96017,K,279,0,0,0,0,0,0,0,0,0,0,0,1,0 40 | 38,O96017,K,287,0,0,0,0,0,0,0,0,0,0,0,1,0 41 | 39,O96017,K,373,0,0,0,0,0,0,0,0,0,0,0,1,0 42 | 40,O96017,K,437,0,0,0,0,0,0,0,0,0,0,0,1,0 43 | 41,O96017,K,444,0,0,0,0,0,0,0,0,0,0,0,1,0 44 | 42,O96017,K,458,0,0,0,0,0,0,0,0,0,0,0,1,0 45 | 43,O96017,K,472,0,0,0,0,0,1,0,0,0,0,0,1,0 46 | 44,O96017,K,494,0,0,0,0,0,0,0,0,0,0,0,1,0 47 | 45,O96017,S,12,0,0,0,0,0,0,0,1,0,0,0,0,0 48 | 46,O96017,S,120,0,0,0,0,0,0,0,1,0,0,0,0,0 49 | 47,O96017,S,140,0,0,0,0,0,0,0,1,1,0,0,0,0 50 | 48,O96017,S,15,0,0,0,0,0,0,0,1,0,0,0,0,0 51 | 49,O96017,S,164,0,0,0,0,0,0,0,1,1,0,0,0,0 52 | 50,O96017,S,19,0,0,0,0,0,0,0,1,1,0,0,0,0 53 | 51,O96017,S,210,0,0,0,0,0,0,0,1,1,0,0,0,0 54 | 52,O96017,S,24,0,0,0,0,0,0,0,1,0,0,0,0,0 55 | 53,O96017,S,260,0,0,0,0,0,0,0,1,0,0,0,0,0 56 | 54,O96017,S,28,0,0,0,0,0,0,0,1,1,0,0,0,0 57 | 55,O96017,S,33,0,0,0,0,0,0,0,1,1,0,0,0,0 58 | 56,O96017,S,35,0,0,0,0,0,0,0,1,1,0,0,0,0 59 | 57,O96017,S,372,0,0,0,0,0,0,0,1,1,0,0,0,0 60 | 58,O96017,S,379,0,0,0,0,0,0,0,1,1,0,0,0,0 61 | 59,O96017,S,39,0,0,0,0,0,0,0,1,0,0,0,0,0 62 | 60,O96017,S,40,0,0,0,0,0,0,0,1,0,0,0,0,0 63 | 61,O96017,S,41,0,0,0,0,0,0,0,1,0,0,0,0,0 64 | 62,O96017,S,42,0,0,0,0,0,0,0,1,0,0,0,0,0 65 | 63,O96017,S,435,0,0,0,0,0,0,0,1,0,0,0,0,0 66 | 64,O96017,S,44,0,0,0,0,0,0,0,1,0,0,0,0,0 67 | 65,O96017,S,456,0,0,0,0,0,0,0,1,1,0,0,0,0 68 | 66,O96017,S,50,0,0,0,0,0,0,0,1,0,0,0,0,0 69 | 67,O96017,S,516,0,0,0,0,0,0,0,1,1,0,0,0,0 70 | 68,O96017,S,52,0,0,0,0,0,0,0,1,0,0,0,0,0 71 | 69,O96017,S,55,0,0,0,0,0,0,0,1,0,0,0,0,0 72 | 70,O96017,S,62,0,0,0,0,0,0,0,1,0,0,0,0,0 73 | 71,O96017,S,67,0,0,0,0,0,0,0,1,0,0,0,0,0 74 | 72,O96017,S,73,0,0,0,0,0,0,0,1,1,0,0,0,0 75 | 73,O96017,T,205,0,0,0,0,0,0,0,1,1,0,0,0,0 76 | 74,O96017,T,225,0,0,0,0,0,0,0,1,0,0,0,0,0 77 | 75,O96017,T,26,0,0,0,0,0,0,0,1,1,0,0,0,0 78 | 76,O96017,T,378,0,0,0,1,0,0,0,1,1,0,0,0,0 79 | 77,O96017,T,383,0,0,0,0,0,0,0,1,1,0,0,0,0 80 | 78,O96017,T,387,0,0,0,0,0,0,0,1,1,0,0,0,0 81 | 79,O96017,T,389,0,0,0,0,0,0,0,1,1,0,0,0,0 82 | 80,O96017,T,43,0,0,0,0,0,0,0,1,0,0,0,0,0 83 | 81,O96017,T,432,0,0,0,0,0,0,0,1,0,0,0,0,0 84 | 82,O96017,T,45,0,0,0,0,0,0,0,1,0,0,0,0,0 85 | 83,O96017,T,517,0,0,0,0,0,0,0,1,1,0,0,0,0 86 | 84,O96017,T,65,0,0,0,0,0,0,0,1,0,0,0,0,0 87 | 85,O96017,T,68,0,0,0,0,0,0,0,1,1,0,0,0,0 88 | 86,O96017,Y,390,0,0,0,0,0,0,0,1,1,0,0,0,0 89 | 87,P02730,K,757,0,0,0,0,0,0,0,0,0,0,0,1,0 90 | 88,P02730,S,162,0,0,0,1,0,0,0,0,0,0,0,0,0 91 | 89,P02730,S,181,0,0,0,0,0,0,0,1,0,0,0,0,0 92 | 90,P02730,S,194,0,0,0,0,0,0,0,1,0,0,0,0,0 93 | 91,P02730,S,224,0,0,0,1,0,0,0,0,0,0,0,0,0 94 | 92,P02730,S,29,0,0,0,0,0,0,0,1,0,0,0,0,0 95 | 93,P02730,S,303,0,0,0,0,0,0,0,1,0,0,0,0,0 96 | 94,P02730,S,349,0,0,0,0,0,0,0,1,0,0,0,0,0 97 | 95,P02730,S,356,0,0,0,0,0,0,0,1,0,0,0,0,0 98 | 96,P02730,S,357,0,0,0,0,0,0,0,1,0,0,0,0,0 99 | 97,P02730,S,50,0,0,0,0,0,0,0,1,0,0,0,0,0 100 | 98,P02730,S,525,0,0,0,0,0,0,0,1,0,0,0,0,0 101 | 99,P02730,S,745,0,0,0,1,0,0,0,0,0,0,0,0,0 102 | 100,P02730,S,781,0,0,0,0,0,0,0,1,0,0,0,0,0 103 | 101,P02730,T,39,0,0,0,0,0,0,0,1,0,0,0,0,0 104 | 102,P02730,T,42,0,0,0,0,0,0,0,1,0,0,0,0,0 105 | 103,P02730,T,44,0,0,0,0,0,0,0,1,0,0,0,0,0 106 | 104,P02730,T,48,0,0,0,0,0,0,0,1,0,0,0,0,0 107 | 105,P02730,T,49,0,0,0,0,0,0,0,1,0,0,0,0,0 108 | 106,P02730,T,54,0,0,0,0,0,0,0,1,0,0,0,0,0 109 | 107,P02730,T,894,0,0,0,0,0,0,0,1,0,0,0,0,0 110 | 108,P02730,Y,21,0,0,0,0,0,0,0,1,1,0,0,0,0 111 | 109,P02730,Y,347,0,0,0,0,0,0,0,1,0,0,0,0,0 112 | 110,P02730,Y,359,0,0,0,0,0,0,0,1,1,0,0,0,0 113 | 111,P02730,Y,46,0,0,0,0,0,0,0,1,0,0,0,0,0 114 | 112,P02730,Y,8,0,0,0,0,0,0,0,1,1,0,0,0,0 115 | 113,P02730,Y,818,0,0,0,0,0,0,0,1,0,0,0,0,0 116 | 114,P02730,Y,904,0,0,0,0,0,0,0,1,1,0,0,0,0 117 | 115,P08559,K,18,1,0,0,0,0,0,0,0,0,0,0,1,0 118 | 116,P08559,K,244,1,0,0,0,0,0,0,0,0,0,0,1,0 119 | 117,P08559,K,277,1,0,0,0,0,0,0,0,0,0,0,0,0 120 | 118,P08559,K,313,1,0,0,0,0,0,0,0,0,0,0,0,0 121 | 119,P08559,K,321,1,1,0,0,0,0,0,0,0,0,0,1,0 122 | 120,P08559,K,336,1,0,0,0,0,0,0,0,0,0,0,0,0 123 | 121,P08559,K,344,0,0,0,0,0,0,0,0,0,0,0,1,0 124 | 122,P08559,K,385,0,0,0,0,0,1,0,0,0,0,0,0,0 125 | 123,P08559,K,39,1,0,0,0,0,0,0,0,0,0,0,1,0 126 | 124,P08559,K,63,1,0,0,0,0,0,0,0,0,0,0,1,0 127 | 125,P08559,K,77,1,0,0,0,0,0,0,0,0,0,0,0,0 128 | 126,P08559,K,83,1,0,0,0,0,0,0,0,0,0,0,1,0 129 | 127,P08559,R,245,0,0,0,0,0,1,0,0,0,0,0,0,0 130 | 128,P08559,S,152,0,0,0,0,0,0,0,1,0,0,0,0,0 131 | 129,P08559,S,232,0,0,0,0,0,0,0,1,1,0,0,0,0 132 | 130,P08559,S,239,0,0,0,0,0,0,0,1,1,0,0,0,0 133 | 131,P08559,S,293,0,0,0,0,0,0,0,1,1,0,0,0,0 134 | 132,P08559,S,295,0,0,0,0,0,0,0,1,1,0,0,0,0 135 | 133,P08559,S,300,0,0,0,0,0,0,0,1,1,0,0,0,0 136 | 134,P08559,S,314,0,0,0,0,0,0,0,1,1,0,0,0,0 137 | 135,P08559,T,116,0,0,0,0,0,0,0,1,0,0,0,0,0 138 | 136,P08559,T,139,0,0,0,0,0,0,0,1,0,0,0,0,0 139 | 137,P08559,T,231,0,0,0,0,0,0,0,1,0,0,0,0,0 140 | 138,P08559,T,240,0,0,0,0,0,0,0,1,0,0,0,0,0 141 | 139,P08559,T,303,0,0,0,0,0,0,0,1,0,0,0,0,0 142 | 140,P08559,Y,118,0,0,0,0,0,0,0,1,0,0,0,0,0 143 | 141,P08559,Y,227,0,0,0,0,0,0,0,1,0,0,0,0,0 144 | 142,P08559,Y,242,0,0,0,0,0,0,0,1,0,0,0,0,0 145 | 143,P08559,Y,243,0,0,0,0,0,0,0,1,0,0,0,0,0 146 | 144,P08559,Y,272,0,0,0,0,0,0,0,1,0,0,0,0,0 147 | 145,P08559,Y,289,0,0,0,0,0,0,0,1,1,0,0,0,0 148 | 146,P08559,Y,301,0,0,0,0,0,0,0,1,0,0,0,0,0 149 | 147,P08559,Y,366,0,0,0,0,0,0,0,1,0,0,0,0,0 150 | 148,P08559,Y,369,0,0,0,0,0,0,0,1,0,0,0,0,0 151 | 149,P15121,K,117,1,0,0,0,0,0,0,0,0,0,0,0,0 152 | 150,P15121,K,12,1,0,0,0,0,0,0,0,0,0,0,1,0 153 | 151,P15121,K,179,0,0,0,0,0,0,0,0,0,0,0,1,0 154 | 152,P15121,K,195,0,0,0,0,0,0,0,0,0,0,0,1,0 155 | 153,P15121,K,22,0,0,0,0,0,0,0,0,0,0,0,1,0 156 | 154,P15121,K,222,1,0,0,0,0,1,0,0,0,0,0,1,0 157 | 155,P15121,K,240,1,0,0,0,0,0,0,0,0,0,0,1,0 158 | 156,P15121,K,243,1,0,0,0,0,0,0,0,0,0,0,1,0 159 | 157,P15121,K,263,1,0,0,0,0,0,0,0,0,0,0,1,0 160 | 158,P15121,K,308,1,0,0,0,0,0,0,0,0,0,0,1,0 161 | 159,P15121,K,62,0,0,0,0,0,0,0,0,0,0,0,1,0 162 | 160,P15121,K,69,0,0,0,0,0,0,0,0,0,0,0,1,0 163 | 161,P15121,K,86,0,0,0,0,0,0,0,0,0,0,0,1,0 164 | 162,P15121,K,90,0,0,0,0,0,0,0,0,0,0,0,1,0 165 | 163,P15121,K,95,1,0,0,0,0,0,0,0,0,0,0,1,0 166 | 164,P15121,R,218,0,0,0,0,0,1,0,0,0,0,0,0,0 167 | 165,P15121,R,233,0,0,0,0,0,1,0,0,0,0,0,0,0 168 | 166,P15121,S,211,0,0,0,0,0,0,0,1,0,0,0,0,0 169 | 167,P15121,S,215,0,0,0,0,0,0,0,1,0,0,0,0,0 170 | 168,P15121,S,23,0,0,0,0,0,0,0,1,0,0,0,0,0 171 | 169,P15121,S,264,0,0,0,0,0,0,0,1,0,0,0,0,0 172 | 170,P15121,S,3,0,0,0,0,0,0,0,1,0,0,0,0,0 173 | 171,P15121,S,77,0,0,0,0,0,0,0,1,0,0,0,0,0 174 | 172,P15121,S,98,0,0,0,0,0,0,0,1,0,0,0,0,0 175 | 173,P15121,T,192,0,0,0,0,0,0,0,1,0,0,0,0,0 176 | 174,P15121,T,20,0,0,0,0,0,0,0,1,0,0,0,0,0 177 | 175,P15121,T,266,0,0,0,0,0,0,0,1,0,0,0,0,0 178 | 176,P15121,Y,104,0,0,0,0,0,0,0,1,0,0,0,0,0 179 | 177,P15121,Y,190,0,0,0,0,0,0,0,1,0,0,0,0,0 180 | 178,P15121,Y,199,0,0,0,0,0,0,0,1,0,0,0,0,0 181 | 179,P15121,Y,40,0,0,0,0,0,0,0,1,0,0,0,0,0 182 | 180,P15121,Y,49,0,0,0,0,0,0,0,1,0,0,0,0,0 183 | 181,P15121,Y,83,0,0,0,0,0,0,0,1,0,0,0,0,0 184 | 182,P24941,K,129,0,0,0,0,0,0,0,0,0,0,0,1,0 185 | 183,P24941,K,142,0,0,0,0,0,0,0,0,0,0,0,1,0 186 | 184,P24941,K,20,0,0,0,0,0,0,0,0,0,0,0,1,0 187 | 185,P24941,K,237,0,0,0,0,0,0,0,0,0,0,0,1,0 188 | 186,P24941,K,24,0,0,0,0,0,0,0,0,0,0,0,1,0 189 | 187,P24941,K,250,0,0,0,0,0,0,0,0,0,0,0,1,0 190 | 188,P24941,K,273,0,0,0,0,0,0,0,0,0,0,0,1,0 191 | 189,P24941,K,278,0,0,0,0,0,0,0,0,0,0,0,1,0 192 | 190,P24941,K,291,0,0,0,0,0,0,0,0,0,0,0,1,0 193 | 191,P24941,K,33,0,0,0,0,0,0,0,0,0,1,0,1,0 194 | 192,P24941,K,56,0,0,0,0,0,0,0,0,0,0,0,1,0 195 | 193,P24941,K,6,1,0,0,0,0,0,0,0,0,1,0,1,0 196 | 194,P24941,K,65,0,0,0,0,0,0,0,0,0,0,0,1,0 197 | 195,P24941,K,9,0,0,0,0,0,0,0,0,0,0,0,1,0 198 | 196,P24941,R,297,0,0,0,0,0,1,0,0,0,0,0,0,0 199 | 197,P24941,S,46,0,0,0,0,0,0,0,1,0,0,0,0,0 200 | 198,P24941,T,137,0,0,0,0,0,0,0,1,0,0,0,0,0 201 | 199,P24941,T,14,0,0,0,0,0,0,0,1,1,0,0,0,0 202 | 200,P24941,T,158,0,0,0,0,0,0,0,1,0,0,0,0,0 203 | 201,P24941,T,160,0,0,0,0,0,0,0,1,1,0,0,0,0 204 | 202,P24941,T,165,0,0,0,0,0,0,0,1,0,0,0,0,0 205 | 203,P24941,T,39,0,0,0,0,0,0,0,1,1,0,0,0,0 206 | 204,P24941,Y,15,0,0,0,0,0,0,0,1,1,0,0,0,0 207 | 205,P24941,Y,159,0,0,0,0,0,0,0,1,0,0,0,0,0 208 | 206,P24941,Y,168,0,0,0,0,0,0,0,1,0,0,0,0,0 209 | 207,P24941,Y,19,0,0,0,0,0,0,0,1,0,0,0,0,0 210 | 208,P28482,K,138,0,0,0,0,0,0,0,0,0,0,0,1,0 211 | 209,P28482,K,151,0,0,0,0,0,0,0,0,0,0,0,1,0 212 | 210,P28482,K,164,0,0,0,0,0,0,0,0,0,0,0,1,0 213 | 211,P28482,K,203,0,0,0,0,0,0,0,0,0,0,0,1,0 214 | 212,P28482,K,259,0,0,0,0,0,0,0,0,0,0,0,1,0 215 | 213,P28482,K,270,0,0,0,0,0,0,0,0,0,0,0,1,0 216 | 214,P28482,K,272,0,0,0,0,0,0,0,0,0,0,0,1,0 217 | 215,P28482,K,285,0,0,0,0,0,0,0,0,0,0,0,1,0 218 | 216,P28482,K,292,0,0,0,0,0,0,0,0,0,0,0,1,0 219 | 217,P28482,K,300,0,0,0,0,0,0,0,0,0,0,0,1,0 220 | 218,P28482,K,330,0,0,0,0,0,0,0,0,0,0,0,1,0 221 | 219,P28482,K,340,0,0,0,0,0,0,0,0,0,0,0,1,0 222 | 220,P28482,K,344,0,0,0,0,0,0,0,0,0,0,0,1,0 223 | 221,P28482,K,55,0,0,0,0,0,0,0,0,0,0,0,1,0 224 | 222,P28482,K,99,0,0,0,0,0,0,0,0,0,0,0,1,0 225 | 223,P28482,R,194,0,0,0,0,0,1,0,0,0,0,0,0,0 226 | 224,P28482,S,142,0,0,0,0,0,0,0,1,0,0,0,0,0 227 | 225,P28482,S,202,0,0,0,0,0,0,0,1,0,0,0,0,0 228 | 226,P28482,S,246,0,0,0,0,0,0,0,1,1,0,0,0,0 229 | 227,P28482,S,248,0,0,0,0,0,0,0,1,1,0,0,0,0 230 | 228,P28482,S,284,0,0,0,0,0,0,0,1,0,0,0,0,0 231 | 229,P28482,S,29,0,0,0,0,0,0,0,1,1,0,0,0,0 232 | 230,P28482,S,360,0,0,0,0,0,0,0,1,0,0,0,0,0 233 | 231,P28482,T,181,0,0,0,0,0,0,0,1,0,0,0,0,0 234 | 232,P28482,T,185,0,0,0,0,0,0,0,1,1,0,0,0,0 235 | 233,P28482,T,190,0,0,0,0,0,0,0,1,1,0,0,0,0 236 | 234,P28482,T,206,0,0,0,0,0,0,0,1,0,0,0,0,0 237 | 235,P28482,T,295,0,0,0,0,0,0,0,1,0,0,0,0,0 238 | 236,P28482,T,63,0,0,0,0,0,0,0,1,0,0,0,0,0 239 | 237,P28482,Y,113,0,0,0,0,0,0,0,1,0,0,0,0,0 240 | 238,P28482,Y,187,0,0,0,0,0,0,0,1,1,0,0,0,0 241 | 239,P28482,Y,193,0,0,0,0,0,0,0,1,0,0,0,0,0 242 | 240,P28482,Y,205,0,0,0,0,0,0,0,1,0,0,0,0,0 243 | 241,P28482,Y,25,0,0,0,0,0,0,0,1,0,0,0,0,0 244 | 242,P28482,Y,263,0,0,0,0,0,0,0,1,0,0,0,0,0 245 | 243,P28482,Y,36,0,0,0,0,0,0,0,1,0,0,0,0,0 246 | 244,P28482,Y,43,0,0,0,0,0,0,0,1,0,0,0,0,0 247 | 245,P29320,K,625,0,0,0,0,0,0,0,0,0,0,0,1,0 248 | 246,P29320,K,656,0,0,0,0,0,0,0,0,0,0,0,1,0 249 | 247,P29320,S,294,0,0,0,0,0,0,0,1,0,0,0,0,0 250 | 248,P29320,S,497,0,0,0,0,0,0,0,1,0,0,0,0,0 251 | 249,P29320,S,498,0,0,0,0,0,0,0,1,0,0,0,0,0 252 | 250,P29320,S,768,0,0,0,0,0,0,0,1,1,0,0,0,0 253 | 251,P29320,S,976,0,0,0,0,0,0,0,1,0,0,0,0,0 254 | 252,P29320,T,432,0,0,0,0,0,0,0,1,0,0,0,0,0 255 | 253,P29320,T,442,0,0,0,0,0,0,0,1,0,0,0,0,0 256 | 254,P29320,T,485,0,0,0,0,0,0,0,1,0,0,0,0,0 257 | 255,P29320,T,595,0,0,0,0,0,0,0,1,0,0,0,0,0 258 | 256,P29320,T,601,0,0,0,0,0,0,0,1,0,0,0,0,0 259 | 257,P29320,T,654,0,0,0,0,0,0,0,1,0,0,0,0,0 260 | 258,P29320,T,781,0,0,0,0,0,0,0,1,0,0,0,0,0 261 | 259,P29320,T,974,0,0,0,0,0,0,0,1,0,0,0,0,0 262 | 260,P29320,Y,561,0,0,0,0,0,0,0,1,0,0,0,0,0 263 | 261,P29320,Y,570,0,0,0,0,0,0,0,1,0,0,0,0,0 264 | 262,P29320,Y,596,0,0,0,0,0,0,0,1,1,0,0,0,0 265 | 263,P29320,Y,602,0,0,0,0,0,0,0,1,1,0,0,0,0 266 | 264,P29320,Y,659,0,0,0,0,0,0,0,1,0,0,0,0,0 267 | 265,P29320,Y,701,0,0,0,0,0,0,0,1,0,0,0,0,0 268 | 266,P29320,Y,736,0,0,0,0,0,0,0,1,0,0,0,0,0 269 | 267,P29320,Y,742,0,0,0,0,0,0,0,1,1,0,0,0,0 270 | 268,P29320,Y,779,0,0,0,0,0,0,0,1,1,0,0,0,0 271 | 269,P29320,Y,937,0,0,0,0,0,0,0,1,0,0,0,0,0 272 | 270,P45984,K,153,0,0,0,0,0,0,0,0,0,0,0,1,0 273 | 271,P45984,K,160,0,0,0,0,0,0,0,0,0,0,0,1,0 274 | 272,P45984,K,166,0,0,0,0,0,0,0,0,0,0,0,1,0 275 | 273,P45984,K,250,1,0,0,0,0,0,0,0,0,0,0,1,0 276 | 274,P45984,K,251,0,0,0,0,0,0,0,0,0,0,0,1,0 277 | 275,P45984,K,300,0,0,0,0,0,0,0,0,0,0,0,1,0 278 | 276,P45984,K,353,0,0,0,0,0,0,0,0,0,0,0,1,0 279 | 277,P45984,K,56,0,0,0,0,0,0,0,0,0,0,0,1,0 280 | 278,P45984,K,68,0,0,0,0,0,0,0,0,0,0,0,1,0 281 | 279,P45984,S,144,0,0,0,0,0,0,0,1,0,0,0,0,0 282 | 280,P45984,S,155,0,0,0,0,0,0,0,1,0,0,0,0,0 283 | 281,P45984,S,292,0,0,0,0,0,0,0,1,0,0,0,0,0 284 | 282,P45984,S,311,0,0,0,0,0,0,0,1,0,0,0,0,0 285 | 283,P45984,S,407,0,0,0,0,0,0,0,1,0,0,0,0,0 286 | 284,P45984,T,178,0,0,0,0,0,0,0,1,0,0,0,0,0 287 | 285,P45984,T,183,0,0,0,0,0,0,0,1,0,0,0,0,0 288 | 286,P45984,T,188,0,0,0,0,0,0,0,1,0,0,0,0,0 289 | 287,P45984,T,404,0,0,0,0,0,0,0,1,0,0,0,0,0 290 | 288,P45984,Y,185,0,0,0,0,0,0,0,1,0,0,0,0,0 291 | 289,P45984,Y,357,0,0,0,0,0,0,0,1,0,0,0,0,0 292 | 290,Q13546,K,105,0,0,0,0,0,0,0,0,0,0,0,1,0 293 | 291,Q13546,K,115,0,0,0,0,0,0,0,0,0,0,0,1,1 294 | 292,Q13546,K,13,0,0,0,0,0,0,0,0,0,0,0,1,0 295 | 293,Q13546,K,137,0,0,0,0,0,0,0,0,0,0,0,1,0 296 | 294,Q13546,K,140,0,0,0,0,0,0,0,0,0,0,0,1,0 297 | 295,Q13546,K,153,0,0,0,0,0,0,0,0,0,0,0,1,0 298 | 296,Q13546,K,163,0,0,0,0,0,0,0,0,0,0,0,1,0 299 | 297,Q13546,K,167,0,0,0,0,0,0,0,0,0,0,0,1,0 300 | 298,Q13546,K,184,0,0,0,0,0,0,0,0,0,0,0,1,0 301 | 299,Q13546,K,185,0,0,0,0,0,0,0,0,0,0,0,1,0 302 | 300,Q13546,K,284,0,0,0,0,0,0,0,0,0,0,0,1,0 303 | 301,Q13546,K,302,0,0,0,0,0,0,0,0,0,0,0,1,0 304 | 302,Q13546,K,306,0,0,0,0,0,0,0,0,0,0,0,1,0 305 | 303,Q13546,K,316,0,0,0,0,0,0,0,0,0,0,0,1,0 306 | 304,Q13546,K,377,0,0,0,0,0,0,0,0,0,0,0,1,1 307 | 305,Q13546,K,49,0,0,0,0,0,0,0,0,0,0,0,1,0 308 | 306,Q13546,K,530,1,1,0,0,0,0,0,0,0,0,0,0,0 309 | 307,Q13546,K,571,0,0,0,0,0,0,0,0,0,0,0,1,0 310 | 308,Q13546,K,585,0,0,0,0,0,0,0,0,0,0,0,1,0 311 | 309,Q13546,K,596,0,0,0,0,0,0,0,0,0,0,0,1,0 312 | 310,Q13546,K,604,0,0,0,0,0,0,0,0,0,0,0,1,0 313 | 311,Q13546,K,627,0,0,0,0,0,0,0,0,0,0,0,1,0 314 | 312,Q13546,K,642,1,0,0,0,0,0,0,0,0,0,0,1,0 315 | 313,Q13546,K,648,1,0,0,0,0,0,0,0,0,0,0,0,0 316 | 314,Q13546,R,477,0,0,0,0,0,1,0,0,0,0,0,0,0 317 | 315,Q13546,R,487,0,0,0,0,0,1,0,0,0,0,0,0,0 318 | 316,Q13546,S,14,0,0,0,0,0,0,0,1,0,0,0,0,0 319 | 317,Q13546,S,15,0,0,0,0,0,0,0,1,0,0,0,0,0 320 | 318,Q13546,S,161,0,0,0,0,0,0,0,1,1,0,0,0,0 321 | 319,Q13546,S,166,0,0,0,0,0,0,0,1,1,0,0,0,0 322 | 320,Q13546,S,20,0,0,0,0,0,0,0,1,0,0,0,0,0 323 | 321,Q13546,S,25,0,0,0,0,0,0,0,1,0,0,0,0,0 324 | 322,Q13546,S,262,0,0,0,0,0,0,0,1,0,0,0,0,0 325 | 323,Q13546,S,291,0,0,0,0,0,0,0,1,0,0,0,0,0 326 | 324,Q13546,S,296,0,0,0,0,0,0,0,1,0,0,0,0,0 327 | 325,Q13546,S,303,0,0,0,0,0,0,0,1,0,0,0,0,0 328 | 326,Q13546,S,309,0,0,0,0,0,0,0,1,0,0,0,0,0 329 | 327,Q13546,S,32,0,0,0,0,0,0,0,1,0,0,0,0,0 330 | 328,Q13546,S,320,0,0,0,0,0,0,0,1,1,0,0,0,0 331 | 329,Q13546,S,330,0,0,0,0,0,0,0,1,0,0,0,0,0 332 | 330,Q13546,S,331,0,0,0,0,0,0,0,1,0,0,0,0,0 333 | 331,Q13546,S,333,0,0,0,0,0,0,0,1,0,0,0,0,0 334 | 332,Q13546,S,335,0,0,0,0,0,0,0,1,0,0,0,0,0 335 | 333,Q13546,S,345,0,0,0,0,0,0,0,1,0,0,0,0,0 336 | 334,Q13546,S,346,0,0,0,0,0,0,0,1,0,0,0,0,0 337 | 335,Q13546,S,357,0,0,0,0,0,0,0,1,1,0,0,0,0 338 | 336,Q13546,S,389,0,0,0,0,0,0,0,1,0,0,0,0,0 339 | 337,Q13546,S,416,0,0,0,0,0,0,0,1,0,0,0,0,0 340 | 338,Q13546,S,470,0,0,0,0,0,0,0,1,0,0,0,0,0 341 | 339,Q13546,S,471,0,0,0,0,0,0,0,1,0,0,0,0,0 342 | 340,Q13546,S,6,0,0,0,0,0,0,0,1,0,0,0,0,0 343 | 341,Q13546,S,610,0,0,0,0,0,0,0,1,0,0,0,0,0 344 | 342,Q13546,S,664,0,0,0,0,0,0,0,1,0,0,0,0,0 345 | 343,Q13546,T,337,0,0,0,0,0,0,0,1,0,0,0,0,0 346 | 344,Q13546,T,38,0,0,0,0,0,0,0,1,0,0,0,0,0 347 | 345,Q13546,T,483,0,0,0,0,0,0,0,1,0,0,0,0,0 348 | 346,Q13546,Y,384,0,0,0,0,0,0,0,1,0,0,0,0,0 349 | 347,Q13546,Y,387,0,0,0,0,0,0,0,1,0,0,0,0,0 350 | 348,Q13546,Y,426,0,0,0,0,0,0,0,1,0,0,0,0,0 351 | 349,Q13546,Y,463,0,0,0,0,0,0,0,1,0,0,0,0,0 352 | 350,Q13546,Y,469,0,0,0,0,0,0,0,1,0,0,0,0,0 353 | 351,Q13546,Y,490,0,0,0,0,0,0,0,1,0,0,0,0,0 354 | 352,Q8NB16,K,157,0,0,0,0,0,0,0,0,0,0,0,1,0 355 | 353,Q8NB16,K,173,0,0,0,0,0,0,0,0,0,0,0,1,0 356 | 354,Q8NB16,K,183,0,0,0,0,0,0,0,0,0,0,0,1,0 357 | 355,Q8NB16,K,198,0,0,0,0,0,0,0,0,0,0,0,1,0 358 | 356,Q8NB16,K,219,0,0,0,0,0,0,0,0,0,0,0,1,0 359 | 357,Q8NB16,K,230,0,0,0,0,0,0,0,0,0,0,0,1,0 360 | 358,Q8NB16,K,249,0,0,0,0,0,0,0,0,0,0,0,1,0 361 | 359,Q8NB16,K,331,0,0,0,0,0,0,0,0,0,0,0,1,0 362 | 360,Q8NB16,K,354,1,0,0,0,0,0,0,0,0,0,0,1,0 363 | 361,Q8NB16,K,372,0,0,0,0,0,0,0,0,0,0,0,1,0 364 | 362,Q8NB16,K,40,0,0,0,0,0,0,0,0,0,0,0,1,0 365 | 363,Q8NB16,K,50,0,0,0,0,0,0,0,0,0,0,0,1,0 366 | 364,Q8NB16,K,57,0,0,0,0,0,0,0,0,0,0,0,1,0 367 | 365,Q8NB16,K,66,0,0,0,0,0,0,0,0,0,0,0,1,0 368 | 366,Q8NB16,K,78,0,0,0,0,0,0,0,0,0,0,0,1,0 369 | 367,Q8NB16,S,106,0,0,0,0,0,0,0,1,0,0,0,0,0 370 | 368,Q8NB16,S,125,0,0,0,0,0,0,0,1,0,0,0,0,0 371 | 369,Q8NB16,S,128,0,0,0,0,0,0,0,1,0,0,0,0,0 372 | 370,Q8NB16,S,161,0,0,0,0,0,0,0,1,0,0,0,0,0 373 | 371,Q8NB16,S,334,0,0,0,0,0,0,0,1,0,0,0,0,0 374 | 372,Q8NB16,S,358,0,0,0,0,0,0,0,1,1,0,0,0,0 375 | 373,Q8NB16,S,373,0,0,0,0,0,0,0,1,0,0,0,0,0 376 | 374,Q8NB16,S,393,0,0,0,0,0,0,0,1,0,0,0,0,0 377 | 375,Q8NB16,S,417,0,0,0,0,0,0,0,1,0,0,0,0,0 378 | 376,Q8NB16,S,467,0,0,0,0,0,0,0,1,0,0,0,0,0 379 | 377,Q8NB16,S,52,0,0,0,0,0,0,0,1,0,0,0,0,0 380 | 378,Q8NB16,S,92,0,0,0,0,0,0,0,1,0,0,0,0,0 381 | 379,Q8NB16,T,246,0,0,0,0,0,0,0,1,0,0,0,0,0 382 | 380,Q8NB16,T,302,0,0,0,0,0,0,0,1,0,0,0,0,0 383 | 381,Q8NB16,T,357,0,0,0,0,0,0,0,1,1,0,0,0,0 384 | 382,Q8NB16,T,364,0,0,0,0,0,0,0,1,0,0,0,0,0 385 | 383,Q8NB16,T,374,0,0,0,0,0,0,0,1,0,0,0,0,0 386 | 384,Q8NB16,T,59,0,0,0,0,0,0,0,1,0,0,0,0,0 387 | 385,Q8NB16,Y,376,0,0,0,0,0,0,0,1,1,0,0,0,0 388 | 386,Q92918,K,296,0,0,0,0,0,0,0,0,0,0,0,1,0 389 | 387,Q92918,K,33,0,0,0,0,0,0,0,0,0,0,0,1,0 390 | 388,Q92918,K,37,0,0,0,0,0,0,0,0,0,0,0,1,0 391 | 389,Q92918,K,46,0,0,0,0,0,0,0,0,0,0,0,1,0 392 | 390,Q92918,K,49,0,0,0,0,0,0,0,0,0,0,0,1,0 393 | 391,Q92918,K,594,0,0,0,0,0,0,0,0,0,0,0,1,0 394 | 392,Q92918,K,600,0,0,0,0,0,0,0,0,0,0,0,1,0 395 | 393,Q92918,S,171,0,0,0,0,0,0,0,1,1,0,0,0,0 396 | 394,Q92918,S,230,0,0,0,0,0,0,0,1,0,0,0,0,0 397 | 395,Q92918,S,258,0,0,0,0,0,0,0,1,0,0,0,0,0 398 | 396,Q92918,S,320,0,0,0,0,0,0,0,1,0,0,0,0,0 399 | 397,Q92918,S,324,0,0,0,0,0,0,0,1,0,0,0,0,0 400 | 398,Q92918,S,325,0,0,0,0,0,0,0,1,0,0,0,0,0 401 | 399,Q92918,S,326,0,0,0,0,0,0,0,1,0,0,0,0,0 402 | 400,Q92918,S,366,0,0,0,0,0,0,0,1,0,0,0,0,0 403 | 401,Q92918,S,368,0,0,0,0,0,0,0,1,0,0,0,0,0 404 | 402,Q92918,S,374,0,0,0,0,0,0,0,1,0,0,0,0,0 405 | 403,Q92918,S,376,0,0,0,0,0,0,0,1,0,0,0,0,0 406 | 404,Q92918,S,377,0,0,0,0,0,0,0,1,0,0,0,0,0 407 | 405,Q92918,S,405,0,0,0,0,0,0,0,1,0,0,0,0,0 408 | 406,Q92918,S,407,0,0,0,0,0,0,0,1,0,0,0,0,0 409 | 407,Q92918,S,413,0,0,0,0,0,0,0,1,0,0,0,0,0 410 | 408,Q92918,S,421,0,0,0,0,0,0,0,1,0,0,0,0,0 411 | 409,Q92918,S,430,0,0,0,0,0,0,0,1,0,0,0,0,0 412 | 410,Q92918,S,436,0,0,0,0,0,0,0,1,0,0,0,0,0 413 | 411,Q92918,S,444,0,0,0,0,0,0,0,1,0,0,0,0,0 414 | 412,Q92918,S,446,0,0,0,0,0,0,0,1,0,0,0,0,0 415 | 413,Q92918,S,447,0,0,0,0,0,0,0,1,0,0,0,0,0 416 | 414,Q92918,S,454,0,0,0,0,0,0,0,1,0,0,0,0,0 417 | 415,Q92918,S,586,0,0,0,0,0,0,0,1,0,0,0,0,0 418 | 416,Q92918,S,598,0,0,0,0,0,0,0,1,0,0,0,0,0 419 | 417,Q92918,S,737,0,0,0,0,0,0,0,1,0,0,0,0,0 420 | 418,Q92918,T,165,0,0,0,0,0,0,0,1,1,0,0,0,0 421 | 419,Q92918,T,175,0,0,0,0,0,0,0,1,1,0,0,0,0 422 | 420,Q92918,T,349,0,0,0,0,0,0,0,1,0,0,0,0,0 423 | 421,Q92918,T,355,0,0,0,0,0,0,0,1,1,0,0,0,0 424 | 422,Q92918,T,451,0,0,0,0,0,0,0,1,0,0,0,0,0 425 | 423,Q92918,T,599,0,0,0,0,0,0,0,1,0,0,0,0,0 426 | 424,Q92918,Y,177,0,0,0,0,0,0,0,1,0,0,0,0,0 427 | 425,Q92918,Y,28,0,0,0,0,0,0,0,1,0,0,0,0,0 428 | 426,Q92918,Y,381,0,0,0,0,0,0,0,1,0,0,0,0,0 429 | -------------------------------------------------------------------------------- /misc/CLA.md: -------------------------------------------------------------------------------- 1 | ### MannLabs Individual Contributor License Agreement 2 | 3 | Thank you for your interest in contributing to open source software projects (“Projects”) made available by MannLabs or its affiliates (“MannLabs”). This Individual Contributor License Agreement (“Agreement”) sets out the terms governing any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that you submit or have submitted, in any form and in any manner, to MannLabs in respect of any of the Projects (collectively “Contributions”). If you have any questions respecting this Agreement, please contact opensource@alphapept.com. 4 | 5 | 6 | You agree that the following terms apply to all of your past, present and future Contributions. Except for the licenses granted in this Agreement, you retain all of your right, title and interest in and to your Contributions. 7 | 8 | 9 | **Copyright License.** You hereby grant, and agree to grant, to MannLabs a non-exclusive, perpetual, irrevocable, worldwide, fully-paid, royalty-free, transferable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, and distribute your Contributions and such derivative works, with the right to sublicense the foregoing rights through multiple tiers of sublicensees. 10 | 11 | 12 | **Patent License.** You hereby grant, and agree to grant, to MannLabs a non-exclusive, perpetual, irrevocable, 13 | worldwide, fully-paid, royalty-free, transferable patent license to make, have made, use, offer to sell, sell, 14 | import, and otherwise transfer your Contributions, where such license applies only to those patent claims 15 | licensable by you that are necessarily infringed by your Contributions alone or by combination of your 16 | Contributions with the Project to which such Contributions were submitted, with the right to sublicense the 17 | foregoing rights through multiple tiers of sublicensees. 18 | 19 | 20 | **Moral Rights.** To the fullest extent permitted under applicable law, you hereby waive, and agree not to 21 | assert, all of your “moral rights” in or relating to your Contributions for the benefit of MannLabs, its assigns, and 22 | their respective direct and indirect sublicensees. 23 | 24 | 25 | **Third Party Content/Rights.** If your Contribution includes or is based on any source code, object code, bug 26 | fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or 27 | other works of authorship that were not authored by you (“Third Party Content”) or if you are aware of any 28 | third party intellectual property or proprietary rights associated with your Contribution (“Third Party Rights”), 29 | then you agree to include with the submission of your Contribution full details respecting such Third Party 30 | Content and Third Party Rights, including, without limitation, identification of which aspects of your 31 | Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the 32 | Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable 33 | third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater 34 | certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights 35 | do not apply to any portion of a Project that is incorporated into your Contribution to that same Project. 36 | 37 | 38 | **Representations.** You represent that, other than the Third Party Content and Third Party Rights identified by 39 | you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled 40 | to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were 41 | created in the course of your employment with your past or present employer(s), you represent that such 42 | employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer 43 | (s) has waived all of their right, title or interest in or to your Contributions. 44 | 45 | 46 | **Disclaimer.** To the fullest extent permitted under applicable law, your Contributions are provided on an "as is" 47 | basis, without any warranties or conditions, express or implied, including, without limitation, any implied 48 | warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not 49 | required to provide support for your Contributions, except to the extent you desire to provide support. 50 | 51 | 52 | **No Obligation.** You acknowledge that MannLabs is under no obligation to use or incorporate your Contributions 53 | into any of the Projects. The decision to use or incorporate your Contributions into any of the Projects will be 54 | made at the sole discretion of MannLabs or its authorized delegates .. 55 | 56 | 57 | **Disputes.** This Agreement shall be governed by and construed in accordance with the laws of the State of 58 | New York, United States of America, without giving effect to its principles or rules regarding conflicts of laws, 59 | other than such principles directing application of New York law. The parties hereby submit to venue in, and 60 | jurisdiction of the courts located in New York, New York for purposes relating to this Agreement. In the event 61 | that any of the provisions of this Agreement shall be held by a court or other tribunal of competent jurisdiction 62 | to be unenforceable, the remaining portions hereof shall remain in full force and effect. 63 | 64 | 65 | **Assignment.** You agree that MannLabs may assign this Agreement, and all of its rights, obligations and licenses 66 | hereunder 67 | -------------------------------------------------------------------------------- /misc/bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.0.10 3 | commit = True 4 | tag = False 5 | parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? 6 | serialize = 7 | {major}.{minor}.{patch} 8 | {major}.{minor}.{patch} 9 | 10 | [bumpversion:part:release] 11 | 12 | [bumpversion:part:build] 13 | 14 | [bumpversion:file:../structuremap/__init__.py] 15 | 16 | [bumpversion:file:../release/one_click_linux_gui/control] 17 | 18 | [bumpversion:file:../release/one_click_linux_gui/create_installer_linux.sh] 19 | 20 | [bumpversion:file:../release/one_click_macos_gui/distribution.xml] 21 | 22 | [bumpversion:file:../release/one_click_macos_gui/Info.plist] 23 | 24 | [bumpversion:file:../release/one_click_macos_gui/create_installer_macos.sh] 25 | 26 | [bumpversion:file:../release/one_click_windows_gui/create_installer_windows.sh] 27 | 28 | [bumpversion:file:../release/one_click_windows_gui/structuremap_innoinstaller.iss] 29 | search = {current_version} 30 | replace = {new_version} 31 | -------------------------------------------------------------------------------- /misc/check_version.sh: -------------------------------------------------------------------------------- 1 | current_version=$(grep "__version__" ../structuremap/__init__.py | cut -f3 -d ' ' | sed 's/"//g') 2 | current_version_as_regex=$(echo $current_version | sed 's/\./\\./g') 3 | conda create -n version_check python=3.8 pip=20.1 -y 4 | conda activate version_check 5 | set +e 6 | already_on_pypi=$(pip install structuremap== 2>&1 | grep -c "$current_version_as_regex") 7 | set -e 8 | conda deactivate 9 | if [ $already_on_pypi -ne 0 ]; then 10 | echo "Version is already on PyPi" 11 | exit 1 12 | fi 13 | -------------------------------------------------------------------------------- /misc/loose_pip_install.sh: -------------------------------------------------------------------------------- 1 | conda create -n structuremap python=3.8 -y 2 | conda activate structuremap 3 | pip install -e '../.[development]' 4 | structuremap 5 | conda deactivate 6 | -------------------------------------------------------------------------------- /misc/stable_pip_install.sh: -------------------------------------------------------------------------------- 1 | conda create -n structuremap python=3.8 -y 2 | conda activate structuremap 3 | pip install -e '../.[stable,development-stable]' 4 | structuremap 5 | conda deactivate 6 | -------------------------------------------------------------------------------- /release/logos/alpha_logo.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/release/logos/alpha_logo.icns -------------------------------------------------------------------------------- /release/logos/alpha_logo.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/release/logos/alpha_logo.ico -------------------------------------------------------------------------------- /release/logos/alpha_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/release/logos/alpha_logo.png -------------------------------------------------------------------------------- /release/one_click_linux_gui/control: -------------------------------------------------------------------------------- 1 | Package: structuremap 2 | Version: 0.0.10 3 | Architecture: all 4 | Maintainer: Mann Labs 5 | Description: structuremap 6 | structuremap is an open-source Python package in the AlphaPept ecosystem. 7 | structuremap was developed by the Mann Labs at the Max Planck Institute of Biochemistry and University of Copenhagen and is freely available with an Apache License. Additional third-party licenses are applicable for external Python packages (see https://github.com/MannLabs/structuremap for more details.). 8 | -------------------------------------------------------------------------------- /release/one_click_linux_gui/create_installer_linux.sh: -------------------------------------------------------------------------------- 1 | #!bash 2 | 3 | # Initial cleanup 4 | rm -rf dist 5 | rm -rf build 6 | cd ../.. 7 | rm -rf dist 8 | rm -rf build 9 | 10 | # Creating a conda environment 11 | conda create -n structuremap_installer python=3.8 -y 12 | conda activate structuremap_installer 13 | 14 | # Creating the wheel 15 | python setup.py sdist bdist_wheel 16 | 17 | # Setting up the local package 18 | cd release/one_click_linux_gui 19 | # Make sure you include the required extra packages and always use the stable or very-stable options! 20 | pip install "../../dist/structuremap-0.0.10-py3-none-any.whl[stable]" 21 | 22 | # Creating the stand-alone pyinstaller folder 23 | pip install pyinstaller==4.2 24 | pyinstaller ../pyinstaller/structuremap.spec -y 25 | conda deactivate 26 | 27 | # If needed, include additional source such as e.g.: 28 | # cp ../../structuremap/data/*.fasta dist/structuremap/data 29 | # WARNING: this probably does not work!!!! 30 | 31 | # Wrapping the pyinstaller folder in a .deb package 32 | mkdir -p dist/structuremap_gui_installer_linux/usr/local/bin 33 | mv dist/structuremap dist/structuremap_gui_installer_linux/usr/local/bin/structuremap 34 | mkdir dist/structuremap_gui_installer_linux/DEBIAN 35 | cp control dist/structuremap_gui_installer_linux/DEBIAN 36 | dpkg-deb --build --root-owner-group dist/structuremap_gui_installer_linux/ 37 | -------------------------------------------------------------------------------- /release/one_click_macos_gui/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDisplayName 6 | structuremap 7 | CFBundleExecutable 8 | MacOS/structuremap_terminal 9 | CFBundleIconFile 10 | alpha_logo.icns 11 | CFBundleIdentifier 12 | structuremap.0.0.10 13 | CFBundleShortVersionString 14 | 0.0.10 15 | CFBundleInfoDictionaryVersion 16 | 6.0 17 | CFBundleName 18 | structuremap 19 | CFBundlePackageType 20 | APPL 21 | LSBackgroundOnly 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /release/one_click_macos_gui/Resources/conclusion.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |

structuremap

10 |

Thank you for installing structuremap.

11 |
12 | 13 | 14 | -------------------------------------------------------------------------------- /release/one_click_macos_gui/Resources/welcome.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 |

structuremap

9 |

structuremap is an open-source Python package of the AlphaPept ecosystem.

10 |

structuremap was developed by the Mann Labs at the Max Planck Institute of Biochemistry and the University of Copenhagen and is freely available with an Apache License. Since structuremap uses external Python packages, additional third-party licenses are applicable.

11 |
12 | 13 | 14 | -------------------------------------------------------------------------------- /release/one_click_macos_gui/create_installer_macos.sh: -------------------------------------------------------------------------------- 1 | #!bash 2 | 3 | # Initial cleanup 4 | rm -rf dist 5 | rm -rf build 6 | FILE=structuremap.pkg 7 | if test -f "$FILE"; then 8 | rm structuremap.pkg 9 | fi 10 | cd ../.. 11 | rm -rf dist 12 | rm -rf build 13 | 14 | # Creating a conda environment 15 | conda create -n structuremapinstaller python=3.8 -y 16 | conda activate structuremapinstaller 17 | 18 | # Creating the wheel 19 | python setup.py sdist bdist_wheel 20 | 21 | # Setting up the local package 22 | cd release/one_click_macos_gui 23 | pip install "../../dist/structuremap-0.0.10-py3-none-any.whl[stable]" 24 | 25 | # Creating the stand-alone pyinstaller folder 26 | pip install pyinstaller==4.2 27 | pyinstaller ../pyinstaller/structuremap.spec -y 28 | conda deactivate 29 | 30 | # If needed, include additional source such as e.g.: 31 | # cp ../../structuremap/data/*.fasta dist/structuremap/data 32 | 33 | # Wrapping the pyinstaller folder in a .pkg package 34 | mkdir -p dist/structuremap/Contents/Resources 35 | cp ../logos/alpha_logo.icns dist/structuremap/Contents/Resources 36 | mv dist/structuremap_gui dist/structuremap/Contents/MacOS 37 | cp Info.plist dist/structuremap/Contents 38 | cp structuremap_terminal dist/structuremap/Contents/MacOS 39 | cp ../../LICENSE.txt Resources/LICENSE.txt 40 | cp ../logos/alpha_logo.png Resources/alpha_logo.png 41 | chmod 777 scripts/* 42 | 43 | pkgbuild --root dist/structuremap --identifier de.mpg.biochem.structuremap.app --version 0.0.10 --install-location /Applications/structuremap.app --scripts scripts structuremap.pkg 44 | productbuild --distribution distribution.xml --resources Resources --package-path structuremap.pkg dist/structuremap_gui_installer_macos.pkg 45 | -------------------------------------------------------------------------------- /release/one_click_macos_gui/distribution.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | structuremap 0.0.10 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | structuremap.pkg 17 | 18 | -------------------------------------------------------------------------------- /release/one_click_macos_gui/scripts/postinstall: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # make sure this file itself is executable 4 | xattr -dr com.apple.quarantine /Applications/structuremap.app 5 | chmod -R 577 /Applications/structuremap.app 6 | echo "Postinstall finished" 7 | -------------------------------------------------------------------------------- /release/one_click_macos_gui/scripts/preinstall: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # make sure this file itself is executable 4 | rm -rf /Applications/structuremap.app 5 | echo "Preinstall finished" 6 | -------------------------------------------------------------------------------- /release/one_click_macos_gui/structuremap_terminal: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | open -a Terminal "${BASH_SOURCE%/*}/structuremap_gui" 4 | -------------------------------------------------------------------------------- /release/one_click_windows_gui/create_installer_windows.sh: -------------------------------------------------------------------------------- 1 | #!bash 2 | 3 | # Initial cleanup 4 | rm -rf dist 5 | rm -rf build 6 | cd ../.. 7 | rm -rf dist 8 | rm -rf build 9 | 10 | # Creating a conda environment 11 | conda create -n structuremap_installer python=3.8 -y 12 | conda activate structuremap_installer 13 | 14 | # Creating the wheel 15 | python setup.py sdist bdist_wheel 16 | 17 | # Setting up the local package 18 | cd release/one_click_windows_gui 19 | # Make sure you include the required extra packages and always use the stable or very-stable options! 20 | pip install "../../dist/structuremap-0.0.10-py3-none-any.whl[stable]" 21 | 22 | # Creating the stand-alone pyinstaller folder 23 | pip install pyinstaller==4.2 24 | pyinstaller ../pyinstaller/structuremap.spec -y 25 | conda deactivate 26 | 27 | # If needed, include additional source such as e.g.: 28 | # cp ../../structuremap/data/*.fasta dist/structuremap/data 29 | 30 | # Wrapping the pyinstaller folder in a .exe package 31 | "C:\Program Files (x86)\Inno Setup 6\ISCC.exe" structuremap_innoinstaller.iss 32 | # WARNING: this assumes a static location for innosetup 33 | -------------------------------------------------------------------------------- /release/one_click_windows_gui/structuremap_innoinstaller.iss: -------------------------------------------------------------------------------- 1 | ; Script generated by the Inno Setup Script Wizard. 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES! 3 | 4 | #define MyAppName "structuremap" 5 | #define MyAppVersion "0.0.10" 6 | #define MyAppPublisher "Max Planck Institute of Biochemistry and the University of Copenhagen, Mann Labs" 7 | #define MyAppURL "https://github.com/MannLabs/structuremap" 8 | #define MyAppExeName "structuremap_gui.exe" 9 | 10 | [Setup] 11 | ; NOTE: The value of AppId uniquely identifies this application. Do not use the same AppId value in installers for other applications. 12 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.) 13 | AppId={{structuremap_Mann_Labs_MPI_CPR} 14 | AppName={#MyAppName} 15 | AppVersion={#MyAppVersion} 16 | ;AppVerName={#MyAppName} {#MyAppVersion} 17 | AppPublisher={#MyAppPublisher} 18 | AppPublisherURL={#MyAppURL} 19 | AppSupportURL={#MyAppURL} 20 | AppUpdatesURL={#MyAppURL} 21 | DefaultDirName={autopf}\{#MyAppName} 22 | DisableProgramGroupPage=yes 23 | LicenseFile=..\..\LICENSE.txt 24 | ; Uncomment the following line to run in non administrative install mode (install for current user only.) 25 | PrivilegesRequired=lowest 26 | PrivilegesRequiredOverridesAllowed=dialog 27 | OutputDir=dist 28 | OutputBaseFilename=structuremap_gui_installer_windows 29 | SetupIconFile=..\logos\alpha_logo.ico 30 | Compression=lzma 31 | SolidCompression=yes 32 | WizardStyle=modern 33 | 34 | [Languages] 35 | Name: "english"; MessagesFile: "compiler:Default.isl" 36 | 37 | [Tasks] 38 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked 39 | 40 | [Files] 41 | Source: "dist\structuremap_gui\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion 42 | Source: "dist\structuremap_gui\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs 43 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files 44 | 45 | [Icons] 46 | Name: "{autoprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}" 47 | Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon 48 | 49 | [Run] 50 | Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent 51 | -------------------------------------------------------------------------------- /release/pyinstaller/structuremap.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python ; coding: utf-8 -*- 2 | 3 | import pkgutil 4 | import os 5 | import sys 6 | from PyInstaller.building.build_main import Analysis, PYZ, EXE, COLLECT, BUNDLE, TOC 7 | import PyInstaller.utils.hooks 8 | import pkg_resources 9 | import importlib.metadata 10 | import structuremap 11 | 12 | 13 | ##################### User definitions 14 | exe_name = 'structuremap_gui' 15 | script_name = 'structuremap_pyinstaller.py' 16 | if sys.platform[:6] == "darwin": 17 | icon = '../logos/alpha_logo.icns' 18 | else: 19 | icon = '../logos/alpha_logo.ico' 20 | block_cipher = None 21 | location = os.getcwd() 22 | project = "structuremap" 23 | remove_tests = True 24 | bundle_name = "structuremap" 25 | ##################### 26 | 27 | 28 | requirements = { 29 | req.split()[0] for req in importlib.metadata.requires(project) 30 | } 31 | requirements.add(project) 32 | requirements.add("distributed") 33 | hidden_imports = set() 34 | datas = [] 35 | binaries = [] 36 | checked = set() 37 | while requirements: 38 | requirement = requirements.pop() 39 | checked.add(requirement) 40 | if requirement in ["pywin32"]: 41 | continue 42 | try: 43 | module_version = importlib.metadata.version(requirement) 44 | except ( 45 | importlib.metadata.PackageNotFoundError, 46 | ModuleNotFoundError, 47 | ImportError 48 | ): 49 | continue 50 | try: 51 | datas_, binaries_, hidden_imports_ = PyInstaller.utils.hooks.collect_all( 52 | requirement, 53 | include_py_files=True 54 | ) 55 | except ImportError: 56 | continue 57 | datas += datas_ 58 | # binaries += binaries_ 59 | hidden_imports_ = set(hidden_imports_) 60 | if "" in hidden_imports_: 61 | hidden_imports_.remove("") 62 | if None in hidden_imports_: 63 | hidden_imports_.remove(None) 64 | requirements |= hidden_imports_ - checked 65 | hidden_imports |= hidden_imports_ 66 | 67 | if remove_tests: 68 | hidden_imports = sorted( 69 | [h for h in hidden_imports if "tests" not in h.split(".")] 70 | ) 71 | else: 72 | hidden_imports = sorted(hidden_imports) 73 | 74 | 75 | hidden_imports = [h for h in hidden_imports if "__pycache__" not in h] 76 | datas = [d for d in datas if ("__pycache__" not in d[0]) and (d[1] not in [".", "Resources", "scripts"])] 77 | 78 | if sys.platform[:5] == "win32": 79 | base_path = os.path.dirname(sys.executable) 80 | library_path = os.path.join(base_path, "Library", "bin") 81 | dll_path = os.path.join(base_path, "DLLs") 82 | libcrypto_dll_path = os.path.join(dll_path, "libcrypto-1_1-x64.dll") 83 | libssl_dll_path = os.path.join(dll_path, "libssl-1_1-x64.dll") 84 | libcrypto_lib_path = os.path.join(library_path, "libcrypto-1_1-x64.dll") 85 | libssl_lib_path = os.path.join(library_path, "libssl-1_1-x64.dll") 86 | if not os.path.exists(libcrypto_dll_path): 87 | datas.append((libcrypto_lib_path, ".")) 88 | if not os.path.exists(libssl_dll_path): 89 | datas.append((libssl_lib_path, ".")) 90 | 91 | a = Analysis( 92 | [script_name], 93 | pathex=[location], 94 | binaries=binaries, 95 | datas=datas, 96 | hiddenimports=hidden_imports, 97 | hookspath=[], 98 | runtime_hooks=[], 99 | excludes=[h for h in hidden_imports if "datashader" in h], 100 | win_no_prefer_redirects=False, 101 | win_private_assemblies=False, 102 | cipher=block_cipher, 103 | noarchive=False 104 | ) 105 | pyz = PYZ( 106 | a.pure, 107 | a.zipped_data, 108 | cipher=block_cipher 109 | ) 110 | 111 | if sys.platform[:5] == "linux": 112 | exe = EXE( 113 | pyz, 114 | a.scripts, 115 | a.binaries, 116 | a.zipfiles, 117 | a.datas, 118 | name=bundle_name, 119 | debug=False, 120 | bootloader_ignore_signals=False, 121 | strip=False, 122 | upx=True, 123 | console=True, 124 | upx_exclude=[], 125 | icon=icon 126 | ) 127 | else: 128 | exe = EXE( 129 | pyz, 130 | a.scripts, 131 | # a.binaries, 132 | a.zipfiles, 133 | # a.datas, 134 | exclude_binaries=True, 135 | name=exe_name, 136 | debug=False, 137 | bootloader_ignore_signals=False, 138 | strip=False, 139 | upx=True, 140 | console=True, 141 | icon=icon 142 | ) 143 | coll = COLLECT( 144 | exe, 145 | a.binaries, 146 | # a.zipfiles, 147 | a.datas, 148 | strip=False, 149 | upx=True, 150 | upx_exclude=[], 151 | name=exe_name 152 | ) 153 | -------------------------------------------------------------------------------- /release/pyinstaller/structuremap_pyinstaller.py: -------------------------------------------------------------------------------- 1 | if __name__ == "__main__": 2 | try: 3 | import structuremap.gui 4 | import multiprocessing 5 | multiprocessing.freeze_support() 6 | structuremap.gui.run() 7 | except e: 8 | import traceback 9 | import sys 10 | exc_info = sys.exc_info() 11 | # Display the *original* exception 12 | traceback.print_exception(*exc_info) 13 | input("Something went wrong, press any key to continue...") 14 | -------------------------------------------------------------------------------- /release/pypi/install_pypi_wheel.sh: -------------------------------------------------------------------------------- 1 | conda create -n structuremap_pip_test python=3.8 -y 2 | conda activate structuremap_pip_test 3 | pip install "structuremap[stable]" 4 | structuremap 5 | conda deactivate 6 | -------------------------------------------------------------------------------- /release/pypi/install_test_pypi_wheel.sh: -------------------------------------------------------------------------------- 1 | conda create -n structuremap_pip_test python=3.8 -y 2 | conda activate structuremap_pip_test 3 | pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple "structuremap[stable]" 4 | structuremap 5 | conda deactivate 6 | -------------------------------------------------------------------------------- /release/pypi/prepare_pypi_wheel.sh: -------------------------------------------------------------------------------- 1 | cd ../.. 2 | conda create -n structuremap_pypi_wheel python=3.8 3 | conda activate structuremap_pypi_wheel 4 | pip install twine 5 | rm -rf dist 6 | rm -rf build 7 | python setup.py sdist bdist_wheel 8 | twine check dist/* 9 | conda deactivate 10 | -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | click==8.0.1 2 | numba==0.55.1 3 | pandas==1.4.0 4 | tqdm==4.62.3 5 | h5py==3.6.0 6 | statsmodels==0.13.1 7 | bio==1.3.3 8 | plotly==4.12.0 9 | -------------------------------------------------------------------------------- /requirements/requirements_development.txt: -------------------------------------------------------------------------------- 1 | jupyter==1.0.0 2 | jupyter_contrib_nbextensions==0.5.1 3 | pyinstaller==6.3.0 4 | autodocsumm==0.2.6 5 | sphinx-rtd-theme==0.5.2 6 | twine==3.4.1 7 | bumpversion==0.6.0 8 | pipdeptree==2.1.0 9 | #ipykernel==6.4.0 10 | psutil==5.8.0 11 | notebook==6.4.12 12 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | # builtin 4 | import setuptools 5 | import re 6 | import os 7 | # local 8 | import structuremap as package2install 9 | 10 | 11 | def get_long_description(): 12 | with open("README.md", "r") as readme_file: 13 | long_description = readme_file.read() 14 | return long_description 15 | 16 | 17 | def get_requirements(): 18 | extra_requirements = {} 19 | requirement_file_names = package2install.__extra_requirements__ 20 | requirement_file_names[""] = "requirements.txt" 21 | for extra, requirement_file_name in requirement_file_names.items(): 22 | full_requirement_file_name = os.path.join( 23 | "requirements", 24 | requirement_file_name, 25 | ) 26 | with open(full_requirement_file_name) as requirements_file: 27 | if extra != "": 28 | extra_stable = f"{extra}-stable" 29 | else: 30 | extra_stable = "stable" 31 | extra_requirements[extra_stable] = [] 32 | extra_requirements[extra] = [] 33 | for line in requirements_file: 34 | extra_requirements[extra_stable].append(line) 35 | requirement, *comparison = re.split("[><=~!]", line) 36 | requirement == requirement.strip() 37 | extra_requirements[extra].append(requirement) 38 | requirements = extra_requirements.pop("") 39 | return requirements, extra_requirements 40 | 41 | 42 | def create_pip_wheel(): 43 | requirements, extra_requirements = get_requirements() 44 | setuptools.setup( 45 | name=package2install.__project__, 46 | version=package2install.__version__, 47 | license=package2install.__license__, 48 | description=package2install.__description__, 49 | long_description=get_long_description(), 50 | long_description_content_type="text/markdown", 51 | author=package2install.__author__, 52 | author_email=package2install.__author_email__, 53 | url=package2install.__github__, 54 | project_urls=package2install.__urls__, 55 | keywords=package2install.__keywords__, 56 | classifiers=package2install.__classifiers__, 57 | packages=[package2install.__project__], 58 | include_package_data=True, 59 | entry_points={ 60 | "console_scripts": package2install.__console_scripts__, 61 | }, 62 | install_requires=requirements + [ 63 | # TODO Remove hardcoded requirement? 64 | "pywin32==225; sys_platform=='win32'" 65 | ], 66 | extras_require=extra_requirements, 67 | python_requires=package2install.__python_version__, 68 | ) 69 | 70 | 71 | if __name__ == "__main__": 72 | create_pip_wheel() 73 | -------------------------------------------------------------------------------- /structuremap/__init__.py: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | 4 | __project__ = "structuremap" 5 | __version__ = "0.0.10" 6 | __license__ = "Apache" 7 | __description__ = "An open-source Python package of the AlphaPept ecosystem" 8 | __author__ = "Isabell Bludau & Mann Labs" 9 | __author_email__ = "opensource@alphapept.com" 10 | __github__ = "https://github.com/MannLabs/structuremap" 11 | __keywords__ = [ 12 | "bioinformatics", 13 | "software", 14 | "AlphaPept ecosystem", 15 | ] 16 | __python_version__ = ">=3.8" 17 | __classifiers__ = [ 18 | "Development Status :: 1 - Planning", 19 | # "Development Status :: 2 - Pre-Alpha", 20 | # "Development Status :: 3 - Alpha", 21 | # "Development Status :: 4 - Beta", 22 | # "Development Status :: 5 - Production/Stable", 23 | # "Development Status :: 6 - Mature", 24 | # "Development Status :: 7 - Inactive" 25 | "Intended Audience :: Science/Research", 26 | "License :: OSI Approved :: Apache Software License", 27 | "Operating System :: OS Independent", 28 | "Programming Language :: Python :: 3", 29 | "Topic :: Scientific/Engineering :: Bio-Informatics", 30 | ] 31 | __console_scripts__ = [ 32 | "structuremap=structuremap.cli:run", 33 | ] 34 | __urls__ = { 35 | "Mann Labs at MPIB": "https://www.biochem.mpg.de/mann", 36 | "Mann Labs at CPR": "https://www.cpr.ku.dk/research/proteomics/mann/", 37 | "GitHub": __github__, 38 | # "ReadTheDocs": None, 39 | # "PyPi": None, 40 | # "Scientific paper": None, 41 | } 42 | __extra_requirements__ = { 43 | "development": "requirements_development.txt", 44 | } 45 | -------------------------------------------------------------------------------- /structuremap/cli.py: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | 4 | # external 5 | import click 6 | 7 | # local 8 | import structuremap 9 | 10 | 11 | @click.group( 12 | context_settings=dict( 13 | help_option_names=['-h', '--help'], 14 | ), 15 | invoke_without_command=True 16 | ) 17 | @click.pass_context 18 | @click.version_option(structuremap.__version__, "-v", "--version") 19 | def run(ctx, **kwargs): 20 | name = f"structuremap {structuremap.__version__}" 21 | click.echo("*" * (len(name) + 4)) 22 | click.echo(f"* {name} *") 23 | click.echo("*" * (len(name) + 4)) 24 | if ctx.invoked_subcommand is None: 25 | click.echo(run.get_help(ctx)) 26 | 27 | 28 | @run.command("gui", help="Start graphical user interface.") 29 | def gui(): 30 | import structuremap.gui 31 | structuremap.gui.run() 32 | -------------------------------------------------------------------------------- /structuremap/gui.py: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | 4 | def run(): 5 | raise NotImplementedError 6 | -------------------------------------------------------------------------------- /structuremap/plotting.py: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | # builtin 4 | from typing import Union 5 | import re 6 | 7 | # external 8 | import plotly.express as px 9 | import pandas as pd 10 | import numpy as np 11 | 12 | 13 | def scale_pvals( 14 | pvals: Union[list, np.array], 15 | ) -> list: 16 | """ 17 | Function to scale p-values that are already negative log10 transformed. 18 | In this context, scaling refers to assigning the p-values to a specific 19 | significance bin. The resulting significance bins are formatted as string 20 | for plotting purposes. 21 | 22 | Parameters 23 | ---------- 24 | pvals : list or np.array of integers 25 | List (or any other iterable) of p-values that are already 26 | negative log10 transformed. 27 | 28 | Returns 29 | ------- 30 | : list 31 | The lists of significance bins as strings. 32 | """ 33 | steps = [1000, 100, 50, 10, 5, 2] 34 | r = [] 35 | for xi in pvals: 36 | s_max = 0 37 | for s in steps: 38 | if xi >= s: 39 | if s > s_max: 40 | s_max = s 41 | r.append('> '+str(s_max)) 42 | return(r) 43 | 44 | 45 | def plot_enrichment( 46 | data: pd.DataFrame, 47 | ptm_select: list = None, 48 | roi_select: list = None, 49 | plot_width: int = None, 50 | plot_height: int = None, 51 | ): 52 | """ 53 | Plot the enrichment of PTMs in different protein regions. 54 | 55 | Parameters 56 | ---------- 57 | data : pd.DataFrame 58 | Dataframe with enrichment results 59 | from structuremap.processing.perform_enrichment_analysis. 60 | ptm_select : list 61 | List of PTMs to show. 62 | Default is None, which shows all PTMs in data. 63 | roi_select : list 64 | List of regions of interest (ROIs) to show. 65 | Default is None, which shows all ROIs in data. 66 | plot_width : int 67 | Integer specifying plot width. Default is None. 68 | plot_height : int 69 | Integer specifying plot height. Default is None. 70 | 71 | Returns 72 | ------- 73 | : plot 74 | Figure showing enrichment of PTMs in different protein regions. 75 | """ 76 | df = data.copy(deep=True) 77 | df['ptm'] = [re.sub('_', ' ', p) for p in df['ptm']] 78 | category_dict = {} 79 | if ptm_select is not None: 80 | ptm_select = [re.sub('_', ' ', p) for p in ptm_select] 81 | df = df[df.ptm.isin(ptm_select)] 82 | category_dict['ptm'] = ptm_select 83 | if roi_select is not None: 84 | df = df[df.roi.isin(roi_select)] 85 | category_dict['roi'] = roi_select 86 | df['log_odds_ratio'] = np.log(df['oddsr']) 87 | df['neg_log_adj_p'] = -np.log10(df.p_adj_bh) 88 | df['neg_log_adj_p_round'] = scale_pvals(df.neg_log_adj_p) 89 | category_dict['neg_log_adj_p_round'] = list(reversed([ 90 | '> 1000', '> 100', '> 50', '> 10', '> 5', '> 2', '> 0'])) 91 | color_dict = {'> 1000': 'rgb(120,0,0)', 92 | '> 100': 'rgb(177, 63, 100)', 93 | '> 50': 'rgb(221, 104, 108)', 94 | '> 10': 'rgb(241, 156, 124)', 95 | '> 5': 'rgb(245, 183, 142)', 96 | '> 2': 'rgb(246, 210, 169)', 97 | '> 0': 'grey'} 98 | fig = px.bar(df, 99 | x='ptm', 100 | y='log_odds_ratio', 101 | labels=dict({'ptm': 'PTM', 102 | 'log_odds_ratio': 'log odds ratio', 103 | 'neg_log_adj_p_round': '-log10 (adj. p-value)'}), 104 | color='neg_log_adj_p_round', 105 | facet_col='roi', 106 | hover_data=['oddsr', 'p_adj_bh'], 107 | category_orders=category_dict, 108 | color_discrete_map=color_dict, 109 | template="simple_white", 110 | ) 111 | if plot_width is None: 112 | p_width = 400+(len(df.ptm.unique())*20) 113 | elif plot_width > 0: 114 | p_width = plot_width 115 | else: 116 | raise ValueError( 117 | f"{plot_width} is not a valid parameter for plot_width. plot_width needs to be a positive integer.") 118 | if plot_height is None: 119 | p_height = 500 120 | elif plot_height > 0: 121 | p_height = plot_height 122 | else: 123 | raise ValueError( 124 | f"{plot_height} is not a valid parameter for plot_height. plot_height needs to be a positive integer.") 125 | fig.update_layout( 126 | autosize=False, 127 | width=p_width, 128 | height=p_height, 129 | margin=dict( 130 | autoexpand=False, 131 | l=100, 132 | r=150, 133 | b=150, 134 | t=50, 135 | ), 136 | ) 137 | config = {'toImageButtonOptions': { 138 | 'format': 'svg', 'filename': 'structure ptm enrichment'}} 139 | return(fig.show(config=config)) 140 | 141 | 142 | def plot_ptm_colocalization( 143 | df, 144 | name='Fraction of modified acceptor residues', 145 | context=None, 146 | plot_width: int = None, 147 | plot_height: int = None, 148 | ): 149 | """ 150 | Plot PTMs co-localization. 151 | 152 | Parameters 153 | ---------- 154 | df : pd.DataFrame 155 | Dataframe with results from 156 | structuremap.processing.evaluate_ptm_colocalization. 157 | name : str 158 | Name of the resulting plot. 159 | Default is 'Fraction of modified acceptor residues'. 160 | context : str 161 | Either '3D', '1D' or None. 162 | Default is None, which shows both 1D and 3D results. 163 | plot_width : int 164 | Integer specifying plot width. Default is None. 165 | plot_height : int 166 | Integer specifying plot height. Default is None. 167 | 168 | Returns 169 | ------- 170 | : plot 171 | Figure showing PTMs co-localization across distance bins. 172 | """ 173 | if plot_width is None: 174 | if context in ['1D', '3D']: 175 | p_width = 1100 176 | else: 177 | p_width = 1000 178 | elif plot_width > 0: 179 | p_width = plot_width 180 | else: 181 | raise ValueError( 182 | f"{plot_width} is not a valid parameter for plot_width. plot_width needs to be a positive integer.") 183 | if plot_height is None: 184 | if context in ['1D', '3D']: 185 | p_height = 350 186 | else: 187 | p_height = 1800 188 | elif plot_height > 0: 189 | p_height = plot_height 190 | else: 191 | raise ValueError( 192 | f"{plot_height} is not a valid parameter for plot_height. plot_height needs to be a positive integer.") 193 | df['variable_sig'] = np.where(((df['pvalue']<=0.01) & (df['variable']=='Observed')), 'Observed (p <= 0.01)', df['variable']) 194 | if context in ['1D', '3D']: 195 | df = df[df.context == context] 196 | fig = px.scatter( 197 | df, 198 | x="cutoff", 199 | y="value", 200 | error_y="std_random_fraction", 201 | color="variable_sig", 202 | facet_col="ptm_types", 203 | facet_col_spacing=0.05, 204 | labels={"value": "Fraction of modified acceptors", 205 | "cutoff": "distance bin", 206 | "ptm_types": "", 207 | "variable_sig": ""}, 208 | color_discrete_sequence=['rgb(177, 63, 100)', '#FA8072', 'grey']) 209 | fig = fig.update_yaxes(matches=None, showticklabels=True, col=1) 210 | fig = fig.update_yaxes(matches=None, showticklabels=True, col=2) 211 | fig = fig.update_yaxes(matches=None, showticklabels=True, col=3) 212 | fig = fig.update_yaxes(matches=None, showticklabels=True, col=4) 213 | fig = fig.update_yaxes(matches=None, showticklabels=True, col=5) 214 | fig = fig.update_yaxes(matches=None, showticklabels=True, col=6) 215 | fig = fig.update_yaxes(matches=None, showticklabels=True, col=7) 216 | elif context is None: 217 | fig = px.scatter( 218 | df, 219 | x="cutoff", 220 | y="value", 221 | error_y="std_random_fraction", 222 | color="variable_sig", 223 | facet_row="ptm_types", 224 | facet_col="context", 225 | labels={"value": "Fraction of modified acceptors", 226 | "cutoff": "distance bin", 227 | "ptm_types": "", 228 | "variable_sig": ""}, 229 | color_discrete_sequence=['rgb(177, 63, 100)', '#FA8072', 'grey']) 230 | fig = fig.update_yaxes(matches=None) 231 | else: 232 | raise ValueError(f"{context} is not a valid context") 233 | fig = fig.update_layout(width=p_width, height=p_height) 234 | fig = fig.update_layout(title=name, 235 | template="simple_white") 236 | config = {'toImageButtonOptions': {'format': 'svg', 'filename': name}} 237 | return fig.show(config=config) 238 | -------------------------------------------------------------------------------- /structuremap/processing.py: -------------------------------------------------------------------------------- 1 | #!python 2 | 3 | # builtin 4 | import json 5 | import os 6 | import socket 7 | import re 8 | from itertools import groupby 9 | import urllib.request 10 | import random 11 | import logging 12 | import ssl 13 | import tempfile 14 | import requests 15 | 16 | # external 17 | import numba 18 | import numpy as np 19 | import pandas as pd 20 | import tqdm 21 | import h5py 22 | import statsmodels.stats.multitest 23 | import Bio.PDB.MMCIF2Dict 24 | import scipy.stats 25 | import sys 26 | 27 | if getattr(sys, 'frozen', False): 28 | print('Using frozen version. Setting SSL context to unverified.') 29 | ssl._create_default_https_context = ssl._create_unverified_context 30 | 31 | def download_alphafold_cif( 32 | proteins: list, 33 | out_folder: str, 34 | out_format: str = "{}.cif", 35 | alphafold_cif_url: str = 'https://alphafold.ebi.ac.uk/files/AF-{protein}-F1-model_v{version}.cif', 36 | timeout: int = 60, 37 | verbose_log: bool = False, 38 | ) -> tuple: 39 | """ 40 | Function to download .cif files of protein structures predicted by AlphaFold. 41 | 42 | Parameters 43 | ---------- 44 | proteins : list 45 | List (or any other iterable) of UniProt protein accessions for which to 46 | download the structures. 47 | out_folder : str 48 | Path to the output folder. 49 | out_format : str 50 | The default file name of the cif files to be saved. 51 | The brackets {} are replaced by a protein name from the proteins list. 52 | Default is '{}.cif'. 53 | alphafold_cif_url : str 54 | The base link from where to download cif files. 55 | The brackets {} are replaced by a protein name from the proteins list. 56 | Default is 'https://alphafold.ebi.ac.uk/files/AF-{}-F1-model_v1.cif'. 57 | timeout : int 58 | Time to wait for reconnection of downloads. 59 | Default is 60. 60 | verbose_log: bool 61 | Whether to write verbose logging information. 62 | Default is False. 63 | 64 | Returns 65 | ------- 66 | : (list, list, list) 67 | The lists of valid, invalid and existing protein accessions. 68 | """ 69 | socket.setdefaulttimeout(timeout) 70 | valid_proteins = [] 71 | invalid_proteins = [] 72 | existing_proteins = [] 73 | AFversions = [9, 8, 7, 6, 5, 4, 3, 2, 1] #Dirty fix, but should hold up for the foreseeable future 74 | 75 | if not os.path.exists(out_folder): 76 | os.makedirs(out_folder) 77 | for protein in tqdm.tqdm(proteins): 78 | name_out = os.path.join( 79 | out_folder, 80 | out_format.format(protein) 81 | ) 82 | if os.path.isfile(name_out): 83 | existing_proteins.append(protein) 84 | else: 85 | for AFversion in AFversions: 86 | response = requests.get(alphafold_cif_url.format(protein=protein,version=AFversion)) 87 | if response.status_code == 200: 88 | latest_AFversion = AFversion 89 | break 90 | else: 91 | latest_AFversion = 404 92 | name_in = alphafold_cif_url.format(protein=protein,version=latest_AFversion) 93 | try: 94 | urllib.request.urlretrieve(name_in, name_out) 95 | valid_proteins.append(protein) 96 | except urllib.error.HTTPError: 97 | if verbose_log: 98 | logging.info(f"Protein {protein} not available for CIF download.") 99 | invalid_proteins.append(protein) 100 | logging.info(f"Valid proteins: {len(valid_proteins)}") 101 | logging.info(f"Invalid proteins: {len(invalid_proteins)}") 102 | logging.info(f"Existing proteins: {len(existing_proteins)}") 103 | return(valid_proteins, invalid_proteins, existing_proteins) 104 | 105 | 106 | def download_alphafold_pae( 107 | proteins: list, 108 | out_folder: str, 109 | out_format: str = "pae_{}.hdf", 110 | alphafold_pae_url: str = 'https://alphafold.ebi.ac.uk/files/AF-{protein}-F1-predicted_aligned_error_v{version}.json', 111 | timeout: int = 60, 112 | verbose_log: bool = False, 113 | ) -> tuple: 114 | """ 115 | Function to download paired aligned errors (pae) for protein structures 116 | predicted by AlphaFold. 117 | 118 | Parameters 119 | ---------- 120 | proteins : list 121 | List (or any other iterable) of UniProt protein accessions for which to 122 | download the structures. 123 | out_folder : str 124 | Path to the output folder. 125 | out_format : str 126 | The default file name of the pae files to be saved. 127 | The brackets {} are replaced by a protein name from the proteins list. 128 | Default is 'pae_{}.hdf'. 129 | alphafold_pae_url : str 130 | The base link from where to download pae files. 131 | The brackets {} are replaced by a protein name from the proteins list. 132 | Default is 'https://alphafold.ebi.ac.uk/files/AF-{}-F1-predicted_aligned_error_v1.json'. 133 | timeout : int 134 | Time to wait for reconnection of downloads. 135 | Default is 60. 136 | verbose_log: bool 137 | Whether to write verbose logging information. 138 | Default is False. 139 | 140 | Returns 141 | ------- 142 | : (list, list, list) 143 | The valid, invalid and existing proteins. 144 | """ 145 | socket.setdefaulttimeout(timeout) 146 | valid_proteins = [] 147 | invalid_proteins = [] 148 | existing_proteins = [] 149 | AFversions = [9, 8, 7, 6, 5, 4, 3, 2, 1] #Dirty fix, but should hold up for the foreseeable future 150 | if not os.path.exists(out_folder): 151 | os.makedirs(out_folder) 152 | for protein in tqdm.tqdm(proteins): 153 | name_out = os.path.join( 154 | out_folder, 155 | out_format.format(protein) 156 | ) 157 | if os.path.isfile(name_out): 158 | existing_proteins.append(protein) 159 | else: 160 | try: 161 | for AFversion in AFversions: 162 | response = requests.get(alphafold_pae_url.format(protein=protein,version=AFversion)) 163 | if response.status_code == 200: 164 | latest_AFversion = AFversion 165 | break 166 | else: 167 | latest_AFversion = 404 168 | name_in = alphafold_pae_url.format(protein=protein,version=latest_AFversion) 169 | with tempfile.TemporaryDirectory() as tmp_pae_dir: 170 | tmp_pae_file_name = os.path.join( 171 | tmp_pae_dir, 172 | "pae_{protein}.json" 173 | ) 174 | urllib.request.urlretrieve(name_in, tmp_pae_file_name) 175 | with open(tmp_pae_file_name) as tmp_pae_file: 176 | data = json.loads(tmp_pae_file.read()) 177 | if latest_AFversion < 3: 178 | dist = np.array(data[0]['distance']) 179 | else: 180 | dist = [item for sublist in data[0]["predicted_aligned_error"] for item in sublist] 181 | data_list = [('dist', dist)] 182 | if getattr(sys, 'frozen', False): 183 | print('Using frozen h5py w/ gzip compression') 184 | with h5py.File(name_out, 'w') as hdf_root: 185 | for key, data in data_list: 186 | print(f'h5py {key}') 187 | hdf_root.create_dataset( 188 | name=key, 189 | data=data, 190 | compression="gzip", 191 | shuffle=True, 192 | ) 193 | print('Done') 194 | else: 195 | with h5py.File(name_out, 'w') as hdf_root: 196 | for key, data in data_list: 197 | hdf_root.create_dataset( 198 | name=key, 199 | data=data, 200 | compression="lzf", 201 | shuffle=True, 202 | ) 203 | 204 | valid_proteins.append(protein) 205 | except urllib.error.HTTPError: 206 | if verbose_log: 207 | logging.info(f"Protein {protein} not available for PAE download.") 208 | # @ Include HDF IO errors as well, which should probably be handled differently. 209 | invalid_proteins.append(protein) 210 | logging.info(f"Valid proteins: {len(valid_proteins)}") 211 | logging.info(f"Invalid proteins: {len(invalid_proteins)}") 212 | logging.info(f"Existing proteins: {len(existing_proteins)}") 213 | return(valid_proteins, invalid_proteins, existing_proteins) 214 | 215 | 216 | def format_alphafold_data( 217 | directory: str, 218 | protein_ids: list, 219 | ) -> pd.DataFrame: 220 | """ 221 | Function to import structure files and format them into a combined dataframe. 222 | 223 | Parameters 224 | ---------- 225 | directory : str 226 | Path to the folder with all .cif files. 227 | proteins : list 228 | List of UniProt protein accessions to create an annotation table. 229 | If an empty list is provided, all proteins in the provided directory 230 | are used to create the annotation table. 231 | 232 | Returns 233 | ------- 234 | : pd.DataFrame 235 | A dataframe with structural information presented in following columns: 236 | ['protein_id', 'protein_number', 'AA', 'position', 'quality', 237 | 'x_coord_c', 'x_coord_ca', 'x_coord_cb', 'x_coord_n', 'y_coord_c', 238 | 'y_coord_ca', 'y_coord_cb', 'y_coord_n', 'z_coord_c', 'z_coord_ca', 239 | 'z_coord_cb', 'z_coord_n', 'secondary_structure', 'structure_group', 240 | 'BEND', 'HELX', 'STRN', 'TURN', 'unstructured'] 241 | """ 242 | 243 | alphafold_annotation_l = [] 244 | protein_number = 0 245 | 246 | for file in tqdm.tqdm(sorted(os.listdir(directory))): 247 | 248 | if file.endswith("cif"): 249 | filepath = os.path.join(directory, file) 250 | 251 | protein_id = re.sub(r'.cif', '', file) 252 | 253 | if ((protein_id in protein_ids) or (len(protein_ids) == 0)): 254 | 255 | protein_number += 1 256 | 257 | structure = Bio.PDB.MMCIF2Dict.MMCIF2Dict(filepath) 258 | 259 | df = pd.DataFrame({'protein_id': structure['_atom_site.pdbx_sifts_xref_db_acc'], 260 | 'protein_number': protein_number, 261 | 'AA': structure['_atom_site.pdbx_sifts_xref_db_res'], 262 | 'position': structure['_atom_site.label_seq_id'], 263 | 'quality': structure['_atom_site.B_iso_or_equiv'], 264 | 'atom_id': structure['_atom_site.label_atom_id'], 265 | 'x_coord': structure['_atom_site.Cartn_x'], 266 | 'y_coord': structure['_atom_site.Cartn_y'], 267 | 'z_coord': structure['_atom_site.Cartn_z']}) 268 | 269 | df = df[df.atom_id.isin(['CA', 'CB', 'C', 'N'])].reset_index(drop=True) 270 | df = df.pivot(index=['protein_id', 271 | 'protein_number', 272 | 'AA', 'position', 273 | 'quality'], 274 | columns="atom_id") 275 | df = pd.DataFrame(df.to_records()) 276 | 277 | df = df.rename(columns={"('x_coord', 'CA')": "x_coord_ca", 278 | "('y_coord', 'CA')": "y_coord_ca", 279 | "('z_coord', 'CA')": "z_coord_ca", 280 | "('x_coord', 'CB')": "x_coord_cb", 281 | "('y_coord', 'CB')": "y_coord_cb", 282 | "('z_coord', 'CB')": "z_coord_cb", 283 | "('x_coord', 'C')": "x_coord_c", 284 | "('y_coord', 'C')": "y_coord_c", 285 | "('z_coord', 'C')": "z_coord_c", 286 | "('x_coord', 'N')": "x_coord_n", 287 | "('y_coord', 'N')": "y_coord_n", 288 | "('z_coord', 'N')": "z_coord_n"}) 289 | 290 | df = df.apply(pd.to_numeric, errors='ignore') 291 | 292 | df['secondary_structure'] = 'unstructured' 293 | 294 | if '_struct_conf.conf_type_id' in structure.keys(): 295 | start_idx = [int(i) for i in structure['_struct_conf.beg_label_seq_id']] 296 | end_idx = [int(i) for i in structure['_struct_conf.end_label_seq_id']] 297 | note = structure['_struct_conf.conf_type_id'] 298 | 299 | for i in np.arange(0, len(start_idx)): 300 | df['secondary_structure'] = np.where( 301 | df['position'].between( 302 | start_idx[i], 303 | end_idx[i]), 304 | note[i], 305 | df['secondary_structure']) 306 | 307 | alphafold_annotation_l.append(df) 308 | 309 | alphafold_annotation = pd.concat(alphafold_annotation_l) 310 | alphafold_annotation = alphafold_annotation.sort_values( 311 | by=['protein_number', 'position']).reset_index(drop=True) 312 | 313 | alphafold_annotation['structure_group'] = [re.sub('_.*', '', i) 314 | for i in alphafold_annotation[ 315 | 'secondary_structure']] 316 | str_oh = pd.get_dummies(alphafold_annotation['structure_group'], 317 | dtype='int64') 318 | alphafold_annotation = alphafold_annotation.join(str_oh) 319 | 320 | return(alphafold_annotation) 321 | 322 | 323 | @numba.njit 324 | def get_3d_dist( 325 | coordinate_array_1: np.ndarray, 326 | coordinate_array_2: np.ndarray, 327 | idx_1: int, 328 | idx_2: int 329 | ) -> float: 330 | """ 331 | Function to get the distance between two coordinates in 3D space. 332 | Input are two coordinate arrays and two respective indices that specify 333 | for which points in the coordinate arrays the distance should be calculated. 334 | 335 | Parameters 336 | ---------- 337 | coordinate_array_1 : np.ndarray 338 | Array of 3D coordinates. 339 | Must be 3d, e.g. np.float64[:,3] 340 | coordinate_array_2 : np.ndarray 341 | Array of 3D coordinates. 342 | Must be 3d, e.g. np.float64[:,3] 343 | idx_1 : int 344 | Integer to select an index in coordinate_array_1. 345 | idx_2 : int 346 | Integer to select an index in coordinate_array_2. 347 | 348 | Returns 349 | ------- 350 | : float 351 | Distance between the two selected 3D coordinates. 352 | """ 353 | dist = np.sqrt( 354 | ( 355 | coordinate_array_1[idx_1, 0] - coordinate_array_2[idx_2, 0] 356 | )**2 + ( 357 | coordinate_array_1[idx_1, 1] - coordinate_array_2[idx_2, 1] 358 | )**2 + ( 359 | coordinate_array_1[idx_1, 2] - coordinate_array_2[idx_2, 2] 360 | )**2 361 | ) 362 | return(dist) 363 | 364 | 365 | @numba.njit 366 | def rotate_vector_around_axis( 367 | vector: np.ndarray, 368 | axis: np.ndarray, 369 | theta: float 370 | ) -> np.ndarray: 371 | """ 372 | Return the rotation matrix associated with counterclockwise rotation about 373 | the given axis by theta degrees. 374 | (https://stackoverflow.com/questions/6802577/rotation-of-3d-vector) 375 | 376 | Parameters 377 | ---------- 378 | vector : np.ndarray 379 | 3D vector which should be rotated. 380 | axis : np.ndarray 381 | 3D vector around which the vector should be rotated. 382 | theta : float 383 | Angle (in degrees) by which the vector should be rotated around the axis. 384 | 385 | Returns 386 | ------- 387 | : np.ndarray 388 | Rotation matrix. 389 | """ 390 | theta = np.radians(theta) 391 | axis = axis / np.linalg.norm(axis) 392 | a = np.cos(theta / 2.0) 393 | b, c, d = -axis * np.sin(theta / 2.0) 394 | aa, bb, cc, dd = a * a, b * b, c * c, d * d 395 | bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d 396 | rotation_matrix = np.array( 397 | [[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)], 398 | [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)], 399 | [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]]) 400 | rotated_vector = np.dot(rotation_matrix, vector) 401 | return rotated_vector 402 | 403 | 404 | @numba.njit 405 | def get_gly_vector( 406 | coord_a: np.ndarray, 407 | coord_c: np.ndarray, 408 | coord_n: np.ndarray, 409 | idx_1: int, 410 | theta: float = -120 411 | ) -> np.ndarray: 412 | """ 413 | Return a pseudo vector Ca -> Cb for a Glycine residue. 414 | The pseudo vector is centered at the origin and the 415 | Ccoord=N coord rotated over -120 degrees 416 | along the CA-C axis (see Bio.PDB package). 417 | 418 | Parameters 419 | ---------- 420 | coord_a : np.ndarray 421 | Array of 3D coordinates of alpha carbon atoms across different 422 | amino acids. 423 | coord_c : np.ndarray 424 | Array of 3D coordinates of carboxy carbon atoms across different 425 | amino acids. 426 | coord_n : np.ndarray 427 | Array of 3D coordinates of amino nitrogen atoms across different 428 | amino acids. 429 | idx_1 : int 430 | Integer to select a specific amino acid in the coordinate arrays. 431 | theta : float 432 | The theta for the rotation. 433 | Default is -120. 434 | 435 | Returns 436 | ------- 437 | : np.ndarray 438 | Pseudo vector Ca -> Cb for a Glycine residue. 439 | """ 440 | # get unit vectors 441 | uv_n = (coord_n[idx_1] - coord_a[idx_1]) / get_3d_dist(coord_n, coord_a, idx_1, idx_1) 442 | uv_c = (coord_c[idx_1] - coord_a[idx_1]) / get_3d_dist(coord_c, coord_a, idx_1, idx_1) 443 | # rotation of uv_n around uv_c over -120 deg 444 | uv_b = rotate_vector_around_axis(vector=uv_n, axis=uv_c, theta=theta) 445 | return uv_b 446 | 447 | @numba.njit 448 | def get_angle( 449 | coord_a: np.ndarray, 450 | coord_b: np.ndarray, 451 | coord_c: np.ndarray, 452 | coord_n: np.ndarray, 453 | idx_1: int, 454 | idx_2: int 455 | ) -> float: 456 | """ 457 | Calculate the angle between the vector of the target amino acid's 458 | side chain (Ca1 -> Cb1) and the vector pointing from the target 459 | amino acid's alpha carbon atom to a different amino acid's 460 | alpha carbon atom (Ca1 -> Ca2). 461 | 462 | Parameters 463 | ---------- 464 | coord_a : np.ndarray 465 | Array of 3D coordinates of alpha carbon atoms across different 466 | amino acids. 467 | coord_b : np.ndarray 468 | Array of 3D coordinates of beta carbon atoms across different 469 | amino acids. 470 | coord_c : np.ndarray 471 | Array of 3D coordinates of carboxy carbon atoms across different 472 | amino acids. 473 | coord_n : np.ndarray 474 | Array of 3D coordinates of amino nitrogen atoms across different 475 | amino acids. 476 | idx_1 : int 477 | Integer to select a first amino acid in the coordinate arrays. 478 | idx_2 : int 479 | Integer to select a second amino acid in the coordinate arrays. 480 | 481 | Returns 482 | ------- 483 | : float 484 | Angle between the side chain of the first amino acid and a second 485 | amino acid. 486 | """ 487 | if np.isnan(coord_b[idx_1, 0]): 488 | # Get pseudo vector Ca -> Cb for a Gly residue. 489 | uv_1 = get_gly_vector(coord_a, 490 | coord_c, 491 | coord_n, 492 | idx_1) 493 | else: 494 | # Calculate unit vector for Ca1 -> Cb1 495 | uv_1 = (coord_b[idx_1] - coord_a[idx_1]) / get_3d_dist(coord_b, coord_a, idx_1, idx_1) 496 | # Calculate unit vector for Ca1 -> Ca2 497 | uv_d = (coord_a[idx_2] - coord_a[idx_1]) / get_3d_dist(coord_a, coord_a, idx_1, idx_2) 498 | # Calculate the angle between the two unit vectors 499 | dot_p = np.dot(uv_1, uv_d) 500 | # angle = np.arccos(np.clip(dot_p, -1.0, 1.0)) 501 | angle = np.arccos(dot_p) 502 | # Convert radians in degrees 503 | angle_deg = np.rad2deg(angle) 504 | return(angle_deg) 505 | 506 | 507 | @numba.njit 508 | def get_paired_error( 509 | position: np.ndarray, 510 | error_dist: np.ndarray, 511 | idx_1: int, 512 | idx_2: int 513 | ) -> float: 514 | """ 515 | Extract paired aligned error of AlphaFold from a complete 516 | error matrix (error_dist) at specific sequence positions. 517 | 518 | Parameters 519 | ---------- 520 | position : np.ndarray 521 | Array of amino acid positions from which to choose specific indeces. 522 | error_dist : np.ndarray 523 | Matrix of paired aligned errors of AlphaFold across all amino acids 524 | in a protein qequence. 525 | idx_1 : int 526 | Integer to select a first amino acid in the position array. 527 | idx_2 : int 528 | Integer to select a second amino acid in the position array. 529 | 530 | Returns 531 | ------- 532 | : float 533 | Paired aligned error of the first amino acid and a second amino acid. 534 | """ 535 | pos1 = position[idx_1] 536 | pos2 = position[idx_2] 537 | err = error_dist[pos1 - 1, pos2 - 1] 538 | return(err) 539 | 540 | 541 | @numba.njit 542 | def get_neighbors( 543 | idx_list: np.ndarray, # Technically this is not a list and it could/should be renamed. 544 | coord_a: np.ndarray, 545 | coord_b: np.ndarray, 546 | coord_c: np.ndarray, 547 | coord_n: np.ndarray, 548 | position: np.ndarray, 549 | error_dist: np.ndarray, 550 | max_dist: float, 551 | max_angle: float 552 | ) -> np.ndarray: 553 | """ 554 | Get the number of amino acids within the specified distance and angle 555 | relative to the target amino acid. 556 | 557 | Parameters 558 | ---------- 559 | idx_list : np.ndarray 560 | Array of amino acid indeces. 561 | coord_a : np.ndarray 562 | Array of 3D coordinates of alpha carbon atoms across different 563 | amino acids. 564 | coord_b : np.ndarray 565 | Array of 3D coordinates of beta carbon atoms across different 566 | amino acids. 567 | coord_c : np.ndarray 568 | Array of 3D coordinates of carboxy carbon atoms across different 569 | amino acids. 570 | coord_n : np.ndarray 571 | Array of 3D coordinates of amino nitrogen atoms across different 572 | amino acids. 573 | position : np.ndarray 574 | Array of amino acid positions. 575 | error_dist: : np.ndarray 576 | Matrix of paired aligned errors of AlphaFold across all amino acids 577 | in a protein qequence. 578 | max_dist : float 579 | Float specifying the maximum distance between two amino acids. 580 | max_angle : float 581 | Float specifying the maximum angle (in degrees) between two 582 | amino acids. 583 | 584 | Returns 585 | ------- 586 | : np.ndarray 587 | Number of amino acids within the specified distance and angle. 588 | """ 589 | res = [] 590 | for x1 in idx_list: 591 | n_neighbors = 0 592 | for x2 in idx_list: 593 | if x1 != x2: 594 | paired_error = get_paired_error( 595 | position=position, 596 | error_dist=error_dist, 597 | idx_1=x1, 598 | idx_2=x2) 599 | if (paired_error <= max_dist): 600 | dist = get_3d_dist( 601 | coordinate_array_1=coord_a, 602 | coordinate_array_2=coord_a, 603 | idx_1=x1, 604 | idx_2=x2) 605 | if (dist + paired_error <= max_dist): 606 | angle = get_angle( 607 | coord_a=coord_a, 608 | coord_b=coord_b, 609 | coord_c=coord_c, 610 | coord_n=coord_n, 611 | idx_1=x1, 612 | idx_2=x2) 613 | if angle <= max_angle: 614 | n_neighbors += 1 615 | res.append(n_neighbors) 616 | return(np.array(res)) 617 | 618 | 619 | @numba.njit 620 | def find_end( 621 | label: int, 622 | start_index: int, 623 | values: np.ndarray 624 | ) -> int: 625 | """Find when the label changes. 626 | 627 | This assumes a sorted values array. 628 | 629 | Parameters 630 | ---------- 631 | label : int 632 | The label of interest. 633 | start_index : int 634 | The previous endindex index of the previous label, 635 | which normally is the start_index for the current label. 636 | values : int 637 | An array with values. 638 | 639 | Returns 640 | ------- 641 | int 642 | The end_index index of the label in values. 643 | """ 644 | while values[start_index] == label: 645 | start_index += 1 646 | if start_index == len(values): 647 | break 648 | return start_index 649 | 650 | 651 | def partition_df_by_prots( 652 | df: pd.DataFrame, 653 | ) -> pd.DataFrame: 654 | """ 655 | Generator function to split a dataframe into seperate proteins. 656 | 657 | NOTE: This function is significantly faster if the input df is already 658 | sorted by protein_number! 659 | 660 | Parameters 661 | ---------- 662 | df : pd.DataFrame 663 | pd.DataFrame of formatted AlphaFold data across various proteins. 664 | 665 | Yields 666 | ------- 667 | : pd.DataFrame 668 | Subset of the input dataframe only containing a single protein. 669 | """ 670 | df = df.astype({'position': 'int64'}) 671 | if not df.protein_number.is_monotonic_increasing: 672 | df = df.sort_values(by='protein_number').reset_index(drop=True) 673 | unique_proteins = df.protein_number.unique() 674 | end = 0 675 | for protein_i in tqdm.tqdm(unique_proteins): 676 | start = end 677 | end = find_end(protein_i, end, df.protein_number.values) 678 | prot_df = df[start:end] 679 | if not prot_df.position.is_monotonic_increasing: 680 | prot_df.sort_values(by='position', inplace=True) 681 | yield prot_df.reset_index(drop=True) 682 | 683 | 684 | def annotate_accessibility( 685 | df: pd.DataFrame, 686 | max_dist: float, 687 | max_angle: float, 688 | error_dir: str, 689 | filename_format: str = "pae_{}.hdf", 690 | ) -> pd.DataFrame: 691 | """ 692 | Half sphere exposure as calculated in 693 | https://onlinelibrary.wiley.com/doi/10.1002/prot.20379 694 | but with paired aligned error metric included. 695 | 696 | Parameters 697 | ---------- 698 | df : pd.DataFrame 699 | pd.DataFrame of formatted AlphaFold data across various proteins. 700 | Such a dataframe is gerated by format_alphafold_data. 701 | max_dist : float 702 | Float specifying the maximum distance between two amino acids. 703 | max_angle : float 704 | Float specifying the maximum angle (in degrees) between two 705 | amino acids. 706 | error_dir: : str 707 | Path to the directory where the hdf files containing the matrices of 708 | paired aligned errors of AlphaFold are stored. This should correspond 709 | to the out_folder used in download_alphafold_pae. 710 | filename_format : str 711 | The file name of the pae files saved by download_alphafold_pae. 712 | The brackets {} are replaced by a protein name from the proteins list. 713 | Default is 'pae_{}.hdf'. 714 | 715 | Returns 716 | ------- 717 | : pd.DataFrame 718 | Dataframe repporting the number of neighboring amino acids at the 719 | specified maximum distance and angle per protein, amino acid and 720 | position. 721 | """ 722 | proteins = list() 723 | AA = list() 724 | AA_p = list() 725 | a_AA = list() 726 | for df_prot in partition_df_by_prots(df): 727 | protein_accession = df_prot.protein_id.values[0] 728 | if error_dir is not None: 729 | with h5py.File(os.path.join( 730 | error_dir, 731 | filename_format.format(protein_accession)) 732 | ) as hdf_root: 733 | error_dist = hdf_root['dist'][...] 734 | size = int(np.sqrt(len(error_dist))) 735 | error_dist = error_dist.reshape(size, size) 736 | use_pae = 'pae' 737 | else: 738 | error_dist = np.zeros((df_prot.shape[0], df_prot.shape[0])) 739 | use_pae = 'nopae' 740 | idx_list = np.arange(0, df_prot.shape[0]) 741 | res_a = get_neighbors( 742 | idx_list=idx_list, 743 | coord_a=np.vstack([df_prot.x_coord_ca.values, 744 | df_prot.y_coord_ca.values, 745 | df_prot.z_coord_ca.values]).T, 746 | coord_b=np.vstack([df_prot.x_coord_cb.values, 747 | df_prot.y_coord_cb.values, 748 | df_prot.z_coord_cb.values]).T, 749 | coord_c=np.vstack([df_prot.x_coord_c.values, 750 | df_prot.y_coord_c.values, 751 | df_prot.z_coord_c.values]).T, 752 | coord_n=np.vstack([df_prot.x_coord_n.values, 753 | df_prot.y_coord_n.values, 754 | df_prot.z_coord_n.values]).T, 755 | # If this step is slow, consider avoiding the vstack to create new arrays 756 | # Alternatively, it might be faster to use e.g. df[["x", "y", "z"]].values 757 | # as pandas might force this into a view rather than a new array 758 | position=df_prot.position.values, 759 | error_dist=error_dist, 760 | max_dist=max_dist, 761 | max_angle=max_angle) 762 | proteins.append(df_prot.protein_id.values) 763 | # using numeracal prot_numbers might be better. 764 | # In general it is good practice to reduce strings/objects in arrays/dfs 765 | # as much possible. Especially try to avoid repetetion of such types and 766 | # just use indices and a reference array. Rarely do you need this actual 767 | # values anyways. 768 | AA.append(df_prot.AA.values) 769 | AA_p.append(df_prot.position.values) 770 | a_AA.append(res_a) 771 | proteins = np.concatenate(proteins) 772 | AA = np.concatenate(AA) 773 | AA_p = np.concatenate(AA_p) 774 | a_AA = np.concatenate(a_AA) 775 | accessibility_df = pd.DataFrame({'protein_id': proteins, 776 | 'AA': AA, 'position': AA_p}) 777 | accessibility_df[f'nAA_{max_dist}_{max_angle}_{use_pae}'] = a_AA 778 | return(accessibility_df) 779 | 780 | 781 | @numba.njit 782 | def smooth_score(score: np.ndarray, 783 | half_window: int 784 | ) -> np.ndarray: 785 | """ 786 | Get an average value for each position in a score array, considering all 787 | values within a window that spans up to half_window positions before and 788 | after a given target position. 789 | 790 | Parameters 791 | ---------- 792 | score : np.ndarray 793 | Array of numeric score values. 794 | half_window : int 795 | Integer specifying the number of positions to consider both before and 796 | after a given target position. 797 | 798 | Returns 799 | ------- 800 | : np.ndarray 801 | Array of smoothed score values. 802 | """ 803 | smooth_score = [] 804 | for i in range(len(score)): 805 | low_window_bound = i - half_window 806 | if low_window_bound < 0: 807 | low_window_bound = 0 808 | high_window_bound = i + half_window 809 | if high_window_bound > len(score): 810 | high_window_bound = len(score) 811 | window_score = score[low_window_bound: high_window_bound + 1] 812 | window_mean = np.mean(window_score) 813 | smooth_score.append(window_mean) 814 | return np.array(smooth_score) 815 | 816 | 817 | def get_smooth_score(df: pd.DataFrame, 818 | scores: np.ndarray, 819 | half_windows: list, 820 | ) -> pd.DataFrame: 821 | """ 822 | Select columns in a dataframe and smooth the values per protein based on a 823 | provided window. 824 | 825 | Parameters 826 | ---------- 827 | df : pd.DataFrame 828 | Dataframe with AlphaFold annotations, as generated by 829 | format_alphafold_data. 830 | scores : np.ndarray 831 | Array of column names in the dataframe that should be smoothed. 832 | half_windows : list 833 | List of one or more integers specifying the number of positions 834 | to consider both before and after a given target position. 835 | 836 | Returns 837 | ------- 838 | : pd.DataFrame 839 | Copy of the input dataframe with additional columns containing the 840 | smoothed scores. 841 | """ 842 | df_out = [] 843 | for df_prot in partition_df_by_prots(df): 844 | for score in scores: 845 | for w in half_windows: 846 | df_prot[f"{score}_smooth{w}"] = smooth_score( 847 | score=df_prot[score].values, 848 | half_window=w) 849 | df_out.append(df_prot) 850 | df_out = pd.concat(df_out) 851 | return df_out 852 | 853 | 854 | @numba.njit 855 | def get_avg_3d_dist(idx_list: np.ndarray, # as before, technically not a list but an array. Rename? 856 | coord: np.ndarray, 857 | position: np.ndarray, 858 | error_dist: np.ndarray, 859 | metric: str = 'mean', 860 | error_operation: str = 'minus', 861 | average_aa_size: float = 3.5, 862 | ) -> float: 863 | """ 864 | Get average 3D distance between a group of amino acids. 865 | 866 | Parameters 867 | ---------- 868 | idx_list : np.ndarray 869 | Array of amino acid indeces. 870 | coord: np.ndarray 871 | Array of 3D coordinates of alpha carbon atoms across different 872 | amino acids. 873 | position : np.ndarray 874 | Array of amino acid positions. 875 | error_dist: : np.ndarray 876 | Matrix of paired aligned errors of AlphaFold across all amino acids in 877 | a protein qequence. 878 | metric : str 879 | Metric to aggregate distances across all pairs for a given amino acid. 880 | 'mean' or 'min' can be chosen. Default is 'mean'. 881 | error_operation : str 882 | Metric to include paired aligned error in the distance calculation. 883 | 'minus' or 'plus' can be chosen. Default is 'minus'. 884 | average_aa_size : float 885 | Average size of an AA. 886 | Default is 3.5 Å 887 | 888 | Returns 889 | ------- 890 | : float 891 | Average 3D distance between all selected amino acids. 892 | """ 893 | if metric not in ['mean', 'min']: 894 | raise ValueError('Select mean or min as metric.') 895 | if error_operation not in ['minus', 'plus']: 896 | raise ValueError('Select minus or plus as error_operation.') 897 | metric_dist = [] 898 | for x1 in idx_list: 899 | all_dist = [] 900 | for x2 in idx_list: 901 | if x1 != x2: 902 | dist_i = get_3d_dist( 903 | coordinate_array_1=coord, 904 | coordinate_array_2=coord, 905 | idx_1=x1, 906 | idx_2=x2) 907 | error_i = get_paired_error( 908 | position=position, 909 | error_dist=error_dist, 910 | idx_1=x1, 911 | idx_2=x2) 912 | if error_operation == 'minus': 913 | dist_error_i = dist_i - error_i 914 | if dist_error_i < average_aa_size: 915 | dist_error_i = average_aa_size 916 | all_dist.append(dist_error_i) 917 | elif error_operation == 'plus': 918 | dist_error_i = dist_i + error_i 919 | nAA_diff = abs(position[x1] - position[x2]) 920 | nAA_dist = nAA_diff * average_aa_size 921 | if dist_error_i > nAA_dist: 922 | all_dist.append(nAA_dist) 923 | else: 924 | all_dist.append(dist_error_i) 925 | # Probably the 5 lines below can be optimized, but likely not worth 926 | # the speed improvement? 927 | all_dist_d = np.array(all_dist) 928 | if metric == 'mean': 929 | m_d = np.mean(all_dist_d) 930 | elif metric == 'min': 931 | m_d = np.min(all_dist_d) 932 | metric_dist.append(m_d) 933 | metric_dist = np.array(metric_dist) 934 | avg_metric_dist = np.mean(metric_dist) 935 | return(avg_metric_dist) 936 | 937 | 938 | @numba.njit 939 | def get_avg_1d_dist(idx_list: np.ndarray, 940 | position: np.ndarray, 941 | metric: str = 'mean' 942 | ) -> float: 943 | """ 944 | Get average 1D distance between a group of amino acids. 945 | 946 | Parameters 947 | ---------- 948 | idx_list : np.ndarray 949 | Array of amino acid indeces. 950 | position : np.ndarray 951 | Array of amino acid positions. 952 | metric : str 953 | Metric to aggregate distances across all pairs for a given amino acid. 954 | 'mean' or 'min' can be chosen. Default is 'mean'. 955 | 956 | Returns 957 | ------- 958 | : float 959 | Average 1D distance between all selected amino acids. 960 | """ 961 | 962 | if metric not in ['mean', 'min']: 963 | raise ValueError('Select mean or min as metric.') 964 | metric_dist = [] 965 | for x1 in idx_list: 966 | all_dist = [] 967 | for x2 in idx_list: 968 | if x1 != x2: 969 | all_dist.append(abs(position[x1] - position[x2])) 970 | all_dist_d = np.array(all_dist) 971 | if metric == 'mean': 972 | m_d = np.mean(all_dist_d) 973 | elif metric == 'min': 974 | m_d = np.min(all_dist_d) 975 | metric_dist.append(m_d) 976 | metric_dist = np.array(metric_dist) 977 | avg_min_dist = np.mean(metric_dist) 978 | return(avg_min_dist) 979 | 980 | 981 | def get_proximity_pvals(df: pd.DataFrame, 982 | ptm_types: np.ndarray, 983 | ptm_site_dict: dict, 984 | error_dir: str, 985 | filename_format: str = "pae_{}.hdf", 986 | per_site_metric: str = 'mean', 987 | error_operation: str = 'minus', 988 | n_random: int = 10000, 989 | random_seed: int = 44 # should obviously be 42;) Might mess up your testing though 990 | ) -> pd.DataFrame: 991 | """ 992 | Get proximity p-values for selected PTMs. 993 | 994 | Parameters 995 | ---------- 996 | df : pd.DataFrame 997 | pd.DataFrame of formatted AlphaFold data across various proteins. 998 | ptm_types: np.ndarray 999 | Array of PTM modifications for which to perform the proximity analysis. 1000 | ptm_site_dict : dict 1001 | Dictionary containing the possible amino acid sites for each PTM. 1002 | error_dir : str 1003 | Path to the directory where the hdf files containing the matrices of 1004 | paired aligned errors of AlphaFold are stored. 1005 | filename_format : str 1006 | The file name of the pae files saved by download_alphafold_pae. 1007 | The brackets {} are replaced by a protein name from the proteins list. 1008 | Default is 'pae_{}.hdf'. 1009 | per_site_metric : str 1010 | Metric to aggregate distances across all pairs for a given amino acid. 1011 | 'mean' or 'min' can be chosen. Default is 'mean'. 1012 | error_operation : str 1013 | Metric to include paired aligned error in the distance calculation. 1014 | 'minus' or 'plus' can be chosen. Default is 'minus'. 1015 | n_random : int 1016 | Number of random permutations to perform. Default is 10'000. 1017 | The higher the number of permutations, the more confidence the analysis 1018 | can achieve. However, a very high number of permutations increases 1019 | processing time. No fewer than 1'000 permutations should be used. 1020 | random_seed : int 1021 | Random seed for the analysis. Default is 44. 1022 | 1023 | Returns 1024 | ------- 1025 | : pd.DataFrame 1026 | Dataframe reporting 3D and 1D proximity p-values for each protein and 1027 | selected PTM. 1028 | """ 1029 | random.seed(random_seed) 1030 | proteins = list() 1031 | ptm_type = list() 1032 | n_ptms = list() 1033 | pvals_3d = list() 1034 | pvals_1d = list() 1035 | for df_prot in partition_df_by_prots(df): 1036 | protein_accession = df_prot.protein_id.values[0] 1037 | for ptm_i in ptm_types: 1038 | acc_aa = ptm_site_dict[ptm_i] 1039 | df_ptm_prot = df_prot[df_prot.AA.isin(acc_aa)].reset_index(drop=True) 1040 | n_aa_mod = np.sum(df_ptm_prot[ptm_i]) 1041 | n_aa_all = df_ptm_prot.shape[0] 1042 | if ((n_aa_mod >= 2) & (n_aa_mod < n_aa_all)): 1043 | with h5py.File(os.path.join( 1044 | error_dir, 1045 | filename_format.format(protein_accession))) as hdf_root: 1046 | error_dist = hdf_root['dist'][...] 1047 | size = int(np.sqrt(len(error_dist))) 1048 | error_dist = error_dist.reshape(size, size) 1049 | # subset to ptm possible positions 1050 | # calculate real distance 1051 | real_idx = df_ptm_prot.index[df_ptm_prot[ptm_i] == 1].tolist() 1052 | avg_dist_3d = get_avg_3d_dist( 1053 | idx_list=np.array(real_idx), 1054 | coord=np.vstack([ 1055 | df_ptm_prot["x_coord_ca"].values, 1056 | df_ptm_prot["y_coord_ca"].values, 1057 | df_ptm_prot["z_coord_ca"].values]).T, 1058 | position=df_ptm_prot["position"].values, 1059 | error_dist=error_dist, 1060 | metric=per_site_metric, 1061 | error_operation=error_operation) 1062 | avg_dist_1d = get_avg_1d_dist( 1063 | idx_list=np.array(real_idx), 1064 | position=df_ptm_prot["position"].values, 1065 | metric=per_site_metric) 1066 | # get background distribution 1067 | rand_idx_list = [np.array(random.sample( 1068 | range(n_aa_all), len(real_idx))) for i in range(n_random)] 1069 | rand_avg_dist_3d = [get_avg_3d_dist( 1070 | idx_list=idx_l, 1071 | coord=np.vstack([ 1072 | df_ptm_prot["x_coord_ca"].values, 1073 | df_ptm_prot["y_coord_ca"].values, 1074 | df_ptm_prot["z_coord_ca"].values]).T, 1075 | position=df_ptm_prot["position"].values, 1076 | error_dist=error_dist, 1077 | metric=per_site_metric, 1078 | error_operation=error_operation) for idx_l in rand_idx_list] 1079 | rand_avg_dist_1d = [get_avg_1d_dist( 1080 | idx_list=idx_l, 1081 | position=df_ptm_prot["position"].values, 1082 | metric=per_site_metric) for idx_l in rand_idx_list] 1083 | # get empirical p-values 1084 | pvalue_3d = np.sum(np.array(rand_avg_dist_3d) <= avg_dist_3d)/n_random 1085 | pvalue_1d = np.sum(np.array(rand_avg_dist_1d) <= avg_dist_1d)/n_random 1086 | # If this is a slow step, there are several ways to still 1087 | # optimize this I think. 1088 | # Especially the creation of 10000 elements in both a list and 1089 | # array seem concerning to me. 1090 | # Probably a >> 10 fold is still possible here. 1091 | else: 1092 | pvalue_3d = np.nan 1093 | pvalue_1d = np.nan 1094 | pvals_3d.append(pvalue_3d) 1095 | pvals_1d.append(pvalue_1d) 1096 | n_ptms.append(n_aa_mod) 1097 | proteins.append(protein_accession) 1098 | ptm_type.append(ptm_i) 1099 | res_df = pd.DataFrame({'protein_id': proteins, 1100 | 'ptm': ptm_type, 1101 | 'n_ptms': n_ptms, 1102 | 'pvalue_1d': pvals_1d, 1103 | 'pvalue_3d': pvals_3d}) 1104 | res_df_noNan = res_df.dropna(subset=['pvalue_3d','pvalue_1d']).reset_index(drop=True) 1105 | # Why are these then stored explicitly above? # This was to know which IDs these are. 1106 | res_df_noNan['pvalue_1d_adj_bh'] = statsmodels.stats.multitest.multipletests(pvals=res_df_noNan.pvalue_1d, alpha=0.1, method='fdr_bh')[1] 1107 | res_df_noNan['pvalue_3d_adj_bh'] = statsmodels.stats.multitest.multipletests(pvals=res_df_noNan.pvalue_3d, alpha=0.1, method='fdr_bh')[1] 1108 | return(res_df_noNan) 1109 | 1110 | 1111 | def perform_enrichment_analysis(df: pd.DataFrame, 1112 | ptm_types: list, 1113 | rois: list, 1114 | quality_cutoffs: list, 1115 | ptm_site_dict: dict, 1116 | multiple_testing: bool = True) -> pd.DataFrame: 1117 | """ 1118 | Get enrichment p-values for selected PTMs acros regions of interest (ROIs). 1119 | 1120 | Parameters 1121 | ---------- 1122 | df : pd.DataFrame 1123 | pd.DataFrame of formatted AlphaFold data across various proteins. 1124 | ptm_types: list 1125 | List of PTM modifications for which to perform the enrichment analysis. 1126 | rois : list 1127 | List of regions of interest (one hot encoded columns in df) for which 1128 | to perform the enrichment analysis. 1129 | quality_cutoffs : list 1130 | List of quality cutoffs (AlphaFold pLDDT values) to filter for. 1131 | ptm_site_dict : dict 1132 | Dictionary containing the possible amino acid sites for each PTM. 1133 | multiple_testing : bool 1134 | Bool if multiple hypothesis testing correction should be performed. 1135 | Default is 'True'. 1136 | 1137 | Returns 1138 | ------- 1139 | : pd.DataFrame 1140 | Dataframe reporting p-values for the enrichment of all selected 1141 | ptm_types across selected rois. 1142 | """ 1143 | 1144 | enrichment = [] 1145 | for q_cut in quality_cutoffs: 1146 | # Is quality_cutoffs expected to be a big list? 1147 | # If so, we can still optimize the function below reasonably I think... 1148 | seq_ann_qcut = df[df.quality >= q_cut] 1149 | for ptm in ptm_types: 1150 | seq_ann_qcut_aa = seq_ann_qcut[seq_ann_qcut.AA.isin(ptm_site_dict[ptm])] 1151 | for roi in rois: 1152 | seq_ann_qcut_aa_roi1 = seq_ann_qcut_aa[roi] == 1 1153 | seq_ann_qcut_aa_roi0 = seq_ann_qcut_aa[roi] == 0 1154 | seq_ann_qcut_aa_ptm1 = seq_ann_qcut_aa[ptm] == 1 1155 | seq_ann_qcut_aa_ptm0 = seq_ann_qcut_aa[ptm] == 0 1156 | n_ptm_in_roi = seq_ann_qcut_aa[seq_ann_qcut_aa_roi1 & seq_ann_qcut_aa_ptm1].shape[0] 1157 | n_ptm_not_in_roi = seq_ann_qcut_aa[seq_ann_qcut_aa_roi0 & seq_ann_qcut_aa_ptm1].shape[0] 1158 | n_naked_in_roi = seq_ann_qcut_aa[seq_ann_qcut_aa_roi1 & seq_ann_qcut_aa_ptm0].shape[0] 1159 | n_naked_not_in_roi = seq_ann_qcut_aa[seq_ann_qcut_aa_roi0 & seq_ann_qcut_aa_ptm0].shape[0] 1160 | fisher_table = np.array([[n_ptm_in_roi, n_naked_in_roi], [n_ptm_not_in_roi, n_naked_not_in_roi]]) 1161 | oddsr, p = scipy.stats.fisher_exact(fisher_table, 1162 | alternative='two-sided') 1163 | res = pd.DataFrame({'quality_cutoff': [q_cut], 1164 | 'ptm': [ptm], 1165 | 'roi': [roi], 1166 | 'n_aa_ptm': np.sum(seq_ann_qcut_aa_ptm1), 1167 | 'n_aa_roi': np.sum(seq_ann_qcut_aa_roi1), 1168 | 'n_ptm_in_roi': n_ptm_in_roi, 1169 | 'n_ptm_not_in_roi': n_ptm_not_in_roi, 1170 | 'n_naked_in_roi': n_naked_in_roi, 1171 | 'n_naked_not_in_roi': n_naked_not_in_roi, 1172 | 'oddsr': [oddsr], 1173 | 'p': [p]}) 1174 | enrichment.append(res) 1175 | enrichment_df = pd.concat(enrichment) 1176 | if multiple_testing: 1177 | enrichment_df['p_adj_bf'] = statsmodels.stats.multitest.multipletests( 1178 | pvals=enrichment_df.p, alpha=0.01, method='bonferroni')[1] 1179 | enrichment_df['p_adj_bh'] = statsmodels.stats.multitest.multipletests( 1180 | pvals=enrichment_df.p, alpha=0.01, method='fdr_bh')[1] 1181 | return(enrichment_df) 1182 | 1183 | 1184 | def perform_enrichment_analysis_per_protein( 1185 | df: pd.DataFrame, 1186 | ptm_types: list, 1187 | rois: list, 1188 | quality_cutoffs: list, 1189 | ptm_site_dict: dict 1190 | ) -> pd.DataFrame: 1191 | """ 1192 | Get per protein enrichment p-values for selected PTMs acros regions of 1193 | interest (ROIs). 1194 | 1195 | Parameters 1196 | ---------- 1197 | df : pd.DataFrame 1198 | pd.DataFrame of formatted AlphaFold data across various proteins. 1199 | ptm_types: list 1200 | List of PTM modifications for which to perform the enrichment analysis. 1201 | rois : list 1202 | List of regions of interest (one hot encoded columns in df) for which 1203 | to perform the enrichment analysis. 1204 | quality_cutoffs : list 1205 | List of quality cutoffs (AlphaFold pLDDT values) to filter for. 1206 | ptm_site_dict : dict 1207 | Dictionary containing the possible amino acid sites for each PTM. 1208 | 1209 | Returns 1210 | ------- 1211 | : pd.DataFrame 1212 | Dataframe reporting p-values for the enrichment of all selected 1213 | ptm_types across selected rois on a per protein basis. 1214 | """ 1215 | enrichment_list = list() 1216 | for df_prot in partition_df_by_prots(df): 1217 | protein_accession = df_prot.protein_id.values[0] 1218 | res = perform_enrichment_analysis(df=df_prot, 1219 | ptm_types=ptm_types, 1220 | rois=rois, 1221 | quality_cutoffs=quality_cutoffs, 1222 | ptm_site_dict=ptm_site_dict, 1223 | multiple_testing=False) 1224 | res.insert(loc=0, column='protein_id', value=np.repeat( 1225 | protein_accession, res.shape[0])) 1226 | enrichment_list.append(res) 1227 | enrichment_per_protein = pd.concat(enrichment_list) 1228 | enrichment_per_protein = enrichment_per_protein[(enrichment_per_protein.n_aa_ptm >= 2) & (enrichment_per_protein.n_aa_roi >= enrichment_per_protein.n_aa_ptm)] 1229 | enrichment_per_protein.reset_index(drop=True, inplace=True) 1230 | enrichment_per_protein['p_adj_bf'] = statsmodels.stats.multitest.multipletests( 1231 | pvals=enrichment_per_protein.p, alpha=0.01, method='bonferroni')[1] 1232 | enrichment_per_protein['p_adj_bh'] = statsmodels.stats.multitest.multipletests( 1233 | pvals=enrichment_per_protein.p, alpha=0.01, method='fdr_bh')[1] 1234 | return enrichment_per_protein 1235 | 1236 | 1237 | def find_idr_pattern( 1238 | idr_list: list, 1239 | min_structured_length: int = 100, 1240 | max_unstructured_length: int = 30 1241 | ) -> tuple: 1242 | """ 1243 | Find short intrinsically disordered regions. 1244 | 1245 | Parameters 1246 | ---------- 1247 | idr_list : list 1248 | Nested list specifying the binary IDR condition and its length. 1249 | For example: [[1,10],[0,30],[1,5]]. 1250 | min_structured_length : int 1251 | Integer specifying the minimum number of amino acids in flanking 1252 | structured regions. 1253 | max_unstructured_length : int 1254 | Integer specifying the maximum number of amino acids in the short 1255 | intrinsically unstructured regions. 1256 | 1257 | Returns 1258 | ------- 1259 | : tuple 1260 | (bool, list) If a pattern was found and the list of start end end 1261 | positions of short IDRs. 1262 | """ 1263 | window = np.array([0, 1, 2]) 1264 | i = 0 1265 | pattern = False 1266 | pos_list = list() 1267 | while i < (len(idr_list) - 2): 1268 | window_i = window + i 1269 | if idr_list[window_i[0]][0] == 0: 1270 | if idr_list[window_i[0]][1] >= min_structured_length: 1271 | if idr_list[window_i[1]][1] <= max_unstructured_length: 1272 | if idr_list[window_i[2]][1] >= min_structured_length: 1273 | pattern = True 1274 | idr_start = np.sum([x[1] for x in idr_list[0: i + 1]]) + 1 1275 | idr_end = idr_start + idr_list[i + 1][1] - 1 1276 | pos_list.append([idr_start, idr_end]) 1277 | i += 1 1278 | return pattern, pos_list 1279 | 1280 | 1281 | def annotate_proteins_with_idr_pattern( 1282 | df: pd.DataFrame, 1283 | min_structured_length: int = 100, 1284 | max_unstructured_length: int = 30 1285 | ) -> pd.DataFrame: 1286 | """ 1287 | Find short intrinsically disordered regions. 1288 | 1289 | Parameters 1290 | ---------- 1291 | df : pd.DataFrame 1292 | Dataframe with AlphaFold annotations. 1293 | min_structured_length : int 1294 | Integer specifying the minimum number of amino acids in flanking 1295 | structured regions. 1296 | max_unstructured_length : int 1297 | Integer specifying the maximum number of amino acids in the short 1298 | intrinsically unstructured regions. 1299 | 1300 | Returns 1301 | ------- 1302 | : pd.DataFrame 1303 | Input dataframe with an additional column 'flexible_pattern'. 1304 | """ 1305 | 1306 | res_out = list() 1307 | proteins = list() 1308 | loop_pattern = list() 1309 | pattern_position = list() 1310 | for df_prot in partition_df_by_prots(df): 1311 | df_prot['flexible_pattern'] = 0 1312 | protein_accession = df_prot.protein_id.values[0] 1313 | idr_list = [[k, len(list(g))] for k, g in groupby(df_prot.IDR.values)] 1314 | pattern, pos_list = find_idr_pattern( 1315 | idr_list, 1316 | min_structured_length=min_structured_length, 1317 | max_unstructured_length=max_unstructured_length) 1318 | pattern_position_list = list() 1319 | if pattern: 1320 | proteins.append(protein_accession) 1321 | loop_pattern.append(pattern) 1322 | pattern_position.append(pos_list) 1323 | 1324 | pattern_position_list = pattern_position_list + [list(np.arange(p[0], p[1] + 1)) for p in pos_list] 1325 | pattern_position_list = [item for sublist in pattern_position_list for item in sublist] 1326 | 1327 | selected_locations = np.flatnonzero(df_prot.position.isin( 1328 | pattern_position_list)) 1329 | df_prot.loc[selected_locations, 'flexible_pattern'] = 1 1330 | res_out.append(df_prot) 1331 | res_out = pd.concat(res_out) 1332 | return res_out 1333 | 1334 | 1335 | @numba.njit 1336 | def extend_flexible_pattern( 1337 | pattern: np.ndarray, 1338 | window: int 1339 | ) -> np.ndarray: 1340 | """ 1341 | This function takes an array of binary values (0 or 1) and extends streches 1342 | of 1s to either side by the provided window. 1343 | 1344 | Parameters 1345 | ---------- 1346 | pattern : np.ndarray 1347 | Array of binary pattern values. 1348 | window : int 1349 | Integer specifying the number of positions to consider both before 1350 | and after the provided pattern. 1351 | 1352 | Returns 1353 | ------- 1354 | : np.ndarray 1355 | Array with of binary values, where streches of 1s in the input array 1356 | were extended to both sides. 1357 | """ 1358 | extended_pattern = [] 1359 | for i in range(len(pattern)): 1360 | low_window_bound = i - window 1361 | if low_window_bound < 0: 1362 | low_window_bound = 0 1363 | high_window_bound = i + window 1364 | if high_window_bound > len(pattern): 1365 | high_window_bound = len(pattern) 1366 | window_patterns = pattern[low_window_bound: high_window_bound + 1] 1367 | window_max = np.max(window_patterns) 1368 | extended_pattern.append(window_max) 1369 | return np.array(extended_pattern) 1370 | 1371 | 1372 | def get_extended_flexible_pattern( 1373 | df: pd.DataFrame, 1374 | patterns: np.ndarray, 1375 | windows: list, 1376 | ) -> pd.DataFrame: 1377 | """ 1378 | Select columns in a dataframe for which to extend the pattern by the 1379 | provided window. 1380 | 1381 | Parameters 1382 | ---------- 1383 | df : pd.DataFrame 1384 | Dataframe with AlphaFold annotations. 1385 | patterns : np.ndarray 1386 | Array of column names in the dataframe with binary values that should 1387 | be extended. 1388 | windows : list 1389 | List of one or more integers specifying the number of positions 1390 | to consider both before and after a pattern. 1391 | 1392 | Returns 1393 | ------- 1394 | : pd.DataFrame 1395 | Input dataframe with additional columns containing the extended 1396 | patterns. 1397 | """ 1398 | df_out = [] 1399 | for df_prot in partition_df_by_prots(df): 1400 | for pattern in patterns: 1401 | for w in windows: 1402 | df_prot[f'{pattern}_extended_{w}'] = extend_flexible_pattern( 1403 | pattern=df_prot[pattern].values, 1404 | window=w) 1405 | df_out.append(df_prot) 1406 | df_out = pd.concat(df_out) 1407 | return df_out 1408 | 1409 | 1410 | # This function could be numba compatible 1411 | def calculate_distances_between_ptms( 1412 | background_idx_list: list, 1413 | target_aa_idx: np.ndarray, 1414 | coords: np.ndarray, 1415 | positions: np.ndarray, 1416 | error_dist: np.ndarray 1417 | ) -> [list, list]: 1418 | """ 1419 | Calculate the distances from a target amino acid to a list of background 1420 | amino acids. 1421 | 1422 | Parameters 1423 | ---------- 1424 | background_idx_list : list 1425 | List of amino acid indices that make up the background. 1426 | target_aa_idx : np.ndarray 1427 | Array of target amino acid indices. 1428 | coords : np.ndarray 1429 | Array of 3D coordinates of alpha carbon atoms across different 1430 | amino acids. 1431 | positions : np.ndarray 1432 | Array of amino acid positions from which to choose the specific indeces. 1433 | error_dist: : np.ndarray 1434 | Matrix of paired aligned errors of AlphaFold across all amino acids 1435 | in a protein qequence. 1436 | 1437 | Returns 1438 | ------- 1439 | : [list, list] 1440 | List of 3D distance results and list of 1D distance results 1441 | """ 1442 | distance_res = list() 1443 | distance_1D_res = list() 1444 | for idx_list in background_idx_list: 1445 | aa_dist_list = list() 1446 | aa_1D_dist_list = list() 1447 | for i in idx_list: 1448 | aa_dist = list() 1449 | aa_1D_dist = list() 1450 | for aa_i in target_aa_idx: 1451 | aa_dist_i = get_3d_dist( 1452 | coordinate_array_1=coords, 1453 | coordinate_array_2=coords, 1454 | idx_1=i, 1455 | idx_2=aa_i) 1456 | aa_error_i = get_paired_error( 1457 | position=positions, 1458 | error_dist=error_dist, 1459 | idx_1=i, 1460 | idx_2=aa_i) 1461 | aa_dist.append(aa_dist_i+aa_error_i) 1462 | aa_1D_dist.append(abs(positions[i]-positions[aa_i])) 1463 | aa_dist_list.append(aa_dist) 1464 | aa_1D_dist_list.append(aa_1D_dist) 1465 | distance_res.append(aa_dist_list) 1466 | distance_1D_res.append(aa_1D_dist_list) 1467 | return distance_res, distance_1D_res 1468 | 1469 | 1470 | def get_ptm_distance_list( 1471 | df: pd.DataFrame, 1472 | ptm_target: str, 1473 | ptm_background: str, 1474 | ptm_dict: dict, 1475 | error_dir: str, 1476 | filename_format: str = "pae_{}.hdf", 1477 | n_random: int = 10000, 1478 | random_seed: int = 44, 1479 | ) -> [list, list, list]: 1480 | """ 1481 | Extract a lists of 3D and 1D distances between target amino acids and a 1482 | random background. 1483 | 1484 | Parameters 1485 | ---------- 1486 | df : pd.DataFrame 1487 | Dataframe with AlphaFold annotations. 1488 | ptm_target : str 1489 | String specifying the PTM type for which you want to evaluate if it 1490 | is in colocalizing with the background. 1491 | ptm_background : str 1492 | String specifying the PTM type that is used as background. 1493 | ptm_dict : dict 1494 | Dictionary containing the possible amino acid sites for each PTM. 1495 | error_dir : str 1496 | Path to the directory where the hdf files containing the matrices of 1497 | paired aligned errors of AlphaFold are stored. 1498 | filename_format : str 1499 | The file name of the pae files saved by download_alphafold_pae. 1500 | The brackets {} are replaced by a protein name from the proteins list. 1501 | Default is 'pae_{}.hdf'. 1502 | n_random : int 1503 | Number of random permutations to perform. Default is 10'000. 1504 | The higher the number of permutations, the more confidence the analysis 1505 | can achieve. However, a very high number of permutations increases 1506 | processing time. No fewer than 1'000 permutations should be used. 1507 | random_seed : int 1508 | Random seed for the analysis. Default is 44. 1509 | 1510 | Returns 1511 | ------- 1512 | : [list, list, list] 1513 | List of 3D distances, list of 1D distances and 1514 | list of modified indices. 1515 | """ 1516 | random.seed(random_seed) 1517 | prot_distances = list() 1518 | prot_distances_1D = list() 1519 | prot_mod_idx = list() 1520 | for df_prot in partition_df_by_prots(df): 1521 | protein_accession = df_prot.protein_id.values[0] 1522 | if error_dir is not None: 1523 | with h5py.File( 1524 | os.path.join( 1525 | error_dir, 1526 | filename_format.format(protein_accession)) 1527 | ) as hdf_root: 1528 | error_dist = hdf_root['dist'][...] 1529 | size = int(np.sqrt(len(error_dist))) 1530 | error_dist = error_dist.reshape(size, size) 1531 | else: 1532 | error_dist = np.zeros((df_prot.shape[0], df_prot.shape[0])) 1533 | # amino acid residues of background PTM 1534 | background_aa = ptm_dict[ptm_background] 1535 | # indices of background_aa 1536 | background_idx = list(np.flatnonzero(df_prot.AA.isin(background_aa))) 1537 | # number of observed background modifications 1538 | n_aa_background_mod = np.sum(df_prot[ptm_background] == 1) 1539 | if n_aa_background_mod >= 1: 1540 | # indices of observed background PTMs 1541 | real_background_idx = df_prot.index[df_prot[ptm_background] == 1].tolist() 1542 | # list of random index lists for background PTMs 1543 | # @TODO: probably slowish due to making lists of 10000 elements, 1544 | # perhaps this can be avoided 1545 | background_idx_list = [random.sample( 1546 | background_idx, 1547 | len(real_background_idx)) for i in np.arange(0, n_random)] 1548 | # Combine real and random backround list with the real indices at 1549 | # position 0 1550 | background_idx_list.insert(0,real_background_idx) 1551 | # amino acid residues of target PTM 1552 | target_aa = ptm_dict[ptm_target] 1553 | # indices of target_aa 1554 | target_aa_idx = list(np.flatnonzero(df_prot.AA.isin(target_aa))) 1555 | # indices of observed target PTMs 1556 | target_mod_idx = df_prot.index[df_prot[ptm_target] == 1].tolist() 1557 | # index of observed PTMs within index list of all target_aa 1558 | target_aa_idx_mod_idx = [i for i in np.arange(len(target_aa_idx)) if target_aa_idx[i] in target_mod_idx] 1559 | distance_res, distance_1D_res = calculate_distances_between_ptms( 1560 | background_idx_list=np.array(background_idx_list), 1561 | target_aa_idx=np.array(target_aa_idx), 1562 | coords=np.vstack([ 1563 | df_prot.x_coord_ca.values, 1564 | df_prot.y_coord_ca.values, 1565 | df_prot.z_coord_ca.values]).T, 1566 | positions=df_prot.position.values, 1567 | error_dist=error_dist) 1568 | prot_distances.append(distance_res) 1569 | prot_distances_1D.append(distance_1D_res) 1570 | prot_mod_idx.append(target_aa_idx_mod_idx) 1571 | return prot_distances, prot_distances_1D, prot_mod_idx 1572 | 1573 | 1574 | # This function could be numba compatible 1575 | def get_mod_ptm_fraction( 1576 | distances: list, 1577 | mod_idx: list, 1578 | min_dist: int, 1579 | max_dist: int 1580 | ) -> float: 1581 | """ 1582 | Calculate the fraction of modified PTM acceptor residues within 1583 | a distance range. 1584 | 1585 | Parameters 1586 | ---------- 1587 | distances: list 1588 | List of 1D or 3D distances. 1589 | mod_idx: lists 1590 | List of indices to select which distances to consider. 1591 | min_dist: int 1592 | Minimum distance of the bin. 1593 | max_dist: int 1594 | Maximum distance of the bin. 1595 | 1596 | Returns 1597 | ------- 1598 | : float 1599 | Fraction of modified PTM acceptor residues within 1600 | the provided distance range. 1601 | """ 1602 | n_aa = [0]*len(distances[0]) 1603 | n_aa_mod = [0]*len(distances[0]) 1604 | for idx, p in enumerate(distances): 1605 | rand_count = 0 1606 | for rand in p: 1607 | for back in rand: 1608 | n_aa[rand_count] += len([i for i in back if ((i > min_dist) & (i <= max_dist))]) 1609 | mod_back = [back[i] for i in mod_idx[idx]] 1610 | n_aa_mod[rand_count] += len([i for i in mod_back if ((i > min_dist) & (i <= max_dist))]) 1611 | rand_count += 1 1612 | mod_fraction = [mod/aa if aa>0 else np.nan for aa,mod in zip(n_aa, n_aa_mod)] 1613 | return mod_fraction 1614 | 1615 | 1616 | def evaluate_ptm_colocalization( 1617 | df: pd.DataFrame, 1618 | ptm_target: str, 1619 | ptm_types: list, 1620 | ptm_dict: dict, 1621 | pae_dir: str, 1622 | filename_format: str = "pae_{}.hdf", 1623 | n_random: int = 5, 1624 | random_seed: int = 44, 1625 | min_dist: float = -0.01, 1626 | max_dist: float = 35, 1627 | dist_step: float = 5 1628 | ) -> pd.DataFrame: 1629 | """ 1630 | Evaluate for a given target PTM type if modifications preferentially occur 1631 | closer to the provided background PTM types than expected by chance or at 1632 | distance bins that are further away. 1633 | 1634 | Parameters 1635 | ---------- 1636 | df : pd.DataFrame 1637 | Dataframe with AlphaFold annotations. 1638 | ptm_target : str 1639 | String specifying the PTM type for which you want to evaluate if it 1640 | is in colocalizing with the background. 1641 | ptm_types : list of strings 1642 | List of strings specifying the PTM types that should be used as 1643 | background. If "self" is included, this means that the ptm_target 1644 | is used also as backround modification. 1645 | ptm_dict : dict 1646 | Dictionary containing the possible amino acid sites for each PTM. 1647 | pae_dir : str 1648 | Path to the directory where the hdf files containing the matrices of 1649 | paired aligned errors of AlphaFold are stored. 1650 | filename_format : str 1651 | The file name of the pae files saved by download_alphafold_pae. 1652 | The brackets {} are replaced by a protein name from the proteins list. 1653 | Default is 'pae_{}.hdf'. 1654 | n_random : int 1655 | Number of random permutations to perform. Default is 10'000. 1656 | The higher the number of permutations, the more confidence the analysis 1657 | can achieve. However, a very high number of permutations increases 1658 | processing time. No fewer than 1'000 permutations should be used. 1659 | random_seed : int 1660 | Random seed for the analysis. Default is 44. 1661 | min_dist : float 1662 | Minimum distance to consider. 1663 | Default is 0, meaning that the target amino acid is included itself. 1664 | max_dist : float 1665 | Maximum distance to consider. 1666 | Default is 35. 1667 | dist_step : float 1668 | Stepsize for distance bins between min_dist and max_dist. 1669 | Default is 5. 1670 | 1671 | Returns 1672 | ------- 1673 | : pd.DataFrame 1674 | Dataframe with following columns: 'context', 'ptm_types', 'cutoff', 1675 | 'std_random_fraction', 'variable', 'value' 1676 | """ 1677 | distance_cutoffs = np.arange(min_dist, max_dist, dist_step) 1678 | # might want to change to np.linspace above 1679 | cutoff_list = list() 1680 | ptm_list = list() 1681 | real_fraction_3D = list() 1682 | mean_random_fraction_3D = list() 1683 | std_random_fraction_3D = list() 1684 | ttest_pval_3D = list() 1685 | real_fraction_1D = list() 1686 | mean_random_fraction_1D = list() 1687 | std_random_fraction_1D = list() 1688 | ttest_pval_1D = list() 1689 | for ptm_type in ptm_types: 1690 | if ptm_target == 'self': 1691 | ptm_target = ptm_type 1692 | distances_3D, distances_1D, mod_idx = get_ptm_distance_list( 1693 | df=df, 1694 | ptm_target=ptm_target, 1695 | ptm_background=ptm_type, 1696 | ptm_dict=ptm_dict, 1697 | error_dir=pae_dir, 1698 | filename_format=filename_format, 1699 | n_random=n_random, 1700 | random_seed=random_seed 1701 | ) 1702 | dist_i = 0 1703 | for dist_cut in distance_cutoffs: 1704 | ptm_list.append(ptm_type) 1705 | cutoff_list.append(dist_cut+dist_step) 1706 | if dist_i == 0: 1707 | # make sure that the minimum is incuded 1708 | dist_step_mod = 0.001 1709 | else: 1710 | dist_step_mod = 0 1711 | mod_fraction_3D = get_mod_ptm_fraction( 1712 | distances_3D, 1713 | mod_idx, 1714 | min_dist=dist_cut-dist_step_mod, 1715 | max_dist=dist_cut+dist_step) 1716 | real_fraction_3D.append(mod_fraction_3D[0]) 1717 | mean_random_fraction_3D.append(np.mean(mod_fraction_3D[1:])) 1718 | std_random_fraction_3D.append(np.std(mod_fraction_3D[1:])) 1719 | ttest_pval_3D.append(scipy.stats.ttest_1samp(mod_fraction_3D[1:], mod_fraction_3D[0]).pvalue) 1720 | mod_fraction_1D = get_mod_ptm_fraction( 1721 | distances_1D, 1722 | mod_idx, 1723 | min_dist=dist_cut-dist_step_mod, 1724 | max_dist=dist_cut+dist_step) 1725 | real_fraction_1D.append(mod_fraction_1D[0]) 1726 | mean_random_fraction_1D.append(np.mean(mod_fraction_1D[1:])) 1727 | std_random_fraction_1D.append(np.std(mod_fraction_1D[1:])) 1728 | ttest_pval_1D.append(scipy.stats.ttest_1samp(mod_fraction_1D[1:], mod_fraction_1D[0]).pvalue) 1729 | dist_i += 1 1730 | res_df_3D = pd.DataFrame({ 1731 | 'context': np.repeat('3D', len(cutoff_list)), 1732 | 'cutoff': cutoff_list, 1733 | 'ptm_types': ptm_list, 1734 | 'Observed': real_fraction_3D, 1735 | 'Random sampling': mean_random_fraction_3D, 1736 | 'std_random_fraction': std_random_fraction_3D, 1737 | 'pvalue': ttest_pval_3D}) 1738 | res_df_1D = pd.DataFrame({ 1739 | 'context': np.repeat('1D', len(cutoff_list)), 1740 | 'cutoff': cutoff_list, 1741 | 'ptm_types': ptm_list, 1742 | 'Observed': real_fraction_1D, 1743 | 'Random sampling': mean_random_fraction_1D, 1744 | 'std_random_fraction': std_random_fraction_1D, 1745 | 'pvalue': ttest_pval_1D}) 1746 | res_df_3D = res_df_3D.melt( 1747 | id_vars=["context", "ptm_types", "cutoff", "std_random_fraction","pvalue"]) 1748 | res_df_1D = res_df_1D.melt( 1749 | id_vars=["context", "ptm_types", "cutoff", "std_random_fraction","pvalue"]) 1750 | res_df = pd.concat([res_df_3D, res_df_1D]) 1751 | res_df['std_random_fraction'] = np.where( 1752 | res_df.variable == 'Observed', 0, res_df.std_random_fraction) 1753 | return res_df 1754 | 1755 | 1756 | def extract_motifs_in_proteome( 1757 | alphafold_df: pd.DataFrame, 1758 | motif_df: pd.DataFrame 1759 | ): 1760 | """ 1761 | Function to find occurences of short linear motifs in the proteome. 1762 | 1763 | Parameters 1764 | ---------- 1765 | alphafold_df : pd.DataFrame 1766 | Dataframe with AlphaFold annotations. 1767 | motif_df : pd.DataFrame 1768 | Dataframe with following columns: 'enzyme', 'motif', 'mod_pos'. 1769 | 1770 | Returns 1771 | ------- 1772 | : pd.DataFrame 1773 | Dataframe containing information about short linear motifs in the 1774 | proteome. Following columns are privided: 'protein_id', 'enzyme', 1775 | 'motif','position','AA','motif_start','motif_end','sequence_window' 1776 | """ 1777 | proteins = list() 1778 | enzyme_list = list() 1779 | motif_list = list() 1780 | site_list = list() 1781 | start_list = list() 1782 | end_list = list() 1783 | AA_list = list() 1784 | sequence_window_list = list() 1785 | for df_prot in partition_df_by_prots(alphafold_df): 1786 | df_prot['flexible_pattern'] = 0 1787 | protein_accession = df_prot.protein_id.values[0] 1788 | sequence = ''.join(df_prot.AA) 1789 | for i in np.arange(0, motif_df.shape[0]): 1790 | for j in re.finditer(motif_df.motif.values[i], sequence): 1791 | proteins.append(protein_accession) 1792 | enzyme_list.append(motif_df.enzyme.values[i]) 1793 | motif_list.append(motif_df.motif.values[i]) 1794 | site_list.append(j.start() + motif_df.mod_pos.values[i] + 1) 1795 | start_list.append(j.start() + 1) 1796 | end_list.append(j.end()) 1797 | AA_list.append(sequence[j.start() + motif_df.mod_pos.values[i]]) 1798 | sequence_window_list.append(sequence[(j.start() + motif_df.mod_pos.values[i] - 10): (j.start() + motif_df.mod_pos.values[i] + 10)]) 1799 | motif_res = pd.DataFrame({ 1800 | 'protein_id': proteins, 1801 | 'enzyme': enzyme_list, 1802 | 'motif': motif_list, 1803 | 'position': site_list, 1804 | 'AA': AA_list, 1805 | 'motif_start': start_list, 1806 | 'motif_end': end_list, 1807 | 'sequence_window': sequence_window_list}) 1808 | return motif_res 1809 | 1810 | 1811 | def import_ptms_for_structuremap( 1812 | file: str, 1813 | organism: str 1814 | ) -> pd.DataFrame: 1815 | """ 1816 | Function to import PTM datasets. 1817 | 1818 | Parameters 1819 | ---------- 1820 | file : str 1821 | Path to the PTM dataset to load. 1822 | This can be processed by AlphaPept, Spectronaut, MaxQuant, DIA-NN or 1823 | FragPipe. 1824 | organism : str 1825 | Organism for which a fasta file should be imported. 1826 | 1827 | Returns 1828 | ------- 1829 | : pd.DataFrame 1830 | Dataframe with PTM information. It contains following columns: 1831 | protein_id: a unique UniProt identifier; 1832 | AA: the one letter amino acid abbreviation of the PTM acceptor; 1833 | position: the sequence position of the PTM acceptor 1834 | (the first amino acid has position 1); 1835 | : N columns for N different PTM types where 1 indicates that 1836 | the PTM is present at the given amino acid postition 1837 | and 0 indicates no modification 1838 | """ 1839 | try: 1840 | from alphamap.organisms_data import import_fasta 1841 | from alphamap.importing import import_data 1842 | from alphamap.preprocessing import format_input_data 1843 | except ModuleNotFoundError: 1844 | raise ModuleNotFoundError(f"Please install alphamap. Subsequently install pandas==1.4.0.") 1845 | fasta_in = import_fasta(organism) 1846 | df = import_data(file) 1847 | df = format_input_data(df=df, 1848 | fasta=fasta_in, 1849 | modification_exp=r'\[.*?\]') 1850 | ptm_df = df.explode(['PTMsites', 'PTMtypes']) 1851 | ptm_df = ptm_df.dropna(subset=['PTMsites', 'PTMtypes']) 1852 | ptm_df = ptm_df.astype({'PTMsites': 'int32'}) 1853 | ptm_df["AA"] = ptm_df.apply( 1854 | lambda x: x["naked_sequence"][x["PTMsites"]], 1855 | axis=1) 1856 | ptm_df["position"] = ptm_df.apply( 1857 | lambda x: x["start"]+x["PTMsites"]+1, 1858 | axis=1) 1859 | ptm_df = ptm_df[["unique_protein_id", "AA", "position", "PTMtypes"]] 1860 | ptm_df = pd.get_dummies( 1861 | ptm_df, prefix="", prefix_sep='', columns=["PTMtypes"]) 1862 | ptm_df = ptm_df.rename(columns={"unique_protein_id": "protein_id"}) 1863 | ptm_df = ptm_df.groupby(['protein_id', 'AA', 'position']) 1864 | ptm_df = ptm_df.max() 1865 | ptm_df = ptm_df.reset_index() 1866 | ptm_df = ptm_df.drop_duplicates() 1867 | ptm_df = ptm_df.reset_index(drop=True) 1868 | return ptm_df 1869 | 1870 | 1871 | def format_for_3Dviz( 1872 | df: pd.DataFrame, 1873 | ptm_dataset: str 1874 | ) -> pd.DataFrame: 1875 | df_mod = df[["protein_id", "AA", "position", ptm_dataset]] 1876 | df_mod = df_mod.rename(columns={"protein_id": "unique_protein_id", 1877 | "AA": "modified_sequence", 1878 | "position": "start"}) 1879 | df_mod["modified_sequence"] = [mod+"_"+str(i) for i,mod in enumerate(df_mod["modified_sequence"])] 1880 | df_mod["all_protein_ids"] = df_mod["unique_protein_id"] 1881 | df_mod["PTMsites"] = 0 1882 | df_mod["start"] = df_mod["start"]-1 1883 | df_mod["end"] = df_mod["start"] 1884 | df_mod["PTMsites"] = [[i] for i in df_mod["PTMsites"]] 1885 | df_mod = df_mod[df_mod[ptm_dataset] == 1] 1886 | df_mod["marker_symbol"] = 1 1887 | df_mod["PTMtypes"] = [[ptm_dataset] for i in df_mod["PTMsites"]] 1888 | df_mod = df_mod.dropna(subset=['PTMtypes']).reset_index(drop=True) 1889 | return df_mod 1890 | -------------------------------------------------------------------------------- /structuremap/utils.py: -------------------------------------------------------------------------------- 1 | #!python 2 | """This module provides generic utilities. 3 | These utilities primarily focus on: 4 | - logging 5 | """ 6 | 7 | # builtin 8 | import logging 9 | import os 10 | import sys 11 | 12 | PROGRESS_CALLBACK = True 13 | 14 | def set_logger( 15 | *, 16 | stream: bool = True, 17 | log_level: int = logging.INFO, 18 | ): 19 | """Set the log stream and file. 20 | All previously set handlers will be disabled with this command. 21 | Parameters 22 | ---------- 23 | stream : bool 24 | If False, no log data is sent to stream. 25 | If True, all logging can be tracked with stdout stream. 26 | Default is True. 27 | log_level : int 28 | The logging level. Usable values are defined in Python's "logging" 29 | module. 30 | Default is logging.INFO. 31 | """ 32 | import time 33 | global PROGRESS_CALLBACK 34 | root = logging.getLogger() 35 | formatter = logging.Formatter( 36 | '%(asctime)s> %(message)s', "%Y-%m-%d %H:%M:%S" 37 | ) 38 | root.setLevel(log_level) 39 | while root.hasHandlers(): 40 | root.removeHandler(root.handlers[0]) 41 | if stream: 42 | stream_handler = logging.StreamHandler(sys.stdout) 43 | stream_handler.setLevel(log_level) 44 | stream_handler.setFormatter(formatter) 45 | root.addHandler(stream_handler) 46 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/tests/__init__.py -------------------------------------------------------------------------------- /tests/run_tests.sh: -------------------------------------------------------------------------------- 1 | conda activate structuremap 2 | python -m unittest test_cli 3 | python -m unittest test_gui 4 | python -m unittest test_processing 5 | jupyter nbconvert --execute --inplace --to notebook --NotebookClient.kernel_name="python" ../nbs/tutorial.ipynb 6 | conda deactivate 7 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | #!python -m unittest tests.test_utils 2 | """This module provides unit tests for structuremap.cli.""" 3 | 4 | # builtin 5 | import unittest 6 | 7 | # local 8 | import structuremap.cli 9 | 10 | if __name__ == "__main__": 11 | unittest.main() 12 | -------------------------------------------------------------------------------- /tests/test_gui.py: -------------------------------------------------------------------------------- 1 | #!python -m unittest tests.test_utils 2 | """This module provides unit tests for structuremap.gui.""" 3 | 4 | # builtin 5 | import unittest 6 | 7 | # local 8 | import structuremap.gui 9 | 10 | if __name__ == "__main__": 11 | unittest.main() 12 | -------------------------------------------------------------------------------- /tests/test_processing.py: -------------------------------------------------------------------------------- 1 | #!python -m unittest tests.test_processing 2 | import numba 3 | import numpy as np 4 | import pandas as pd 5 | import tqdm 6 | import h5py 7 | import random 8 | import statsmodels.stats.multitest 9 | import urllib.request, json 10 | import os 11 | import socket 12 | import re 13 | import Bio.PDB.MMCIF2Dict 14 | from itertools import groupby 15 | import unittest 16 | from scipy.spatial.transform import Rotation as R 17 | from Bio import PDB 18 | 19 | from structuremap.processing import download_alphafold_cif, \ 20 | download_alphafold_pae, \ 21 | format_alphafold_data, \ 22 | get_3d_dist, \ 23 | rotate_vector_around_axis, \ 24 | get_angle, \ 25 | get_paired_error, \ 26 | get_neighbors, \ 27 | annotate_accessibility, \ 28 | smooth_score, \ 29 | get_smooth_score, \ 30 | get_avg_3d_dist, \ 31 | get_avg_1d_dist, \ 32 | find_idr_pattern, \ 33 | annotate_proteins_with_idr_pattern, \ 34 | extend_flexible_pattern, \ 35 | get_extended_flexible_pattern, \ 36 | get_mod_ptm_fraction 37 | 38 | 39 | THIS_FOLDER = os.path.dirname(__file__) 40 | TEST_FOLDER = os.path.join( 41 | f"{os.path.dirname(THIS_FOLDER)}", 42 | "data", 43 | "test_files", 44 | ) 45 | 46 | class TestProcessing(unittest.TestCase): 47 | def test_download_alphafold_cif(self, ): 48 | valid, invalid, existing = download_alphafold_cif( 49 | proteins=['O15552','Q5VSL9','Q7Z6M3','O15552yy'], 50 | out_folder=TEST_FOLDER) 51 | 52 | np.testing.assert_equal(valid, np.array(['Q5VSL9'])) 53 | np.testing.assert_equal(invalid, np.array(['O15552yy'])) 54 | np.testing.assert_equal(existing, np.array(['O15552','Q7Z6M3'])) 55 | 56 | os.remove( 57 | os.path.join( 58 | TEST_FOLDER, 59 | 'Q5VSL9.cif' 60 | ) 61 | ) 62 | 63 | def test_download_alphafold_pae(self, ): 64 | valid, invalid, existing = download_alphafold_pae( 65 | proteins=['O15552','Q5VSL9','Q7Z6M3','O15552yy'], 66 | out_folder=TEST_FOLDER) 67 | 68 | np.testing.assert_equal(valid, np.array(['Q5VSL9'])) 69 | np.testing.assert_equal(invalid, np.array(['O15552yy'])) 70 | np.testing.assert_equal(existing, np.array(['O15552','Q7Z6M3'])) 71 | 72 | os.remove( 73 | os.path.join( 74 | TEST_FOLDER, 75 | 'pae_Q5VSL9.hdf' 76 | ) 77 | ) 78 | 79 | def test_format_alphafold_data(self, ): 80 | alphafold_formatted = format_alphafold_data( 81 | directory=TEST_FOLDER, protein_ids=["Q7Z6M3","O15552"]) 82 | 83 | alphafold_formatted_ini = pd.read_csv( 84 | os.path.join( 85 | TEST_FOLDER, 86 | 'test_alphafold_annotation.csv' 87 | ) 88 | ) 89 | pd.testing.assert_frame_equal(alphafold_formatted, alphafold_formatted_ini, check_dtype=False) 90 | 91 | def test_get_3d_dist(self, ): 92 | x = np.array([1.1,1.1,1.1,1.1,5.1]) 93 | y = np.array([1.1,2.1,3.1,1.1,10.1]) 94 | z = np.array([1.1,3.1,5.1,1.1,4.1]) 95 | coordinate_array = np.vstack([x,y,z]).T 96 | np.testing.assert_equal(2.236068, np.round(get_3d_dist(coordinate_array, coordinate_array, 0, 1), decimals=6)) 97 | np.testing.assert_equal(4.472136, np.round(get_3d_dist(coordinate_array, coordinate_array, 0, 2), decimals=6)) 98 | np.testing.assert_equal(4.472136, np.round(get_3d_dist(coordinate_array, coordinate_array, 2, 0), decimals=6)) 99 | 100 | def rotate_vector_around_axis_scipy(self, vector, axis, theta): 101 | theta = np.radians(theta) 102 | axis_norm = axis / np.linalg.norm(axis) 103 | r = R.from_rotvec(theta * axis_norm) 104 | return(r.apply(vector)) 105 | 106 | def test_rotate_vector_around_axis(self, ): 107 | v = np.array([3.0, 5.0, 0.0]) 108 | a = np.array([4.0, 4.0, 1.0]) 109 | t = 90 110 | 111 | res_real = rotate_vector_around_axis(v, a, t) 112 | res_scipy = self.rotate_vector_around_axis_scipy(v, a, t) 113 | 114 | np.testing.assert_almost_equal(res_real, res_scipy, decimal=10) 115 | 116 | def test_get_angle(self, ): 117 | x_a = np.array([1.1,1.1,1.1]) 118 | y_a = np.array([1.1,2.1,-3.1]) 119 | z_a = np.array([1.1,3.1,5.1]) 120 | x_b = np.array([1.5,np.nan,1.5]) 121 | y_b = np.array([1.5,2.5,3.5]) 122 | z_b = np.array([1.5,3.5,5.5]) 123 | x_c = np.array([1.5,1.5,10.6]) 124 | y_c = np.array([1.5,2.5,11.6]) 125 | z_c = np.array([1.5,3.5,5.6]) 126 | x_n = np.array([4.5,1.8,1.5]) 127 | y_n = np.array([40.5,7.8,3.5]) 128 | z_n = np.array([3.5,3.8,5.5]) 129 | 130 | coordinate_array_a = np.vstack([x_a,y_a,z_a]).T 131 | coordinate_array_b = np.vstack([x_b,y_b,z_b]).T 132 | coordinate_array_c = np.vstack([x_c,y_c,z_c]).T 133 | coordinate_array_n = np.vstack([x_n,y_n,z_n]).T 134 | 135 | np.testing.assert_equal(39.231520, 136 | np.round(get_angle(coordinate_array_a, coordinate_array_b, 137 | coordinate_array_c, coordinate_array_n, 138 | 0, 1), decimals=6)) 139 | np.testing.assert_equal(91.140756, 140 | np.round(get_angle(coordinate_array_a, coordinate_array_b, 141 | coordinate_array_c, coordinate_array_n, 142 | 0, 2), decimals=6)) 143 | np.testing.assert_equal(47.168228, 144 | np.round(get_angle(coordinate_array_a, coordinate_array_b, 145 | coordinate_array_c, coordinate_array_n, 146 | 2, 0), decimals=6)) 147 | 148 | # test gly 149 | np.testing.assert_equal(93.985035, 150 | np.round(get_angle(coordinate_array_a, coordinate_array_b, 151 | coordinate_array_c, coordinate_array_n, 152 | 1, 2), decimals=6)) 153 | 154 | def test_get_paired_error(self, ): 155 | pos = np.array([1,2,3]) 156 | error = np.array([[0,2,10],[1,0,5],[10,4,0]]) 157 | np.testing.assert_equal(2, get_paired_error(pos, error, 0,1)) 158 | np.testing.assert_equal(0, get_paired_error(pos, error, 2,2)) 159 | 160 | pos = np.array([1,3]) 161 | np.testing.assert_equal(10, get_paired_error(pos, error, 0,1)) 162 | 163 | def test_get_neighbors(self, ): 164 | idxl = np.array([0,1,2]) 165 | x_a = np.array([1.1,1.1,1.1]) 166 | y_a = np.array([1.1,2.1,-3.1]) 167 | z_a = np.array([1.1,3.1,5.1]) 168 | x_b = np.array([1.5,np.nan,1.5]) 169 | y_b = np.array([1.5,2.5,3.5]) 170 | z_b = np.array([1.5,3.5,5.5]) 171 | x_c = np.array([1.5,1.5,10.6]) 172 | y_c = np.array([1.5,2.5,11.6]) 173 | z_c = np.array([1.5,3.5,5.6]) 174 | x_n = np.array([4.5,1.8,1.5]) 175 | y_n = np.array([40.5,7.8,3.5]) 176 | z_n = np.array([3.5,3.8,5.5]) 177 | 178 | coordinate_array_a = np.vstack([x_a,y_a,z_a]).T 179 | coordinate_array_b = np.vstack([x_b,y_b,z_b]).T 180 | coordinate_array_c = np.vstack([x_c,y_c,z_c]).T 181 | coordinate_array_n = np.vstack([x_n,y_n,z_n]).T 182 | 183 | pos=np.array([1,2,3]) 184 | error = np.array([[0,2,10],[1,0,5],[10,4,0]]) 185 | 186 | np.testing.assert_equal(np.array([1, 0, 0]), 187 | get_neighbors(idxl, coordinate_array_a, coordinate_array_b, 188 | coordinate_array_c, coordinate_array_n, 189 | pos, error, 5, 40)) 190 | np.testing.assert_equal(np.array([1, 1, 0]), 191 | get_neighbors(idxl, coordinate_array_a, coordinate_array_b, 192 | coordinate_array_c, coordinate_array_n, 193 | pos, error, 5, 150)) 194 | np.testing.assert_equal(np.array([2, 2, 2]), 195 | get_neighbors(idxl, coordinate_array_a, coordinate_array_b, 196 | coordinate_array_c, coordinate_array_n, 197 | pos, error, 50, 140)) 198 | 199 | def test_annotate_accessibility(self, ): 200 | radius = 12.0 201 | 202 | alphafold_annotation = pd.read_csv( 203 | os.path.join( 204 | TEST_FOLDER, 205 | 'test_alphafold_annotation.csv' 206 | ) 207 | ) 208 | 209 | res_accessability = annotate_accessibility( 210 | df=alphafold_annotation[alphafold_annotation.protein_id=="Q7Z6M3"], 211 | max_dist=12, 212 | max_angle=90, 213 | error_dir=None) 214 | 215 | # comparison to https://biopython.org/docs/dev/api/Bio.PDB.HSExposure.html#Bio.PDB.HSExposure.HSExposureCB 216 | with open( 217 | os.path.join( 218 | TEST_FOLDER, 219 | 'Q7Z6M3.pdb' 220 | ) 221 | ) as pdbfile: 222 | p=PDB.PDBParser() 223 | s=p.get_structure('X', pdbfile) 224 | m=s[0] 225 | hse=PDB.HSExposureCB(m, radius) 226 | residue_list=PDB.Selection.unfold_entities(m,'R') 227 | res_hse = [] 228 | for r in residue_list: 229 | res_hse.append(r.xtra['EXP_HSE_B_U']) 230 | 231 | np.testing.assert_equal(np.array(res_hse), res_accessability.nAA_12_90_nopae.values) 232 | 233 | # @ToDo: test with actual error_dir 234 | 235 | def test_smooth_score(self, ): 236 | np.testing.assert_equal(np.array([1.5, 2. , 3. , 4. , 4.5]),smooth_score(score=np.array([1,2,3,4,5]), half_window=1)) 237 | 238 | def test_get_smooth_score(self, ): 239 | testdata = pd.DataFrame({'protein_id':[1,1,1,1,1,1,2,2,2,2,2,2], 240 | 'protein_number':[1,1,1,1,1,1,2,2,2,2,2,2], 241 | 'position':[1,2,3,4,5,6,1,2,3,4,5,6], 242 | 'score':[1,2,3,4,5,6,7,8,9,10,11,12], 243 | 'score_2':[10,20,30,40,50,60,70,80,90,100,110,120]}) 244 | test_res = get_smooth_score(testdata, np.array(['score','score_2']), [1]) 245 | np.testing.assert_equal([1.5,2,3,4,5,5.5,7.5,8,9,10,11,11.5], test_res.score_smooth1.values) 246 | np.testing.assert_equal([15,20,30,40,50,55,75,80,90,100,110,115], test_res.score_2_smooth1.values) 247 | 248 | def test_get_avg_3d_dist(self, ): 249 | x = np.array([1.1,1.1,1.1,1.1,1.1,1.1]) 250 | y = np.array([1.1,2.1,3.1,1.1,10.1,20.1]) 251 | z = np.array([1.1,3.1,5.1,10.1,11.1,12.1]) 252 | pos = np.array([1,2,3,4,5,6]) 253 | error = np.array([[0,2,10,2,3,4],[1,0,5,3,2,9],[10,4,0,3,6,7],[10,4,5,0,6,7],[10,4,5,3,0,7],[10,4,0,3,6,0]]) 254 | 255 | coordinate_array = np.vstack([x,y,z]).T 256 | 257 | np.testing.assert_equal(6.976812, np.round(get_avg_3d_dist(np.array([0,4]), coordinate_array, pos, error), decimals=6)) 258 | np.testing.assert_equal(3.5, np.round(get_avg_3d_dist(np.array([0,2]), coordinate_array, pos, error), decimals=6)) 259 | 260 | np.testing.assert_equal(5.668168, np.round(get_avg_3d_dist(np.array([0,3,4]), coordinate_array, pos, error), decimals=6)) 261 | np.testing.assert_equal(4.666667, np.round(get_avg_3d_dist(np.array([0,3,4]), coordinate_array, pos, error, metric='min'), decimals=6)) 262 | 263 | np.testing.assert_equal(14, np.round(get_avg_3d_dist(np.array([0,4]), coordinate_array, pos, error, error_operation='plus'), decimals=6)) 264 | error = 0.1*error 265 | np.testing.assert_equal(13.876812, np.round(get_avg_3d_dist(np.array([0,4]), coordinate_array, pos, error, error_operation='plus'), decimals=6)) 266 | 267 | x = np.array([1.1,1.1,1.1,1.1]) 268 | y = np.array([1.1,1.1,10.1,20.1]) 269 | z = np.array([1.1,10.1,11.1,12.1]) 270 | pos = np.array([1,4,5,6]) 271 | error = np.array([[0,2,10,2,3,4],[1,0,5,3,2,9],[10,4,0,3,6,7],[10,4,5,0,6,7],[10,4,5,3,0,7],[10,4,0,3,6,0]]) 272 | 273 | coordinate_array = np.vstack([x,y,z]).T 274 | 275 | np.testing.assert_equal(6.976812, np.round(get_avg_3d_dist(np.array([0,2]), coordinate_array, pos, error), decimals=6)) 276 | 277 | def test_get_avg_1d_dist(self, ): 278 | pos = np.array([1,2,3,4,5,6]) 279 | np.testing.assert_equal(4, np.round(get_avg_1d_dist(np.array([0,4]), pos), decimals=6)) 280 | np.testing.assert_equal(2.666667, np.round(get_avg_1d_dist(np.array([0,3,4]), pos), decimals=6)) 281 | np.testing.assert_equal(1.666667, np.round(get_avg_1d_dist(np.array([0,3,4]), pos, metric='min'), decimals=6)) 282 | 283 | pos = np.array([1,4,5,6]) 284 | np.testing.assert_equal(4, np.round(get_avg_1d_dist(np.array([0,2]), pos), decimals=6)) 285 | np.testing.assert_equal(2.666667, np.round(get_avg_1d_dist(np.array([0,1,2]), pos), decimals=6)) 286 | 287 | def test_find_idr_pattern(self, ): 288 | assert find_idr_pattern(idr_list = [[0,300],[1,10],[0,500],[1,500]])[0] == True 289 | assert find_idr_pattern(idr_list = [[0,300],[1,50],[0,500]])[0] == False 290 | assert find_idr_pattern(idr_list = [[0,50],[0,50],[1,50],[0,500]])[0] == False 291 | assert find_idr_pattern(idr_list = [[0,30],[0,300],[1,50],[0,50]])[0] == False 292 | assert find_idr_pattern(idr_list = [[0,30]])[0] == False 293 | 294 | assert find_idr_pattern(idr_list = [[0,300],[1,10],[0,500],[1,500]])[1][0][0] == [301] 295 | assert find_idr_pattern(idr_list = [[0,300],[1,10],[0,500],[1,500]])[1][0][1] == [310] 296 | assert find_idr_pattern(idr_list = [[1,10],[0,300],[1,10],[0,500],[1,500]])[1][0][0] == [311] 297 | assert find_idr_pattern(idr_list = [[1,10],[0,300],[1,10],[0,500],[1,500]])[1][0][1] == [320] 298 | 299 | def test_annotate_proteins_with_idr_pattern(self, ): 300 | testdata = pd.DataFrame({'protein_id':[1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2], 301 | 'protein_number':[1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2], 302 | 'position':[1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6], 303 | 'IDR':[0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0]}) 304 | test_res = annotate_proteins_with_idr_pattern(testdata, 3, 3) 305 | np.testing.assert_equal([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 306 | list(test_res.flexible_pattern.values)) 307 | 308 | def test_extend_flexible_pattern(self, ): 309 | np.testing.assert_equal(np.array([1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0]), 310 | extend_flexible_pattern(np.array([1,1,1,0,0,0,0,1,1,0,0,0,0]),1)) 311 | 312 | def test_get_extended_flexible_pattern(self, ): 313 | testdata = pd.DataFrame({'protein_id':[1,1,1,1,1,1,2,2,2,2,2,2], 314 | 'protein_number':[1,1,1,1,1,1,2,2,2,2,2,2], 315 | 'position':[1,2,3,4,5,6,1,2,3,4,5,6], 316 | 'score':[1,1,0,0,0,1,1,1,0,0,0,0], 317 | 'score_2':[0,0,0,0,0,0,0,0,0,0,0,1]}) 318 | test_res = get_extended_flexible_pattern(testdata, np.array(['score','score_2']), [1]) 319 | np.testing.assert_equal([1,1,1,0,1,1,1,1,1,0,0,0], test_res.score_extended_1.values) 320 | test_res = get_extended_flexible_pattern(testdata, np.array(['score','score_2']), [2]) 321 | np.testing.assert_equal([1,1,1,1,1,1,1,1,1,1,0,0], test_res.score_extended_2.values) 322 | np.testing.assert_equal([0,0,0,0,0,0,0,0,0,1,1,1], test_res.score_2_extended_2.values) 323 | 324 | def test_get_mod_ptm_fraction(self, ): 325 | # Example with 2 proteins and 2 randomizations 326 | # 1st protein with 3 modified lysines and 3 STY sites > 1 phospho 327 | # 2nd protein with 2 modified lysines and 4 STY sites > 2 phospho 328 | distances = [ 329 | [[[10, 20, 30], [2, 10, 20], [5, 8, 30]], # protein 1 > real 330 | [[30, 20, 50], [20, 10, 20], [50, 10, 30]], # protein 1 > random 1 331 | [[20, 50, 10], [50, 40, 10], [50, 20, 30]]], # protein 1 > random 2 332 | [[[10, 10, 30, 50], [50, 10, 5, 50]], # protein 2 > real 333 | [[50, 20, 30, 40], [20, 20, 10, 80]], # protein 2 > random 1 334 | [[15, 10, 30, 10], [10, 10, 20, 20]]]] # protein 2 > random 2 335 | mod_idx = [[0], # protein 1 336 | [1, 2]] # protein 2 337 | modidied_fraction = get_mod_ptm_fraction( 338 | distances, mod_idx, min_dist=0, max_dist=10) 339 | # Real: 340 | # n_aa: 1,2,2,2,2 341 | # n_mod: 1,1,1,1,2 342 | # final: 9,6 343 | 344 | # Random 1: 345 | # n_aa: 0,1,1,0,1 346 | # n_mod: 0,0,0,0,1 347 | # final: 3,1 348 | 349 | # Random 2: 350 | # n_aa: 1,1,0,2,2 351 | # n_mod: 0,0,0,1,1 352 | # final: 6,2 353 | 354 | # Fractions: 0.66, 0.33, 0.33 355 | 356 | np.testing.assert_almost_equal( 357 | modidied_fraction, 358 | [0.66666666, 0.33333333, 0.33333333]) 359 | modidied_fraction = get_mod_ptm_fraction( 360 | distances, mod_idx, min_dist=5, max_dist=10) 361 | np.testing.assert_almost_equal( 362 | modidied_fraction, 363 | [0.5, 0.33333333, 0.33333333]) 364 | 365 | 366 | if __name__ == "__main__": 367 | unittest.main() 368 | --------------------------------------------------------------------------------