├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug-report.md
    └── workflows
    │   ├── github_action_test_dummy.yml
    │   ├── pip_installation.yml
    │   └── publish_and_release.yml
├── .gitignore
├── HISTORY.md
├── LICENSE.txt
├── MANIFEST.in
├── README.md
├── data
    └── test_files
    │   ├── O15552.cif
    │   ├── Q7Z6M3.cif
    │   ├── Q7Z6M3.pdb
    │   ├── kinase_motifs.csv
    │   ├── pae_O15552.hdf
    │   ├── pae_Q7Z6M3.hdf
    │   ├── ptm_file.csv
    │   └── test_alphafold_annotation.csv
├── misc
    ├── CLA.md
    ├── bumpversion.cfg
    ├── check_version.sh
    ├── loose_pip_install.sh
    └── stable_pip_install.sh
├── nbs
    └── tutorial.ipynb
├── release
    ├── logos
    │   ├── alpha_logo.icns
    │   ├── alpha_logo.ico
    │   └── alpha_logo.png
    ├── one_click_linux_gui
    │   ├── control
    │   └── create_installer_linux.sh
    ├── one_click_macos_gui
    │   ├── Info.plist
    │   ├── Resources
    │   │   ├── conclusion.html
    │   │   └── welcome.html
    │   ├── create_installer_macos.sh
    │   ├── distribution.xml
    │   ├── scripts
    │   │   ├── postinstall
    │   │   └── preinstall
    │   └── structuremap_terminal
    ├── one_click_windows_gui
    │   ├── create_installer_windows.sh
    │   └── structuremap_innoinstaller.iss
    ├── pyinstaller
    │   ├── structuremap.spec
    │   └── structuremap_pyinstaller.py
    └── pypi
    │   ├── install_pypi_wheel.sh
    │   ├── install_test_pypi_wheel.sh
    │   └── prepare_pypi_wheel.sh
├── requirements
    ├── requirements.txt
    └── requirements_development.txt
├── setup.py
├── structuremap
    ├── __init__.py
    ├── cli.py
    ├── gui.py
    ├── plotting.py
    ├── processing.py
    └── utils.py
└── tests
    ├── __init__.py
    ├── run_tests.sh
    ├── test_cli.py
    ├── test_gui.py
    └── test_processing.py


/.github/ISSUE_TEMPLATE/bug-report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | Make sure your bug is not addressed in the [troubleshooting section](https://github.com/MannLabs/structuremap#troubleshooting) or in [previous issues](https://github.com/MannLabs/structuremap/issues?q=is%3Aissue). If not, provide a clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Logs**
24 | Please provide the log (see the structuremap terminal on where to find it).
25 | 
26 | **Screenshots**
27 | If applicable, add screenshots to help explain your problem.
28 | 
29 | **Version (please complete the following information):**
30 |  - Installation Type [e.g. One-Click Installer / Pip / Developer]
31 |  - If no log is available, provide the following:
32 |    - Platform information
33 |      - system    [e.g. Darwin]
34 |      - release   [e.g. 19.6.0]
35 |      - version   [e.g. 10.15.7]
36 |      - machine   [e.g. x86_64]
37 |      - processor [e.g. i386]
38 |      - cpu count [e.g. 8]
39 |    - Python information:
40 |      - structuremap version [e.g. 0.1.2]
41 |      - [other packages]
42 | 
43 | **Additional context**
44 | Add any other context about the problem here. Attached log files or upload data files if possible.
45 | 


--------------------------------------------------------------------------------
/.github/workflows/github_action_test_dummy.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 | 
 4 | name: Test new GitHub action workflow
 5 | 
 6 | 
 7 | jobs:
 8 |   Version_bumped:
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - name: Checkout code
12 |         uses: actions/checkout@v2
13 | 


--------------------------------------------------------------------------------
/.github/workflows/pip_installation.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches: [ main ]
 4 |   pull_request:
 5 |     branches: [ main, development ]
 6 |   workflow_dispatch:
 7 | 
 8 | name: Default installation and tests
 9 | 
10 | jobs:
11 |   stable_installation:
12 |     name: Test stable pip installation on ${{ matrix.os }}
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       matrix:
16 |         os: [ubuntu-latest, macOS-latest, windows-latest]
17 |     steps:
18 |       - uses: actions/checkout@v2
19 |       - uses: conda-incubator/setup-miniconda@v2
20 |         with:
21 |           auto-update-conda: true
22 |           python-version: ${{ matrix.python-version }}
23 |       - name: Conda info
24 |         shell: bash -l {0}
25 |         run: conda info
26 |       - name: Test pip installation with all stable dependencies
27 |         shell: bash -l {0}
28 |         run: |
29 |           cd misc
30 |           . ./stable_pip_install.sh
31 |       - name: Unittests
32 |         shell: bash -l {0}
33 |         run: |
34 |           cd tests
35 |           . ./run_tests.sh
36 |   loose_installation:
37 |     name: Test loose pip installation on ${{ matrix.os }}
38 |     runs-on: ${{ matrix.os }}
39 |     strategy:
40 |       matrix:
41 |         os: [ubuntu-latest, macOS-latest, windows-latest]
42 |     steps:
43 |       - uses: actions/checkout@v2
44 |       - uses: conda-incubator/setup-miniconda@v2
45 |         with:
46 |           auto-update-conda: true
47 |           python-version: ${{ matrix.python-version }}
48 |       - name: Conda info
49 |         shell: bash -l {0}
50 |         run: conda info
51 |       - name: Test pip installation with all loose dependencies
52 |         shell: bash -l {0}
53 |         run: |
54 |           cd misc
55 |           . ./loose_pip_install.sh
56 |       - name: Unittests
57 |         shell: bash -l {0}
58 |         run: |
59 |           cd tests
60 |           . ./run_tests.sh
61 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_and_release.yml:
--------------------------------------------------------------------------------
  1 | on:
  2 |   # push:
  3 |   #   branches: [ main ]
  4 |   workflow_dispatch:
  5 | 
  6 | 
  7 | name: Publish on PyPi and release on GitHub
  8 | 
  9 | jobs:
 10 |   Version_Bumped:
 11 |     runs-on: ubuntu-latest
 12 |     outputs:
 13 |       version: ${{ steps.master_version_bumped.outputs.version }}
 14 |     steps:
 15 |       - name: Checkout code
 16 |         uses: actions/checkout@v2
 17 |       - uses: conda-incubator/setup-miniconda@v2
 18 |         with:
 19 |           auto-update-conda: true
 20 |           python-version: ${{ matrix.python-version }}
 21 |       - name: Master version bumped
 22 |         id: master_version_bumped
 23 |         shell: bash -l {0}
 24 |         run: |
 25 |           cd misc
 26 |           . ./check_version.sh
 27 |           echo ::set-output name=version::$current_version
 28 |   # Create_Draft_On_GitHub:
 29 |   #   runs-on: ubuntu-latest
 30 |   #   needs: Version_Bumped
 31 |   #   outputs:
 32 |   #     upload_url: ${{ steps.draft_release.outputs.upload_url }}
 33 |   #   steps:
 34 |   #     - name: Draft Release
 35 |   #       id: draft_release
 36 |   #       uses: actions/create-release@v1
 37 |   #       env:
 38 |   #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
 39 |   #       with:
 40 |   #         tag_name: ${{ needs.Version_Bumped.outputs.version }}
 41 |   #         release_name: Release version ${{ needs.Version_Bumped.outputs.version }}
 42 |   #         draft: false
 43 |   #         prerelease: false
 44 |   # Create_Linux_Release:
 45 |   #   runs-on: ubuntu-latest
 46 |   #   needs: Create_Draft_On_GitHub
 47 |   #   steps:
 48 |   #     - name: Checkout code
 49 |   #       uses: actions/checkout@v2
 50 |   #     - uses: conda-incubator/setup-miniconda@v2
 51 |   #       with:
 52 |   #         auto-update-conda: true
 53 |   #         python-version: ${{ matrix.python-version }}
 54 |   #     - name: Conda info
 55 |   #       shell: bash -l {0}
 56 |   #       run: conda info
 57 |   #     - name: Creating installer for Linux
 58 |   #       shell: bash -l {0}
 59 |   #       run: |
 60 |   #         cd release/one_click_linux_gui
 61 |   #         . ./create_installer_linux.sh
 62 |   #     - name: Test installer for Linux
 63 |   #       shell: bash -l {0}
 64 |   #       run: |
 65 |   #         sudo dpkg -i release/one_click_linux_gui/dist/structuremap_gui_installer_linux.deb
 66 |   #     - name: Upload Linux Installer
 67 |   #       id: upload-release-asset
 68 |   #       uses: actions/upload-release-asset@v1
 69 |   #       env:
 70 |   #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
 71 |   #       with:
 72 |   #         upload_url: ${{ needs.Create_Draft_On_GitHub.outputs.upload_url }}
 73 |   #         asset_path: release/one_click_linux_gui/dist/structuremap_gui_installer_linux.deb
 74 |   #         asset_name: structuremap_gui_installer_linux.deb
 75 |   #         asset_content_type: application/octet-stream
 76 |   # Create_MacOS_Release:
 77 |   #   runs-on: macos-latest
 78 |   #   needs: Create_Draft_On_GitHub
 79 |   #   steps:
 80 |   #     - name: Checkout code
 81 |   #       uses: actions/checkout@v2
 82 |   #     - uses: conda-incubator/setup-miniconda@v2
 83 |   #       with:
 84 |   #         auto-update-conda: true
 85 |   #         python-version: ${{ matrix.python-version }}
 86 |   #     - name: Conda info
 87 |   #       shell: bash -l {0}
 88 |   #       run: conda info
 89 |   #     - name: Creating installer for MacOS
 90 |   #       shell: bash -l {0}
 91 |   #       run: |
 92 |   #         cd release/one_click_macos_gui
 93 |   #         . ./create_installer_macos.sh
 94 |   #     - name: Test installer for MacOS
 95 |   #       shell: bash -l {0}
 96 |   #       run: |
 97 |   #         sudo installer -pkg release/one_click_macos_gui/dist/structuremap_gui_installer_macos.pkg -target /
 98 |   #     - name: Upload MacOS Installer
 99 |   #       id: upload-release-asset
100 |   #       uses: actions/upload-release-asset@v1
101 |   #       env:
102 |   #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
103 |   #       with:
104 |   #         upload_url: ${{ needs.Create_Draft_On_GitHub.outputs.upload_url }}
105 |   #         asset_path: release/one_click_macos_gui/dist/structuremap_gui_installer_macos.pkg
106 |   #         asset_name: structuremap_gui_installer_macos.pkg
107 |   #         asset_content_type: application/octet-stream
108 |   # Create_Windows_Release:
109 |   #   runs-on: windows-latest
110 |   #   needs: Create_Draft_On_GitHub
111 |   #   steps:
112 |   #     - name: Checkout code
113 |   #       uses: actions/checkout@v2
114 |   #     - uses: conda-incubator/setup-miniconda@v2
115 |   #       with:
116 |   #         auto-update-conda: true
117 |   #         python-version: ${{ matrix.python-version }}
118 |   #     - name: Conda info
119 |   #       shell: bash -l {0}
120 |   #       run: conda info
121 |   #     - name: Creating installer for Windows
122 |   #       shell: bash -l {0}
123 |   #       run: |
124 |   #         cd release/one_click_windows_gui
125 |   #         . ./create_installer_windows.sh
126 |   #     - name: Test installer for Windows
127 |   #       shell: bash -l {0}
128 |   #       run: |
129 |   #         cd release/one_click_windows_gui/dist/
130 |   #         echo "TODO, this test seems to freeze the runner..."
131 |   #         # ./structuremap_gui_installer_windows.exe //verysilent //log=log.txt //noicons //tasks= //portable=1
132 |   #         # cat log.txt
133 |   #     - name: Upload Windows Installer
134 |   #       id: upload-release-asset
135 |   #       uses: actions/upload-release-asset@v1
136 |   #       env:
137 |   #         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
138 |   #       with:
139 |   #         upload_url: ${{ needs.Create_Draft_On_GitHub.outputs.upload_url }}
140 |   #         asset_path: release/one_click_windows_gui/dist/structuremap_gui_installer_windows.exe
141 |   #         asset_name: structuremap_gui_installer_windows.exe
142 |   #         asset_content_type: application/octet-stream
143 |   Create_PyPi_Release:
144 |     runs-on: ubuntu-latest
145 |     needs: Version_Bumped
146 |     steps:
147 |       - name: Checkout code
148 |         uses: actions/checkout@v2
149 |       - uses: conda-incubator/setup-miniconda@v2
150 |         with:
151 |           auto-update-conda: true
152 |           python-version: ${{ matrix.python-version }}
153 |       - name: Conda info
154 |         shell: bash -l {0}
155 |         run: conda info
156 |       - name: Prepare distribution
157 |         shell: bash -l {0}
158 |         run: |
159 |           cd release/pypi
160 |           . ./prepare_pypi_wheel.sh
161 |       - name: Publish distribution to Test PyPI
162 |         uses: pypa/gh-action-pypi-publish@master
163 |         with:
164 |           password: ${{ secrets.TEST_PYPI_API_TOKEN }}
165 |           repository_url: https://test.pypi.org/legacy/
166 |       - name: Test PyPI test release
167 |         shell: bash -l {0}
168 |         run: |
169 |           cd release/pypi
170 |           . ./install_test_pypi_wheel.sh
171 |       - name: Publish distribution to PyPI
172 |         uses: pypa/gh-action-pypi-publish@master
173 |         with:
174 |           password: ${{ secrets.PYPI_API_TOKEN }}
175 |   Test_PyPi_Release:
176 |     name: Test_PyPi_version_on_${{ matrix.os }}
177 |     runs-on: ${{ matrix.os }}
178 |     needs: Create_PyPi_Release
179 |     strategy:
180 |       matrix:
181 |         os: [ubuntu-latest, macOS-latest, windows-latest]
182 |     steps:
183 |       - uses: actions/checkout@v2
184 |       - uses: conda-incubator/setup-miniconda@v2
185 |         with:
186 |           auto-update-conda: true
187 |           python-version: ${{ matrix.python-version }}
188 |       - name: Conda info
189 |         shell: bash -l {0}
190 |         run: conda info
191 |       - name: Test pip installation from PyPi
192 |         shell: bash -l {0}
193 |         run: |
194 |           cd release/pypi
195 |           . ./install_pypi_wheel.sh
196 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | # lib/
 18 | # lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | # *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # User defined:
132 | structuremap/logs
133 | *.DS_Store
134 | *sandbox*
135 | 


--------------------------------------------------------------------------------
/HISTORY.md:
--------------------------------------------------------------------------------
1 | ## Changelog
2 | 
3 | ### 0.0.1
4 | 
5 | * FEAT: Initial creation of structuremap.
6 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2020 MannLabs
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include structuremap *
2 | include LICENSE.txt
3 | include README.md
4 | recursive-exclude structuremap/logs *
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![Pip installation](https://github.com/MannLabs/structuremap/workflows/Default%20installation%20and%20tests/badge.svg)
  2 | ![GUI and PyPi releases](https://github.com/MannLabs/structuremap/workflows/Publish%20on%20PyPi%20and%20release%20on%20GitHub/badge.svg)
  3 | [![Downloads](https://pepy.tech/badge/structuremap)](https://pepy.tech/project/structuremap)
  4 | [![Downloads](https://pepy.tech/badge/structuremap/month)](https://pepy.tech/project/structuremap)
  5 | [![Downloads](https://pepy.tech/badge/structuremap/week)](https://pepy.tech/project/structuremap)
  6 | 
  7 | 
  8 | # structuremap
  9 | An open-source Python package for integrating information from predicted protein structures deposited in the [AlphaFold database](https://alphafold.ebi.ac.uk/) with proteomics data and specifically with post-translational modifications (PTMs). PTMs on the 3D protein structures can be visualised by [AlphaMap](https://github.com/MannLabs/alphamap). To enable all hyperlinks in this document, please view it at [GitHub](https://github.com/MannLabs/structuremap).
 10 | 
 11 | * [**About**](#about)
 12 | * [**License**](#license)
 13 | * [**Installation**](#installation)
 14 |   * [**Pip installer**](#pip)
 15 |   * [**Developer installer**](#developer)
 16 | * [**Usage**](#usage)
 17 |   * [**Python and jupyter notebooks**](#python-and-jupyter-notebooks)
 18 | * [**Troubleshooting**](#troubleshooting)
 19 | * [**Citing structuremap**](#citing-structuremap)
 20 | * [**How to contribute**](#how-to-contribute)
 21 | * [**Changelog**](#changelog)
 22 | 
 23 | ---
 24 | ## About
 25 | 
 26 | An open-source Python package for integrating information from predicted protein structures deposited in the [AlphaFold database](https://alphafold.ebi.ac.uk/) with proteomics data and specifically with post-translational modifications (PTMs). You can find a detailed description of the tool and its capabilities to generate biological insights in ["The structural context of PTMs at a proteome wide scale" by Bludau et al. (2022)](https://doi.org/10.1371/journal.pbio.3001636). The complete anlaysis workflow of this study performed with structuremap can be found [here](https://github.com/MannLabs/structuremap_analysis).
 27 | 
 28 | ---
 29 | ## License
 30 | 
 31 | structuremap was developed by the [Mann Labs at the Max Planck Institute of Biochemistry](https://www.biochem.mpg.de/mann) and the [University of Copenhagen](https://www.cpr.ku.dk/research/proteomics/mann/) and is freely available with an [Apache License](LICENSE.txt). External Python packages (available in the [requirements](requirements) folder) have their own licenses, which can be consulted on their respective websites.
 32 | 
 33 | ---
 34 | ## Installation
 35 | 
 36 | structuremap can be installed and used on all major operating systems (Windows, macOS and Linux).
 37 | There are two different types of installation possible:
 38 | 
 39 | * [**Pip installer:**](#pip) Choose this installation if you want to use structuremap as a Python package in an existing Python 3.8 environment (e.g. a Jupyter notebook). If needed, the GUI and CLI can be installed with pip as well.
 40 | * [**Developer installer:**](#developer) Choose this installation if you are familiar with CLI tools, [conda](https://docs.conda.io/en/latest/) and Python. This installation allows access to all available features of structuremap and even allows to modify its source code directly. Generally, the developer version of structuremap outperforms the precompiled versions which makes this the installation of choice for high-throughput experiments.
 41 | 
 42 | ### Pip
 43 | 
 44 | structuremap can be installed in an existing Python 3.8 environment with a single `bash` command. *This `bash` command can also be run directly from within a Jupyter notebook by prepending it with a `!`*:
 45 | 
 46 | ```bash
 47 | pip install structuremap
 48 | ```
 49 | 
 50 | Installing structuremap like this avoids conflicts when integrating it in other tools, as this does not enforce strict versioning of dependencies. However, if new versions of dependencies are released, they are not guaranteed to be fully compatible with structuremap. While this should only occur in rare cases where dependencies are not backwards compatible, you can always force structuremap to use dependency versions which are known to be compatible with:
 51 | 
 52 | ```bash
 53 | pip install "structuremap[stable]"
 54 | ```
 55 | 
 56 | NOTE: You might need to run `pip install pip==21.0` before installing structuremap like this. Also note the double quotes `"`.
 57 | 
 58 | For those who are really adventurous, it is also possible to directly install any branch (e.g. `@development`) with any extras (e.g. `#egg=structuremap[stable,development-stable]`) from GitHub with e.g.
 59 | 
 60 | ```bash
 61 | pip install "git+https://github.com/MannLabs/structuremap.git@development#egg=structuremap[stable,development-stable]"
 62 | ```
 63 | 
 64 | ### Developer
 65 | 
 66 | structuremap can also be installed in editable (i.e. developer) mode with a few `bash` commands. This allows to fully customize the software and even modify the source code to your specific needs. When an editable Python package is installed, its source code is stored in a transparent location of your choice. While optional, it is advised to first (create and) navigate to e.g. a general software folder:
 67 | 
 68 | ```bash
 69 | mkdir ~/folder/where/to/install/software
 70 | cd ~/folder/where/to/install/software
 71 | ```
 72 | 
 73 | ***The following commands assume you do not perform any additional `cd` commands anymore***.
 74 | 
 75 | Next, download the structuremap repository from GitHub either directly or with a `git` command. This creates a new structuremap subfolder in your current directory.
 76 | 
 77 | ```bash
 78 | git clone https://github.com/MannLabs/structuremap.git
 79 | ```
 80 | 
 81 | For any Python package, it is highly recommended to use a separate [conda virtual environment](https://docs.conda.io/en/latest/), as otherwise *dependency conflicts can occur with already existing packages*.
 82 | 
 83 | ```bash
 84 | conda create --name structuremap python=3.8 -y
 85 | conda activate structuremap
 86 | ```
 87 | 
 88 | Finally, structuremap and all its [dependencies](requirements) need to be installed. To take advantage of all features and allow development (with the `-e` flag), this is best done by also installing the [development dependencies](requirements/requirements_development.txt) instead of only the [core dependencies](requirements/requirements.txt):
 89 | 
 90 | ```bash
 91 | pip install -e "./structuremap[development]"
 92 | ```
 93 | 
 94 | By default this installs loose dependencies (no explicit versioning), although it is also possible to use stable dependencies (e.g. `pip install -e "./structuremap[stable,development-stable]"`).
 95 | 
 96 | ***By using the editable flag `-e`, all modifications to the [structuremap source code folder](structuremap) are directly reflected when running structuremap. Note that the structuremap folder cannot be moved and/or renamed if an editable version is installed. In case of confusion, you can always retrieve the location of any Python module with e.g. the command `import module` followed by `module.__file__`.***
 97 | 
 98 | ---
 99 | ## Usage
100 | 
101 | ### Python and Jupyter notebooks
102 | 
103 | structuremap can be imported as a Python package into any Python script or notebook with the command `import structuremap`.
104 | 
105 | A brief [Jupyter notebook tutorial](nbs/tutorial.ipynb) on how to use the API is also present in the [nbs folder](nbs).
106 | 
107 | ---
108 | ## Troubleshooting
109 | 
110 | In case of issues, check out the following:
111 | 
112 | * [Issues](https://github.com/MannLabs/structuremap/issues): Try a few different search terms to find out if a similar problem has been encountered before
113 | * [Discussions](https://github.com/MannLabs/structuremap/discussions): Check if your problem or feature requests has been discussed before.
114 | 
115 | ---
116 | ## Citing structuremap
117 | 
118 | If you use structuremap for your work, please cite our publication:
119 | 
120 | Bludau I, et al. (2022) The structural context of posttranslational modifications at a proteome-wide scale. PLOS Biology 20(5): e3001636. https://doi.org/10.1371/journal.pbio.3001636
121 | 
122 | ---
123 | ## How to contribute
124 | 
125 | If you like this software, you can give us a [star](https://github.com/MannLabs/structuremap/stargazers) to boost our visibility! All direct contributions are also welcome. Feel free to post a new [issue](https://github.com/MannLabs/structuremap/issues) or clone the repository and create a [pull request](https://github.com/MannLabs/structuremap/pulls) with a new branch. For an even more interactive participation, check out the [discussions](https://github.com/MannLabs/structuremap/discussions) and the [the Contributors License Agreement](misc/CLA.md).
126 | 
127 | ---
128 | ## Changelog
129 | 
130 | See the [HISTORY.md](HISTORY.md) for a full overview of the changes made in each version.
131 | 


--------------------------------------------------------------------------------
/data/test_files/kinase_motifs.csv:
--------------------------------------------------------------------------------
  1 | enzyme	motif	mod_pos
  2 | Akt kinase substrate motif	[R][A-Z][R][A-Z][A-Z][ST][FL]	5
  3 | Akt kinase substrate motif	[R][A-Z][R][A-Z][A-Z][ST]	5
  4 | Akt kinase substrate motif	[G][R][A][R][T][ST][S][FAE]	6
  5 | Akt kinase substrate motif	[RQK][RKNQPH][RK][RST][NKQHDP][S][FWIMNS][STH][RSK][STPQ]	5
  6 | Akt kinase substrate motif	[RK][A-Z][RK][ST][A-Z][S]	5
  7 | AMP-activated protein kinase substrate motif	[MVLIF][RKH][A-Z][A-Z][A-Z][ST][A-Z][A-Z][A-Z][MVLIF]	5
  8 | AMP-activated protein kinase substrate motif	[MVLI][A-Z][A-Z][RKH][A-Z][ST][A-Z][A-Z][A-Z][MVLI]	5
  9 | AMP-activated protein kinase substrate motif	[MVLIF][RKH][A-Z][A-Z][ST][A-Z][A-Z][A-Z][MVLIF]	4
 10 | AMP-activated protein kinase 2 substrate motif	[RK][A-Z][R][A-Z][A-Z][S][A-Z][A-Z][A-Z][RK]	5
 11 | ATM kinase substrate motif	[PLIM][A-Z][LIDE][S][Q]	3
 12 | ATM kinase substrate motif	[L][S][Q][E]	1
 13 | ATM kinase substrate motif	[S][Q]	0
 14 | Aurora-A kinase substrate motif	[RKN][R][A-Z][ST][MLVI]	3
 15 | b-Adrenergic Receptor kinase substrate motif	[DE][ST][A-Z][A-Z][A-Z]	1
 16 | Branched chain alpha-ketoacid dehydrogenase kinase substrate motif	[H][S][T][S][D][D]	1
 17 | Branched chain alpha-ketoacid dehydrogenase kinase substrate motif	[Y][R][S][V][D][E]	2
 18 | Calmodulin-dependent protein kinase I substrate motif	[MVLIF][A-Z][R][A-Z][A-Z][ST][A-Z][A-Z][A-Z][MVLIF]	5
 19 | Calmodulin-dependent protein kinase II alpha substrate motif	[MILVFY][A-Z][R][A-Z][A-Z][ST][MILVFY]	5
 20 | Calmodulin-dependent protein kinase II substrate motif	[R][A-Z][A-Z][ST]	3
 21 | Calmodulin-dependent protein kinase II substrate motif	[KF][RK][QM][QMKLF][S][FIMLV][DEI][LMKI][FK]	4
 22 | Calmodulin-dependent protein kinase II substrate motif	[MVLIF][A-Z][RK][A-Z][A-Z][ST][A-Z][A-Z]	5
 23 | Calmodulin-dependent protein kinase II substrate motif	[R][A-Z][A-Z][S]	3
 24 | Calmodulin-dependent protein kinase IV substrate motif	[V][P][G][K][A][R][K][K][S][S][C][Q][L][L]	8
 25 | Calmodulin-dependent protein kinase IV substrate motif	[P][L][A][R][T][L][S][V][A][G][L][P]	6
 26 | Calmodulin-dependent protein kinase IV substrate motif	[MILVFY][A-Z][R][A-Z][A-Z][ST]	5
 27 | Casein kinase I delta substrate motif	[E][FE][D][TAG][G][S][I][IFYG][IGF][FG][FPL]	5
 28 | Casein kinase I gamma substrate motif	[Y][YE][DY][AD][AG][S][I][IYFG][IGF][FG][FPL]	5
 29 | Casein kinase I substrate motif	[DE][A-Z][A-Z][ST]	3
 30 | Casein kinase II substrate motif	[EDA][DE][ED][ED][S][EDA][DEA][ED][ED]	4
 31 | Casein kinase II substrate motif	[S][A-Z][EST]	0
 32 | Casein kinase II substrate motif	[S][A-Z][A-Z][EST]	0
 33 | Casein kinase II substrate motif	[ST][A-Z][A-Z][ED]	0
 34 | Casein kinase II substrate motif	[S][D][A-Z][E]	0
 35 | Casein kinase II substrate motif	[S][A-Z][A-Z][ED]	0
 36 | Casein kinase II substrate motif	[S][DE][A-Z][DE][A-Z][DE]	0
 37 | Casein kinase II substrate motif	[DE][S][DE][A-Z][DE]	1
 38 | Casein kinase II substrate motif	[S][DE][DE][DE]	0
 39 | Casein kinase II substrate motif	[ST][A-Z][A-Z][DE]	0
 40 | Casein kinase II substrate motif	[ST][A-Z][A-Z][EDSY]	0
 41 | Casein kinase II substrate motif	[SEPG][DSNEP][EDGQW][YEDSWT][WED][S][DE][DEWN][ED][EDNQ]	5
 42 | Cdc2 kinase substrate motif	[RK][S][P][RP][RKH]	1
 43 | Cdc2 kinase substrate motif	[ST][P][A-Z][RK]	0
 44 | Cdc2 kinase substrate motif	[H][H][H][RK][S][P][R][RK][R]	4
 45 | Cdc2 like protein kinase substrate motif	[P][A-Z][ST][P][K][K][A-Z][K][K]	2
 46 | CDK1,2,4,6 kinase substrate motif	[ST][P][A-Z][RK]	0
 47 | CDK kinase substrate motif	[S][P][A-Z][RK][A-Z]	0
 48 | CDK4 kinase substrate motif	[P][L][ST][P][I][P][KRH]	2
 49 | CDK4 kinase substrate motif	[P][L][ST][P][A-Z][KRH]	2
 50 | CDK5 kinase substrate motif	[T][P][A-Z][K]	0
 51 | CDK5 kinase substrate motif	[KHG][H][HP][KGH][S][P][RK][HRK][RHK]	4
 52 | CDK5 kinase substrate motif	[ST][P][G][ST][P][G][T][P]	3
 53 | Chk1 kinase substrate motif	[MILV][A-Z][RK][A-Z][A-Z][ST]	5
 54 | CLK1 kinase substrate motif	[R][A-Z][A-Z][ST][A-Z][A-Z][R]	3
 55 | CLK1,2 kinase substrate motif	[RK][A-Z][RK][A-Z][RK][A-Z][S][A-Z][A-Z][R]	6
 56 | CLK2 kinase substrate motif	[R][RH][RH][RE][R][E][RH][S][R][RD][L]	7
 57 | DMPK1 kinase substrate motif	[K][K][A-Z][R][R][T][LV][A-Z]	5
 58 | DMPK1 kinase substrate motif	[K][K][R][A-Z][R][T][LV][A-Z]	5
 59 | DMPK1 kinase substrate motif	[RK][A-Z][R][R][A-Z][ST][LV][A-Z]	5
 60 | DMPK1,2 kinase substrate motif	[R][A-Z][A-Z][ST][LV][R]	3
 61 | DNA dependent Protein kinase substrate motif	[A-Z][S][Q]	1
 62 | DNA dependent Protein kinase substrate motif	[P][ST][A-Z]	1
 63 | DOA/CDC-like kinase 2 substrate motif	[R][RK][R][ER][R][EA][HR][S][R][R][RD][LE]	7
 64 | Doublecortin kinase-1 kinase substrate motif	[ILVFM][R][R][A-Z][A-Z][ST][ILMVF]	5
 65 | elF2 alpha kinase substrate motif	[E][A-Z][S][A-Z][R][A-Z][A-Z][R]	2
 66 | ERK1 kinase substrate motif	[TPS][GPEY][PLI][LMP][S][P][GPF][PFGY][FYI]	4
 67 | ERK1 kinase substrate motif	[T][E][Y]	0
 68 | ERK1,2 kinase substrate motif	[P][A-Z][ST][P][P]	2
 69 | ERK1,2 kinase substrate motif	[A-Z][A-Z][P][A-Z][ST][P][P][P][A-Z]	4
 70 | ERK1,2 kinase substrate motif	[P][A-Z][ST][P]	2
 71 | ERK1,2 kinase substrate motif	[S][P]	0
 72 | ERK1, ERK2, SAPK, CDK5 and GSK3 kinase substrate motif	[K][S][P][P]	1
 73 | ERK2 kinase substrate motif	[DYWE][C][PSCE][PCSLTV][LMT][S][PA][TSGRCF][WPS][WF]	5
 74 | G protein-coupled receptor kinase 1 substrate motif	[A-Z][A-Z][ST][E]	2
 75 | G protein-coupled receptor kinase 1 substrate motif	[A-Z][ST][A-Z][A-Z][A-Z][APST]	1
 76 | Growth associated histone HI kinase substrate motif	[ST][ST][P][A-Z][KR]	1
 77 | Growth associated histone HI kinase substrate motif	[KR][ST][P]	1
 78 | Growth associated histone HI kinase substrate motif	[ST][P][KR]	0
 79 | GSK3 kinase substrate motif	[S][A-Z][A-Z][A-Z][S]	0
 80 | GSK3, Erk1, Erk2 and CDK5 kinase motif	[P][A-Z][T][P]	2
 81 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[R][A-Z][A-Z][S][P][V]	3
 82 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[K][ST][P][A-Z][K]	1
 83 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[K][S][P][A-Z][A-Z][A-Z][K]	1
 84 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[K][S][P][A-Z][A-Z][K]	1
 85 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[K][S][P][A-Z][A-Z][A-Z][A-Z][K]	1
 86 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[K][T][P][A][K][E][E]	1
 87 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[P][A-Z][S][P]	2
 88 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[A-Z][ST][P]	1
 89 | GSK-3, ERK1, ERK2, CDK5 substrate motif	[A-Z][A-Z][S][P]	2
 90 | HMGCoA Reductase kinase substrate motif	[MLVIF][RKH][A-Z][A-Z][S][A-Z][A-Z][A-Z][MLVIF]	4
 91 | JNK1 kinase substrate motif	[G][P][QM][S][P][I]	3
 92 | LKB1 kinase substrate motif	[L][R][T]	2
 93 | MAPKAPK1 kinase substrate motif	[RK][A-Z][R][A-Z][A-Z][S]	5
 94 | MAPKAPK1 kinase substrate motif	[R][R][R][A-Z][S]	4
 95 | MAPKAPK2 kinase substrate motif	[LFI][A-Z][A-Z][A-Z][R][QST][L][ST][MLIV]	7
 96 | MAPKAPK2 kinase substrate motif	[A-Z][A-Z][ND][A-Z][R][A-Z][A-Z][S][A-Z][A-Z]	7
 97 | MAPKAPK2 kinase substrate motif	[S][A-Z][A-Z][A-Z][ST]	4
 98 | MAPK 11,13,14 kinase substrate motif	[T][GPE][Y]	0
 99 | MEKK kinase substrate motif	[R][R][F][G][S][ND][R][R][F]	4
100 | MEKK kinase substrate motif	[R][R][F][G][S][MLVIF][R][R][MLVIF]	4
101 | MLCK kinase substrate motif	[K][K][R][A-Z][A-Z][S][A-Z][RK][RK]	5
102 | mTOR kinase substrate motif	[F][T][Y]	1
103 | Nek 2 kinase substrate motif	[I][R][R][L][S][T][R][R][R]	4
104 | NIMA kinase substrate motif	[RN][FLM][RK][RK][S][RIVM][RIMV][MIFV][IFM]	4
105 | NIMA kinase substrate motif	[F][R][A-Z][ST]	3
106 | NIMA kinase substrate motif	[R][F][RK][RK][S][RI][RI][M][I]	4
107 | p70 Ribosomal S6 kinase substrate motif	[RK][A-Z][R][A-Z][A-Z][ST][MLVI]	5
108 | p70 Ribosomal S6 kinase substrate motif	[V][F][L][G][F][T][Y][V][A][P]	5
109 | PAK1 kinase substrate motif	[A][K][R][R][R][L][S][S][S][L][R][A]	8
110 | PAK1 kinase substrate motif	[V][R][K][R][T][L][R][R][L]	4
111 | PAK2 kinase substrate motif	[RK][RA-Z][A-Z][ST]	3
112 | PDK1 kinase substrate motif	[F][A-Z][A-Z][F][ST][FY]	4
113 | Phosphorylase kinase substrate motif	[K][R][K][Q][I][S][V][R]	5
114 | Phosphorylase kinase substrate motif	[FMK][RK][MRQF][MFLI][S][S][FIML][FRK][LI][FLI]	5
115 | Phosphorylase kinase substrate motif	[KR][A-Z][A-Z][S][VI]	3
116 | Pim1 kinase substrate sequence	[RK][RK][RK][A-Z][ST][A-Z]	4
117 | Pim2 kinase substrate sequence	[RK][RKAQP][RK][RQHNY][PHK][S][GST][PSGQHST][SPQGD][TSPG]	5
118 | PKA kinase substrate motif	[R][R][A-Z][S][MILVFY]	3
119 | PKA kinase substrate motif	[R][A-Z][S]	2
120 | PKA kinase substrate motif	[K][R][A-Z][A-Z][S]	4
121 | PKA kinase substrate motif	[R][A-Z][A-Z][S]	3
122 | PKA kinase substrate motif	[RK][A-Z][ST]	2
123 | PKA kinase substrate motif	[K][A-Z][A-Z][ST]	3
124 | PKA kinase substrate motif	[RK][RK][A-Z][ST]	3
125 | PKA kinase substrate motif	[K][A-Z][A-Z][A-Z][ST]	4
126 | PKA kinase substrate motif	[ST][A-Z][RK]	0
127 | PKA kinase substrate motif	[R][R][R][R][S][I][I][F][I]	4
128 | PKA kinase substrate motif	[R][R][A-Z][S]	3
129 | PKA kinase substrate motif	[R][RK][A-Z][ST][ILVFY][DCA-Z][A-Z][D]	3
130 | PKA kinase substrate motif	[R][R][A-Z][S]	3
131 | PKA kinase substrate motif	[R][R][R][RN][S][I][I][FD]	4
132 | PKA kinase substrate motif	[RCPK][RAP][RK][RKS][NLSMP][S][ILVC][SPHQ][SWQ][SLG]	5
133 | PKA, PKG kinase substrate motif	[R][RK][A-Z][ST][ND]	3
134 | PKC alpha kinase substrate motif	[A][R][K][G][S][L][R][Q]	4
135 | PKC alpha kinase substrate motif	[R][RF][R][R][RK][G][S][F][RK][RK]	6
136 | PKC beta kinase substrate motif	[LRF][RK][R][KQ][G][S][FM][K][K][A-Z][A]	5
137 | PKC delta kinase substrate motif	[R][A-Z][R][K][G][S][F]	5
138 | PKC epsilon kinase substrate motif	[K][R][Q][G][S][V][R][R]	4
139 | PKC epsilon kinase substrate motif	[R][KER][A-Z][S]	3
140 | PKC eta kinase substrate motif	[A][R][A-Z][A-Z][R][RK][R][S][F][R][R]	7
141 | PKC family kinase substrate motif	[F][A-Z][A-Z][F][ST][ST][FY]	5
142 | PKC gamma kinase substrate motif	[R][R][R][K][GK][S][F][RK][RK][K][A]	5
143 | PKC kinase substrate motif	[A-Z][R][A-Z][A-Z][S][A-Z][R][A-Z]	4
144 | PKC kinase substrate motif	[ST][A-Z][RK]	0
145 | PKC kinase substrate motif	[RK][A-Z][A-Z][ST]	3
146 | PKC kinase substrate motif	[RK][A-Z][A-Z][ST][A-Z][RK]	3
147 | PKC kinase substrate motif	[KR][A-Z][ST]	2
148 | PKC kinase substrate motif	[RK][A-Z][ST][A-Z][RK]	2
149 | PKC mu kinase substrate motif	[LV][VLA][R][QKE][M][S]	5
150 | PKC theta kinase substrate motif	[RFWM][WAKS][RSKH][RHSQ][RKNPGQ][S][IFRVKSLM][KMRST][RSKW][RKG]	5
151 | PKC zeta kinase substrate motif	[F][A-Z][R][A-Z][A-Z][S][FM][FM]	5
152 | PKD kinase substrate motif	[LVI][RKQ][RK][RKTQM][NKRLMH][S][FWIMLV][SN][RSPYW][SRNL]	5
153 | PKG kinase substrate motif	[R][RK][A-Z][ST][ND]	3
154 | PKR kinase substrate motif	[R][A-Z][A-Z][ST][A-Z][R][A-Z][A-Z][R]	3
155 | Plk1 kinase substrate motif	[DE][A-Z][ST][ILVM][A-Z][DE]	2
156 | Pyruvate dehydrogenase kinase substrate motif	[A-Z][S][A-Z][A-Z][D][A-Z][A-Z]	1
157 | RAF1 kinase substrate motif	[P][L][T][L][P]	2
158 | RAF1 kinase substrate motif	[P][L][L][T][P]	3
159 | RAF1 kinase substrate motif	[P][L][T][P]	2
160 | RAF1 kinase substrate motif	[P][T][L][P]	1
161 | RAF1 kinase substrate motif	[P][L][T][L][P]	2
162 | RAF1 kinase substrate motif	[P][T][L][P]	1
163 | RAF1 kinase substrate motif	[L][T][P]	1
164 | TGF beta receptor kinase substrate motif	[K][K][K][K][K][K][ST][A-Z][A-Z][A-Z]	6
165 | TGF beta receptor kinase substrate motif	[RKQN][MCW][RTSN][EDSN][RKEDN][S][SDE][SD][RN][NHSRC]	5
166 | ZIP kinase substrate motif	[R][R][A-Z][A-Z][S]	4
167 | ZIP kinase substrate motif	[K][R][A-Z][R][S]	4
168 | ZIP kinase substrate motif	[K][R][R][A-Z][T]	4
169 | Dual specificity protein phosphatase 1 substrate motif	[T][E][Y]	0
170 | Dual specificity protein phosphatase 6 substrate motif	[T][A-Z][Y]	0
171 | PP2A, PP2C substrate motif	[R][R][A][ST][V][A]	3
172 | PP2B substrate motif	[A-Z][R][A-Z][A-Z][S][V][A]	4
173 | PP2C delta substrate motif	[A-Z][T][A-Z][Y][A-Z]	1
174 | 14-3-3 domain binding motif	[K][C][S][T][W][P]	3
175 | 14-3-3 domain binding motif	[R][A-Z][A-Z][S]	3
176 | 14-3-3 domain binding motif	[R][A-Z][R][A-Z][A-Z][S][A-Z][P]	5
177 | 14-3-3 domain binding motif	[Y][T][V]	1
178 | 14-3-3 domain binding motif	[R][S][A-Z][ST][A-Z][P]	3
179 | 14-3-3 domain binding motif	[R][A-Z][YF][A-Z][S][A-Z][P]	4
180 | 14-3-3 domain binding motif	[R][P][V][S][S][A][A][S][V][Y]	7
181 | BARD1 BRCT domain binding motif	[S][DE][DE][E]	0
182 | Beta-TrCP1 domain binding motif	[D][S][G][A-Z][A-Z][S]	5
183 | BRCA1 BRCT domain binding motif	[S][FYH][VFY][FY]	0
184 | CDC4 WD40 domain binding motif	[IL][ILP][T][P][RK]	2
185 | Chk2 FHA domain binding motif	[H][F][D][T][Y][L][I]	3
186 | FHA domain binding motif	[RDH][LY][LM][KA][T][QLMEV][KLIR]	4
187 | MDC1 BRCT domain binding motif	[S][ST][A-Z]	1
188 | Plk1 PBD domain binding motif	[S][ST][A-Z]	1
189 | RAD9 BRCT domain binding motif	[S][Y][I][I]	0
190 | WW domain binding motif	[ST][P]	0
191 | Abl kinase substrate motif	[Y][M][A-Z][M]	0
192 | Abl kinase substrate motif	[E][D][A][I][Y]	4
193 | Abl kinase substrate motif	[A-Z][V][I][Y][A][A][P][F]	3
194 | Abl kinase substrate motif	[E][A][I][Y][A][A][P][F]	3
195 | Abl kinase substrate motif	[E][E][I][Y][E][E][Y]	6
196 | Abl kinase substrate motif	[E][E][I][Y][E][E][Y]	3
197 | Abl kinase substrate motif	[E][A-Z][I][Y][A-Z][A-Z][P][A-Z]	3
198 | Abl kinase substrate motif	[E][E][I][Y][Y][Y][V][H]	3
199 | Abl kinase substrate motif	[E][R][I][Y][A][R][T][K]	3
200 | Abl kinase substrate motif	[A][E][V][IVLF][Y][A][A][PF][F]	4
201 | ALK kinase substrate motif	[Y][A-Z][A-Z][Y][Y]	0
202 | ALK kinase substrate motif	[Y][DE][A-Z][ILVM]	0
203 | ALK kinase substrate motif	[DE][A-Z][A-Z][Y]	3
204 | ALK kinase substrate motif	[Y][A-Z][A-Z][A-Z][A-Z][FY]	0
205 | CSK kinase substrate motif	[E][E][DE][I][Y][F][F][F][F]	4
206 | CSK kinase substrate motif	[A-Z][A-Z][A-Z][I][Y][MIF][F][F][F]	4
207 | EGFR kinase substrate motif	[E][E][E][E][Y][F][E][L][V]	4
208 | EGFR kinase substrate motif	[EDRA][DE][DE][EDI][Y][FVIE][EFD][LIFV][V]	4
209 | EGFR kinase substrate motif	[A-Z][DE][Y][A-Z]	2
210 | EGFR kinase substrate motif	[Y][I][P][P]	0
211 | EGFR kinase substrate motif	[A-Z][DE][Y][ILV]	2
212 | Fes kinase substrate motif	[E][E][E][I][Y][E][E][I][E]	4
213 | Fes kinase substrate motif	[EAD][EA][EA][IEV][Y][DE][DE][IVE][EIV]	4
214 | FGFR kinase substrate motif	[E][E][E][Y][F][F][L][F]	3
215 | FGFR kinase substrate motif	[A][EA][E][E][Y][FV][F][LFMIV][F]	4
216 | Fgr kinase substrate motif	[M][E][EN][IV][Y][GE][I][F][F]	4
217 | IGF1 receptor kinase substrate motif	[K][K][K][S][P][G][E][Y][V][N][I][E][F][G]	7
218 | Insulin receptor kinase substrate motif	[Y][M][A-Z][M]	0
219 | Insulin receptor kinase substrate motif	[E][E][END][Y][MF][MF][MFIE][MF]	3
220 | Insulin receptor kinase substrate motif	[A-Z][E][E][E][Y][M][M][M][M]	4
221 | Insulin receptor kinase substrate motif	[K][K][S][R][G][D][Y][M][T][M][Q][I][G]	6
222 | Insulin receptor kinase substrate motif	[K][K][K][L][P][A][T][G][D][Y][M][N][M][S][P][V][G][D]	9
223 | JAK2 kinase substrate motif	[Y][A-Z][A-Z][LIV]	0
224 | JNK kinase substrate motif	[T][P][Y]	2
225 | Lck kinase substrate motif	[A-Z][E][A-Z][I][Y][G][V][L][F]	4
226 | Lck kinase substrate motif	[E][A-Z][IVLF][Y][GA][V][LVFI][FLVI]	3
227 | Lyn kinase substrate motif	[D][E][E][I][Y][EG][E][L][A-Z]	4
228 | Lyn kinase substrate motif	[DE][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][DE][A-Z][A-Z][Y][A-Z][A-Z][L][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][Y][A-Z][A-Z][LI]	11
229 | PDGFR kinase substrate motif	[E][E][E][E][Y][V][F][I][A-Z]	4
230 | PDGFR kinase substrate motif	[LN][RI][T][Y]	3
231 | PDGFR kinase substrate motif	[DE][DE][DE][DE][Y][VEI][F][IVF]	4
232 | Src family kinase substrate motif	[DE][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][DE][A-Z][A-Z][Y][A-Z][A-Z][L][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][Y][A-Z][A-Z][LI]	11
233 | Src family kinase substrate motif	[IVLS][A-Z][Y][A-Z][A-Z][LI]	2
234 | Src kinase substrate motif	[Y][M][A-Z][M]	0
235 | Src kinase substrate motif	[Y][I][Y][G][S][F][K]	2
236 | Src kinase substrate motif	[E][E][E][I][Y][GE][E][F][D]	4
237 | Src kinase substrate motif	[D][DE][EDG][IVL][Y][GE][E][FI][F]	4
238 | Src kinase substrate motif	[DE][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][DE][A-Z][A-Z][Y][A-Z][A-Z][L][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][Y][A-Z][A-Z][LI]	11
239 | Src kinase substrate motif	[DE][DE][EDG][IVL][Y][GED][E][FILV][DE]	4
240 | Src kinase substrate motif	[Y][AGSTDE]	0
241 | Syk kinase substrate motif	[EDTY][A-Z][Y][E][E]	2
242 | PTP1B phosphatase substrate motif	[DE][Y][Y][RK]	2
243 | PTP1B phosphatase substrate motif	[DE][Y][Y][RK]	1
244 | PTP1B phosphatase substrate motif	[E][F][Y][GA][T][Y][GA]	2
245 | PTP1B phosphatase substrate motif	[E][YFD][Y][M]	2
246 | PTP1B phosphatase substrate motif	[EP][MLIVF][Y][GA][A-Z][MLIVFY][A]	2
247 | PTP1B phosphatase substrate motif	[R][D][A-Z][Y][A-Z][T][D][Y][Y][R]	8
248 | PTP1B phosphatase substrate motif	[E][FDY][Y]	2
249 | PTP1B, TC-PTP phosphatase substrate motif	[D][Y][Y][R]	2
250 | PTP1B, TC-PTP phosphatase substrate motif	[D][Y][Y][R]	1
251 | PTPRH phosphatase substrate motif	[DE][F][Y][GA][FY][AG]	2
252 | PTPRJ phosphatase substrate motif	[F][MLVI][Y]	2
253 | SHP1 phosphatase substrate motif	[DE][A-Z][LIV][A-Z][Y][A-Z][A-Z][LIV]	4
254 | SHP1 phosphatase substrate motif	[DE][A-Z][LIV][A-Z][A-Z][Y][A-Z][A-Z][LIV]	5
255 | SHP1 phosphatase substrate motif	[DE][DE][DE][L][A-Z][Y][A-Z][A-Z][FMLVI][DE]	5
256 | SHP1 phosphatase substrate motif	[DE][A-Z][Y]	2
257 | SHP1 phosphatase substrate motif	[EP][FIL][Y][Y][A][A-Z][FILV]	3
258 | SHP2 phosphatase substrate motif	[Y][I][D][L]	0
259 | SHP2 phosphatase substrate motif	[Y][A][S][I]	0
260 | SHP2 phosphatase substrate motif	[E][F][Y][A][A-Z][VI][G][RKH][S]	2
261 | TC-PTP phosphatase substrate motif	[DE][DE][A-Z][A-Z][A-Z][Y][V][A]	5
262 | TC-PTP phosphatase substrate motif	[EDY][Y]	1
263 | 3BP2 SH2 domain binding motif	[Y][EMV][NVI]	0
264 | Abl SH2 domain binding motif	[Y][E][N][P]	0
265 | Crk SH2 domain binding motif	[Y][A-Z][A-Z][P]	0
266 | Crk SH2 domain binding motif	[Y][D][H][P]	0
267 | Csk SH2 domain binding motif	[Y][TAS][KRQN][MIVR]	0
268 | Grb2 SH2 domain binding motif	[Y][YIV][N][FLIV]	0
269 | Fes SH2 domain binding motif	[Y][E][A-Z][VI]	0
270 | Fgr SH2 domain binding motif	[Y][E][E][IV]	0
271 | Fyn SH2 domain binding motif	[Y][E][D][P]	0
272 | GRB2, 3BP2, Csk, Fes, Syk C-terminal SH2 domain binding motif	[Y][Y][MILV][A-Z][MILV]	1
273 | Grb2 SH2 domain binding motif	[Y][QYV][N][YQF]	0
274 | Grb2 SH2 domain binding motif	[Y][A-Z][N]	0
275 | GRB7, GRB10 SH2 domain binding motif	[FY][Y][ETYS][N][ILVPTYS]	1
276 | HCP SH2 domain binding motif	[Y][F][A-Z][FPLY]	0
277 | Itk SH2 domain binding motif	[Y][AEV][YFESNV][PFIH]	0
278 | Lck and Src SH2 domain binding motif	[Y][D][Y][V]	0
279 | Nck SH2 domain binding motif	[Y][D][E][P]	0
280 | PI3 kinase p85 SH2 domain binding motif	[Y][M][A-Z][M]	0
281 | PI3 kinase p85 SH2 domain binding motif	[Y][A-Z][A-Z][M]	0
282 | PI3 kinase p85 SH2 domain binding motif	[Y][M][P][M][S]	0
283 | PLCgamma C and N-terminal SH2 domain binding motif	[Y][LIV][E][LIV]	0
284 | RasGAP C-terminal SH2 domain binding motif	[Y][A-Z][A-Z][P]	0
285 | RasGAP N-terminal SH2 domain binding motif	[Y][I][L][V][A-Z][MLIVP]	0
286 | SAP and EAT2 SH2 domain binding motif	[T][I][Y][A-Z][A-Z][VI]	2
287 | Sem5 SH2 domain binding motif	[Y][LV][N][VP]	0
288 | Shb SH2 domain binding motif	[Y][TVI][A-Z][L]	0
289 | SHC SH2 domain binding motif	[Y][IEYL][A-Z][ILM]	0
290 | SHIP2 SH2 domain binding motif	[IVLS][A-Z][Y][A-Z][A-Z][LI]	2
291 | SHP1 C-terminal SH2 domain binding motif	[VIL][A-Z][Y][A][A-Z][LV]	2
292 | SHP1 C-terminal SH2 domain binding motif	[A-Z][A-Z][Y][Y][M][KR]	2
293 | SHP1 N-terminal SH2 domain binding motif	[L][YH][Y][MF][A-Z][FM]	2
294 | SHP1 N-terminal SH2 domain binding motif	[L][A-Z][Y][A][A-Z][L]	2
295 | SHP1 SH2 domain binding motif	[IV][A-Z][Y][A-Z][A-Z][LV]	2
296 | SHP1, SHP2 SH2 domain binding motif	[VIL][A-Z][Y][MLF][A-Z][P]	2
297 | SHP2 CSH2 domain binding motif	[TVIY][A-Z][Y][ASTV][A-Z][IVL]	2
298 | SHP2 C-terminal SH2 domain binding motif	[ILV][ILV][ILVFTY][Y][TILV][IL][ILVP]	3
299 | SHP2 N-terminal SH2 domain binding motif	[HF][A-Z][V][A-Z][TSA][Y]	5
300 | SHP2 N-terminal SH2 domain binding motif	[IVL][A-Z][Y][FM][A-Z][P]	2
301 | SHP2 N-terminal SH2 domain binding motif	[Y][IV][A-Z][IV]	0
302 | SHP2 N-terminal SH2 domain binding motif	[ILVM][A-Z][Y][TVA][A-Z][IVLF]	2
303 | SHP2 N-terminal SH2 domain binding motif	[IV][A-Z][Y][LMT][Y][APT][S][G]	2
304 | SHP2 N-terminal SH2 domain binding motif	[W][MTV][Y][YR][IL][A-Z]	2
305 | SHP2, PLCgamma SH2 domain binding motifs	[Y][I][P][P]	0
306 | Src and Abl SH2 domain binding motif	[Y][M][A-Z][M]	0
307 | Src, Fyn, Lck, Fgr, Abl, Crk, Nck SH2 domain binding motif	[Y][RKHQED][RKHQED][IP]	0
308 | Src, Fyn,Csk, Nck and SHC SH2 domain binding motif	[P][P][A-Z][Y]	3
309 | Src,Lck and Fyn SH2 domains binding motif	[Y][E][E][I]	0
310 | STAT1 SH2 domain binding motif	[Y][DE][PR][RPQ]	0
311 | STAT3 SH2 domain binding motif	[Y][A-Z][A-Z][Q]	0
312 | STAT3 SH2 domain binding motif	[Y][MLVIF][PRKH][Q]	0
313 | Syk C-terminal SH2 domain binding motif	[Y][QTE][EQ][LI]	0
314 | Syk N-terminal SH2 domain binding motif	[Y][T][T][ILM]	0
315 | Syk, ZAP-70, Shc, Lyn SH2 domain binding motif	[DE][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][DE][A-Z][A-Z][Y][A-Z][A-Z][L][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][A-Z][Y][A-Z][A-Z][LI]	11
316 | Tensin SH2 domain binding motif	[Y][E][N][FIV]	0
317 | Vav SH2 domain binding motif	[Y][MLE][E][P]	0
318 | Vav SH2 domain binding motif	[Y][E][S][P]	0
319 | Cbl PTB domain binding motif	[D][ND][A-Z][Y]	3
320 | Dok1 PTB domain binding motif	[N][A-Z][L][Y]	3
321 | FRIP PTB domain binding motif	[N][A-Z][A-Z][Y]	3
322 | Shc PTB domain binding motif	[N][P][A-Z][Y]	3
323 | Shb PTB domain binding motif	[D][D][A-Z][Y]	3
324 | ShcA PTB domain binding motif	[N][P][A-Z][Y][F][A-Z][R]	3
325 | ShcC PTB domain binding motif	[H][N][MLVI][MLVIN][N][P][ST][Y]	7
326 | 


--------------------------------------------------------------------------------
/data/test_files/pae_O15552.hdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/data/test_files/pae_O15552.hdf


--------------------------------------------------------------------------------
/data/test_files/pae_Q7Z6M3.hdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/data/test_files/pae_Q7Z6M3.hdf


--------------------------------------------------------------------------------
/data/test_files/ptm_file.csv:
--------------------------------------------------------------------------------
  1 | ,protein_id,AA,position,ac,ac_reg,ga,gl,gl_reg,m,m_reg,p,p_reg,sm,sm_reg,ub,ub_reg
  2 | 0,O43353,K,17,0,0,0,0,0,0,0,0,0,0,0,1,0
  3 | 1,O43353,K,182,0,0,0,0,0,0,0,0,0,0,0,1,0
  4 | 2,O43353,K,203,0,0,0,0,0,0,0,0,0,0,0,1,0
  5 | 3,O43353,K,209,0,0,0,0,0,0,0,0,0,0,0,1,1
  6 | 4,O43353,K,384,0,0,0,0,0,1,0,0,0,0,0,0,0
  7 | 5,O43353,K,410,0,0,0,0,0,0,0,0,0,0,0,1,0
  8 | 6,O43353,K,480,0,0,0,0,0,0,0,0,0,0,0,1,0
  9 | 7,O43353,K,508,0,0,0,0,0,0,0,0,0,0,0,1,0
 10 | 8,O43353,K,538,0,0,0,0,0,0,0,0,0,0,0,1,0
 11 | 9,O43353,R,26,0,0,0,0,0,1,0,0,0,0,0,0,0
 12 | 10,O43353,S,168,0,0,0,0,0,0,0,1,0,0,0,0,0
 13 | 11,O43353,S,174,0,0,0,0,0,0,0,1,0,0,0,0,0
 14 | 12,O43353,S,176,0,0,0,0,0,0,0,1,1,0,0,0,0
 15 | 13,O43353,S,178,0,0,0,0,0,0,0,1,0,0,0,0,0
 16 | 14,O43353,S,180,0,0,0,0,0,0,0,1,0,0,0,0,0
 17 | 15,O43353,S,181,0,0,0,0,0,0,0,1,0,0,0,0,0
 18 | 16,O43353,S,319,0,0,0,0,0,0,0,1,0,0,0,0,0
 19 | 17,O43353,S,345,0,0,0,0,0,0,0,1,0,0,0,0,0
 20 | 18,O43353,S,357,0,0,0,0,0,0,0,1,0,0,0,0,0
 21 | 19,O43353,S,363,0,0,0,0,0,0,0,1,0,0,0,0,0
 22 | 20,O43353,S,374,0,0,0,0,0,0,0,1,0,0,0,0,0
 23 | 21,O43353,S,393,0,0,0,0,0,0,0,1,0,0,0,0,0
 24 | 22,O43353,S,399,0,0,0,0,0,0,0,1,0,0,0,0,0
 25 | 23,O43353,S,428,0,0,0,0,0,0,0,1,0,0,0,0,0
 26 | 24,O43353,S,527,0,0,0,0,0,0,0,1,0,0,0,0,0
 27 | 25,O43353,S,529,0,0,0,0,0,0,0,1,0,0,0,0,0
 28 | 26,O43353,S,531,0,0,0,0,0,0,0,1,0,0,0,0,0
 29 | 27,O43353,S,539,0,0,0,0,0,0,0,1,0,0,0,0,0
 30 | 28,O43353,T,411,0,0,0,0,0,0,0,1,0,0,0,0,0
 31 | 29,O43353,T,412,0,0,0,0,0,0,0,1,0,0,0,0,0
 32 | 30,O43353,Y,23,0,0,0,0,0,0,0,1,0,0,0,0,0
 33 | 31,O43353,Y,381,0,0,0,0,0,0,0,1,0,0,0,0,0
 34 | 32,O43353,Y,474,0,0,0,0,0,0,0,1,1,0,0,0,0
 35 | 33,O96017,K,119,0,0,0,0,0,0,0,0,0,0,0,1,0
 36 | 34,O96017,K,131,0,0,0,0,0,0,0,0,0,0,0,1,0
 37 | 35,O96017,K,224,0,0,0,0,0,0,0,0,0,0,0,1,0
 38 | 36,O96017,K,235,1,0,0,0,0,0,0,0,0,0,0,0,0
 39 | 37,O96017,K,279,0,0,0,0,0,0,0,0,0,0,0,1,0
 40 | 38,O96017,K,287,0,0,0,0,0,0,0,0,0,0,0,1,0
 41 | 39,O96017,K,373,0,0,0,0,0,0,0,0,0,0,0,1,0
 42 | 40,O96017,K,437,0,0,0,0,0,0,0,0,0,0,0,1,0
 43 | 41,O96017,K,444,0,0,0,0,0,0,0,0,0,0,0,1,0
 44 | 42,O96017,K,458,0,0,0,0,0,0,0,0,0,0,0,1,0
 45 | 43,O96017,K,472,0,0,0,0,0,1,0,0,0,0,0,1,0
 46 | 44,O96017,K,494,0,0,0,0,0,0,0,0,0,0,0,1,0
 47 | 45,O96017,S,12,0,0,0,0,0,0,0,1,0,0,0,0,0
 48 | 46,O96017,S,120,0,0,0,0,0,0,0,1,0,0,0,0,0
 49 | 47,O96017,S,140,0,0,0,0,0,0,0,1,1,0,0,0,0
 50 | 48,O96017,S,15,0,0,0,0,0,0,0,1,0,0,0,0,0
 51 | 49,O96017,S,164,0,0,0,0,0,0,0,1,1,0,0,0,0
 52 | 50,O96017,S,19,0,0,0,0,0,0,0,1,1,0,0,0,0
 53 | 51,O96017,S,210,0,0,0,0,0,0,0,1,1,0,0,0,0
 54 | 52,O96017,S,24,0,0,0,0,0,0,0,1,0,0,0,0,0
 55 | 53,O96017,S,260,0,0,0,0,0,0,0,1,0,0,0,0,0
 56 | 54,O96017,S,28,0,0,0,0,0,0,0,1,1,0,0,0,0
 57 | 55,O96017,S,33,0,0,0,0,0,0,0,1,1,0,0,0,0
 58 | 56,O96017,S,35,0,0,0,0,0,0,0,1,1,0,0,0,0
 59 | 57,O96017,S,372,0,0,0,0,0,0,0,1,1,0,0,0,0
 60 | 58,O96017,S,379,0,0,0,0,0,0,0,1,1,0,0,0,0
 61 | 59,O96017,S,39,0,0,0,0,0,0,0,1,0,0,0,0,0
 62 | 60,O96017,S,40,0,0,0,0,0,0,0,1,0,0,0,0,0
 63 | 61,O96017,S,41,0,0,0,0,0,0,0,1,0,0,0,0,0
 64 | 62,O96017,S,42,0,0,0,0,0,0,0,1,0,0,0,0,0
 65 | 63,O96017,S,435,0,0,0,0,0,0,0,1,0,0,0,0,0
 66 | 64,O96017,S,44,0,0,0,0,0,0,0,1,0,0,0,0,0
 67 | 65,O96017,S,456,0,0,0,0,0,0,0,1,1,0,0,0,0
 68 | 66,O96017,S,50,0,0,0,0,0,0,0,1,0,0,0,0,0
 69 | 67,O96017,S,516,0,0,0,0,0,0,0,1,1,0,0,0,0
 70 | 68,O96017,S,52,0,0,0,0,0,0,0,1,0,0,0,0,0
 71 | 69,O96017,S,55,0,0,0,0,0,0,0,1,0,0,0,0,0
 72 | 70,O96017,S,62,0,0,0,0,0,0,0,1,0,0,0,0,0
 73 | 71,O96017,S,67,0,0,0,0,0,0,0,1,0,0,0,0,0
 74 | 72,O96017,S,73,0,0,0,0,0,0,0,1,1,0,0,0,0
 75 | 73,O96017,T,205,0,0,0,0,0,0,0,1,1,0,0,0,0
 76 | 74,O96017,T,225,0,0,0,0,0,0,0,1,0,0,0,0,0
 77 | 75,O96017,T,26,0,0,0,0,0,0,0,1,1,0,0,0,0
 78 | 76,O96017,T,378,0,0,0,1,0,0,0,1,1,0,0,0,0
 79 | 77,O96017,T,383,0,0,0,0,0,0,0,1,1,0,0,0,0
 80 | 78,O96017,T,387,0,0,0,0,0,0,0,1,1,0,0,0,0
 81 | 79,O96017,T,389,0,0,0,0,0,0,0,1,1,0,0,0,0
 82 | 80,O96017,T,43,0,0,0,0,0,0,0,1,0,0,0,0,0
 83 | 81,O96017,T,432,0,0,0,0,0,0,0,1,0,0,0,0,0
 84 | 82,O96017,T,45,0,0,0,0,0,0,0,1,0,0,0,0,0
 85 | 83,O96017,T,517,0,0,0,0,0,0,0,1,1,0,0,0,0
 86 | 84,O96017,T,65,0,0,0,0,0,0,0,1,0,0,0,0,0
 87 | 85,O96017,T,68,0,0,0,0,0,0,0,1,1,0,0,0,0
 88 | 86,O96017,Y,390,0,0,0,0,0,0,0,1,1,0,0,0,0
 89 | 87,P02730,K,757,0,0,0,0,0,0,0,0,0,0,0,1,0
 90 | 88,P02730,S,162,0,0,0,1,0,0,0,0,0,0,0,0,0
 91 | 89,P02730,S,181,0,0,0,0,0,0,0,1,0,0,0,0,0
 92 | 90,P02730,S,194,0,0,0,0,0,0,0,1,0,0,0,0,0
 93 | 91,P02730,S,224,0,0,0,1,0,0,0,0,0,0,0,0,0
 94 | 92,P02730,S,29,0,0,0,0,0,0,0,1,0,0,0,0,0
 95 | 93,P02730,S,303,0,0,0,0,0,0,0,1,0,0,0,0,0
 96 | 94,P02730,S,349,0,0,0,0,0,0,0,1,0,0,0,0,0
 97 | 95,P02730,S,356,0,0,0,0,0,0,0,1,0,0,0,0,0
 98 | 96,P02730,S,357,0,0,0,0,0,0,0,1,0,0,0,0,0
 99 | 97,P02730,S,50,0,0,0,0,0,0,0,1,0,0,0,0,0
100 | 98,P02730,S,525,0,0,0,0,0,0,0,1,0,0,0,0,0
101 | 99,P02730,S,745,0,0,0,1,0,0,0,0,0,0,0,0,0
102 | 100,P02730,S,781,0,0,0,0,0,0,0,1,0,0,0,0,0
103 | 101,P02730,T,39,0,0,0,0,0,0,0,1,0,0,0,0,0
104 | 102,P02730,T,42,0,0,0,0,0,0,0,1,0,0,0,0,0
105 | 103,P02730,T,44,0,0,0,0,0,0,0,1,0,0,0,0,0
106 | 104,P02730,T,48,0,0,0,0,0,0,0,1,0,0,0,0,0
107 | 105,P02730,T,49,0,0,0,0,0,0,0,1,0,0,0,0,0
108 | 106,P02730,T,54,0,0,0,0,0,0,0,1,0,0,0,0,0
109 | 107,P02730,T,894,0,0,0,0,0,0,0,1,0,0,0,0,0
110 | 108,P02730,Y,21,0,0,0,0,0,0,0,1,1,0,0,0,0
111 | 109,P02730,Y,347,0,0,0,0,0,0,0,1,0,0,0,0,0
112 | 110,P02730,Y,359,0,0,0,0,0,0,0,1,1,0,0,0,0
113 | 111,P02730,Y,46,0,0,0,0,0,0,0,1,0,0,0,0,0
114 | 112,P02730,Y,8,0,0,0,0,0,0,0,1,1,0,0,0,0
115 | 113,P02730,Y,818,0,0,0,0,0,0,0,1,0,0,0,0,0
116 | 114,P02730,Y,904,0,0,0,0,0,0,0,1,1,0,0,0,0
117 | 115,P08559,K,18,1,0,0,0,0,0,0,0,0,0,0,1,0
118 | 116,P08559,K,244,1,0,0,0,0,0,0,0,0,0,0,1,0
119 | 117,P08559,K,277,1,0,0,0,0,0,0,0,0,0,0,0,0
120 | 118,P08559,K,313,1,0,0,0,0,0,0,0,0,0,0,0,0
121 | 119,P08559,K,321,1,1,0,0,0,0,0,0,0,0,0,1,0
122 | 120,P08559,K,336,1,0,0,0,0,0,0,0,0,0,0,0,0
123 | 121,P08559,K,344,0,0,0,0,0,0,0,0,0,0,0,1,0
124 | 122,P08559,K,385,0,0,0,0,0,1,0,0,0,0,0,0,0
125 | 123,P08559,K,39,1,0,0,0,0,0,0,0,0,0,0,1,0
126 | 124,P08559,K,63,1,0,0,0,0,0,0,0,0,0,0,1,0
127 | 125,P08559,K,77,1,0,0,0,0,0,0,0,0,0,0,0,0
128 | 126,P08559,K,83,1,0,0,0,0,0,0,0,0,0,0,1,0
129 | 127,P08559,R,245,0,0,0,0,0,1,0,0,0,0,0,0,0
130 | 128,P08559,S,152,0,0,0,0,0,0,0,1,0,0,0,0,0
131 | 129,P08559,S,232,0,0,0,0,0,0,0,1,1,0,0,0,0
132 | 130,P08559,S,239,0,0,0,0,0,0,0,1,1,0,0,0,0
133 | 131,P08559,S,293,0,0,0,0,0,0,0,1,1,0,0,0,0
134 | 132,P08559,S,295,0,0,0,0,0,0,0,1,1,0,0,0,0
135 | 133,P08559,S,300,0,0,0,0,0,0,0,1,1,0,0,0,0
136 | 134,P08559,S,314,0,0,0,0,0,0,0,1,1,0,0,0,0
137 | 135,P08559,T,116,0,0,0,0,0,0,0,1,0,0,0,0,0
138 | 136,P08559,T,139,0,0,0,0,0,0,0,1,0,0,0,0,0
139 | 137,P08559,T,231,0,0,0,0,0,0,0,1,0,0,0,0,0
140 | 138,P08559,T,240,0,0,0,0,0,0,0,1,0,0,0,0,0
141 | 139,P08559,T,303,0,0,0,0,0,0,0,1,0,0,0,0,0
142 | 140,P08559,Y,118,0,0,0,0,0,0,0,1,0,0,0,0,0
143 | 141,P08559,Y,227,0,0,0,0,0,0,0,1,0,0,0,0,0
144 | 142,P08559,Y,242,0,0,0,0,0,0,0,1,0,0,0,0,0
145 | 143,P08559,Y,243,0,0,0,0,0,0,0,1,0,0,0,0,0
146 | 144,P08559,Y,272,0,0,0,0,0,0,0,1,0,0,0,0,0
147 | 145,P08559,Y,289,0,0,0,0,0,0,0,1,1,0,0,0,0
148 | 146,P08559,Y,301,0,0,0,0,0,0,0,1,0,0,0,0,0
149 | 147,P08559,Y,366,0,0,0,0,0,0,0,1,0,0,0,0,0
150 | 148,P08559,Y,369,0,0,0,0,0,0,0,1,0,0,0,0,0
151 | 149,P15121,K,117,1,0,0,0,0,0,0,0,0,0,0,0,0
152 | 150,P15121,K,12,1,0,0,0,0,0,0,0,0,0,0,1,0
153 | 151,P15121,K,179,0,0,0,0,0,0,0,0,0,0,0,1,0
154 | 152,P15121,K,195,0,0,0,0,0,0,0,0,0,0,0,1,0
155 | 153,P15121,K,22,0,0,0,0,0,0,0,0,0,0,0,1,0
156 | 154,P15121,K,222,1,0,0,0,0,1,0,0,0,0,0,1,0
157 | 155,P15121,K,240,1,0,0,0,0,0,0,0,0,0,0,1,0
158 | 156,P15121,K,243,1,0,0,0,0,0,0,0,0,0,0,1,0
159 | 157,P15121,K,263,1,0,0,0,0,0,0,0,0,0,0,1,0
160 | 158,P15121,K,308,1,0,0,0,0,0,0,0,0,0,0,1,0
161 | 159,P15121,K,62,0,0,0,0,0,0,0,0,0,0,0,1,0
162 | 160,P15121,K,69,0,0,0,0,0,0,0,0,0,0,0,1,0
163 | 161,P15121,K,86,0,0,0,0,0,0,0,0,0,0,0,1,0
164 | 162,P15121,K,90,0,0,0,0,0,0,0,0,0,0,0,1,0
165 | 163,P15121,K,95,1,0,0,0,0,0,0,0,0,0,0,1,0
166 | 164,P15121,R,218,0,0,0,0,0,1,0,0,0,0,0,0,0
167 | 165,P15121,R,233,0,0,0,0,0,1,0,0,0,0,0,0,0
168 | 166,P15121,S,211,0,0,0,0,0,0,0,1,0,0,0,0,0
169 | 167,P15121,S,215,0,0,0,0,0,0,0,1,0,0,0,0,0
170 | 168,P15121,S,23,0,0,0,0,0,0,0,1,0,0,0,0,0
171 | 169,P15121,S,264,0,0,0,0,0,0,0,1,0,0,0,0,0
172 | 170,P15121,S,3,0,0,0,0,0,0,0,1,0,0,0,0,0
173 | 171,P15121,S,77,0,0,0,0,0,0,0,1,0,0,0,0,0
174 | 172,P15121,S,98,0,0,0,0,0,0,0,1,0,0,0,0,0
175 | 173,P15121,T,192,0,0,0,0,0,0,0,1,0,0,0,0,0
176 | 174,P15121,T,20,0,0,0,0,0,0,0,1,0,0,0,0,0
177 | 175,P15121,T,266,0,0,0,0,0,0,0,1,0,0,0,0,0
178 | 176,P15121,Y,104,0,0,0,0,0,0,0,1,0,0,0,0,0
179 | 177,P15121,Y,190,0,0,0,0,0,0,0,1,0,0,0,0,0
180 | 178,P15121,Y,199,0,0,0,0,0,0,0,1,0,0,0,0,0
181 | 179,P15121,Y,40,0,0,0,0,0,0,0,1,0,0,0,0,0
182 | 180,P15121,Y,49,0,0,0,0,0,0,0,1,0,0,0,0,0
183 | 181,P15121,Y,83,0,0,0,0,0,0,0,1,0,0,0,0,0
184 | 182,P24941,K,129,0,0,0,0,0,0,0,0,0,0,0,1,0
185 | 183,P24941,K,142,0,0,0,0,0,0,0,0,0,0,0,1,0
186 | 184,P24941,K,20,0,0,0,0,0,0,0,0,0,0,0,1,0
187 | 185,P24941,K,237,0,0,0,0,0,0,0,0,0,0,0,1,0
188 | 186,P24941,K,24,0,0,0,0,0,0,0,0,0,0,0,1,0
189 | 187,P24941,K,250,0,0,0,0,0,0,0,0,0,0,0,1,0
190 | 188,P24941,K,273,0,0,0,0,0,0,0,0,0,0,0,1,0
191 | 189,P24941,K,278,0,0,0,0,0,0,0,0,0,0,0,1,0
192 | 190,P24941,K,291,0,0,0,0,0,0,0,0,0,0,0,1,0
193 | 191,P24941,K,33,0,0,0,0,0,0,0,0,0,1,0,1,0
194 | 192,P24941,K,56,0,0,0,0,0,0,0,0,0,0,0,1,0
195 | 193,P24941,K,6,1,0,0,0,0,0,0,0,0,1,0,1,0
196 | 194,P24941,K,65,0,0,0,0,0,0,0,0,0,0,0,1,0
197 | 195,P24941,K,9,0,0,0,0,0,0,0,0,0,0,0,1,0
198 | 196,P24941,R,297,0,0,0,0,0,1,0,0,0,0,0,0,0
199 | 197,P24941,S,46,0,0,0,0,0,0,0,1,0,0,0,0,0
200 | 198,P24941,T,137,0,0,0,0,0,0,0,1,0,0,0,0,0
201 | 199,P24941,T,14,0,0,0,0,0,0,0,1,1,0,0,0,0
202 | 200,P24941,T,158,0,0,0,0,0,0,0,1,0,0,0,0,0
203 | 201,P24941,T,160,0,0,0,0,0,0,0,1,1,0,0,0,0
204 | 202,P24941,T,165,0,0,0,0,0,0,0,1,0,0,0,0,0
205 | 203,P24941,T,39,0,0,0,0,0,0,0,1,1,0,0,0,0
206 | 204,P24941,Y,15,0,0,0,0,0,0,0,1,1,0,0,0,0
207 | 205,P24941,Y,159,0,0,0,0,0,0,0,1,0,0,0,0,0
208 | 206,P24941,Y,168,0,0,0,0,0,0,0,1,0,0,0,0,0
209 | 207,P24941,Y,19,0,0,0,0,0,0,0,1,0,0,0,0,0
210 | 208,P28482,K,138,0,0,0,0,0,0,0,0,0,0,0,1,0
211 | 209,P28482,K,151,0,0,0,0,0,0,0,0,0,0,0,1,0
212 | 210,P28482,K,164,0,0,0,0,0,0,0,0,0,0,0,1,0
213 | 211,P28482,K,203,0,0,0,0,0,0,0,0,0,0,0,1,0
214 | 212,P28482,K,259,0,0,0,0,0,0,0,0,0,0,0,1,0
215 | 213,P28482,K,270,0,0,0,0,0,0,0,0,0,0,0,1,0
216 | 214,P28482,K,272,0,0,0,0,0,0,0,0,0,0,0,1,0
217 | 215,P28482,K,285,0,0,0,0,0,0,0,0,0,0,0,1,0
218 | 216,P28482,K,292,0,0,0,0,0,0,0,0,0,0,0,1,0
219 | 217,P28482,K,300,0,0,0,0,0,0,0,0,0,0,0,1,0
220 | 218,P28482,K,330,0,0,0,0,0,0,0,0,0,0,0,1,0
221 | 219,P28482,K,340,0,0,0,0,0,0,0,0,0,0,0,1,0
222 | 220,P28482,K,344,0,0,0,0,0,0,0,0,0,0,0,1,0
223 | 221,P28482,K,55,0,0,0,0,0,0,0,0,0,0,0,1,0
224 | 222,P28482,K,99,0,0,0,0,0,0,0,0,0,0,0,1,0
225 | 223,P28482,R,194,0,0,0,0,0,1,0,0,0,0,0,0,0
226 | 224,P28482,S,142,0,0,0,0,0,0,0,1,0,0,0,0,0
227 | 225,P28482,S,202,0,0,0,0,0,0,0,1,0,0,0,0,0
228 | 226,P28482,S,246,0,0,0,0,0,0,0,1,1,0,0,0,0
229 | 227,P28482,S,248,0,0,0,0,0,0,0,1,1,0,0,0,0
230 | 228,P28482,S,284,0,0,0,0,0,0,0,1,0,0,0,0,0
231 | 229,P28482,S,29,0,0,0,0,0,0,0,1,1,0,0,0,0
232 | 230,P28482,S,360,0,0,0,0,0,0,0,1,0,0,0,0,0
233 | 231,P28482,T,181,0,0,0,0,0,0,0,1,0,0,0,0,0
234 | 232,P28482,T,185,0,0,0,0,0,0,0,1,1,0,0,0,0
235 | 233,P28482,T,190,0,0,0,0,0,0,0,1,1,0,0,0,0
236 | 234,P28482,T,206,0,0,0,0,0,0,0,1,0,0,0,0,0
237 | 235,P28482,T,295,0,0,0,0,0,0,0,1,0,0,0,0,0
238 | 236,P28482,T,63,0,0,0,0,0,0,0,1,0,0,0,0,0
239 | 237,P28482,Y,113,0,0,0,0,0,0,0,1,0,0,0,0,0
240 | 238,P28482,Y,187,0,0,0,0,0,0,0,1,1,0,0,0,0
241 | 239,P28482,Y,193,0,0,0,0,0,0,0,1,0,0,0,0,0
242 | 240,P28482,Y,205,0,0,0,0,0,0,0,1,0,0,0,0,0
243 | 241,P28482,Y,25,0,0,0,0,0,0,0,1,0,0,0,0,0
244 | 242,P28482,Y,263,0,0,0,0,0,0,0,1,0,0,0,0,0
245 | 243,P28482,Y,36,0,0,0,0,0,0,0,1,0,0,0,0,0
246 | 244,P28482,Y,43,0,0,0,0,0,0,0,1,0,0,0,0,0
247 | 245,P29320,K,625,0,0,0,0,0,0,0,0,0,0,0,1,0
248 | 246,P29320,K,656,0,0,0,0,0,0,0,0,0,0,0,1,0
249 | 247,P29320,S,294,0,0,0,0,0,0,0,1,0,0,0,0,0
250 | 248,P29320,S,497,0,0,0,0,0,0,0,1,0,0,0,0,0
251 | 249,P29320,S,498,0,0,0,0,0,0,0,1,0,0,0,0,0
252 | 250,P29320,S,768,0,0,0,0,0,0,0,1,1,0,0,0,0
253 | 251,P29320,S,976,0,0,0,0,0,0,0,1,0,0,0,0,0
254 | 252,P29320,T,432,0,0,0,0,0,0,0,1,0,0,0,0,0
255 | 253,P29320,T,442,0,0,0,0,0,0,0,1,0,0,0,0,0
256 | 254,P29320,T,485,0,0,0,0,0,0,0,1,0,0,0,0,0
257 | 255,P29320,T,595,0,0,0,0,0,0,0,1,0,0,0,0,0
258 | 256,P29320,T,601,0,0,0,0,0,0,0,1,0,0,0,0,0
259 | 257,P29320,T,654,0,0,0,0,0,0,0,1,0,0,0,0,0
260 | 258,P29320,T,781,0,0,0,0,0,0,0,1,0,0,0,0,0
261 | 259,P29320,T,974,0,0,0,0,0,0,0,1,0,0,0,0,0
262 | 260,P29320,Y,561,0,0,0,0,0,0,0,1,0,0,0,0,0
263 | 261,P29320,Y,570,0,0,0,0,0,0,0,1,0,0,0,0,0
264 | 262,P29320,Y,596,0,0,0,0,0,0,0,1,1,0,0,0,0
265 | 263,P29320,Y,602,0,0,0,0,0,0,0,1,1,0,0,0,0
266 | 264,P29320,Y,659,0,0,0,0,0,0,0,1,0,0,0,0,0
267 | 265,P29320,Y,701,0,0,0,0,0,0,0,1,0,0,0,0,0
268 | 266,P29320,Y,736,0,0,0,0,0,0,0,1,0,0,0,0,0
269 | 267,P29320,Y,742,0,0,0,0,0,0,0,1,1,0,0,0,0
270 | 268,P29320,Y,779,0,0,0,0,0,0,0,1,1,0,0,0,0
271 | 269,P29320,Y,937,0,0,0,0,0,0,0,1,0,0,0,0,0
272 | 270,P45984,K,153,0,0,0,0,0,0,0,0,0,0,0,1,0
273 | 271,P45984,K,160,0,0,0,0,0,0,0,0,0,0,0,1,0
274 | 272,P45984,K,166,0,0,0,0,0,0,0,0,0,0,0,1,0
275 | 273,P45984,K,250,1,0,0,0,0,0,0,0,0,0,0,1,0
276 | 274,P45984,K,251,0,0,0,0,0,0,0,0,0,0,0,1,0
277 | 275,P45984,K,300,0,0,0,0,0,0,0,0,0,0,0,1,0
278 | 276,P45984,K,353,0,0,0,0,0,0,0,0,0,0,0,1,0
279 | 277,P45984,K,56,0,0,0,0,0,0,0,0,0,0,0,1,0
280 | 278,P45984,K,68,0,0,0,0,0,0,0,0,0,0,0,1,0
281 | 279,P45984,S,144,0,0,0,0,0,0,0,1,0,0,0,0,0
282 | 280,P45984,S,155,0,0,0,0,0,0,0,1,0,0,0,0,0
283 | 281,P45984,S,292,0,0,0,0,0,0,0,1,0,0,0,0,0
284 | 282,P45984,S,311,0,0,0,0,0,0,0,1,0,0,0,0,0
285 | 283,P45984,S,407,0,0,0,0,0,0,0,1,0,0,0,0,0
286 | 284,P45984,T,178,0,0,0,0,0,0,0,1,0,0,0,0,0
287 | 285,P45984,T,183,0,0,0,0,0,0,0,1,0,0,0,0,0
288 | 286,P45984,T,188,0,0,0,0,0,0,0,1,0,0,0,0,0
289 | 287,P45984,T,404,0,0,0,0,0,0,0,1,0,0,0,0,0
290 | 288,P45984,Y,185,0,0,0,0,0,0,0,1,0,0,0,0,0
291 | 289,P45984,Y,357,0,0,0,0,0,0,0,1,0,0,0,0,0
292 | 290,Q13546,K,105,0,0,0,0,0,0,0,0,0,0,0,1,0
293 | 291,Q13546,K,115,0,0,0,0,0,0,0,0,0,0,0,1,1
294 | 292,Q13546,K,13,0,0,0,0,0,0,0,0,0,0,0,1,0
295 | 293,Q13546,K,137,0,0,0,0,0,0,0,0,0,0,0,1,0
296 | 294,Q13546,K,140,0,0,0,0,0,0,0,0,0,0,0,1,0
297 | 295,Q13546,K,153,0,0,0,0,0,0,0,0,0,0,0,1,0
298 | 296,Q13546,K,163,0,0,0,0,0,0,0,0,0,0,0,1,0
299 | 297,Q13546,K,167,0,0,0,0,0,0,0,0,0,0,0,1,0
300 | 298,Q13546,K,184,0,0,0,0,0,0,0,0,0,0,0,1,0
301 | 299,Q13546,K,185,0,0,0,0,0,0,0,0,0,0,0,1,0
302 | 300,Q13546,K,284,0,0,0,0,0,0,0,0,0,0,0,1,0
303 | 301,Q13546,K,302,0,0,0,0,0,0,0,0,0,0,0,1,0
304 | 302,Q13546,K,306,0,0,0,0,0,0,0,0,0,0,0,1,0
305 | 303,Q13546,K,316,0,0,0,0,0,0,0,0,0,0,0,1,0
306 | 304,Q13546,K,377,0,0,0,0,0,0,0,0,0,0,0,1,1
307 | 305,Q13546,K,49,0,0,0,0,0,0,0,0,0,0,0,1,0
308 | 306,Q13546,K,530,1,1,0,0,0,0,0,0,0,0,0,0,0
309 | 307,Q13546,K,571,0,0,0,0,0,0,0,0,0,0,0,1,0
310 | 308,Q13546,K,585,0,0,0,0,0,0,0,0,0,0,0,1,0
311 | 309,Q13546,K,596,0,0,0,0,0,0,0,0,0,0,0,1,0
312 | 310,Q13546,K,604,0,0,0,0,0,0,0,0,0,0,0,1,0
313 | 311,Q13546,K,627,0,0,0,0,0,0,0,0,0,0,0,1,0
314 | 312,Q13546,K,642,1,0,0,0,0,0,0,0,0,0,0,1,0
315 | 313,Q13546,K,648,1,0,0,0,0,0,0,0,0,0,0,0,0
316 | 314,Q13546,R,477,0,0,0,0,0,1,0,0,0,0,0,0,0
317 | 315,Q13546,R,487,0,0,0,0,0,1,0,0,0,0,0,0,0
318 | 316,Q13546,S,14,0,0,0,0,0,0,0,1,0,0,0,0,0
319 | 317,Q13546,S,15,0,0,0,0,0,0,0,1,0,0,0,0,0
320 | 318,Q13546,S,161,0,0,0,0,0,0,0,1,1,0,0,0,0
321 | 319,Q13546,S,166,0,0,0,0,0,0,0,1,1,0,0,0,0
322 | 320,Q13546,S,20,0,0,0,0,0,0,0,1,0,0,0,0,0
323 | 321,Q13546,S,25,0,0,0,0,0,0,0,1,0,0,0,0,0
324 | 322,Q13546,S,262,0,0,0,0,0,0,0,1,0,0,0,0,0
325 | 323,Q13546,S,291,0,0,0,0,0,0,0,1,0,0,0,0,0
326 | 324,Q13546,S,296,0,0,0,0,0,0,0,1,0,0,0,0,0
327 | 325,Q13546,S,303,0,0,0,0,0,0,0,1,0,0,0,0,0
328 | 326,Q13546,S,309,0,0,0,0,0,0,0,1,0,0,0,0,0
329 | 327,Q13546,S,32,0,0,0,0,0,0,0,1,0,0,0,0,0
330 | 328,Q13546,S,320,0,0,0,0,0,0,0,1,1,0,0,0,0
331 | 329,Q13546,S,330,0,0,0,0,0,0,0,1,0,0,0,0,0
332 | 330,Q13546,S,331,0,0,0,0,0,0,0,1,0,0,0,0,0
333 | 331,Q13546,S,333,0,0,0,0,0,0,0,1,0,0,0,0,0
334 | 332,Q13546,S,335,0,0,0,0,0,0,0,1,0,0,0,0,0
335 | 333,Q13546,S,345,0,0,0,0,0,0,0,1,0,0,0,0,0
336 | 334,Q13546,S,346,0,0,0,0,0,0,0,1,0,0,0,0,0
337 | 335,Q13546,S,357,0,0,0,0,0,0,0,1,1,0,0,0,0
338 | 336,Q13546,S,389,0,0,0,0,0,0,0,1,0,0,0,0,0
339 | 337,Q13546,S,416,0,0,0,0,0,0,0,1,0,0,0,0,0
340 | 338,Q13546,S,470,0,0,0,0,0,0,0,1,0,0,0,0,0
341 | 339,Q13546,S,471,0,0,0,0,0,0,0,1,0,0,0,0,0
342 | 340,Q13546,S,6,0,0,0,0,0,0,0,1,0,0,0,0,0
343 | 341,Q13546,S,610,0,0,0,0,0,0,0,1,0,0,0,0,0
344 | 342,Q13546,S,664,0,0,0,0,0,0,0,1,0,0,0,0,0
345 | 343,Q13546,T,337,0,0,0,0,0,0,0,1,0,0,0,0,0
346 | 344,Q13546,T,38,0,0,0,0,0,0,0,1,0,0,0,0,0
347 | 345,Q13546,T,483,0,0,0,0,0,0,0,1,0,0,0,0,0
348 | 346,Q13546,Y,384,0,0,0,0,0,0,0,1,0,0,0,0,0
349 | 347,Q13546,Y,387,0,0,0,0,0,0,0,1,0,0,0,0,0
350 | 348,Q13546,Y,426,0,0,0,0,0,0,0,1,0,0,0,0,0
351 | 349,Q13546,Y,463,0,0,0,0,0,0,0,1,0,0,0,0,0
352 | 350,Q13546,Y,469,0,0,0,0,0,0,0,1,0,0,0,0,0
353 | 351,Q13546,Y,490,0,0,0,0,0,0,0,1,0,0,0,0,0
354 | 352,Q8NB16,K,157,0,0,0,0,0,0,0,0,0,0,0,1,0
355 | 353,Q8NB16,K,173,0,0,0,0,0,0,0,0,0,0,0,1,0
356 | 354,Q8NB16,K,183,0,0,0,0,0,0,0,0,0,0,0,1,0
357 | 355,Q8NB16,K,198,0,0,0,0,0,0,0,0,0,0,0,1,0
358 | 356,Q8NB16,K,219,0,0,0,0,0,0,0,0,0,0,0,1,0
359 | 357,Q8NB16,K,230,0,0,0,0,0,0,0,0,0,0,0,1,0
360 | 358,Q8NB16,K,249,0,0,0,0,0,0,0,0,0,0,0,1,0
361 | 359,Q8NB16,K,331,0,0,0,0,0,0,0,0,0,0,0,1,0
362 | 360,Q8NB16,K,354,1,0,0,0,0,0,0,0,0,0,0,1,0
363 | 361,Q8NB16,K,372,0,0,0,0,0,0,0,0,0,0,0,1,0
364 | 362,Q8NB16,K,40,0,0,0,0,0,0,0,0,0,0,0,1,0
365 | 363,Q8NB16,K,50,0,0,0,0,0,0,0,0,0,0,0,1,0
366 | 364,Q8NB16,K,57,0,0,0,0,0,0,0,0,0,0,0,1,0
367 | 365,Q8NB16,K,66,0,0,0,0,0,0,0,0,0,0,0,1,0
368 | 366,Q8NB16,K,78,0,0,0,0,0,0,0,0,0,0,0,1,0
369 | 367,Q8NB16,S,106,0,0,0,0,0,0,0,1,0,0,0,0,0
370 | 368,Q8NB16,S,125,0,0,0,0,0,0,0,1,0,0,0,0,0
371 | 369,Q8NB16,S,128,0,0,0,0,0,0,0,1,0,0,0,0,0
372 | 370,Q8NB16,S,161,0,0,0,0,0,0,0,1,0,0,0,0,0
373 | 371,Q8NB16,S,334,0,0,0,0,0,0,0,1,0,0,0,0,0
374 | 372,Q8NB16,S,358,0,0,0,0,0,0,0,1,1,0,0,0,0
375 | 373,Q8NB16,S,373,0,0,0,0,0,0,0,1,0,0,0,0,0
376 | 374,Q8NB16,S,393,0,0,0,0,0,0,0,1,0,0,0,0,0
377 | 375,Q8NB16,S,417,0,0,0,0,0,0,0,1,0,0,0,0,0
378 | 376,Q8NB16,S,467,0,0,0,0,0,0,0,1,0,0,0,0,0
379 | 377,Q8NB16,S,52,0,0,0,0,0,0,0,1,0,0,0,0,0
380 | 378,Q8NB16,S,92,0,0,0,0,0,0,0,1,0,0,0,0,0
381 | 379,Q8NB16,T,246,0,0,0,0,0,0,0,1,0,0,0,0,0
382 | 380,Q8NB16,T,302,0,0,0,0,0,0,0,1,0,0,0,0,0
383 | 381,Q8NB16,T,357,0,0,0,0,0,0,0,1,1,0,0,0,0
384 | 382,Q8NB16,T,364,0,0,0,0,0,0,0,1,0,0,0,0,0
385 | 383,Q8NB16,T,374,0,0,0,0,0,0,0,1,0,0,0,0,0
386 | 384,Q8NB16,T,59,0,0,0,0,0,0,0,1,0,0,0,0,0
387 | 385,Q8NB16,Y,376,0,0,0,0,0,0,0,1,1,0,0,0,0
388 | 386,Q92918,K,296,0,0,0,0,0,0,0,0,0,0,0,1,0
389 | 387,Q92918,K,33,0,0,0,0,0,0,0,0,0,0,0,1,0
390 | 388,Q92918,K,37,0,0,0,0,0,0,0,0,0,0,0,1,0
391 | 389,Q92918,K,46,0,0,0,0,0,0,0,0,0,0,0,1,0
392 | 390,Q92918,K,49,0,0,0,0,0,0,0,0,0,0,0,1,0
393 | 391,Q92918,K,594,0,0,0,0,0,0,0,0,0,0,0,1,0
394 | 392,Q92918,K,600,0,0,0,0,0,0,0,0,0,0,0,1,0
395 | 393,Q92918,S,171,0,0,0,0,0,0,0,1,1,0,0,0,0
396 | 394,Q92918,S,230,0,0,0,0,0,0,0,1,0,0,0,0,0
397 | 395,Q92918,S,258,0,0,0,0,0,0,0,1,0,0,0,0,0
398 | 396,Q92918,S,320,0,0,0,0,0,0,0,1,0,0,0,0,0
399 | 397,Q92918,S,324,0,0,0,0,0,0,0,1,0,0,0,0,0
400 | 398,Q92918,S,325,0,0,0,0,0,0,0,1,0,0,0,0,0
401 | 399,Q92918,S,326,0,0,0,0,0,0,0,1,0,0,0,0,0
402 | 400,Q92918,S,366,0,0,0,0,0,0,0,1,0,0,0,0,0
403 | 401,Q92918,S,368,0,0,0,0,0,0,0,1,0,0,0,0,0
404 | 402,Q92918,S,374,0,0,0,0,0,0,0,1,0,0,0,0,0
405 | 403,Q92918,S,376,0,0,0,0,0,0,0,1,0,0,0,0,0
406 | 404,Q92918,S,377,0,0,0,0,0,0,0,1,0,0,0,0,0
407 | 405,Q92918,S,405,0,0,0,0,0,0,0,1,0,0,0,0,0
408 | 406,Q92918,S,407,0,0,0,0,0,0,0,1,0,0,0,0,0
409 | 407,Q92918,S,413,0,0,0,0,0,0,0,1,0,0,0,0,0
410 | 408,Q92918,S,421,0,0,0,0,0,0,0,1,0,0,0,0,0
411 | 409,Q92918,S,430,0,0,0,0,0,0,0,1,0,0,0,0,0
412 | 410,Q92918,S,436,0,0,0,0,0,0,0,1,0,0,0,0,0
413 | 411,Q92918,S,444,0,0,0,0,0,0,0,1,0,0,0,0,0
414 | 412,Q92918,S,446,0,0,0,0,0,0,0,1,0,0,0,0,0
415 | 413,Q92918,S,447,0,0,0,0,0,0,0,1,0,0,0,0,0
416 | 414,Q92918,S,454,0,0,0,0,0,0,0,1,0,0,0,0,0
417 | 415,Q92918,S,586,0,0,0,0,0,0,0,1,0,0,0,0,0
418 | 416,Q92918,S,598,0,0,0,0,0,0,0,1,0,0,0,0,0
419 | 417,Q92918,S,737,0,0,0,0,0,0,0,1,0,0,0,0,0
420 | 418,Q92918,T,165,0,0,0,0,0,0,0,1,1,0,0,0,0
421 | 419,Q92918,T,175,0,0,0,0,0,0,0,1,1,0,0,0,0
422 | 420,Q92918,T,349,0,0,0,0,0,0,0,1,0,0,0,0,0
423 | 421,Q92918,T,355,0,0,0,0,0,0,0,1,1,0,0,0,0
424 | 422,Q92918,T,451,0,0,0,0,0,0,0,1,0,0,0,0,0
425 | 423,Q92918,T,599,0,0,0,0,0,0,0,1,0,0,0,0,0
426 | 424,Q92918,Y,177,0,0,0,0,0,0,0,1,0,0,0,0,0
427 | 425,Q92918,Y,28,0,0,0,0,0,0,0,1,0,0,0,0,0
428 | 426,Q92918,Y,381,0,0,0,0,0,0,0,1,0,0,0,0,0
429 | 


--------------------------------------------------------------------------------
/misc/CLA.md:
--------------------------------------------------------------------------------
 1 | ### MannLabs Individual Contributor License Agreement
 2 | 
 3 | Thank you for your interest in contributing to open source software projects (“Projects”) made available by MannLabs or its affiliates (“MannLabs”). This Individual Contributor License Agreement (“Agreement”) sets out the terms governing any source code, object code, bug fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or other works of authorship that you submit or have submitted, in any form and in any manner, to MannLabs in respect of any of the Projects (collectively “Contributions”). If you have any questions respecting this Agreement, please contact opensource@alphapept.com.
 4 | 
 5 | 
 6 | You agree that the following terms apply to all of your past, present and future Contributions. Except for the licenses granted in this Agreement, you retain all of your right, title and interest in and to your Contributions.
 7 | 
 8 | 
 9 | **Copyright License.** You hereby grant, and agree to grant, to MannLabs a non-exclusive, perpetual, irrevocable, worldwide, fully-paid, royalty-free, transferable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, and distribute your Contributions and such derivative works, with the right to sublicense the foregoing rights through multiple tiers of sublicensees.
10 | 
11 | 
12 | **Patent License.** You hereby grant, and agree to grant, to MannLabs a non-exclusive, perpetual, irrevocable,
13 | worldwide, fully-paid, royalty-free, transferable patent license to make, have made, use, offer to sell, sell,
14 | import, and otherwise transfer your Contributions, where such license applies only to those patent claims
15 | licensable by you that are necessarily infringed by your Contributions alone or by combination of your
16 | Contributions with the Project to which such Contributions were submitted, with the right to sublicense the
17 | foregoing rights through multiple tiers of sublicensees.
18 | 
19 | 
20 | **Moral Rights.** To the fullest extent permitted under applicable law, you hereby waive, and agree not to
21 | assert, all of your “moral rights” in or relating to your Contributions for the benefit of MannLabs, its assigns, and
22 | their respective direct and indirect sublicensees.
23 | 
24 | 
25 | **Third Party Content/Rights.** If your Contribution includes or is based on any source code, object code, bug
26 | fixes, configuration changes, tools, specifications, documentation, data, materials, feedback, information or
27 | other works of authorship that were not authored by you (“Third Party Content”) or if you are aware of any
28 | third party intellectual property or proprietary rights associated with your Contribution (“Third Party Rights”),
29 | then you agree to include with the submission of your Contribution full details respecting such Third Party
30 | Content and Third Party Rights, including, without limitation, identification of which aspects of your
31 | Contribution contain Third Party Content or are associated with Third Party Rights, the owner/author of the
32 | Third Party Content and Third Party Rights, where you obtained the Third Party Content, and any applicable
33 | third party license terms or restrictions respecting the Third Party Content and Third Party Rights. For greater
34 | certainty, the foregoing obligations respecting the identification of Third Party Content and Third Party Rights
35 | do not apply to any portion of a Project that is incorporated into your Contribution to that same Project.
36 | 
37 | 
38 | **Representations.** You represent that, other than the Third Party Content and Third Party Rights identified by
39 | you in accordance with this Agreement, you are the sole author of your Contributions and are legally entitled
40 | to grant the foregoing licenses and waivers in respect of your Contributions. If your Contributions were
41 | created in the course of your employment with your past or present employer(s), you represent that such
42 | employer(s) has authorized you to make your Contributions on behalf of such employer(s) or such employer
43 | (s) has waived all of their right, title or interest in or to your Contributions.
44 | 
45 | 
46 | **Disclaimer.** To the fullest extent permitted under applicable law, your Contributions are provided on an "as is"
47 | basis, without any warranties or conditions, express or implied, including, without limitation, any implied
48 | warranties or conditions of non-infringement, merchantability or fitness for a particular purpose. You are not
49 | required to provide support for your Contributions, except to the extent you desire to provide support.
50 | 
51 | 
52 | **No Obligation.** You acknowledge that MannLabs is under no obligation to use or incorporate your Contributions
53 | into any of the Projects. The decision to use or incorporate your Contributions into any of the Projects will be
54 | made at the sole discretion of MannLabs or its authorized delegates ..
55 | 
56 | 
57 | **Disputes.** This Agreement shall be governed by and construed in accordance with the laws of the State of
58 | New York, United States of America, without giving effect to its principles or rules regarding conflicts of laws,
59 | other than such principles directing application of New York law. The parties hereby submit to venue in, and
60 | jurisdiction of the courts located in New York, New York for purposes relating to this Agreement. In the event
61 | that any of the provisions of this Agreement shall be held by a court or other tribunal of competent jurisdiction
62 | to be unenforceable, the remaining portions hereof shall remain in full force and effect.
63 | 
64 | 
65 | **Assignment.** You agree that MannLabs may assign this Agreement, and all of its rights, obligations and licenses
66 | hereunder
67 | 


--------------------------------------------------------------------------------
/misc/bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.0.10
 3 | commit = True
 4 | tag = False
 5 | parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
 6 | serialize = 
 7 | 	{major}.{minor}.{patch}
 8 | 	{major}.{minor}.{patch}
 9 | 
10 | [bumpversion:part:release]
11 | 
12 | [bumpversion:part:build]
13 | 
14 | [bumpversion:file:../structuremap/__init__.py]
15 | 
16 | [bumpversion:file:../release/one_click_linux_gui/control]
17 | 
18 | [bumpversion:file:../release/one_click_linux_gui/create_installer_linux.sh]
19 | 
20 | [bumpversion:file:../release/one_click_macos_gui/distribution.xml]
21 | 
22 | [bumpversion:file:../release/one_click_macos_gui/Info.plist]
23 | 
24 | [bumpversion:file:../release/one_click_macos_gui/create_installer_macos.sh]
25 | 
26 | [bumpversion:file:../release/one_click_windows_gui/create_installer_windows.sh]
27 | 
28 | [bumpversion:file:../release/one_click_windows_gui/structuremap_innoinstaller.iss]
29 | search = {current_version}
30 | replace = {new_version}
31 | 


--------------------------------------------------------------------------------
/misc/check_version.sh:
--------------------------------------------------------------------------------
 1 | current_version=$(grep "__version__" ../structuremap/__init__.py | cut -f3 -d ' ' | sed 's/"//g')
 2 | current_version_as_regex=$(echo $current_version | sed 's/\./\\./g')
 3 | conda create -n version_check python=3.8 pip=20.1 -y
 4 | conda activate version_check
 5 | set +e
 6 | already_on_pypi=$(pip install structuremap== 2>&1 | grep -c "$current_version_as_regex")
 7 | set -e
 8 | conda deactivate
 9 | if [ $already_on_pypi -ne 0 ]; then
10 |   echo "Version is already on PyPi"
11 |   exit 1
12 | fi
13 | 


--------------------------------------------------------------------------------
/misc/loose_pip_install.sh:
--------------------------------------------------------------------------------
1 | conda create -n structuremap python=3.8 -y
2 | conda activate structuremap
3 | pip install -e '../.[development]'
4 | structuremap
5 | conda deactivate
6 | 


--------------------------------------------------------------------------------
/misc/stable_pip_install.sh:
--------------------------------------------------------------------------------
1 | conda create -n structuremap python=3.8 -y
2 | conda activate structuremap
3 | pip install -e '../.[stable,development-stable]'
4 | structuremap
5 | conda deactivate
6 | 


--------------------------------------------------------------------------------
/release/logos/alpha_logo.icns:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/release/logos/alpha_logo.icns


--------------------------------------------------------------------------------
/release/logos/alpha_logo.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/release/logos/alpha_logo.ico


--------------------------------------------------------------------------------
/release/logos/alpha_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/release/logos/alpha_logo.png


--------------------------------------------------------------------------------
/release/one_click_linux_gui/control:
--------------------------------------------------------------------------------
1 | Package: structuremap
2 | Version: 0.0.10
3 | Architecture: all
4 | Maintainer: Mann Labs <opensource@alphapept.com>
5 | Description: structuremap
6 |   structuremap is an open-source Python package in the AlphaPept ecosystem.
7 |   structuremap was developed by the Mann Labs at the Max Planck Institute of Biochemistry and University of Copenhagen and is freely available with an Apache License. Additional third-party licenses are applicable for external Python packages (see https://github.com/MannLabs/structuremap for more details.).
8 | 


--------------------------------------------------------------------------------
/release/one_click_linux_gui/create_installer_linux.sh:
--------------------------------------------------------------------------------
 1 | #!bash
 2 | 
 3 | # Initial cleanup
 4 | rm -rf dist
 5 | rm -rf build
 6 | cd ../..
 7 | rm -rf dist
 8 | rm -rf build
 9 | 
10 | # Creating a conda environment
11 | conda create -n structuremap_installer python=3.8 -y
12 | conda activate structuremap_installer
13 | 
14 | # Creating the wheel
15 | python setup.py sdist bdist_wheel
16 | 
17 | # Setting up the local package
18 | cd release/one_click_linux_gui
19 | # Make sure you include the required extra packages and always use the stable or very-stable options!
20 | pip install "../../dist/structuremap-0.0.10-py3-none-any.whl[stable]"
21 | 
22 | # Creating the stand-alone pyinstaller folder
23 | pip install pyinstaller==4.2
24 | pyinstaller ../pyinstaller/structuremap.spec -y
25 | conda deactivate
26 | 
27 | # If needed, include additional source such as e.g.:
28 | # cp ../../structuremap/data/*.fasta dist/structuremap/data
29 | # WARNING: this probably does not work!!!!
30 | 
31 | # Wrapping the pyinstaller folder in a .deb package
32 | mkdir -p dist/structuremap_gui_installer_linux/usr/local/bin
33 | mv dist/structuremap dist/structuremap_gui_installer_linux/usr/local/bin/structuremap
34 | mkdir dist/structuremap_gui_installer_linux/DEBIAN
35 | cp control dist/structuremap_gui_installer_linux/DEBIAN
36 | dpkg-deb --build --root-owner-group dist/structuremap_gui_installer_linux/
37 | 


--------------------------------------------------------------------------------
/release/one_click_macos_gui/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>CFBundleDisplayName</key>
 6 | 	<string>structuremap</string>
 7 | 	<key>CFBundleExecutable</key>
 8 | 	<string>MacOS/structuremap_terminal</string>
 9 | 	<key>CFBundleIconFile</key>
10 | 	<string>alpha_logo.icns</string>
11 | 	<key>CFBundleIdentifier</key>
12 | 	<string>structuremap.0.0.10</string>
13 | 	<key>CFBundleShortVersionString</key>
14 | 	<string>0.0.10</string>
15 | 	<key>CFBundleInfoDictionaryVersion</key>
16 | 	<string>6.0</string>
17 | 	<key>CFBundleName</key>
18 | 	<string>structuremap</string>
19 | 	<key>CFBundlePackageType</key>
20 | 	<string>APPL</string>
21 | 	<key>LSBackgroundOnly</key>
22 | 	<true/>
23 | </dict>
24 | </plist>
25 | 


--------------------------------------------------------------------------------
/release/one_click_macos_gui/Resources/conclusion.html:
--------------------------------------------------------------------------------
 1 | 
 2 | <!DOCTYPE html>
 3 | <html lang="en">
 4 |     <head>
 5 |         <meta charset="utf-8" />
 6 |     </head>
 7 |     <body>
 8 |         <div style="font-family: Helvetica; padding-left: 10px;" align="left">
 9 |             <h3>structuremap</h3>
10 |                 <p style="color: #020202; font-size: 11px;">Thank you for installing structuremap.</p>
11 |         </div>
12 |     </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/release/one_click_macos_gui/Resources/welcome.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="utf-8" />
 5 | </head>
 6 | <body>
 7 |     <div align="left" style="font-family: Helvetica; padding-left: 10px;">
 8 |       <h3>structuremap</h3>
 9 |         <p style="color: #020202; font-size: 11px;">structuremap is an open-source Python package of the AlphaPept ecosystem.</p>
10 |         <p style="color: #020202; font-size: 11px;">structuremap was developed by the Mann Labs at the Max Planck Institute of Biochemistry and the University of Copenhagen and is freely available with an Apache License. Since structuremap uses external Python packages, additional third-party licenses are applicable.</p>
11 |     </div>
12 | </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/release/one_click_macos_gui/create_installer_macos.sh:
--------------------------------------------------------------------------------
 1 | #!bash
 2 | 
 3 | # Initial cleanup
 4 | rm -rf dist
 5 | rm -rf build
 6 | FILE=structuremap.pkg
 7 | if test -f "$FILE"; then
 8 |   rm structuremap.pkg
 9 | fi
10 | cd ../..
11 | rm -rf dist
12 | rm -rf build
13 | 
14 | # Creating a conda environment
15 | conda create -n structuremapinstaller python=3.8 -y
16 | conda activate structuremapinstaller
17 | 
18 | # Creating the wheel
19 | python setup.py sdist bdist_wheel
20 | 
21 | # Setting up the local package
22 | cd release/one_click_macos_gui
23 | pip install "../../dist/structuremap-0.0.10-py3-none-any.whl[stable]"
24 | 
25 | # Creating the stand-alone pyinstaller folder
26 | pip install pyinstaller==4.2
27 | pyinstaller ../pyinstaller/structuremap.spec -y
28 | conda deactivate
29 | 
30 | # If needed, include additional source such as e.g.:
31 | # cp ../../structuremap/data/*.fasta dist/structuremap/data
32 | 
33 | # Wrapping the pyinstaller folder in a .pkg package
34 | mkdir -p dist/structuremap/Contents/Resources
35 | cp ../logos/alpha_logo.icns dist/structuremap/Contents/Resources
36 | mv dist/structuremap_gui dist/structuremap/Contents/MacOS
37 | cp Info.plist dist/structuremap/Contents
38 | cp structuremap_terminal dist/structuremap/Contents/MacOS
39 | cp ../../LICENSE.txt Resources/LICENSE.txt
40 | cp ../logos/alpha_logo.png Resources/alpha_logo.png
41 | chmod 777 scripts/*
42 | 
43 | pkgbuild --root dist/structuremap --identifier de.mpg.biochem.structuremap.app --version 0.0.10 --install-location /Applications/structuremap.app --scripts scripts structuremap.pkg
44 | productbuild --distribution distribution.xml --resources Resources --package-path structuremap.pkg dist/structuremap_gui_installer_macos.pkg
45 | 


--------------------------------------------------------------------------------
/release/one_click_macos_gui/distribution.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8" standalone="no"?>
 2 | <installer-script minSpecVersion="1.000000">
 3 |     <title>structuremap 0.0.10</title>
 4 |     <background mime-type="image/png" file="alpha_logo.png" scaling="proportional"/>
 5 |     <welcome file="welcome.html" mime-type="text/html" />
 6 |     <conclusion file="conclusion.html" mime-type="text/html" />
 7 |     <license file="LICENSE.txt"/>
 8 |     <options customize="never" allow-external-scripts="no"/>
 9 |     <domains enable_localSystem="true" />
10 |     <choices-outline>
11 |         <line choice="structuremap"/>
12 |     </choices-outline>
13 |     <choice id="structuremap" title="structuremap">
14 |         <pkg-ref id="structuremap.pkg"/>
15 |     </choice>
16 |     <pkg-ref id="structuremap.pkg" auth="Root">structuremap.pkg</pkg-ref>
17 | </installer-script>
18 | 


--------------------------------------------------------------------------------
/release/one_click_macos_gui/scripts/postinstall:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # make sure this file itself is executable
4 | xattr -dr com.apple.quarantine /Applications/structuremap.app
5 | chmod -R 577 /Applications/structuremap.app
6 | echo "Postinstall finished"
7 | 


--------------------------------------------------------------------------------
/release/one_click_macos_gui/scripts/preinstall:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | # make sure this file itself is executable
4 | rm -rf /Applications/structuremap.app
5 | echo "Preinstall finished"
6 | 


--------------------------------------------------------------------------------
/release/one_click_macos_gui/structuremap_terminal:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | open -a Terminal "${BASH_SOURCE%/*}/structuremap_gui"
4 | 


--------------------------------------------------------------------------------
/release/one_click_windows_gui/create_installer_windows.sh:
--------------------------------------------------------------------------------
 1 | #!bash
 2 | 
 3 | # Initial cleanup
 4 | rm -rf dist
 5 | rm -rf build
 6 | cd ../..
 7 | rm -rf dist
 8 | rm -rf build
 9 | 
10 | # Creating a conda environment
11 | conda create -n structuremap_installer python=3.8 -y
12 | conda activate structuremap_installer
13 | 
14 | # Creating the wheel
15 | python setup.py sdist bdist_wheel
16 | 
17 | # Setting up the local package
18 | cd release/one_click_windows_gui
19 | # Make sure you include the required extra packages and always use the stable or very-stable options!
20 | pip install "../../dist/structuremap-0.0.10-py3-none-any.whl[stable]"
21 | 
22 | # Creating the stand-alone pyinstaller folder
23 | pip install pyinstaller==4.2
24 | pyinstaller ../pyinstaller/structuremap.spec -y
25 | conda deactivate
26 | 
27 | # If needed, include additional source such as e.g.:
28 | # cp ../../structuremap/data/*.fasta dist/structuremap/data
29 | 
30 | # Wrapping the pyinstaller folder in a .exe package
31 | "C:\Program Files (x86)\Inno Setup 6\ISCC.exe" structuremap_innoinstaller.iss
32 | # WARNING: this assumes a static location for innosetup
33 | 


--------------------------------------------------------------------------------
/release/one_click_windows_gui/structuremap_innoinstaller.iss:
--------------------------------------------------------------------------------
 1 | ; Script generated by the Inno Setup Script Wizard.
 2 | ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
 3 | 
 4 | #define MyAppName "structuremap"
 5 | #define MyAppVersion "0.0.10"
 6 | #define MyAppPublisher "Max Planck Institute of Biochemistry and the University of Copenhagen, Mann Labs"
 7 | #define MyAppURL "https://github.com/MannLabs/structuremap"
 8 | #define MyAppExeName "structuremap_gui.exe"
 9 | 
10 | [Setup]
11 | ; NOTE: The value of AppId uniquely identifies this application. Do not use the same AppId value in installers for other applications.
12 | ; (To generate a new GUID, click Tools | Generate GUID inside the IDE.)
13 | AppId={{structuremap_Mann_Labs_MPI_CPR}
14 | AppName={#MyAppName}
15 | AppVersion={#MyAppVersion}
16 | ;AppVerName={#MyAppName} {#MyAppVersion}
17 | AppPublisher={#MyAppPublisher}
18 | AppPublisherURL={#MyAppURL}
19 | AppSupportURL={#MyAppURL}
20 | AppUpdatesURL={#MyAppURL}
21 | DefaultDirName={autopf}\{#MyAppName}
22 | DisableProgramGroupPage=yes
23 | LicenseFile=..\..\LICENSE.txt
24 | ; Uncomment the following line to run in non administrative install mode (install for current user only.)
25 | PrivilegesRequired=lowest
26 | PrivilegesRequiredOverridesAllowed=dialog
27 | OutputDir=dist
28 | OutputBaseFilename=structuremap_gui_installer_windows
29 | SetupIconFile=..\logos\alpha_logo.ico
30 | Compression=lzma
31 | SolidCompression=yes
32 | WizardStyle=modern
33 | 
34 | [Languages]
35 | Name: "english"; MessagesFile: "compiler:Default.isl"
36 | 
37 | [Tasks]
38 | Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked
39 | 
40 | [Files]
41 | Source: "dist\structuremap_gui\{#MyAppExeName}"; DestDir: "{app}"; Flags: ignoreversion
42 | Source: "dist\structuremap_gui\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
43 | ; NOTE: Don't use "Flags: ignoreversion" on any shared system files
44 | 
45 | [Icons]
46 | Name: "{autoprograms}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"
47 | Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon
48 | 
49 | [Run]
50 | Filename: "{app}\{#MyAppExeName}"; Description: "{cm:LaunchProgram,{#StringChange(MyAppName, '&', '&&')}}"; Flags: nowait postinstall skipifsilent
51 | 


--------------------------------------------------------------------------------
/release/pyinstaller/structuremap.spec:
--------------------------------------------------------------------------------
  1 | # -*- mode: python ; coding: utf-8 -*-
  2 | 
  3 | import pkgutil
  4 | import os
  5 | import sys
  6 | from PyInstaller.building.build_main import Analysis, PYZ, EXE, COLLECT, BUNDLE, TOC
  7 | import PyInstaller.utils.hooks
  8 | import pkg_resources
  9 | import importlib.metadata
 10 | import structuremap
 11 | 
 12 | 
 13 | ##################### User definitions
 14 | exe_name = 'structuremap_gui'
 15 | script_name = 'structuremap_pyinstaller.py'
 16 | if sys.platform[:6] == "darwin":
 17 | 	icon = '../logos/alpha_logo.icns'
 18 | else:
 19 | 	icon = '../logos/alpha_logo.ico'
 20 | block_cipher = None
 21 | location = os.getcwd()
 22 | project = "structuremap"
 23 | remove_tests = True
 24 | bundle_name = "structuremap"
 25 | #####################
 26 | 
 27 | 
 28 | requirements = {
 29 | 	req.split()[0] for req in importlib.metadata.requires(project)
 30 | }
 31 | requirements.add(project)
 32 | requirements.add("distributed")
 33 | hidden_imports = set()
 34 | datas = []
 35 | binaries = []
 36 | checked = set()
 37 | while requirements:
 38 | 	requirement = requirements.pop()
 39 | 	checked.add(requirement)
 40 | 	if requirement in ["pywin32"]:
 41 | 		continue
 42 | 	try:
 43 | 		module_version = importlib.metadata.version(requirement)
 44 | 	except (
 45 | 		importlib.metadata.PackageNotFoundError,
 46 | 		ModuleNotFoundError,
 47 | 		ImportError
 48 | 	):
 49 | 		continue
 50 | 	try:
 51 | 		datas_, binaries_, hidden_imports_ = PyInstaller.utils.hooks.collect_all(
 52 | 			requirement,
 53 | 			include_py_files=True
 54 | 		)
 55 | 	except ImportError:
 56 | 		continue
 57 | 	datas += datas_
 58 | 	# binaries += binaries_
 59 | 	hidden_imports_ = set(hidden_imports_)
 60 | 	if "" in hidden_imports_:
 61 | 		hidden_imports_.remove("")
 62 | 	if None in hidden_imports_:
 63 | 		hidden_imports_.remove(None)
 64 | 	requirements |= hidden_imports_ - checked
 65 | 	hidden_imports |= hidden_imports_
 66 | 
 67 | if remove_tests:
 68 | 	hidden_imports = sorted(
 69 | 		[h for h in hidden_imports if "tests" not in h.split(".")]
 70 | 	)
 71 | else:
 72 | 	hidden_imports = sorted(hidden_imports)
 73 | 
 74 | 
 75 | hidden_imports = [h for h in hidden_imports if "__pycache__" not in h]
 76 | datas = [d for d in datas if ("__pycache__" not in d[0]) and (d[1] not in [".", "Resources", "scripts"])]
 77 | 
 78 | if sys.platform[:5] == "win32":
 79 | 	base_path = os.path.dirname(sys.executable)
 80 | 	library_path = os.path.join(base_path, "Library", "bin")
 81 | 	dll_path = os.path.join(base_path, "DLLs")
 82 | 	libcrypto_dll_path = os.path.join(dll_path, "libcrypto-1_1-x64.dll")
 83 | 	libssl_dll_path = os.path.join(dll_path, "libssl-1_1-x64.dll")
 84 | 	libcrypto_lib_path = os.path.join(library_path, "libcrypto-1_1-x64.dll")
 85 | 	libssl_lib_path = os.path.join(library_path, "libssl-1_1-x64.dll")
 86 | 	if not os.path.exists(libcrypto_dll_path):
 87 | 		datas.append((libcrypto_lib_path, "."))
 88 | 	if not os.path.exists(libssl_dll_path):
 89 | 		datas.append((libssl_lib_path, "."))
 90 | 
 91 | a = Analysis(
 92 | 	[script_name],
 93 | 	pathex=[location],
 94 | 	binaries=binaries,
 95 | 	datas=datas,
 96 | 	hiddenimports=hidden_imports,
 97 | 	hookspath=[],
 98 | 	runtime_hooks=[],
 99 | 	excludes=[h for h in hidden_imports if "datashader" in h],
100 | 	win_no_prefer_redirects=False,
101 | 	win_private_assemblies=False,
102 | 	cipher=block_cipher,
103 | 	noarchive=False
104 | )
105 | pyz = PYZ(
106 | 	a.pure,
107 | 	a.zipped_data,
108 | 	cipher=block_cipher
109 | )
110 | 
111 | if sys.platform[:5] == "linux":
112 | 	exe = EXE(
113 | 		pyz,
114 | 		a.scripts,
115 | 		a.binaries,
116 | 		a.zipfiles,
117 | 		a.datas,
118 | 		name=bundle_name,
119 | 		debug=False,
120 | 		bootloader_ignore_signals=False,
121 | 		strip=False,
122 | 		upx=True,
123 | 		console=True,
124 | 		upx_exclude=[],
125 | 		icon=icon
126 | 	)
127 | else:
128 | 	exe = EXE(
129 | 		pyz,
130 | 		a.scripts,
131 | 		# a.binaries,
132 | 		a.zipfiles,
133 | 		# a.datas,
134 | 		exclude_binaries=True,
135 | 		name=exe_name,
136 | 		debug=False,
137 | 		bootloader_ignore_signals=False,
138 | 		strip=False,
139 | 		upx=True,
140 | 		console=True,
141 | 		icon=icon
142 | 	)
143 | 	coll = COLLECT(
144 | 		exe,
145 | 		a.binaries,
146 | 		# a.zipfiles,
147 | 		a.datas,
148 | 		strip=False,
149 | 		upx=True,
150 | 		upx_exclude=[],
151 | 		name=exe_name
152 | 	)
153 | 


--------------------------------------------------------------------------------
/release/pyinstaller/structuremap_pyinstaller.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     try:
 3 |         import structuremap.gui
 4 |         import multiprocessing
 5 |         multiprocessing.freeze_support()
 6 |         structuremap.gui.run()
 7 |     except e:
 8 |         import traceback
 9 |         import sys
10 |         exc_info = sys.exc_info()
11 |         # Display the *original* exception
12 |         traceback.print_exception(*exc_info)
13 |         input("Something went wrong, press any key to continue...")
14 | 


--------------------------------------------------------------------------------
/release/pypi/install_pypi_wheel.sh:
--------------------------------------------------------------------------------
1 | conda create -n structuremap_pip_test python=3.8 -y
2 | conda activate structuremap_pip_test
3 | pip install "structuremap[stable]"
4 | structuremap
5 | conda deactivate
6 | 


--------------------------------------------------------------------------------
/release/pypi/install_test_pypi_wheel.sh:
--------------------------------------------------------------------------------
1 | conda create -n structuremap_pip_test python=3.8 -y
2 | conda activate structuremap_pip_test
3 | pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple "structuremap[stable]"
4 | structuremap
5 | conda deactivate
6 | 


--------------------------------------------------------------------------------
/release/pypi/prepare_pypi_wheel.sh:
--------------------------------------------------------------------------------
 1 | cd ../..
 2 | conda create -n structuremap_pypi_wheel python=3.8
 3 | conda activate structuremap_pypi_wheel
 4 | pip install twine
 5 | rm -rf dist
 6 | rm -rf build
 7 | python setup.py sdist bdist_wheel
 8 | twine check dist/*
 9 | conda deactivate
10 | 


--------------------------------------------------------------------------------
/requirements/requirements.txt:
--------------------------------------------------------------------------------
1 | click==8.0.1
2 | numba==0.55.1
3 | pandas==1.4.0
4 | tqdm==4.62.3
5 | h5py==3.6.0
6 | statsmodels==0.13.1
7 | bio==1.3.3
8 | plotly==4.12.0
9 | 


--------------------------------------------------------------------------------
/requirements/requirements_development.txt:
--------------------------------------------------------------------------------
 1 | jupyter==1.0.0
 2 | jupyter_contrib_nbextensions==0.5.1
 3 | pyinstaller==6.3.0
 4 | autodocsumm==0.2.6
 5 | sphinx-rtd-theme==0.5.2
 6 | twine==3.4.1
 7 | bumpversion==0.6.0
 8 | pipdeptree==2.1.0
 9 | #ipykernel==6.4.0
10 | psutil==5.8.0
11 | notebook==6.4.12
12 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | 
 3 | # builtin
 4 | import setuptools
 5 | import re
 6 | import os
 7 | # local
 8 | import structuremap as package2install
 9 | 
10 | 
11 | def get_long_description():
12 |     with open("README.md", "r") as readme_file:
13 |         long_description = readme_file.read()
14 |     return long_description
15 | 
16 | 
17 | def get_requirements():
18 |     extra_requirements = {}
19 |     requirement_file_names = package2install.__extra_requirements__
20 |     requirement_file_names[""] = "requirements.txt"
21 |     for extra, requirement_file_name in requirement_file_names.items():
22 |         full_requirement_file_name = os.path.join(
23 |             "requirements",
24 |             requirement_file_name,
25 |         )
26 |         with open(full_requirement_file_name) as requirements_file:
27 |             if extra != "":
28 |                 extra_stable = f"{extra}-stable"
29 |             else:
30 |                 extra_stable = "stable"
31 |             extra_requirements[extra_stable] = []
32 |             extra_requirements[extra] = []
33 |             for line in requirements_file:
34 |                 extra_requirements[extra_stable].append(line)
35 |                 requirement, *comparison = re.split("[><=~!]", line)
36 |                 requirement == requirement.strip()
37 |                 extra_requirements[extra].append(requirement)
38 |     requirements = extra_requirements.pop("")
39 |     return requirements, extra_requirements
40 | 
41 | 
42 | def create_pip_wheel():
43 |     requirements, extra_requirements = get_requirements()
44 |     setuptools.setup(
45 |         name=package2install.__project__,
46 |         version=package2install.__version__,
47 |         license=package2install.__license__,
48 |         description=package2install.__description__,
49 |         long_description=get_long_description(),
50 |         long_description_content_type="text/markdown",
51 |         author=package2install.__author__,
52 |         author_email=package2install.__author_email__,
53 |         url=package2install.__github__,
54 |         project_urls=package2install.__urls__,
55 |         keywords=package2install.__keywords__,
56 |         classifiers=package2install.__classifiers__,
57 |         packages=[package2install.__project__],
58 |         include_package_data=True,
59 |         entry_points={
60 |             "console_scripts": package2install.__console_scripts__,
61 |         },
62 |         install_requires=requirements + [
63 |             # TODO Remove hardcoded requirement?
64 |             "pywin32==225; sys_platform=='win32'"
65 |         ],
66 |         extras_require=extra_requirements,
67 |         python_requires=package2install.__python_version__,
68 |     )
69 | 
70 | 
71 | if __name__ == "__main__":
72 |     create_pip_wheel()
73 | 


--------------------------------------------------------------------------------
/structuremap/__init__.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | 
 3 | 
 4 | __project__ = "structuremap"
 5 | __version__ = "0.0.10"
 6 | __license__ = "Apache"
 7 | __description__ = "An open-source Python package of the AlphaPept ecosystem"
 8 | __author__ = "Isabell Bludau & Mann Labs"
 9 | __author_email__ = "opensource@alphapept.com"
10 | __github__ = "https://github.com/MannLabs/structuremap"
11 | __keywords__ = [
12 |     "bioinformatics",
13 |     "software",
14 |     "AlphaPept ecosystem",
15 | ]
16 | __python_version__ = ">=3.8"
17 | __classifiers__ = [
18 |     "Development Status :: 1 - Planning",
19 |     # "Development Status :: 2 - Pre-Alpha",
20 |     # "Development Status :: 3 - Alpha",
21 |     # "Development Status :: 4 - Beta",
22 |     # "Development Status :: 5 - Production/Stable",
23 |     # "Development Status :: 6 - Mature",
24 |     # "Development Status :: 7 - Inactive"
25 |     "Intended Audience :: Science/Research",
26 |     "License :: OSI Approved :: Apache Software License",
27 |     "Operating System :: OS Independent",
28 |     "Programming Language :: Python :: 3",
29 |     "Topic :: Scientific/Engineering :: Bio-Informatics",
30 | ]
31 | __console_scripts__ = [
32 |     "structuremap=structuremap.cli:run",
33 | ]
34 | __urls__ = {
35 |     "Mann Labs at MPIB": "https://www.biochem.mpg.de/mann",
36 |     "Mann Labs at CPR": "https://www.cpr.ku.dk/research/proteomics/mann/",
37 |     "GitHub": __github__,
38 |     # "ReadTheDocs": None,
39 |     # "PyPi": None,
40 |     # "Scientific paper": None,
41 | }
42 | __extra_requirements__ = {
43 |     "development": "requirements_development.txt",
44 | }
45 | 


--------------------------------------------------------------------------------
/structuremap/cli.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | 
 3 | 
 4 | # external
 5 | import click
 6 | 
 7 | # local
 8 | import structuremap
 9 | 
10 | 
11 | @click.group(
12 |     context_settings=dict(
13 |         help_option_names=['-h', '--help'],
14 |     ),
15 |     invoke_without_command=True
16 | )
17 | @click.pass_context
18 | @click.version_option(structuremap.__version__, "-v", "--version")
19 | def run(ctx, **kwargs):
20 |     name = f"structuremap {structuremap.__version__}"
21 |     click.echo("*" * (len(name) + 4))
22 |     click.echo(f"* {name} *")
23 |     click.echo("*" * (len(name) + 4))
24 |     if ctx.invoked_subcommand is None:
25 |         click.echo(run.get_help(ctx))
26 | 
27 | 
28 | @run.command("gui", help="Start graphical user interface.")
29 | def gui():
30 |     import structuremap.gui
31 |     structuremap.gui.run()
32 | 


--------------------------------------------------------------------------------
/structuremap/gui.py:
--------------------------------------------------------------------------------
1 | #!python
2 | 
3 | 
4 | def run():
5 |     raise NotImplementedError
6 | 


--------------------------------------------------------------------------------
/structuremap/plotting.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | 
  3 | # builtin
  4 | from typing import Union
  5 | import re
  6 | 
  7 | # external
  8 | import plotly.express as px
  9 | import pandas as pd
 10 | import numpy as np
 11 | 
 12 | 
 13 | def scale_pvals(
 14 |     pvals: Union[list, np.array],
 15 | ) -> list:
 16 |     """
 17 |     Function to scale p-values that are already negative log10 transformed.
 18 |     In this context, scaling refers to assigning the p-values to a specific
 19 |     significance bin. The resulting significance bins are formatted as string
 20 |     for plotting purposes.
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     pvals : list or np.array of integers
 25 |         List (or any other iterable) of p-values that are already
 26 |         negative log10 transformed.
 27 | 
 28 |     Returns
 29 |     -------
 30 |     : list
 31 |         The lists of significance bins as strings.
 32 |     """
 33 |     steps = [1000, 100, 50, 10, 5, 2]
 34 |     r = []
 35 |     for xi in pvals:
 36 |         s_max = 0
 37 |         for s in steps:
 38 |             if xi >= s:
 39 |                 if s > s_max:
 40 |                     s_max = s
 41 |         r.append('> '+str(s_max))
 42 |     return(r)
 43 | 
 44 | 
 45 | def plot_enrichment(
 46 |     data: pd.DataFrame,
 47 |     ptm_select: list = None,
 48 |     roi_select: list = None,
 49 |     plot_width: int = None,
 50 |     plot_height: int = None,
 51 | ):
 52 |     """
 53 |     Plot the enrichment of PTMs in different protein regions.
 54 | 
 55 |     Parameters
 56 |     ----------
 57 |     data : pd.DataFrame
 58 |         Dataframe with enrichment results
 59 |         from structuremap.processing.perform_enrichment_analysis.
 60 |     ptm_select : list
 61 |         List of PTMs to show.
 62 |         Default is None, which shows all PTMs in data.
 63 |     roi_select : list
 64 |         List of regions of interest (ROIs) to show.
 65 |         Default is None, which shows all ROIs in data.
 66 |     plot_width : int
 67 |         Integer specifying plot width. Default is None.
 68 |     plot_height : int
 69 |         Integer specifying plot height. Default is None.
 70 | 
 71 |     Returns
 72 |     -------
 73 |     : plot
 74 |         Figure showing enrichment of PTMs in different protein regions.
 75 |     """
 76 |     df = data.copy(deep=True)
 77 |     df['ptm'] = [re.sub('_', ' ', p) for p in df['ptm']]
 78 |     category_dict = {}
 79 |     if ptm_select is not None:
 80 |         ptm_select = [re.sub('_', ' ', p) for p in ptm_select]
 81 |         df = df[df.ptm.isin(ptm_select)]
 82 |         category_dict['ptm'] = ptm_select
 83 |     if roi_select is not None:
 84 |         df = df[df.roi.isin(roi_select)]
 85 |         category_dict['roi'] = roi_select
 86 |     df['log_odds_ratio'] = np.log(df['oddsr'])
 87 |     df['neg_log_adj_p'] = -np.log10(df.p_adj_bh)
 88 |     df['neg_log_adj_p_round'] = scale_pvals(df.neg_log_adj_p)
 89 |     category_dict['neg_log_adj_p_round'] = list(reversed([
 90 |         '> 1000', '> 100', '> 50', '> 10', '> 5', '> 2', '> 0']))
 91 |     color_dict = {'> 1000': 'rgb(120,0,0)',
 92 |                   '> 100': 'rgb(177, 63, 100)',
 93 |                   '> 50': 'rgb(221, 104, 108)',
 94 |                   '> 10': 'rgb(241, 156, 124)',
 95 |                   '> 5': 'rgb(245, 183, 142)',
 96 |                   '> 2': 'rgb(246, 210, 169)',
 97 |                   '> 0': 'grey'}
 98 |     fig = px.bar(df,
 99 |                  x='ptm',
100 |                  y='log_odds_ratio',
101 |                  labels=dict({'ptm': 'PTM',
102 |                               'log_odds_ratio': 'log odds ratio',
103 |                               'neg_log_adj_p_round': '-log10 (adj. p-value)'}),
104 |                  color='neg_log_adj_p_round',
105 |                  facet_col='roi',
106 |                  hover_data=['oddsr', 'p_adj_bh'],
107 |                  category_orders=category_dict,
108 |                  color_discrete_map=color_dict,
109 |                  template="simple_white",
110 |                  )
111 |     if plot_width is None:
112 |         p_width = 400+(len(df.ptm.unique())*20)
113 |     elif plot_width > 0:
114 |         p_width = plot_width
115 |     else:
116 |         raise ValueError(
117 |             f"{plot_width} is not a valid parameter for plot_width. plot_width needs to be a positive integer.")
118 |     if plot_height is None:
119 |         p_height = 500
120 |     elif plot_height > 0:
121 |         p_height = plot_height
122 |     else:
123 |         raise ValueError(
124 |             f"{plot_height} is not a valid parameter for plot_height. plot_height needs to be a positive integer.")
125 |     fig.update_layout(
126 |         autosize=False,
127 |         width=p_width,
128 |         height=p_height,
129 |         margin=dict(
130 |             autoexpand=False,
131 |             l=100,
132 |             r=150,
133 |             b=150,
134 |             t=50,
135 |         ),
136 |     )
137 |     config = {'toImageButtonOptions': {
138 |         'format': 'svg', 'filename': 'structure ptm enrichment'}}
139 |     return(fig.show(config=config))
140 | 
141 | 
142 | def plot_ptm_colocalization(
143 |     df,
144 |     name='Fraction of modified acceptor residues',
145 |     context=None,
146 |     plot_width: int = None,
147 |     plot_height: int = None,
148 | ):
149 |     """
150 |     Plot PTMs co-localization.
151 | 
152 |     Parameters
153 |     ----------
154 |     df : pd.DataFrame
155 |         Dataframe with results from
156 |         structuremap.processing.evaluate_ptm_colocalization.
157 |     name : str
158 |         Name of the resulting plot.
159 |         Default is 'Fraction of modified acceptor residues'.
160 |     context : str
161 |         Either '3D', '1D' or None.
162 |         Default is None, which shows both 1D and 3D results.
163 |     plot_width : int
164 |         Integer specifying plot width. Default is None.
165 |     plot_height : int
166 |         Integer specifying plot height. Default is None.
167 | 
168 |     Returns
169 |     -------
170 |     : plot
171 |         Figure showing PTMs co-localization across distance bins.
172 |     """
173 |     if plot_width is None:
174 |         if context in ['1D', '3D']:
175 |             p_width = 1100
176 |         else:
177 |             p_width = 1000
178 |     elif plot_width > 0:
179 |         p_width = plot_width
180 |     else:
181 |         raise ValueError(
182 |             f"{plot_width} is not a valid parameter for plot_width. plot_width needs to be a positive integer.")
183 |     if plot_height is None:
184 |         if context in ['1D', '3D']:
185 |             p_height = 350
186 |         else:
187 |             p_height = 1800
188 |     elif plot_height > 0:
189 |         p_height = plot_height
190 |     else:
191 |         raise ValueError(
192 |             f"{plot_height} is not a valid parameter for plot_height. plot_height needs to be a positive integer.")
193 |     df['variable_sig'] = np.where(((df['pvalue']<=0.01) & (df['variable']=='Observed')), 'Observed (p <= 0.01)', df['variable'])
194 |     if context in ['1D', '3D']:
195 |         df = df[df.context == context]
196 |         fig = px.scatter(
197 |             df,
198 |             x="cutoff",
199 |             y="value",
200 |             error_y="std_random_fraction",
201 |             color="variable_sig",
202 |             facet_col="ptm_types",
203 |             facet_col_spacing=0.05,
204 |             labels={"value": "Fraction of modified acceptors",
205 |                     "cutoff": "distance bin",
206 |                     "ptm_types": "",
207 |                     "variable_sig": ""},
208 |             color_discrete_sequence=['rgb(177, 63, 100)', '#FA8072', 'grey'])
209 |         fig = fig.update_yaxes(matches=None, showticklabels=True, col=1)
210 |         fig = fig.update_yaxes(matches=None, showticklabels=True, col=2)
211 |         fig = fig.update_yaxes(matches=None, showticklabels=True, col=3)
212 |         fig = fig.update_yaxes(matches=None, showticklabels=True, col=4)
213 |         fig = fig.update_yaxes(matches=None, showticklabels=True, col=5)
214 |         fig = fig.update_yaxes(matches=None, showticklabels=True, col=6)
215 |         fig = fig.update_yaxes(matches=None, showticklabels=True, col=7)
216 |     elif context is None:
217 |         fig = px.scatter(
218 |             df,
219 |             x="cutoff",
220 |             y="value",
221 |             error_y="std_random_fraction",
222 |             color="variable_sig",
223 |             facet_row="ptm_types",
224 |             facet_col="context",
225 |             labels={"value": "Fraction of modified acceptors",
226 |                     "cutoff": "distance bin",
227 |                     "ptm_types": "",
228 |                     "variable_sig": ""},
229 |             color_discrete_sequence=['rgb(177, 63, 100)', '#FA8072', 'grey'])
230 |         fig = fig.update_yaxes(matches=None)
231 |     else:
232 |         raise ValueError(f"{context} is not a valid context")
233 |     fig = fig.update_layout(width=p_width, height=p_height)
234 |     fig = fig.update_layout(title=name,
235 |                             template="simple_white")
236 |     config = {'toImageButtonOptions': {'format': 'svg', 'filename': name}}
237 |     return fig.show(config=config)
238 | 


--------------------------------------------------------------------------------
/structuremap/processing.py:
--------------------------------------------------------------------------------
   1 | #!python
   2 | 
   3 | # builtin
   4 | import json
   5 | import os
   6 | import socket
   7 | import re
   8 | from itertools import groupby
   9 | import urllib.request
  10 | import random
  11 | import logging
  12 | import ssl
  13 | import tempfile
  14 | import requests
  15 | 
  16 | # external
  17 | import numba
  18 | import numpy as np
  19 | import pandas as pd
  20 | import tqdm
  21 | import h5py
  22 | import statsmodels.stats.multitest
  23 | import Bio.PDB.MMCIF2Dict
  24 | import scipy.stats
  25 | import sys
  26 | 
  27 | if getattr(sys, 'frozen', False):
  28 |     print('Using frozen version. Setting SSL context to unverified.')
  29 |     ssl._create_default_https_context = ssl._create_unverified_context
  30 | 
  31 | def download_alphafold_cif(
  32 |     proteins: list,
  33 |     out_folder: str,
  34 |     out_format: str = "{}.cif",
  35 |     alphafold_cif_url: str = 'https://alphafold.ebi.ac.uk/files/AF-{protein}-F1-model_v{version}.cif',
  36 |     timeout: int = 60,
  37 |     verbose_log: bool = False,
  38 | ) -> tuple:
  39 |     """
  40 |     Function to download .cif files of protein structures predicted by AlphaFold.
  41 | 
  42 |     Parameters
  43 |     ----------
  44 |     proteins : list
  45 |         List (or any other iterable) of UniProt protein accessions for which to
  46 |         download the structures.
  47 |     out_folder : str
  48 |         Path to the output folder.
  49 |     out_format : str
  50 |         The default file name of the cif files to be saved.
  51 |         The brackets {} are replaced by a protein name from the proteins list.
  52 |         Default is '{}.cif'.
  53 |     alphafold_cif_url : str
  54 |         The base link from where to download cif files.
  55 |         The brackets {} are replaced by a protein name from the proteins list.
  56 |         Default is 'https://alphafold.ebi.ac.uk/files/AF-{}-F1-model_v1.cif'.
  57 |     timeout : int
  58 |         Time to wait for reconnection of downloads.
  59 |         Default is 60.
  60 |     verbose_log: bool
  61 |         Whether to write verbose logging information.
  62 |         Default is False.
  63 | 
  64 |     Returns
  65 |     -------
  66 |     : (list, list, list)
  67 |         The lists of valid, invalid and existing protein accessions.
  68 |     """
  69 |     socket.setdefaulttimeout(timeout)
  70 |     valid_proteins = []
  71 |     invalid_proteins = []
  72 |     existing_proteins = []
  73 |     AFversions = [9, 8, 7, 6, 5, 4, 3, 2, 1] #Dirty fix, but should hold up for the foreseeable future
  74 | 
  75 |     if not os.path.exists(out_folder):
  76 |         os.makedirs(out_folder)
  77 |     for protein in tqdm.tqdm(proteins):
  78 |         name_out = os.path.join(
  79 |             out_folder,
  80 |             out_format.format(protein)
  81 |         )
  82 |         if os.path.isfile(name_out):
  83 |             existing_proteins.append(protein)
  84 |         else:
  85 |             for AFversion in AFversions:
  86 |                 response = requests.get(alphafold_cif_url.format(protein=protein,version=AFversion))
  87 |                 if response.status_code == 200:
  88 |                     latest_AFversion = AFversion
  89 |                     break
  90 |                 else:
  91 |                     latest_AFversion = 404
  92 |             name_in = alphafold_cif_url.format(protein=protein,version=latest_AFversion)
  93 |             try:
  94 |                 urllib.request.urlretrieve(name_in, name_out)
  95 |                 valid_proteins.append(protein)
  96 |             except urllib.error.HTTPError:
  97 |                 if verbose_log:
  98 |                     logging.info(f"Protein {protein} not available for CIF download.")
  99 |                 invalid_proteins.append(protein)
 100 |     logging.info(f"Valid proteins: {len(valid_proteins)}")
 101 |     logging.info(f"Invalid proteins: {len(invalid_proteins)}")
 102 |     logging.info(f"Existing proteins: {len(existing_proteins)}")
 103 |     return(valid_proteins, invalid_proteins, existing_proteins)
 104 | 
 105 | 
 106 | def download_alphafold_pae(
 107 |     proteins: list,
 108 |     out_folder: str,
 109 |     out_format: str = "pae_{}.hdf",
 110 |     alphafold_pae_url: str = 'https://alphafold.ebi.ac.uk/files/AF-{protein}-F1-predicted_aligned_error_v{version}.json',
 111 |     timeout: int = 60,
 112 |     verbose_log: bool = False,
 113 | ) -> tuple:
 114 |     """
 115 |     Function to download paired aligned errors (pae) for protein structures
 116 |     predicted by AlphaFold.
 117 | 
 118 |     Parameters
 119 |     ----------
 120 |     proteins : list
 121 |         List (or any other iterable) of UniProt protein accessions for which to
 122 |         download the structures.
 123 |     out_folder : str
 124 |         Path to the output folder.
 125 |     out_format : str
 126 |         The default file name of the pae files to be saved.
 127 |         The brackets {} are replaced by a protein name from the proteins list.
 128 |         Default is 'pae_{}.hdf'.
 129 |     alphafold_pae_url : str
 130 |         The base link from where to download pae files.
 131 |         The brackets {} are replaced by a protein name from the proteins list.
 132 |         Default is 'https://alphafold.ebi.ac.uk/files/AF-{}-F1-predicted_aligned_error_v1.json'.
 133 |     timeout : int
 134 |         Time to wait for reconnection of downloads.
 135 |         Default is 60.
 136 |     verbose_log: bool
 137 |         Whether to write verbose logging information.
 138 |         Default is False.
 139 | 
 140 |     Returns
 141 |     -------
 142 |     : (list, list, list)
 143 |         The valid, invalid and existing proteins.
 144 |     """
 145 |     socket.setdefaulttimeout(timeout)
 146 |     valid_proteins = []
 147 |     invalid_proteins = []
 148 |     existing_proteins = []
 149 |     AFversions = [9, 8, 7, 6, 5, 4, 3, 2, 1] #Dirty fix, but should hold up for the foreseeable future
 150 |     if not os.path.exists(out_folder):
 151 |         os.makedirs(out_folder)
 152 |     for protein in tqdm.tqdm(proteins):
 153 |         name_out = os.path.join(
 154 |             out_folder,
 155 |             out_format.format(protein)
 156 |         )
 157 |         if os.path.isfile(name_out):
 158 |             existing_proteins.append(protein)
 159 |         else:
 160 |             try:
 161 |                 for AFversion in AFversions:
 162 |                     response = requests.get(alphafold_pae_url.format(protein=protein,version=AFversion))
 163 |                     if response.status_code == 200:
 164 |                         latest_AFversion = AFversion
 165 |                         break
 166 |                     else:
 167 |                         latest_AFversion = 404
 168 |                 name_in = alphafold_pae_url.format(protein=protein,version=latest_AFversion)
 169 |                 with tempfile.TemporaryDirectory() as tmp_pae_dir:
 170 |                     tmp_pae_file_name = os.path.join(
 171 |                         tmp_pae_dir,
 172 |                         "pae_{protein}.json"
 173 |                     )
 174 |                     urllib.request.urlretrieve(name_in, tmp_pae_file_name)
 175 |                     with open(tmp_pae_file_name) as tmp_pae_file:
 176 |                         data = json.loads(tmp_pae_file.read())
 177 |                 if latest_AFversion < 3: 
 178 |                     dist = np.array(data[0]['distance'])
 179 |                 else:
 180 |                     dist = [item for sublist in data[0]["predicted_aligned_error"] for item in sublist]
 181 |                 data_list = [('dist', dist)]
 182 |                 if getattr(sys, 'frozen', False):
 183 |                     print('Using frozen h5py w/ gzip compression')
 184 |                     with h5py.File(name_out, 'w') as hdf_root:
 185 |                         for key, data in data_list:
 186 |                             print(f'h5py {key}')
 187 |                             hdf_root.create_dataset(
 188 |                                                 name=key,
 189 |                                                 data=data,
 190 |                                                 compression="gzip",
 191 |                                                 shuffle=True,
 192 |                                             )
 193 |                     print('Done')
 194 |                 else:
 195 |                     with h5py.File(name_out, 'w') as hdf_root:
 196 |                         for key, data in data_list:
 197 |                             hdf_root.create_dataset(
 198 |                                                 name=key,
 199 |                                                 data=data,
 200 |                                                 compression="lzf",
 201 |                                                 shuffle=True,
 202 |                                             )
 203 | 
 204 |                 valid_proteins.append(protein)
 205 |             except urllib.error.HTTPError:
 206 |                 if verbose_log:
 207 |                     logging.info(f"Protein {protein} not available for PAE download.")
 208 |                 # @ Include HDF IO errors as well, which should probably be handled differently.
 209 |                 invalid_proteins.append(protein)
 210 |     logging.info(f"Valid proteins: {len(valid_proteins)}")
 211 |     logging.info(f"Invalid proteins: {len(invalid_proteins)}")
 212 |     logging.info(f"Existing proteins: {len(existing_proteins)}")
 213 |     return(valid_proteins, invalid_proteins, existing_proteins)
 214 | 
 215 | 
 216 | def format_alphafold_data(
 217 |     directory: str,
 218 |     protein_ids: list,
 219 | ) -> pd.DataFrame:
 220 |     """
 221 |     Function to import structure files and format them into a combined dataframe.
 222 | 
 223 |     Parameters
 224 |     ----------
 225 |     directory : str
 226 |         Path to the folder with all .cif files.
 227 |     proteins : list
 228 |         List of UniProt protein accessions to create an annotation table.
 229 |         If an empty list is provided, all proteins in the provided directory
 230 |         are used to create the annotation table.
 231 | 
 232 |     Returns
 233 |     -------
 234 |     : pd.DataFrame
 235 |         A dataframe with structural information presented in following columns:
 236 |         ['protein_id', 'protein_number', 'AA', 'position', 'quality',
 237 |         'x_coord_c', 'x_coord_ca', 'x_coord_cb', 'x_coord_n', 'y_coord_c',
 238 |         'y_coord_ca', 'y_coord_cb', 'y_coord_n', 'z_coord_c', 'z_coord_ca',
 239 |         'z_coord_cb', 'z_coord_n', 'secondary_structure', 'structure_group',
 240 |         'BEND', 'HELX', 'STRN', 'TURN', 'unstructured']
 241 |     """
 242 | 
 243 |     alphafold_annotation_l = []
 244 |     protein_number = 0
 245 | 
 246 |     for file in tqdm.tqdm(sorted(os.listdir(directory))):
 247 | 
 248 |         if file.endswith("cif"):
 249 |             filepath = os.path.join(directory, file)
 250 | 
 251 |             protein_id = re.sub(r'.cif', '', file)
 252 | 
 253 |             if ((protein_id in protein_ids) or (len(protein_ids) == 0)):
 254 | 
 255 |                 protein_number += 1
 256 | 
 257 |                 structure = Bio.PDB.MMCIF2Dict.MMCIF2Dict(filepath)
 258 | 
 259 |                 df = pd.DataFrame({'protein_id': structure['_atom_site.pdbx_sifts_xref_db_acc'],
 260 |                                    'protein_number': protein_number,
 261 |                                    'AA': structure['_atom_site.pdbx_sifts_xref_db_res'],
 262 |                                    'position': structure['_atom_site.label_seq_id'],
 263 |                                    'quality': structure['_atom_site.B_iso_or_equiv'],
 264 |                                    'atom_id': structure['_atom_site.label_atom_id'],
 265 |                                    'x_coord': structure['_atom_site.Cartn_x'],
 266 |                                    'y_coord': structure['_atom_site.Cartn_y'],
 267 |                                    'z_coord': structure['_atom_site.Cartn_z']})
 268 | 
 269 |                 df = df[df.atom_id.isin(['CA', 'CB', 'C', 'N'])].reset_index(drop=True)
 270 |                 df = df.pivot(index=['protein_id',
 271 |                                      'protein_number',
 272 |                                      'AA', 'position',
 273 |                                      'quality'],
 274 |                               columns="atom_id")
 275 |                 df = pd.DataFrame(df.to_records())
 276 | 
 277 |                 df = df.rename(columns={"('x_coord', 'CA')": "x_coord_ca",
 278 |                                         "('y_coord', 'CA')": "y_coord_ca",
 279 |                                         "('z_coord', 'CA')": "z_coord_ca",
 280 |                                         "('x_coord', 'CB')": "x_coord_cb",
 281 |                                         "('y_coord', 'CB')": "y_coord_cb",
 282 |                                         "('z_coord', 'CB')": "z_coord_cb",
 283 |                                         "('x_coord', 'C')": "x_coord_c",
 284 |                                         "('y_coord', 'C')": "y_coord_c",
 285 |                                         "('z_coord', 'C')": "z_coord_c",
 286 |                                         "('x_coord', 'N')": "x_coord_n",
 287 |                                         "('y_coord', 'N')": "y_coord_n",
 288 |                                         "('z_coord', 'N')": "z_coord_n"})
 289 | 
 290 |                 df = df.apply(pd.to_numeric, errors='ignore')
 291 | 
 292 |                 df['secondary_structure'] = 'unstructured'
 293 | 
 294 |                 if '_struct_conf.conf_type_id' in structure.keys():
 295 |                     start_idx = [int(i) for i in structure['_struct_conf.beg_label_seq_id']]
 296 |                     end_idx = [int(i) for i in structure['_struct_conf.end_label_seq_id']]
 297 |                     note = structure['_struct_conf.conf_type_id']
 298 | 
 299 |                     for i in np.arange(0, len(start_idx)):
 300 |                         df['secondary_structure'] = np.where(
 301 |                             df['position'].between(
 302 |                                 start_idx[i],
 303 |                                 end_idx[i]),
 304 |                             note[i],
 305 |                             df['secondary_structure'])
 306 | 
 307 |                 alphafold_annotation_l.append(df)
 308 | 
 309 |     alphafold_annotation = pd.concat(alphafold_annotation_l)
 310 |     alphafold_annotation = alphafold_annotation.sort_values(
 311 |         by=['protein_number', 'position']).reset_index(drop=True)
 312 | 
 313 |     alphafold_annotation['structure_group'] = [re.sub('_.*', '', i)
 314 |                                                for i in alphafold_annotation[
 315 |                                                'secondary_structure']]
 316 |     str_oh = pd.get_dummies(alphafold_annotation['structure_group'],
 317 |                             dtype='int64')
 318 |     alphafold_annotation = alphafold_annotation.join(str_oh)
 319 | 
 320 |     return(alphafold_annotation)
 321 | 
 322 | 
 323 | @numba.njit
 324 | def get_3d_dist(
 325 |     coordinate_array_1: np.ndarray,
 326 |     coordinate_array_2: np.ndarray,
 327 |     idx_1: int,
 328 |     idx_2: int
 329 | ) -> float:
 330 |     """
 331 |     Function to get the distance between two coordinates in 3D space.
 332 |     Input are two coordinate arrays and two respective indices that specify
 333 |     for which points in the coordinate arrays the distance should be calculated.
 334 | 
 335 |     Parameters
 336 |     ----------
 337 |     coordinate_array_1 : np.ndarray
 338 |         Array of 3D coordinates.
 339 |         Must be 3d, e.g. np.float64[:,3]
 340 |     coordinate_array_2 : np.ndarray
 341 |         Array of 3D coordinates.
 342 |         Must be 3d, e.g. np.float64[:,3]
 343 |     idx_1 : int
 344 |         Integer to select an index in coordinate_array_1.
 345 |     idx_2 : int
 346 |         Integer to select an index in coordinate_array_2.
 347 | 
 348 |     Returns
 349 |     -------
 350 |     : float
 351 |         Distance between the two selected 3D coordinates.
 352 |     """
 353 |     dist = np.sqrt(
 354 |         (
 355 |             coordinate_array_1[idx_1, 0] - coordinate_array_2[idx_2, 0]
 356 |         )**2 + (
 357 |             coordinate_array_1[idx_1, 1] - coordinate_array_2[idx_2, 1]
 358 |         )**2 + (
 359 |             coordinate_array_1[idx_1, 2] - coordinate_array_2[idx_2, 2]
 360 |         )**2
 361 |     )
 362 |     return(dist)
 363 | 
 364 | 
 365 | @numba.njit
 366 | def rotate_vector_around_axis(
 367 |     vector: np.ndarray,
 368 |     axis: np.ndarray,
 369 |     theta: float
 370 | ) -> np.ndarray:
 371 |     """
 372 |     Return the rotation matrix associated with counterclockwise rotation about
 373 |     the given axis by theta degrees.
 374 |     (https://stackoverflow.com/questions/6802577/rotation-of-3d-vector)
 375 | 
 376 |     Parameters
 377 |     ----------
 378 |     vector : np.ndarray
 379 |         3D vector which should be rotated.
 380 |     axis : np.ndarray
 381 |         3D vector around which the vector should be rotated.
 382 |     theta : float
 383 |         Angle (in degrees) by which the vector should be rotated around the axis.
 384 | 
 385 |     Returns
 386 |     -------
 387 |     : np.ndarray
 388 |         Rotation matrix.
 389 |     """
 390 |     theta = np.radians(theta)
 391 |     axis = axis / np.linalg.norm(axis)
 392 |     a = np.cos(theta / 2.0)
 393 |     b, c, d = -axis * np.sin(theta / 2.0)
 394 |     aa, bb, cc, dd = a * a, b * b, c * c, d * d
 395 |     bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
 396 |     rotation_matrix = np.array(
 397 |         [[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
 398 |          [2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
 399 |          [2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc]])
 400 |     rotated_vector = np.dot(rotation_matrix, vector)
 401 |     return rotated_vector
 402 | 
 403 | 
 404 | @numba.njit
 405 | def get_gly_vector(
 406 |     coord_a: np.ndarray,
 407 |     coord_c: np.ndarray,
 408 |     coord_n: np.ndarray,
 409 |     idx_1: int,
 410 |     theta: float = -120
 411 | ) -> np.ndarray:
 412 |     """
 413 |     Return a pseudo vector Ca -> Cb for a Glycine residue.
 414 |     The pseudo vector is centered at the origin and the
 415 |     Ccoord=N coord rotated over -120 degrees
 416 |     along the CA-C axis (see Bio.PDB package).
 417 | 
 418 |     Parameters
 419 |     ----------
 420 |     coord_a : np.ndarray
 421 |         Array of 3D coordinates of alpha carbon atoms across different
 422 |         amino acids.
 423 |     coord_c : np.ndarray
 424 |         Array of 3D coordinates of carboxy carbon atoms across different
 425 |         amino acids.
 426 |     coord_n : np.ndarray
 427 |         Array of 3D coordinates of amino nitrogen atoms across different
 428 |         amino acids.
 429 |     idx_1 : int
 430 |         Integer to select a specific amino acid in the coordinate arrays.
 431 |     theta : float
 432 |         The theta for the rotation.
 433 |         Default is -120.
 434 | 
 435 |     Returns
 436 |     -------
 437 |     : np.ndarray
 438 |         Pseudo vector Ca -> Cb for a Glycine residue.
 439 |     """
 440 |     # get unit vectors
 441 |     uv_n = (coord_n[idx_1] - coord_a[idx_1]) / get_3d_dist(coord_n, coord_a, idx_1, idx_1)
 442 |     uv_c = (coord_c[idx_1] - coord_a[idx_1]) / get_3d_dist(coord_c, coord_a, idx_1, idx_1)
 443 |     # rotation of uv_n around uv_c over -120 deg
 444 |     uv_b = rotate_vector_around_axis(vector=uv_n, axis=uv_c, theta=theta)
 445 |     return uv_b
 446 | 
 447 | @numba.njit
 448 | def get_angle(
 449 |     coord_a: np.ndarray,
 450 |     coord_b: np.ndarray,
 451 |     coord_c: np.ndarray,
 452 |     coord_n: np.ndarray,
 453 |     idx_1: int,
 454 |     idx_2: int
 455 | ) -> float:
 456 |     """
 457 |     Calculate the angle between the vector of the target amino acid's
 458 |     side chain (Ca1 -> Cb1) and the vector pointing from the target
 459 |     amino acid's alpha carbon atom to a different amino acid's
 460 |     alpha carbon atom (Ca1 -> Ca2).
 461 | 
 462 |     Parameters
 463 |     ----------
 464 |     coord_a : np.ndarray
 465 |         Array of 3D coordinates of alpha carbon atoms across different
 466 |         amino acids.
 467 |     coord_b : np.ndarray
 468 |         Array of 3D coordinates of beta carbon atoms across different
 469 |         amino acids.
 470 |     coord_c : np.ndarray
 471 |         Array of 3D coordinates of carboxy carbon atoms across different
 472 |         amino acids.
 473 |     coord_n : np.ndarray
 474 |         Array of 3D coordinates of amino nitrogen atoms across different
 475 |         amino acids.
 476 |     idx_1 : int
 477 |         Integer to select a first amino acid in the coordinate arrays.
 478 |     idx_2 : int
 479 |         Integer to select a second amino acid in the coordinate arrays.
 480 | 
 481 |     Returns
 482 |     -------
 483 |     : float
 484 |         Angle between the side chain of the first amino acid and a second
 485 |         amino acid.
 486 |     """
 487 |     if np.isnan(coord_b[idx_1, 0]):
 488 |         # Get pseudo vector Ca -> Cb for a Gly residue.
 489 |         uv_1 = get_gly_vector(coord_a,
 490 |                               coord_c,
 491 |                               coord_n,
 492 |                               idx_1)
 493 |     else:
 494 |         # Calculate unit vector for Ca1 -> Cb1
 495 |         uv_1 = (coord_b[idx_1] - coord_a[idx_1]) / get_3d_dist(coord_b, coord_a, idx_1, idx_1)
 496 |     # Calculate unit vector for Ca1 -> Ca2
 497 |     uv_d = (coord_a[idx_2] - coord_a[idx_1]) / get_3d_dist(coord_a, coord_a, idx_1, idx_2)
 498 |     # Calculate the angle between the two unit vectors
 499 |     dot_p = np.dot(uv_1, uv_d)
 500 |     # angle = np.arccos(np.clip(dot_p, -1.0, 1.0))
 501 |     angle = np.arccos(dot_p)
 502 |     # Convert radians in degrees
 503 |     angle_deg = np.rad2deg(angle)
 504 |     return(angle_deg)
 505 | 
 506 | 
 507 | @numba.njit
 508 | def get_paired_error(
 509 |     position: np.ndarray,
 510 |     error_dist: np.ndarray,
 511 |     idx_1: int,
 512 |     idx_2: int
 513 | ) -> float:
 514 |     """
 515 |     Extract paired aligned error of AlphaFold from a complete
 516 |     error matrix (error_dist) at specific sequence positions.
 517 | 
 518 |     Parameters
 519 |     ----------
 520 |     position : np.ndarray
 521 |         Array of amino acid positions from which to choose specific indeces.
 522 |     error_dist : np.ndarray
 523 |         Matrix of paired aligned errors of AlphaFold across all amino acids
 524 |         in a protein qequence.
 525 |     idx_1 : int
 526 |         Integer to select a first amino acid in the position array.
 527 |     idx_2 : int
 528 |         Integer to select a second amino acid in the position array.
 529 | 
 530 |     Returns
 531 |     -------
 532 |     : float
 533 |         Paired aligned error of the first amino acid and a second amino acid.
 534 |     """
 535 |     pos1 = position[idx_1]
 536 |     pos2 = position[idx_2]
 537 |     err = error_dist[pos1 - 1, pos2 - 1]
 538 |     return(err)
 539 | 
 540 | 
 541 | @numba.njit
 542 | def get_neighbors(
 543 |     idx_list: np.ndarray, # Technically this is not a list and it could/should be renamed.
 544 |     coord_a: np.ndarray,
 545 |     coord_b: np.ndarray,
 546 |     coord_c: np.ndarray,
 547 |     coord_n: np.ndarray,
 548 |     position: np.ndarray,
 549 |     error_dist: np.ndarray,
 550 |     max_dist: float,
 551 |     max_angle: float
 552 | ) -> np.ndarray:
 553 |     """
 554 |     Get the number of amino acids within the specified distance and angle
 555 |     relative to the target amino acid.
 556 | 
 557 |     Parameters
 558 |     ----------
 559 |     idx_list : np.ndarray
 560 |         Array of amino acid indeces.
 561 |     coord_a : np.ndarray
 562 |         Array of 3D coordinates of alpha carbon atoms across different
 563 |         amino acids.
 564 |     coord_b : np.ndarray
 565 |         Array of 3D coordinates of beta carbon atoms across different
 566 |         amino acids.
 567 |     coord_c : np.ndarray
 568 |         Array of 3D coordinates of carboxy carbon atoms across different
 569 |         amino acids.
 570 |     coord_n : np.ndarray
 571 |         Array of 3D coordinates of amino nitrogen atoms across different
 572 |         amino acids.
 573 |     position : np.ndarray
 574 |         Array of amino acid positions.
 575 |     error_dist: : np.ndarray
 576 |         Matrix of paired aligned errors of AlphaFold across all amino acids
 577 |         in a protein qequence.
 578 |     max_dist : float
 579 |         Float specifying the maximum distance between two amino acids.
 580 |     max_angle : float
 581 |         Float specifying the maximum angle (in degrees) between two
 582 |         amino acids.
 583 | 
 584 |     Returns
 585 |     -------
 586 |     : np.ndarray
 587 |         Number of amino acids within the specified distance and angle.
 588 |     """
 589 |     res = []
 590 |     for x1 in idx_list:
 591 |         n_neighbors = 0
 592 |         for x2 in idx_list:
 593 |             if x1 != x2:
 594 |                 paired_error = get_paired_error(
 595 |                     position=position,
 596 |                     error_dist=error_dist,
 597 |                     idx_1=x1,
 598 |                     idx_2=x2)
 599 |                 if (paired_error <= max_dist):
 600 |                     dist = get_3d_dist(
 601 |                         coordinate_array_1=coord_a,
 602 |                         coordinate_array_2=coord_a,
 603 |                         idx_1=x1,
 604 |                         idx_2=x2)
 605 |                     if (dist + paired_error <= max_dist):
 606 |                         angle = get_angle(
 607 |                             coord_a=coord_a,
 608 |                             coord_b=coord_b,
 609 |                             coord_c=coord_c,
 610 |                             coord_n=coord_n,
 611 |                             idx_1=x1,
 612 |                             idx_2=x2)
 613 |                         if angle <= max_angle:
 614 |                             n_neighbors += 1
 615 |         res.append(n_neighbors)
 616 |     return(np.array(res))
 617 | 
 618 | 
 619 | @numba.njit
 620 | def find_end(
 621 |     label: int,
 622 |     start_index: int,
 623 |     values: np.ndarray
 624 | ) -> int:
 625 |     """Find when the label changes.
 626 | 
 627 |     This assumes a sorted values array.
 628 | 
 629 |     Parameters
 630 |     ----------
 631 |     label : int
 632 |         The label of interest.
 633 |     start_index : int
 634 |         The previous endindex index of the previous label,
 635 |         which normally is the start_index for the current label.
 636 |     values : int
 637 |         An array with values.
 638 | 
 639 |     Returns
 640 |     -------
 641 |     int
 642 |         The end_index index of the label in values.
 643 |     """
 644 |     while values[start_index] == label:
 645 |         start_index += 1
 646 |         if start_index == len(values):
 647 |             break
 648 |     return start_index
 649 | 
 650 | 
 651 | def partition_df_by_prots(
 652 |     df: pd.DataFrame,
 653 | ) -> pd.DataFrame:
 654 |     """
 655 |     Generator function to split a dataframe into seperate proteins.
 656 | 
 657 |     NOTE: This function is significantly faster if the input df is already
 658 |     sorted by protein_number!
 659 | 
 660 |     Parameters
 661 |     ----------
 662 |     df : pd.DataFrame
 663 |         pd.DataFrame of formatted AlphaFold data across various proteins.
 664 | 
 665 |     Yields
 666 |     -------
 667 |     : pd.DataFrame
 668 |         Subset of the input dataframe only containing a single protein.
 669 |     """
 670 |     df = df.astype({'position': 'int64'})
 671 |     if not df.protein_number.is_monotonic_increasing:
 672 |         df = df.sort_values(by='protein_number').reset_index(drop=True)
 673 |     unique_proteins = df.protein_number.unique()
 674 |     end = 0
 675 |     for protein_i in tqdm.tqdm(unique_proteins):
 676 |         start = end
 677 |         end = find_end(protein_i, end, df.protein_number.values)
 678 |         prot_df = df[start:end]
 679 |         if not prot_df.position.is_monotonic_increasing:
 680 |             prot_df.sort_values(by='position', inplace=True)
 681 |         yield prot_df.reset_index(drop=True)
 682 | 
 683 | 
 684 | def annotate_accessibility(
 685 |     df: pd.DataFrame,
 686 |     max_dist: float,
 687 |     max_angle: float,
 688 |     error_dir: str,
 689 |     filename_format: str = "pae_{}.hdf",
 690 | ) -> pd.DataFrame:
 691 |     """
 692 |     Half sphere exposure as calculated in
 693 |     https://onlinelibrary.wiley.com/doi/10.1002/prot.20379
 694 |     but with paired aligned error metric included.
 695 | 
 696 |     Parameters
 697 |     ----------
 698 |     df : pd.DataFrame
 699 |         pd.DataFrame of formatted AlphaFold data across various proteins.
 700 |         Such a dataframe is gerated by format_alphafold_data.
 701 |     max_dist : float
 702 |         Float specifying the maximum distance between two amino acids.
 703 |     max_angle : float
 704 |         Float specifying the maximum angle (in degrees) between two
 705 |         amino acids.
 706 |     error_dir: : str
 707 |         Path to the directory where the hdf files containing the matrices of
 708 |         paired aligned errors of AlphaFold are stored. This should correspond
 709 |         to the out_folder used in download_alphafold_pae.
 710 |     filename_format : str
 711 |         The file name of the pae files saved by download_alphafold_pae.
 712 |         The brackets {} are replaced by a protein name from the proteins list.
 713 |         Default is 'pae_{}.hdf'.
 714 | 
 715 |     Returns
 716 |     -------
 717 |     : pd.DataFrame
 718 |         Dataframe repporting the number of neighboring amino acids at the
 719 |         specified maximum distance and angle per protein, amino acid and
 720 |         position.
 721 |     """
 722 |     proteins = list()
 723 |     AA = list()
 724 |     AA_p = list()
 725 |     a_AA = list()
 726 |     for df_prot in partition_df_by_prots(df):
 727 |         protein_accession = df_prot.protein_id.values[0]
 728 |         if error_dir is not None:
 729 |             with h5py.File(os.path.join(
 730 |                 error_dir,
 731 |                 filename_format.format(protein_accession))
 732 |             ) as hdf_root:
 733 |                 error_dist = hdf_root['dist'][...]
 734 |             size = int(np.sqrt(len(error_dist)))
 735 |             error_dist = error_dist.reshape(size, size)
 736 |             use_pae = 'pae'
 737 |         else:
 738 |             error_dist = np.zeros((df_prot.shape[0], df_prot.shape[0]))
 739 |             use_pae = 'nopae'
 740 |         idx_list = np.arange(0, df_prot.shape[0])
 741 |         res_a = get_neighbors(
 742 |             idx_list=idx_list,
 743 |             coord_a=np.vstack([df_prot.x_coord_ca.values,
 744 |                               df_prot.y_coord_ca.values,
 745 |                               df_prot.z_coord_ca.values]).T,
 746 |             coord_b=np.vstack([df_prot.x_coord_cb.values,
 747 |                               df_prot.y_coord_cb.values,
 748 |                               df_prot.z_coord_cb.values]).T,
 749 |             coord_c=np.vstack([df_prot.x_coord_c.values,
 750 |                               df_prot.y_coord_c.values,
 751 |                               df_prot.z_coord_c.values]).T,
 752 |             coord_n=np.vstack([df_prot.x_coord_n.values,
 753 |                               df_prot.y_coord_n.values,
 754 |                               df_prot.z_coord_n.values]).T,
 755 |             # If this step is slow, consider avoiding the vstack to create new arrays
 756 |             # Alternatively, it might be faster to use e.g. df[["x", "y", "z"]].values
 757 |             # as pandas might force this into a view rather than a new array
 758 |             position=df_prot.position.values,
 759 |             error_dist=error_dist,
 760 |             max_dist=max_dist,
 761 |             max_angle=max_angle)
 762 |         proteins.append(df_prot.protein_id.values)
 763 |         # using numeracal prot_numbers might be better.
 764 |         # In general it is good practice to reduce strings/objects in arrays/dfs
 765 |         # as much possible. Especially try to avoid repetetion of such types and
 766 |         # just use indices and a reference array. Rarely do you need this actual
 767 |         # values anyways.
 768 |         AA.append(df_prot.AA.values)
 769 |         AA_p.append(df_prot.position.values)
 770 |         a_AA.append(res_a)
 771 |     proteins = np.concatenate(proteins)
 772 |     AA = np.concatenate(AA)
 773 |     AA_p = np.concatenate(AA_p)
 774 |     a_AA = np.concatenate(a_AA)
 775 |     accessibility_df = pd.DataFrame({'protein_id': proteins,
 776 |                                      'AA': AA, 'position': AA_p})
 777 |     accessibility_df[f'nAA_{max_dist}_{max_angle}_{use_pae}'] = a_AA
 778 |     return(accessibility_df)
 779 | 
 780 | 
 781 | @numba.njit
 782 | def smooth_score(score: np.ndarray,
 783 |                  half_window: int
 784 |                  ) -> np.ndarray:
 785 |     """
 786 |     Get an average value for each position in a score array, considering all
 787 |     values within a window that spans up to half_window positions before and
 788 |     after a given target position.
 789 | 
 790 |     Parameters
 791 |     ----------
 792 |     score : np.ndarray
 793 |         Array of numeric score values.
 794 |     half_window : int
 795 |         Integer specifying the number of positions to consider both before and
 796 |         after a given target position.
 797 | 
 798 |     Returns
 799 |     -------
 800 |     : np.ndarray
 801 |         Array of smoothed score values.
 802 |     """
 803 |     smooth_score = []
 804 |     for i in range(len(score)):
 805 |         low_window_bound = i - half_window
 806 |         if low_window_bound < 0:
 807 |             low_window_bound = 0
 808 |         high_window_bound = i + half_window
 809 |         if high_window_bound > len(score):
 810 |             high_window_bound = len(score)
 811 |         window_score = score[low_window_bound: high_window_bound + 1]
 812 |         window_mean = np.mean(window_score)
 813 |         smooth_score.append(window_mean)
 814 |     return np.array(smooth_score)
 815 | 
 816 | 
 817 | def get_smooth_score(df: pd.DataFrame,
 818 |                      scores: np.ndarray,
 819 |                      half_windows: list,
 820 |                      ) -> pd.DataFrame:
 821 |     """
 822 |     Select columns in a dataframe and smooth the values per protein based on a
 823 |     provided window.
 824 | 
 825 |     Parameters
 826 |     ----------
 827 |     df : pd.DataFrame
 828 |         Dataframe with AlphaFold annotations, as generated by
 829 |         format_alphafold_data.
 830 |     scores : np.ndarray
 831 |         Array of column names in the dataframe that should be smoothed.
 832 |     half_windows : list
 833 |         List of one or more integers specifying the number of positions
 834 |         to consider both before and after a given target position.
 835 | 
 836 |     Returns
 837 |     -------
 838 |     : pd.DataFrame
 839 |         Copy of the input dataframe with additional columns containing the
 840 |         smoothed scores.
 841 |     """
 842 |     df_out = []
 843 |     for df_prot in partition_df_by_prots(df):
 844 |         for score in scores:
 845 |             for w in half_windows:
 846 |                 df_prot[f"{score}_smooth{w}"] = smooth_score(
 847 |                     score=df_prot[score].values,
 848 |                     half_window=w)
 849 |         df_out.append(df_prot)
 850 |     df_out = pd.concat(df_out)
 851 |     return df_out
 852 | 
 853 | 
 854 | @numba.njit
 855 | def get_avg_3d_dist(idx_list: np.ndarray,  # as before, technically not a list but an array. Rename?
 856 |                     coord: np.ndarray,
 857 |                     position: np.ndarray,
 858 |                     error_dist: np.ndarray,
 859 |                     metric: str = 'mean',
 860 |                     error_operation: str = 'minus',
 861 |                     average_aa_size: float = 3.5,
 862 |                     ) -> float:
 863 |     """
 864 |     Get average 3D distance between a group of amino acids.
 865 | 
 866 |     Parameters
 867 |     ----------
 868 |     idx_list : np.ndarray
 869 |         Array of amino acid indeces.
 870 |     coord: np.ndarray
 871 |         Array of 3D coordinates of alpha carbon atoms across different
 872 |         amino acids.
 873 |     position : np.ndarray
 874 |         Array of amino acid positions.
 875 |     error_dist: : np.ndarray
 876 |         Matrix of paired aligned errors of AlphaFold across all amino acids in
 877 |         a protein qequence.
 878 |     metric : str
 879 |         Metric to aggregate distances across all pairs for a given amino acid.
 880 |         'mean' or 'min' can be chosen. Default is 'mean'.
 881 |     error_operation : str
 882 |         Metric to include paired aligned error in the distance calculation.
 883 |         'minus' or 'plus' can be chosen. Default is 'minus'.
 884 |     average_aa_size : float
 885 |         Average size of an AA.
 886 |         Default is 3.5 Å
 887 | 
 888 |     Returns
 889 |     -------
 890 |     : float
 891 |         Average 3D distance between all selected amino acids.
 892 |     """
 893 |     if metric not in ['mean', 'min']:
 894 |         raise ValueError('Select mean or min as metric.')
 895 |     if error_operation not in ['minus', 'plus']:
 896 |         raise ValueError('Select minus or plus as error_operation.')
 897 |     metric_dist = []
 898 |     for x1 in idx_list:
 899 |         all_dist = []
 900 |         for x2 in idx_list:
 901 |             if x1 != x2:
 902 |                 dist_i = get_3d_dist(
 903 |                     coordinate_array_1=coord,
 904 |                     coordinate_array_2=coord,
 905 |                     idx_1=x1,
 906 |                     idx_2=x2)
 907 |                 error_i = get_paired_error(
 908 |                     position=position,
 909 |                     error_dist=error_dist,
 910 |                     idx_1=x1,
 911 |                     idx_2=x2)
 912 |                 if error_operation == 'minus':
 913 |                     dist_error_i = dist_i - error_i
 914 |                     if dist_error_i < average_aa_size:
 915 |                         dist_error_i = average_aa_size
 916 |                     all_dist.append(dist_error_i)
 917 |                 elif error_operation == 'plus':
 918 |                     dist_error_i = dist_i + error_i
 919 |                     nAA_diff = abs(position[x1] - position[x2])
 920 |                     nAA_dist = nAA_diff * average_aa_size
 921 |                     if dist_error_i > nAA_dist:
 922 |                         all_dist.append(nAA_dist)
 923 |                     else:
 924 |                         all_dist.append(dist_error_i)
 925 |         # Probably the 5 lines below can be optimized, but likely not worth
 926 |         # the speed improvement?
 927 |         all_dist_d = np.array(all_dist)
 928 |         if metric == 'mean':
 929 |             m_d = np.mean(all_dist_d)
 930 |         elif metric == 'min':
 931 |             m_d = np.min(all_dist_d)
 932 |         metric_dist.append(m_d)
 933 |     metric_dist = np.array(metric_dist)
 934 |     avg_metric_dist = np.mean(metric_dist)
 935 |     return(avg_metric_dist)
 936 | 
 937 | 
 938 | @numba.njit
 939 | def get_avg_1d_dist(idx_list: np.ndarray,
 940 |                     position: np.ndarray,
 941 |                     metric: str = 'mean'
 942 |                     ) -> float:
 943 |     """
 944 |     Get average 1D distance between a group of amino acids.
 945 | 
 946 |     Parameters
 947 |     ----------
 948 |     idx_list : np.ndarray
 949 |         Array of amino acid indeces.
 950 |     position : np.ndarray
 951 |         Array of amino acid positions.
 952 |     metric : str
 953 |         Metric to aggregate distances across all pairs for a given amino acid.
 954 |         'mean' or 'min' can be chosen. Default is 'mean'.
 955 | 
 956 |     Returns
 957 |     -------
 958 |     : float
 959 |         Average 1D distance between all selected amino acids.
 960 |     """
 961 | 
 962 |     if metric not in ['mean', 'min']:
 963 |         raise ValueError('Select mean or min as metric.')
 964 |     metric_dist = []
 965 |     for x1 in idx_list:
 966 |         all_dist = []
 967 |         for x2 in idx_list:
 968 |             if x1 != x2:
 969 |                 all_dist.append(abs(position[x1] - position[x2]))
 970 |         all_dist_d = np.array(all_dist)
 971 |         if metric == 'mean':
 972 |             m_d = np.mean(all_dist_d)
 973 |         elif metric == 'min':
 974 |             m_d = np.min(all_dist_d)
 975 |         metric_dist.append(m_d)
 976 |     metric_dist = np.array(metric_dist)
 977 |     avg_min_dist = np.mean(metric_dist)
 978 |     return(avg_min_dist)
 979 | 
 980 | 
 981 | def get_proximity_pvals(df: pd.DataFrame,
 982 |                         ptm_types: np.ndarray,
 983 |                         ptm_site_dict: dict,
 984 |                         error_dir: str,
 985 |                         filename_format: str = "pae_{}.hdf",
 986 |                         per_site_metric: str = 'mean',
 987 |                         error_operation: str = 'minus',
 988 |                         n_random: int = 10000,
 989 |                         random_seed: int = 44  # should obviously be 42;) Might mess up your testing though
 990 |                         ) -> pd.DataFrame:
 991 |     """
 992 |     Get proximity p-values for selected PTMs.
 993 | 
 994 |     Parameters
 995 |     ----------
 996 |     df : pd.DataFrame
 997 |         pd.DataFrame of formatted AlphaFold data across various proteins.
 998 |     ptm_types: np.ndarray
 999 |         Array of PTM modifications for which to perform the proximity analysis.
1000 |     ptm_site_dict : dict
1001 |         Dictionary containing the possible amino acid sites for each PTM.
1002 |     error_dir : str
1003 |         Path to the directory where the hdf files containing the matrices of
1004 |         paired aligned errors of AlphaFold are stored.
1005 |     filename_format : str
1006 |         The file name of the pae files saved by download_alphafold_pae.
1007 |         The brackets {} are replaced by a protein name from the proteins list.
1008 |         Default is 'pae_{}.hdf'.
1009 |     per_site_metric : str
1010 |         Metric to aggregate distances across all pairs for a given amino acid.
1011 |         'mean' or 'min' can be chosen. Default is 'mean'.
1012 |     error_operation : str
1013 |         Metric to include paired aligned error in the distance calculation.
1014 |         'minus' or 'plus' can be chosen. Default is 'minus'.
1015 |     n_random : int
1016 |         Number of random permutations to perform. Default is 10'000.
1017 |         The higher the number of permutations, the more confidence the analysis
1018 |         can achieve. However, a very high number of permutations increases
1019 |         processing time. No fewer than 1'000 permutations should be used.
1020 |     random_seed : int
1021 |         Random seed for the analysis. Default is 44.
1022 | 
1023 |     Returns
1024 |     -------
1025 |     : pd.DataFrame
1026 |         Dataframe reporting 3D and 1D proximity p-values for each protein and
1027 |         selected PTM.
1028 |     """
1029 |     random.seed(random_seed)
1030 |     proteins = list()
1031 |     ptm_type = list()
1032 |     n_ptms = list()
1033 |     pvals_3d = list()
1034 |     pvals_1d = list()
1035 |     for df_prot in partition_df_by_prots(df):
1036 |         protein_accession = df_prot.protein_id.values[0]
1037 |         for ptm_i in ptm_types:
1038 |             acc_aa = ptm_site_dict[ptm_i]
1039 |             df_ptm_prot = df_prot[df_prot.AA.isin(acc_aa)].reset_index(drop=True)
1040 |             n_aa_mod = np.sum(df_ptm_prot[ptm_i])
1041 |             n_aa_all = df_ptm_prot.shape[0]
1042 |             if ((n_aa_mod >= 2) & (n_aa_mod < n_aa_all)):
1043 |                 with h5py.File(os.path.join(
1044 |                     error_dir,
1045 |                     filename_format.format(protein_accession))) as hdf_root:
1046 |                     error_dist = hdf_root['dist'][...]
1047 |                 size = int(np.sqrt(len(error_dist)))
1048 |                 error_dist = error_dist.reshape(size, size)
1049 |                 # subset to ptm possible positions
1050 |                 # calculate real distance
1051 |                 real_idx = df_ptm_prot.index[df_ptm_prot[ptm_i] == 1].tolist()
1052 |                 avg_dist_3d = get_avg_3d_dist(
1053 |                     idx_list=np.array(real_idx),
1054 |                     coord=np.vstack([
1055 |                         df_ptm_prot["x_coord_ca"].values,
1056 |                         df_ptm_prot["y_coord_ca"].values,
1057 |                         df_ptm_prot["z_coord_ca"].values]).T,
1058 |                     position=df_ptm_prot["position"].values,
1059 |                     error_dist=error_dist,
1060 |                     metric=per_site_metric,
1061 |                     error_operation=error_operation)
1062 |                 avg_dist_1d = get_avg_1d_dist(
1063 |                     idx_list=np.array(real_idx),
1064 |                     position=df_ptm_prot["position"].values,
1065 |                     metric=per_site_metric)
1066 |                 # get background distribution
1067 |                 rand_idx_list = [np.array(random.sample(
1068 |                     range(n_aa_all), len(real_idx))) for i in range(n_random)]
1069 |                 rand_avg_dist_3d = [get_avg_3d_dist(
1070 |                     idx_list=idx_l,
1071 |                     coord=np.vstack([
1072 |                         df_ptm_prot["x_coord_ca"].values,
1073 |                         df_ptm_prot["y_coord_ca"].values,
1074 |                         df_ptm_prot["z_coord_ca"].values]).T,
1075 |                     position=df_ptm_prot["position"].values,
1076 |                     error_dist=error_dist,
1077 |                     metric=per_site_metric,
1078 |                     error_operation=error_operation) for idx_l in rand_idx_list]
1079 |                 rand_avg_dist_1d = [get_avg_1d_dist(
1080 |                     idx_list=idx_l,
1081 |                     position=df_ptm_prot["position"].values,
1082 |                     metric=per_site_metric) for idx_l in rand_idx_list]
1083 |                 # get empirical p-values
1084 |                 pvalue_3d = np.sum(np.array(rand_avg_dist_3d) <= avg_dist_3d)/n_random
1085 |                 pvalue_1d = np.sum(np.array(rand_avg_dist_1d) <= avg_dist_1d)/n_random
1086 |                 # If this is a slow step, there are several ways to still
1087 |                 # optimize this I think.
1088 |                 # Especially the creation of 10000 elements in both a list and
1089 |                 # array seem concerning to me.
1090 |                 # Probably a >> 10 fold is still possible here.
1091 |             else:
1092 |                 pvalue_3d = np.nan
1093 |                 pvalue_1d = np.nan
1094 |             pvals_3d.append(pvalue_3d)
1095 |             pvals_1d.append(pvalue_1d)
1096 |             n_ptms.append(n_aa_mod)
1097 |             proteins.append(protein_accession)
1098 |             ptm_type.append(ptm_i)
1099 |     res_df = pd.DataFrame({'protein_id': proteins,
1100 |                            'ptm': ptm_type,
1101 |                            'n_ptms': n_ptms,
1102 |                            'pvalue_1d': pvals_1d,
1103 |                            'pvalue_3d': pvals_3d})
1104 |     res_df_noNan = res_df.dropna(subset=['pvalue_3d','pvalue_1d']).reset_index(drop=True)
1105 |     # Why are these then stored explicitly above? # This was to know which IDs these are.
1106 |     res_df_noNan['pvalue_1d_adj_bh'] = statsmodels.stats.multitest.multipletests(pvals=res_df_noNan.pvalue_1d, alpha=0.1, method='fdr_bh')[1]
1107 |     res_df_noNan['pvalue_3d_adj_bh'] = statsmodels.stats.multitest.multipletests(pvals=res_df_noNan.pvalue_3d, alpha=0.1, method='fdr_bh')[1]
1108 |     return(res_df_noNan)
1109 | 
1110 | 
1111 | def perform_enrichment_analysis(df: pd.DataFrame,
1112 |                                 ptm_types: list,
1113 |                                 rois: list,
1114 |                                 quality_cutoffs: list,
1115 |                                 ptm_site_dict: dict,
1116 |                                 multiple_testing: bool = True) -> pd.DataFrame:
1117 |     """
1118 |     Get enrichment p-values for selected PTMs acros regions of interest (ROIs).
1119 | 
1120 |     Parameters
1121 |     ----------
1122 |     df : pd.DataFrame
1123 |         pd.DataFrame of formatted AlphaFold data across various proteins.
1124 |     ptm_types: list
1125 |         List of PTM modifications for which to perform the enrichment analysis.
1126 |     rois : list
1127 |         List of regions of interest (one hot encoded columns in df) for which
1128 |         to perform the enrichment analysis.
1129 |     quality_cutoffs : list
1130 |         List of quality cutoffs (AlphaFold pLDDT values) to filter for.
1131 |     ptm_site_dict : dict
1132 |         Dictionary containing the possible amino acid sites for each PTM.
1133 |     multiple_testing : bool
1134 |         Bool if multiple hypothesis testing correction should be performed.
1135 |         Default is 'True'.
1136 | 
1137 |     Returns
1138 |     -------
1139 |     : pd.DataFrame
1140 |         Dataframe reporting p-values for the enrichment of all selected
1141 |         ptm_types across selected rois.
1142 |     """
1143 | 
1144 |     enrichment = []
1145 |     for q_cut in quality_cutoffs:
1146 |         # Is quality_cutoffs expected to be a big list?
1147 |         # If so, we can still optimize the function below reasonably I think...
1148 |         seq_ann_qcut = df[df.quality >= q_cut]
1149 |         for ptm in ptm_types:
1150 |             seq_ann_qcut_aa = seq_ann_qcut[seq_ann_qcut.AA.isin(ptm_site_dict[ptm])]
1151 |             for roi in rois:
1152 |                 seq_ann_qcut_aa_roi1 = seq_ann_qcut_aa[roi] == 1
1153 |                 seq_ann_qcut_aa_roi0 = seq_ann_qcut_aa[roi] == 0
1154 |                 seq_ann_qcut_aa_ptm1 = seq_ann_qcut_aa[ptm] == 1
1155 |                 seq_ann_qcut_aa_ptm0 = seq_ann_qcut_aa[ptm] == 0
1156 |                 n_ptm_in_roi = seq_ann_qcut_aa[seq_ann_qcut_aa_roi1 & seq_ann_qcut_aa_ptm1].shape[0]
1157 |                 n_ptm_not_in_roi = seq_ann_qcut_aa[seq_ann_qcut_aa_roi0 & seq_ann_qcut_aa_ptm1].shape[0]
1158 |                 n_naked_in_roi = seq_ann_qcut_aa[seq_ann_qcut_aa_roi1 & seq_ann_qcut_aa_ptm0].shape[0]
1159 |                 n_naked_not_in_roi = seq_ann_qcut_aa[seq_ann_qcut_aa_roi0 & seq_ann_qcut_aa_ptm0].shape[0]
1160 |                 fisher_table = np.array([[n_ptm_in_roi, n_naked_in_roi], [n_ptm_not_in_roi, n_naked_not_in_roi]])
1161 |                 oddsr, p = scipy.stats.fisher_exact(fisher_table,
1162 |                                                     alternative='two-sided')
1163 |                 res = pd.DataFrame({'quality_cutoff': [q_cut],
1164 |                                     'ptm': [ptm],
1165 |                                     'roi': [roi],
1166 |                                     'n_aa_ptm':  np.sum(seq_ann_qcut_aa_ptm1),
1167 |                                     'n_aa_roi':  np.sum(seq_ann_qcut_aa_roi1),
1168 |                                     'n_ptm_in_roi': n_ptm_in_roi,
1169 |                                     'n_ptm_not_in_roi': n_ptm_not_in_roi,
1170 |                                     'n_naked_in_roi': n_naked_in_roi,
1171 |                                     'n_naked_not_in_roi': n_naked_not_in_roi,
1172 |                                     'oddsr': [oddsr],
1173 |                                     'p': [p]})
1174 |                 enrichment.append(res)
1175 |     enrichment_df = pd.concat(enrichment)
1176 |     if multiple_testing:
1177 |         enrichment_df['p_adj_bf'] = statsmodels.stats.multitest.multipletests(
1178 |             pvals=enrichment_df.p, alpha=0.01, method='bonferroni')[1]
1179 |         enrichment_df['p_adj_bh'] = statsmodels.stats.multitest.multipletests(
1180 |             pvals=enrichment_df.p, alpha=0.01, method='fdr_bh')[1]
1181 |     return(enrichment_df)
1182 | 
1183 | 
1184 | def perform_enrichment_analysis_per_protein(
1185 |     df: pd.DataFrame,
1186 |     ptm_types: list,
1187 |     rois: list,
1188 |     quality_cutoffs: list,
1189 |     ptm_site_dict: dict
1190 | ) -> pd.DataFrame:
1191 |     """
1192 |     Get per protein enrichment p-values for selected PTMs acros regions of
1193 |     interest (ROIs).
1194 | 
1195 |     Parameters
1196 |     ----------
1197 |     df : pd.DataFrame
1198 |         pd.DataFrame of formatted AlphaFold data across various proteins.
1199 |     ptm_types: list
1200 |         List of PTM modifications for which to perform the enrichment analysis.
1201 |     rois : list
1202 |         List of regions of interest (one hot encoded columns in df) for which
1203 |         to perform the enrichment analysis.
1204 |     quality_cutoffs : list
1205 |         List of quality cutoffs (AlphaFold pLDDT values) to filter for.
1206 |     ptm_site_dict : dict
1207 |         Dictionary containing the possible amino acid sites for each PTM.
1208 | 
1209 |     Returns
1210 |     -------
1211 |     : pd.DataFrame
1212 |         Dataframe reporting p-values for the enrichment of all selected
1213 |         ptm_types across selected rois on a per protein basis.
1214 |     """
1215 |     enrichment_list = list()
1216 |     for df_prot in partition_df_by_prots(df):
1217 |         protein_accession = df_prot.protein_id.values[0]
1218 |         res = perform_enrichment_analysis(df=df_prot,
1219 |                                           ptm_types=ptm_types,
1220 |                                           rois=rois,
1221 |                                           quality_cutoffs=quality_cutoffs,
1222 |                                           ptm_site_dict=ptm_site_dict,
1223 |                                           multiple_testing=False)
1224 |         res.insert(loc=0, column='protein_id', value=np.repeat(
1225 |             protein_accession, res.shape[0]))
1226 |         enrichment_list.append(res)
1227 |     enrichment_per_protein = pd.concat(enrichment_list)
1228 |     enrichment_per_protein = enrichment_per_protein[(enrichment_per_protein.n_aa_ptm >= 2) & (enrichment_per_protein.n_aa_roi >= enrichment_per_protein.n_aa_ptm)]
1229 |     enrichment_per_protein.reset_index(drop=True, inplace=True)
1230 |     enrichment_per_protein['p_adj_bf'] = statsmodels.stats.multitest.multipletests(
1231 |         pvals=enrichment_per_protein.p, alpha=0.01, method='bonferroni')[1]
1232 |     enrichment_per_protein['p_adj_bh'] = statsmodels.stats.multitest.multipletests(
1233 |         pvals=enrichment_per_protein.p, alpha=0.01, method='fdr_bh')[1]
1234 |     return enrichment_per_protein
1235 | 
1236 | 
1237 | def find_idr_pattern(
1238 |     idr_list: list,
1239 |     min_structured_length: int = 100,
1240 |     max_unstructured_length: int = 30
1241 | ) -> tuple:
1242 |     """
1243 |     Find short intrinsically disordered regions.
1244 | 
1245 |     Parameters
1246 |     ----------
1247 |     idr_list : list
1248 |         Nested list specifying the binary IDR condition and its length.
1249 |         For example: [[1,10],[0,30],[1,5]].
1250 |     min_structured_length : int
1251 |         Integer specifying the minimum number of amino acids in flanking
1252 |         structured regions.
1253 |     max_unstructured_length : int
1254 |         Integer specifying the maximum number of amino acids in the short
1255 |         intrinsically unstructured regions.
1256 | 
1257 |     Returns
1258 |     -------
1259 |     : tuple
1260 |         (bool, list) If a pattern was found and the list of start end end
1261 |         positions of short IDRs.
1262 |     """
1263 |     window = np.array([0, 1, 2])
1264 |     i = 0
1265 |     pattern = False
1266 |     pos_list = list()
1267 |     while i < (len(idr_list) - 2):
1268 |         window_i = window + i
1269 |         if idr_list[window_i[0]][0] == 0:
1270 |             if idr_list[window_i[0]][1] >= min_structured_length:
1271 |                 if idr_list[window_i[1]][1] <= max_unstructured_length:
1272 |                     if idr_list[window_i[2]][1] >= min_structured_length:
1273 |                         pattern = True
1274 |                         idr_start = np.sum([x[1] for x in idr_list[0: i + 1]]) + 1
1275 |                         idr_end = idr_start + idr_list[i + 1][1] - 1
1276 |                         pos_list.append([idr_start, idr_end])
1277 |         i += 1
1278 |     return pattern, pos_list
1279 | 
1280 | 
1281 | def annotate_proteins_with_idr_pattern(
1282 |     df: pd.DataFrame,
1283 |     min_structured_length: int = 100,
1284 |     max_unstructured_length: int = 30
1285 | ) -> pd.DataFrame:
1286 |     """
1287 |     Find short intrinsically disordered regions.
1288 | 
1289 |     Parameters
1290 |     ----------
1291 |     df : pd.DataFrame
1292 |         Dataframe with AlphaFold annotations.
1293 |     min_structured_length : int
1294 |         Integer specifying the minimum number of amino acids in flanking
1295 |         structured regions.
1296 |     max_unstructured_length : int
1297 |         Integer specifying the maximum number of amino acids in the short
1298 |         intrinsically unstructured regions.
1299 | 
1300 |     Returns
1301 |     -------
1302 |     : pd.DataFrame
1303 |         Input dataframe with an additional column 'flexible_pattern'.
1304 |     """
1305 | 
1306 |     res_out = list()
1307 |     proteins = list()
1308 |     loop_pattern = list()
1309 |     pattern_position = list()
1310 |     for df_prot in partition_df_by_prots(df):
1311 |         df_prot['flexible_pattern'] = 0
1312 |         protein_accession = df_prot.protein_id.values[0]
1313 |         idr_list = [[k, len(list(g))] for k, g in groupby(df_prot.IDR.values)]
1314 |         pattern, pos_list = find_idr_pattern(
1315 |             idr_list,
1316 |             min_structured_length=min_structured_length,
1317 |             max_unstructured_length=max_unstructured_length)
1318 |         pattern_position_list = list()
1319 |         if pattern:
1320 |             proteins.append(protein_accession)
1321 |             loop_pattern.append(pattern)
1322 |             pattern_position.append(pos_list)
1323 | 
1324 |             pattern_position_list = pattern_position_list + [list(np.arange(p[0], p[1] + 1)) for p in pos_list]
1325 |             pattern_position_list = [item for sublist in pattern_position_list for item in sublist]
1326 | 
1327 |             selected_locations = np.flatnonzero(df_prot.position.isin(
1328 |                 pattern_position_list))
1329 |             df_prot.loc[selected_locations, 'flexible_pattern'] = 1
1330 |         res_out.append(df_prot)
1331 |     res_out = pd.concat(res_out)
1332 |     return res_out
1333 | 
1334 | 
1335 | @numba.njit
1336 | def extend_flexible_pattern(
1337 |     pattern: np.ndarray,
1338 |     window: int
1339 | ) -> np.ndarray:
1340 |     """
1341 |     This function takes an array of binary values (0 or 1) and extends streches
1342 |     of 1s to either side by the provided window.
1343 | 
1344 |     Parameters
1345 |     ----------
1346 |     pattern : np.ndarray
1347 |         Array of binary pattern values.
1348 |     window : int
1349 |         Integer specifying the number of positions to consider both before
1350 |         and after the provided pattern.
1351 | 
1352 |     Returns
1353 |     -------
1354 |     : np.ndarray
1355 |         Array with of binary values, where streches of 1s in the input array
1356 |         were extended to both sides.
1357 |     """
1358 |     extended_pattern = []
1359 |     for i in range(len(pattern)):
1360 |         low_window_bound = i - window
1361 |         if low_window_bound < 0:
1362 |             low_window_bound = 0
1363 |         high_window_bound = i + window
1364 |         if high_window_bound > len(pattern):
1365 |             high_window_bound = len(pattern)
1366 |         window_patterns = pattern[low_window_bound: high_window_bound + 1]
1367 |         window_max = np.max(window_patterns)
1368 |         extended_pattern.append(window_max)
1369 |     return np.array(extended_pattern)
1370 | 
1371 | 
1372 | def get_extended_flexible_pattern(
1373 |     df: pd.DataFrame,
1374 |     patterns: np.ndarray,
1375 |     windows: list,
1376 | ) -> pd.DataFrame:
1377 |     """
1378 |     Select columns in a dataframe for which to extend the pattern by the
1379 |     provided window.
1380 | 
1381 |     Parameters
1382 |     ----------
1383 |     df : pd.DataFrame
1384 |         Dataframe with AlphaFold annotations.
1385 |     patterns : np.ndarray
1386 |         Array of column names in the dataframe with binary values that should
1387 |         be extended.
1388 |     windows : list
1389 |         List of one or more integers specifying the number of positions
1390 |         to consider both before and after a pattern.
1391 | 
1392 |     Returns
1393 |     -------
1394 |     : pd.DataFrame
1395 |         Input dataframe with additional columns containing the extended
1396 |         patterns.
1397 |     """
1398 |     df_out = []
1399 |     for df_prot in partition_df_by_prots(df):
1400 |         for pattern in patterns:
1401 |             for w in windows:
1402 |                 df_prot[f'{pattern}_extended_{w}'] = extend_flexible_pattern(
1403 |                     pattern=df_prot[pattern].values,
1404 |                     window=w)
1405 |         df_out.append(df_prot)
1406 |     df_out = pd.concat(df_out)
1407 |     return df_out
1408 | 
1409 | 
1410 | #  This function could be numba compatible
1411 | def calculate_distances_between_ptms(
1412 |     background_idx_list: list,
1413 |     target_aa_idx: np.ndarray,
1414 |     coords: np.ndarray,
1415 |     positions: np.ndarray,
1416 |     error_dist: np.ndarray
1417 | ) -> [list, list]:
1418 |     """
1419 |     Calculate the distances from a target amino acid to a list of background
1420 |     amino acids.
1421 | 
1422 |     Parameters
1423 |     ----------
1424 |     background_idx_list : list
1425 |         List of amino acid indices that make up the background.
1426 |     target_aa_idx : np.ndarray
1427 |         Array of target amino acid indices.
1428 |     coords : np.ndarray
1429 |         Array of 3D coordinates of alpha carbon atoms across different
1430 |         amino acids.
1431 |     positions : np.ndarray
1432 |         Array of amino acid positions from which to choose the specific indeces.
1433 |     error_dist: : np.ndarray
1434 |         Matrix of paired aligned errors of AlphaFold across all amino acids
1435 |         in a protein qequence.
1436 | 
1437 |     Returns
1438 |     -------
1439 |     : [list, list]
1440 |         List of 3D distance results and list of 1D distance results
1441 |     """
1442 |     distance_res = list()
1443 |     distance_1D_res = list()
1444 |     for idx_list in background_idx_list:
1445 |         aa_dist_list = list()
1446 |         aa_1D_dist_list = list()
1447 |         for i in idx_list:
1448 |             aa_dist = list()
1449 |             aa_1D_dist = list()
1450 |             for aa_i in target_aa_idx:
1451 |                 aa_dist_i = get_3d_dist(
1452 |                     coordinate_array_1=coords,
1453 |                     coordinate_array_2=coords,
1454 |                     idx_1=i,
1455 |                     idx_2=aa_i)
1456 |                 aa_error_i = get_paired_error(
1457 |                     position=positions,
1458 |                     error_dist=error_dist,
1459 |                     idx_1=i,
1460 |                     idx_2=aa_i)
1461 |                 aa_dist.append(aa_dist_i+aa_error_i)
1462 |                 aa_1D_dist.append(abs(positions[i]-positions[aa_i]))
1463 |             aa_dist_list.append(aa_dist)
1464 |             aa_1D_dist_list.append(aa_1D_dist)
1465 |         distance_res.append(aa_dist_list)
1466 |         distance_1D_res.append(aa_1D_dist_list)
1467 |     return distance_res, distance_1D_res
1468 | 
1469 | 
1470 | def get_ptm_distance_list(
1471 |     df: pd.DataFrame,
1472 |     ptm_target: str,
1473 |     ptm_background: str,
1474 |     ptm_dict: dict,
1475 |     error_dir: str,
1476 |     filename_format: str = "pae_{}.hdf",
1477 |     n_random: int = 10000,
1478 |     random_seed: int = 44,
1479 | ) -> [list, list, list]:
1480 |     """
1481 |     Extract a lists of 3D and 1D distances between target amino acids and a
1482 |     random background.
1483 | 
1484 |     Parameters
1485 |     ----------
1486 |     df : pd.DataFrame
1487 |         Dataframe with AlphaFold annotations.
1488 |     ptm_target : str
1489 |         String specifying the PTM type for which you want to evaluate if it
1490 |         is in colocalizing with the background.
1491 |     ptm_background : str
1492 |         String specifying the PTM type that is used as background.
1493 |     ptm_dict : dict
1494 |         Dictionary containing the possible amino acid sites for each PTM.
1495 |     error_dir : str
1496 |         Path to the directory where the hdf files containing the matrices of
1497 |         paired aligned errors of AlphaFold are stored.
1498 |     filename_format : str
1499 |         The file name of the pae files saved by download_alphafold_pae.
1500 |         The brackets {} are replaced by a protein name from the proteins list.
1501 |         Default is 'pae_{}.hdf'.
1502 |     n_random : int
1503 |         Number of random permutations to perform. Default is 10'000.
1504 |         The higher the number of permutations, the more confidence the analysis
1505 |         can achieve. However, a very high number of permutations increases
1506 |         processing time. No fewer than 1'000 permutations should be used.
1507 |     random_seed : int
1508 |         Random seed for the analysis. Default is 44.
1509 | 
1510 |     Returns
1511 |     -------
1512 |     : [list, list, list]
1513 |         List of 3D distances, list of 1D distances and
1514 |         list of modified indices.
1515 |     """
1516 |     random.seed(random_seed)
1517 |     prot_distances = list()
1518 |     prot_distances_1D = list()
1519 |     prot_mod_idx = list()
1520 |     for df_prot in partition_df_by_prots(df):
1521 |         protein_accession = df_prot.protein_id.values[0]
1522 |         if error_dir is not None:
1523 |             with h5py.File(
1524 |                 os.path.join(
1525 |                     error_dir,
1526 |                     filename_format.format(protein_accession))
1527 |                     ) as hdf_root:
1528 |                 error_dist = hdf_root['dist'][...]
1529 |             size = int(np.sqrt(len(error_dist)))
1530 |             error_dist = error_dist.reshape(size, size)
1531 |         else:
1532 |             error_dist = np.zeros((df_prot.shape[0], df_prot.shape[0]))
1533 |         # amino acid residues of background PTM
1534 |         background_aa = ptm_dict[ptm_background]
1535 |         # indices of background_aa
1536 |         background_idx = list(np.flatnonzero(df_prot.AA.isin(background_aa)))
1537 |         # number of observed background modifications
1538 |         n_aa_background_mod = np.sum(df_prot[ptm_background] == 1)
1539 |         if n_aa_background_mod >= 1:
1540 |             # indices of observed background PTMs
1541 |             real_background_idx = df_prot.index[df_prot[ptm_background] == 1].tolist()
1542 |             # list of random index lists for background PTMs
1543 |             # @TODO: probably slowish due to making lists of 10000 elements,
1544 |             # perhaps this can be avoided
1545 |             background_idx_list = [random.sample(
1546 |                 background_idx,
1547 |                 len(real_background_idx)) for i in np.arange(0, n_random)]
1548 |             # Combine real and random backround list with the real indices at
1549 |             # position 0
1550 |             background_idx_list.insert(0,real_background_idx)
1551 |             # amino acid residues of target PTM
1552 |             target_aa = ptm_dict[ptm_target]
1553 |             # indices of target_aa
1554 |             target_aa_idx = list(np.flatnonzero(df_prot.AA.isin(target_aa)))
1555 |             # indices of observed target PTMs
1556 |             target_mod_idx = df_prot.index[df_prot[ptm_target] == 1].tolist()
1557 |             # index of observed PTMs within index list of all target_aa
1558 |             target_aa_idx_mod_idx = [i for i in np.arange(len(target_aa_idx)) if target_aa_idx[i] in target_mod_idx]
1559 |             distance_res, distance_1D_res = calculate_distances_between_ptms(
1560 |                 background_idx_list=np.array(background_idx_list),
1561 |                 target_aa_idx=np.array(target_aa_idx),
1562 |                 coords=np.vstack([
1563 |                     df_prot.x_coord_ca.values,
1564 |                     df_prot.y_coord_ca.values,
1565 |                     df_prot.z_coord_ca.values]).T,
1566 |                 positions=df_prot.position.values,
1567 |                 error_dist=error_dist)
1568 |             prot_distances.append(distance_res)
1569 |             prot_distances_1D.append(distance_1D_res)
1570 |             prot_mod_idx.append(target_aa_idx_mod_idx)
1571 |     return prot_distances, prot_distances_1D, prot_mod_idx
1572 | 
1573 | 
1574 | #  This function could be numba compatible
1575 | def get_mod_ptm_fraction(
1576 |     distances: list,
1577 |     mod_idx: list,
1578 |     min_dist: int,
1579 |     max_dist: int
1580 | ) -> float:
1581 |     """
1582 |     Calculate the fraction of modified PTM acceptor residues within
1583 |     a distance range.
1584 | 
1585 |     Parameters
1586 |     ----------
1587 |     distances: list
1588 |         List of 1D or 3D distances.
1589 |     mod_idx: lists
1590 |         List of indices to select which distances to consider.
1591 |     min_dist: int
1592 |         Minimum distance of the bin.
1593 |     max_dist: int
1594 |         Maximum distance of the bin.
1595 | 
1596 |     Returns
1597 |     -------
1598 |     : float
1599 |         Fraction of modified PTM acceptor residues within
1600 |         the provided distance range.
1601 |     """
1602 |     n_aa = [0]*len(distances[0])
1603 |     n_aa_mod = [0]*len(distances[0])
1604 |     for idx, p in enumerate(distances):
1605 |         rand_count = 0
1606 |         for rand in p:
1607 |             for back in rand:
1608 |                 n_aa[rand_count] += len([i for i in back if ((i > min_dist) & (i <= max_dist))])
1609 |                 mod_back = [back[i] for i in mod_idx[idx]]
1610 |                 n_aa_mod[rand_count] += len([i for i in mod_back if ((i > min_dist) & (i <= max_dist))])
1611 |             rand_count += 1
1612 |     mod_fraction = [mod/aa if aa>0 else np.nan for aa,mod in zip(n_aa, n_aa_mod)]
1613 |     return mod_fraction
1614 | 
1615 | 
1616 | def evaluate_ptm_colocalization(
1617 |     df: pd.DataFrame,
1618 |     ptm_target: str,
1619 |     ptm_types: list,
1620 |     ptm_dict: dict,
1621 |     pae_dir: str,
1622 |     filename_format: str = "pae_{}.hdf",
1623 |     n_random: int = 5,
1624 |     random_seed: int = 44,
1625 |     min_dist: float = -0.01,
1626 |     max_dist: float = 35,
1627 |     dist_step: float = 5
1628 | ) -> pd.DataFrame:
1629 |     """
1630 |     Evaluate for a given target PTM type if modifications preferentially occur
1631 |     closer to the provided background PTM types than expected by chance or at
1632 |     distance bins that are further away.
1633 | 
1634 |     Parameters
1635 |     ----------
1636 |     df : pd.DataFrame
1637 |         Dataframe with AlphaFold annotations.
1638 |     ptm_target : str
1639 |         String specifying the PTM type for which you want to evaluate if it
1640 |         is in colocalizing with the background.
1641 |     ptm_types : list of strings
1642 |         List of strings specifying the PTM types that should be used as
1643 |         background. If "self" is included, this means that the ptm_target
1644 |         is used also as backround modification.
1645 |     ptm_dict : dict
1646 |         Dictionary containing the possible amino acid sites for each PTM.
1647 |     pae_dir : str
1648 |         Path to the directory where the hdf files containing the matrices of
1649 |         paired aligned errors of AlphaFold are stored.
1650 |     filename_format : str
1651 |         The file name of the pae files saved by download_alphafold_pae.
1652 |         The brackets {} are replaced by a protein name from the proteins list.
1653 |         Default is 'pae_{}.hdf'.
1654 |     n_random : int
1655 |         Number of random permutations to perform. Default is 10'000.
1656 |         The higher the number of permutations, the more confidence the analysis
1657 |         can achieve. However, a very high number of permutations increases
1658 |         processing time. No fewer than 1'000 permutations should be used.
1659 |     random_seed : int
1660 |         Random seed for the analysis. Default is 44.
1661 |     min_dist : float
1662 |         Minimum distance to consider.
1663 |         Default is 0, meaning that the target amino acid is included itself.
1664 |     max_dist : float
1665 |         Maximum distance to consider.
1666 |         Default is 35.
1667 |     dist_step : float
1668 |         Stepsize for distance bins between min_dist and max_dist.
1669 |         Default is 5.
1670 | 
1671 |     Returns
1672 |     -------
1673 |     : pd.DataFrame
1674 |         Dataframe with following columns: 'context', 'ptm_types', 'cutoff',
1675 |         'std_random_fraction', 'variable', 'value'
1676 |     """
1677 |     distance_cutoffs = np.arange(min_dist, max_dist, dist_step)
1678 |     # might want to change to np.linspace above
1679 |     cutoff_list = list()
1680 |     ptm_list = list()
1681 |     real_fraction_3D = list()
1682 |     mean_random_fraction_3D = list()
1683 |     std_random_fraction_3D = list()
1684 |     ttest_pval_3D = list()
1685 |     real_fraction_1D = list()
1686 |     mean_random_fraction_1D = list()
1687 |     std_random_fraction_1D = list()
1688 |     ttest_pval_1D = list()
1689 |     for ptm_type in ptm_types:
1690 |         if ptm_target == 'self':
1691 |             ptm_target = ptm_type
1692 |         distances_3D, distances_1D, mod_idx = get_ptm_distance_list(
1693 |             df=df,
1694 |             ptm_target=ptm_target,
1695 |             ptm_background=ptm_type,
1696 |             ptm_dict=ptm_dict,
1697 |             error_dir=pae_dir,
1698 |             filename_format=filename_format,
1699 |             n_random=n_random,
1700 |             random_seed=random_seed
1701 |         )
1702 |         dist_i = 0
1703 |         for dist_cut in distance_cutoffs:
1704 |             ptm_list.append(ptm_type)
1705 |             cutoff_list.append(dist_cut+dist_step)
1706 |             if dist_i == 0:
1707 |                 # make sure that the minimum is incuded
1708 |                 dist_step_mod = 0.001
1709 |             else:
1710 |                 dist_step_mod = 0
1711 |             mod_fraction_3D = get_mod_ptm_fraction(
1712 |                 distances_3D,
1713 |                 mod_idx,
1714 |                 min_dist=dist_cut-dist_step_mod,
1715 |                 max_dist=dist_cut+dist_step)
1716 |             real_fraction_3D.append(mod_fraction_3D[0])
1717 |             mean_random_fraction_3D.append(np.mean(mod_fraction_3D[1:]))
1718 |             std_random_fraction_3D.append(np.std(mod_fraction_3D[1:]))
1719 |             ttest_pval_3D.append(scipy.stats.ttest_1samp(mod_fraction_3D[1:], mod_fraction_3D[0]).pvalue)
1720 |             mod_fraction_1D = get_mod_ptm_fraction(
1721 |                 distances_1D,
1722 |                 mod_idx,
1723 |                 min_dist=dist_cut-dist_step_mod,
1724 |                 max_dist=dist_cut+dist_step)
1725 |             real_fraction_1D.append(mod_fraction_1D[0])
1726 |             mean_random_fraction_1D.append(np.mean(mod_fraction_1D[1:]))
1727 |             std_random_fraction_1D.append(np.std(mod_fraction_1D[1:]))
1728 |             ttest_pval_1D.append(scipy.stats.ttest_1samp(mod_fraction_1D[1:], mod_fraction_1D[0]).pvalue)
1729 |             dist_i += 1
1730 |     res_df_3D = pd.DataFrame({
1731 |         'context': np.repeat('3D', len(cutoff_list)),
1732 |         'cutoff': cutoff_list,
1733 |         'ptm_types': ptm_list,
1734 |         'Observed': real_fraction_3D,
1735 |         'Random sampling': mean_random_fraction_3D,
1736 |         'std_random_fraction': std_random_fraction_3D,
1737 |         'pvalue': ttest_pval_3D})
1738 |     res_df_1D = pd.DataFrame({
1739 |         'context': np.repeat('1D', len(cutoff_list)),
1740 |         'cutoff': cutoff_list,
1741 |         'ptm_types': ptm_list,
1742 |         'Observed': real_fraction_1D,
1743 |         'Random sampling': mean_random_fraction_1D,
1744 |         'std_random_fraction': std_random_fraction_1D,
1745 |         'pvalue': ttest_pval_1D})
1746 |     res_df_3D = res_df_3D.melt(
1747 |         id_vars=["context", "ptm_types", "cutoff", "std_random_fraction","pvalue"])
1748 |     res_df_1D = res_df_1D.melt(
1749 |         id_vars=["context", "ptm_types", "cutoff", "std_random_fraction","pvalue"])
1750 |     res_df = pd.concat([res_df_3D, res_df_1D])
1751 |     res_df['std_random_fraction'] = np.where(
1752 |         res_df.variable == 'Observed', 0, res_df.std_random_fraction)
1753 |     return res_df
1754 | 
1755 | 
1756 | def extract_motifs_in_proteome(
1757 |     alphafold_df: pd.DataFrame,
1758 |     motif_df: pd.DataFrame
1759 | ):
1760 |     """
1761 |     Function to find occurences of short linear motifs in the proteome.
1762 | 
1763 |     Parameters
1764 |     ----------
1765 |     alphafold_df : pd.DataFrame
1766 |         Dataframe with AlphaFold annotations.
1767 |     motif_df : pd.DataFrame
1768 |         Dataframe with following columns: 'enzyme', 'motif', 'mod_pos'.
1769 | 
1770 |     Returns
1771 |     -------
1772 |     : pd.DataFrame
1773 |         Dataframe containing information about short linear motifs in the
1774 |         proteome. Following columns are privided: 'protein_id', 'enzyme',
1775 |         'motif','position','AA','motif_start','motif_end','sequence_window'
1776 |     """
1777 |     proteins = list()
1778 |     enzyme_list = list()
1779 |     motif_list = list()
1780 |     site_list = list()
1781 |     start_list = list()
1782 |     end_list = list()
1783 |     AA_list = list()
1784 |     sequence_window_list = list()
1785 |     for df_prot in partition_df_by_prots(alphafold_df):
1786 |         df_prot['flexible_pattern'] = 0
1787 |         protein_accession = df_prot.protein_id.values[0]
1788 |         sequence = ''.join(df_prot.AA)
1789 |         for i in np.arange(0, motif_df.shape[0]):
1790 |             for j in re.finditer(motif_df.motif.values[i], sequence):
1791 |                 proteins.append(protein_accession)
1792 |                 enzyme_list.append(motif_df.enzyme.values[i])
1793 |                 motif_list.append(motif_df.motif.values[i])
1794 |                 site_list.append(j.start() + motif_df.mod_pos.values[i] + 1)
1795 |                 start_list.append(j.start() + 1)
1796 |                 end_list.append(j.end())
1797 |                 AA_list.append(sequence[j.start() + motif_df.mod_pos.values[i]])
1798 |                 sequence_window_list.append(sequence[(j.start() + motif_df.mod_pos.values[i] - 10): (j.start() + motif_df.mod_pos.values[i] + 10)])
1799 |     motif_res = pd.DataFrame({
1800 |         'protein_id': proteins,
1801 |         'enzyme': enzyme_list,
1802 |         'motif': motif_list,
1803 |         'position': site_list,
1804 |         'AA': AA_list,
1805 |         'motif_start': start_list,
1806 |         'motif_end': end_list,
1807 |         'sequence_window': sequence_window_list})
1808 |     return motif_res
1809 | 
1810 | 
1811 | def import_ptms_for_structuremap(
1812 |     file: str,
1813 |     organism: str
1814 | ) -> pd.DataFrame:
1815 |     """
1816 |     Function to import PTM datasets.
1817 | 
1818 |     Parameters
1819 |     ----------
1820 |     file : str
1821 |         Path to the PTM dataset to load.
1822 |         This can be processed by AlphaPept, Spectronaut, MaxQuant, DIA-NN or
1823 |         FragPipe.
1824 |     organism : str
1825 |         Organism for which a fasta file should be imported.
1826 | 
1827 |     Returns
1828 |     -------
1829 |     : pd.DataFrame
1830 |         Dataframe with PTM information. It contains following columns:
1831 |         protein_id: a unique UniProt identifier;
1832 |         AA: the one letter amino acid abbreviation of the PTM acceptor;
1833 |         position: the sequence position of the PTM acceptor
1834 |         (the first amino acid has position 1);
1835 |         <PTM types>: N columns for N different PTM types where 1 indicates that
1836 |         the PTM is present at the given amino acid postition
1837 |         and 0 indicates no modification
1838 |     """
1839 |     try:
1840 |         from alphamap.organisms_data import import_fasta
1841 |         from alphamap.importing import import_data
1842 |         from alphamap.preprocessing import format_input_data
1843 |     except ModuleNotFoundError:
1844 |         raise ModuleNotFoundError(f"Please install alphamap. Subsequently install pandas==1.4.0.")
1845 |     fasta_in = import_fasta(organism)
1846 |     df = import_data(file)
1847 |     df = format_input_data(df=df,
1848 |                            fasta=fasta_in,
1849 |                            modification_exp=r'\[.*?\]')
1850 |     ptm_df = df.explode(['PTMsites', 'PTMtypes'])
1851 |     ptm_df = ptm_df.dropna(subset=['PTMsites', 'PTMtypes'])
1852 |     ptm_df = ptm_df.astype({'PTMsites': 'int32'})
1853 |     ptm_df["AA"] = ptm_df.apply(
1854 |         lambda x: x["naked_sequence"][x["PTMsites"]],
1855 |         axis=1)
1856 |     ptm_df["position"] = ptm_df.apply(
1857 |         lambda x: x["start"]+x["PTMsites"]+1,
1858 |         axis=1)
1859 |     ptm_df = ptm_df[["unique_protein_id", "AA", "position", "PTMtypes"]]
1860 |     ptm_df = pd.get_dummies(
1861 |         ptm_df, prefix="", prefix_sep='', columns=["PTMtypes"])
1862 |     ptm_df = ptm_df.rename(columns={"unique_protein_id": "protein_id"})
1863 |     ptm_df = ptm_df.groupby(['protein_id', 'AA', 'position'])
1864 |     ptm_df = ptm_df.max()
1865 |     ptm_df = ptm_df.reset_index()
1866 |     ptm_df = ptm_df.drop_duplicates()
1867 |     ptm_df = ptm_df.reset_index(drop=True)
1868 |     return ptm_df
1869 | 
1870 | 
1871 | def format_for_3Dviz(
1872 |     df: pd.DataFrame,
1873 |     ptm_dataset: str
1874 | ) -> pd.DataFrame:
1875 |     df_mod = df[["protein_id", "AA", "position", ptm_dataset]]
1876 |     df_mod = df_mod.rename(columns={"protein_id": "unique_protein_id",
1877 |                                     "AA": "modified_sequence",
1878 |                                     "position": "start"})
1879 |     df_mod["modified_sequence"] = [mod+"_"+str(i) for i,mod in enumerate(df_mod["modified_sequence"])]
1880 |     df_mod["all_protein_ids"] = df_mod["unique_protein_id"]
1881 |     df_mod["PTMsites"] = 0
1882 |     df_mod["start"] = df_mod["start"]-1
1883 |     df_mod["end"] = df_mod["start"]
1884 |     df_mod["PTMsites"] = [[i] for i in df_mod["PTMsites"]]
1885 |     df_mod = df_mod[df_mod[ptm_dataset] == 1]
1886 |     df_mod["marker_symbol"] = 1
1887 |     df_mod["PTMtypes"] = [[ptm_dataset] for i in df_mod["PTMsites"]]
1888 |     df_mod = df_mod.dropna(subset=['PTMtypes']).reset_index(drop=True)
1889 |     return df_mod
1890 | 


--------------------------------------------------------------------------------
/structuremap/utils.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | """This module provides generic utilities.
 3 | These utilities primarily focus on:
 4 |     - logging
 5 | """
 6 | 
 7 | # builtin
 8 | import logging
 9 | import os
10 | import sys
11 | 
12 | PROGRESS_CALLBACK = True
13 | 
14 | def set_logger(
15 |     *,
16 |     stream: bool = True,
17 |     log_level: int = logging.INFO,
18 | ):
19 |     """Set the log stream and file.
20 |     All previously set handlers will be disabled with this command.
21 |     Parameters
22 |     ----------
23 |     stream : bool
24 |         If False, no log data is sent to stream.
25 |         If True, all logging can be tracked with stdout stream.
26 |         Default is True.
27 |     log_level : int
28 |         The logging level. Usable values are defined in Python's "logging"
29 |         module.
30 |         Default is logging.INFO.
31 |     """
32 |     import time
33 |     global PROGRESS_CALLBACK
34 |     root = logging.getLogger()
35 |     formatter = logging.Formatter(
36 |         '%(asctime)s> %(message)s', "%Y-%m-%d %H:%M:%S"
37 |     )
38 |     root.setLevel(log_level)
39 |     while root.hasHandlers():
40 |         root.removeHandler(root.handlers[0])
41 |     if stream:
42 |         stream_handler = logging.StreamHandler(sys.stdout)
43 |         stream_handler.setLevel(log_level)
44 |         stream_handler.setFormatter(formatter)
45 |         root.addHandler(stream_handler)
46 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MannLabs/structuremap/f14b4325e30f16394ea819af2e29f9c68f786ee4/tests/__init__.py


--------------------------------------------------------------------------------
/tests/run_tests.sh:
--------------------------------------------------------------------------------
1 | conda activate structuremap
2 | python -m unittest test_cli
3 | python -m unittest test_gui
4 | python -m unittest test_processing
5 | jupyter nbconvert --execute --inplace --to notebook --NotebookClient.kernel_name="python" ../nbs/tutorial.ipynb
6 | conda deactivate
7 | 


--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | #!python -m unittest tests.test_utils
 2 | """This module provides unit tests for structuremap.cli."""
 3 | 
 4 | # builtin
 5 | import unittest
 6 | 
 7 | # local
 8 | import structuremap.cli
 9 | 
10 | if __name__ == "__main__":
11 |     unittest.main()
12 | 


--------------------------------------------------------------------------------
/tests/test_gui.py:
--------------------------------------------------------------------------------
 1 | #!python -m unittest tests.test_utils
 2 | """This module provides unit tests for structuremap.gui."""
 3 | 
 4 | # builtin
 5 | import unittest
 6 | 
 7 | # local
 8 | import structuremap.gui
 9 | 
10 | if __name__ == "__main__":
11 |     unittest.main()
12 | 


--------------------------------------------------------------------------------
/tests/test_processing.py:
--------------------------------------------------------------------------------
  1 | #!python -m unittest tests.test_processing
  2 | import numba
  3 | import numpy as np
  4 | import pandas as pd
  5 | import tqdm
  6 | import h5py
  7 | import random
  8 | import statsmodels.stats.multitest
  9 | import urllib.request, json
 10 | import os
 11 | import socket
 12 | import re
 13 | import Bio.PDB.MMCIF2Dict
 14 | from itertools import groupby
 15 | import unittest
 16 | from scipy.spatial.transform import Rotation as R
 17 | from Bio import PDB
 18 | 
 19 | from structuremap.processing import download_alphafold_cif, \
 20 |     download_alphafold_pae, \
 21 |     format_alphafold_data, \
 22 |     get_3d_dist, \
 23 |     rotate_vector_around_axis, \
 24 |     get_angle, \
 25 |     get_paired_error, \
 26 |     get_neighbors, \
 27 |     annotate_accessibility, \
 28 |     smooth_score, \
 29 |     get_smooth_score, \
 30 |     get_avg_3d_dist, \
 31 |     get_avg_1d_dist, \
 32 |     find_idr_pattern, \
 33 |     annotate_proteins_with_idr_pattern, \
 34 |     extend_flexible_pattern, \
 35 |     get_extended_flexible_pattern, \
 36 |     get_mod_ptm_fraction
 37 | 
 38 | 
 39 | THIS_FOLDER = os.path.dirname(__file__)
 40 | TEST_FOLDER = os.path.join(
 41 |     f"{os.path.dirname(THIS_FOLDER)}",
 42 |     "data",
 43 |     "test_files",
 44 | )
 45 | 
 46 | class TestProcessing(unittest.TestCase):
 47 |     def test_download_alphafold_cif(self, ):
 48 |         valid, invalid, existing = download_alphafold_cif(
 49 |             proteins=['O15552','Q5VSL9','Q7Z6M3','O15552yy'],
 50 |             out_folder=TEST_FOLDER)
 51 | 
 52 |         np.testing.assert_equal(valid, np.array(['Q5VSL9']))
 53 |         np.testing.assert_equal(invalid, np.array(['O15552yy']))
 54 |         np.testing.assert_equal(existing, np.array(['O15552','Q7Z6M3']))
 55 | 
 56 |         os.remove(
 57 |             os.path.join(
 58 |                 TEST_FOLDER,
 59 |                 'Q5VSL9.cif'
 60 |             )
 61 |         )
 62 | 
 63 |     def test_download_alphafold_pae(self, ):
 64 |         valid, invalid, existing = download_alphafold_pae(
 65 |             proteins=['O15552','Q5VSL9','Q7Z6M3','O15552yy'],
 66 |             out_folder=TEST_FOLDER)
 67 | 
 68 |         np.testing.assert_equal(valid, np.array(['Q5VSL9']))
 69 |         np.testing.assert_equal(invalid, np.array(['O15552yy']))
 70 |         np.testing.assert_equal(existing, np.array(['O15552','Q7Z6M3']))
 71 | 
 72 |         os.remove(
 73 |             os.path.join(
 74 |                 TEST_FOLDER,
 75 |                 'pae_Q5VSL9.hdf'
 76 |             )
 77 |         )
 78 | 
 79 |     def test_format_alphafold_data(self, ):
 80 |         alphafold_formatted = format_alphafold_data(
 81 |             directory=TEST_FOLDER, protein_ids=["Q7Z6M3","O15552"])
 82 | 
 83 |         alphafold_formatted_ini = pd.read_csv(
 84 |             os.path.join(
 85 |                 TEST_FOLDER,
 86 |                 'test_alphafold_annotation.csv'
 87 |             )
 88 |         )
 89 |         pd.testing.assert_frame_equal(alphafold_formatted, alphafold_formatted_ini, check_dtype=False)
 90 | 
 91 |     def test_get_3d_dist(self, ):
 92 |         x = np.array([1.1,1.1,1.1,1.1,5.1])
 93 |         y = np.array([1.1,2.1,3.1,1.1,10.1])
 94 |         z = np.array([1.1,3.1,5.1,1.1,4.1])
 95 |         coordinate_array = np.vstack([x,y,z]).T
 96 |         np.testing.assert_equal(2.236068, np.round(get_3d_dist(coordinate_array, coordinate_array, 0, 1), decimals=6))
 97 |         np.testing.assert_equal(4.472136, np.round(get_3d_dist(coordinate_array, coordinate_array, 0, 2), decimals=6))
 98 |         np.testing.assert_equal(4.472136, np.round(get_3d_dist(coordinate_array, coordinate_array, 2, 0), decimals=6))
 99 | 
100 |     def rotate_vector_around_axis_scipy(self, vector, axis, theta):
101 |         theta = np.radians(theta)
102 |         axis_norm = axis / np.linalg.norm(axis)
103 |         r = R.from_rotvec(theta * axis_norm)
104 |         return(r.apply(vector))
105 | 
106 |     def test_rotate_vector_around_axis(self, ):
107 |         v = np.array([3.0, 5.0, 0.0])
108 |         a = np.array([4.0, 4.0, 1.0])
109 |         t = 90
110 | 
111 |         res_real = rotate_vector_around_axis(v, a, t)
112 |         res_scipy = self.rotate_vector_around_axis_scipy(v, a, t)
113 | 
114 |         np.testing.assert_almost_equal(res_real, res_scipy, decimal=10)
115 | 
116 |     def test_get_angle(self, ):
117 |         x_a = np.array([1.1,1.1,1.1])
118 |         y_a = np.array([1.1,2.1,-3.1])
119 |         z_a = np.array([1.1,3.1,5.1])
120 |         x_b = np.array([1.5,np.nan,1.5])
121 |         y_b = np.array([1.5,2.5,3.5])
122 |         z_b = np.array([1.5,3.5,5.5])
123 |         x_c = np.array([1.5,1.5,10.6])
124 |         y_c = np.array([1.5,2.5,11.6])
125 |         z_c = np.array([1.5,3.5,5.6])
126 |         x_n = np.array([4.5,1.8,1.5])
127 |         y_n = np.array([40.5,7.8,3.5])
128 |         z_n = np.array([3.5,3.8,5.5])
129 | 
130 |         coordinate_array_a = np.vstack([x_a,y_a,z_a]).T
131 |         coordinate_array_b = np.vstack([x_b,y_b,z_b]).T
132 |         coordinate_array_c = np.vstack([x_c,y_c,z_c]).T
133 |         coordinate_array_n = np.vstack([x_n,y_n,z_n]).T
134 | 
135 |         np.testing.assert_equal(39.231520,
136 |                                 np.round(get_angle(coordinate_array_a, coordinate_array_b,
137 |                                                    coordinate_array_c, coordinate_array_n,
138 |                                                    0, 1), decimals=6))
139 |         np.testing.assert_equal(91.140756,
140 |                                 np.round(get_angle(coordinate_array_a, coordinate_array_b,
141 |                                                    coordinate_array_c, coordinate_array_n,
142 |                                                    0, 2), decimals=6))
143 |         np.testing.assert_equal(47.168228,
144 |                                 np.round(get_angle(coordinate_array_a, coordinate_array_b,
145 |                                                    coordinate_array_c, coordinate_array_n,
146 |                                                    2, 0), decimals=6))
147 | 
148 |         # test gly
149 |         np.testing.assert_equal(93.985035,
150 |                                 np.round(get_angle(coordinate_array_a, coordinate_array_b,
151 |                                                    coordinate_array_c, coordinate_array_n,
152 |                                                    1, 2), decimals=6))
153 | 
154 |     def test_get_paired_error(self, ):
155 |         pos = np.array([1,2,3])
156 |         error = np.array([[0,2,10],[1,0,5],[10,4,0]])
157 |         np.testing.assert_equal(2, get_paired_error(pos, error, 0,1))
158 |         np.testing.assert_equal(0, get_paired_error(pos, error, 2,2))
159 | 
160 |         pos = np.array([1,3])
161 |         np.testing.assert_equal(10, get_paired_error(pos, error, 0,1))
162 | 
163 |     def test_get_neighbors(self, ):
164 |         idxl = np.array([0,1,2])
165 |         x_a = np.array([1.1,1.1,1.1])
166 |         y_a = np.array([1.1,2.1,-3.1])
167 |         z_a = np.array([1.1,3.1,5.1])
168 |         x_b = np.array([1.5,np.nan,1.5])
169 |         y_b = np.array([1.5,2.5,3.5])
170 |         z_b = np.array([1.5,3.5,5.5])
171 |         x_c = np.array([1.5,1.5,10.6])
172 |         y_c = np.array([1.5,2.5,11.6])
173 |         z_c = np.array([1.5,3.5,5.6])
174 |         x_n = np.array([4.5,1.8,1.5])
175 |         y_n = np.array([40.5,7.8,3.5])
176 |         z_n = np.array([3.5,3.8,5.5])
177 | 
178 |         coordinate_array_a = np.vstack([x_a,y_a,z_a]).T
179 |         coordinate_array_b = np.vstack([x_b,y_b,z_b]).T
180 |         coordinate_array_c = np.vstack([x_c,y_c,z_c]).T
181 |         coordinate_array_n = np.vstack([x_n,y_n,z_n]).T
182 | 
183 |         pos=np.array([1,2,3])
184 |         error = np.array([[0,2,10],[1,0,5],[10,4,0]])
185 | 
186 |         np.testing.assert_equal(np.array([1, 0, 0]),
187 |                                 get_neighbors(idxl, coordinate_array_a, coordinate_array_b,
188 |                                               coordinate_array_c, coordinate_array_n,
189 |                                               pos, error, 5, 40))
190 |         np.testing.assert_equal(np.array([1, 1, 0]),
191 |                                 get_neighbors(idxl, coordinate_array_a, coordinate_array_b,
192 |                                               coordinate_array_c, coordinate_array_n,
193 |                                               pos, error, 5, 150))
194 |         np.testing.assert_equal(np.array([2, 2, 2]),
195 |                                 get_neighbors(idxl, coordinate_array_a, coordinate_array_b,
196 |                                               coordinate_array_c, coordinate_array_n,
197 |                                               pos, error, 50, 140))
198 | 
199 |     def test_annotate_accessibility(self, ):
200 |         radius = 12.0
201 | 
202 |         alphafold_annotation = pd.read_csv(
203 |             os.path.join(
204 |                 TEST_FOLDER,
205 |                 'test_alphafold_annotation.csv'
206 |             )
207 |         )
208 | 
209 |         res_accessability = annotate_accessibility(
210 |             df=alphafold_annotation[alphafold_annotation.protein_id=="Q7Z6M3"],
211 |             max_dist=12,
212 |             max_angle=90,
213 |             error_dir=None)
214 | 
215 |         # comparison to https://biopython.org/docs/dev/api/Bio.PDB.HSExposure.html#Bio.PDB.HSExposure.HSExposureCB
216 |         with open(
217 |             os.path.join(
218 |                 TEST_FOLDER,
219 |                 'Q7Z6M3.pdb'
220 |             )
221 |         ) as pdbfile:
222 |             p=PDB.PDBParser()
223 |             s=p.get_structure('X', pdbfile)
224 |             m=s[0]
225 |             hse=PDB.HSExposureCB(m, radius)
226 |             residue_list=PDB.Selection.unfold_entities(m,'R')
227 |             res_hse = []
228 |             for r in residue_list:
229 |                 res_hse.append(r.xtra['EXP_HSE_B_U'])
230 | 
231 |         np.testing.assert_equal(np.array(res_hse), res_accessability.nAA_12_90_nopae.values)
232 | 
233 |         # @ToDo: test with actual error_dir
234 | 
235 |     def test_smooth_score(self, ):
236 |         np.testing.assert_equal(np.array([1.5, 2. , 3. , 4. , 4.5]),smooth_score(score=np.array([1,2,3,4,5]), half_window=1))
237 | 
238 |     def test_get_smooth_score(self, ):
239 |         testdata = pd.DataFrame({'protein_id':[1,1,1,1,1,1,2,2,2,2,2,2],
240 |                                  'protein_number':[1,1,1,1,1,1,2,2,2,2,2,2],
241 |                                  'position':[1,2,3,4,5,6,1,2,3,4,5,6],
242 |                                  'score':[1,2,3,4,5,6,7,8,9,10,11,12],
243 |                                  'score_2':[10,20,30,40,50,60,70,80,90,100,110,120]})
244 |         test_res = get_smooth_score(testdata, np.array(['score','score_2']), [1])
245 |         np.testing.assert_equal([1.5,2,3,4,5,5.5,7.5,8,9,10,11,11.5], test_res.score_smooth1.values)
246 |         np.testing.assert_equal([15,20,30,40,50,55,75,80,90,100,110,115], test_res.score_2_smooth1.values)
247 | 
248 |     def test_get_avg_3d_dist(self, ):
249 |         x = np.array([1.1,1.1,1.1,1.1,1.1,1.1])
250 |         y = np.array([1.1,2.1,3.1,1.1,10.1,20.1])
251 |         z = np.array([1.1,3.1,5.1,10.1,11.1,12.1])
252 |         pos = np.array([1,2,3,4,5,6])
253 |         error = np.array([[0,2,10,2,3,4],[1,0,5,3,2,9],[10,4,0,3,6,7],[10,4,5,0,6,7],[10,4,5,3,0,7],[10,4,0,3,6,0]])
254 | 
255 |         coordinate_array = np.vstack([x,y,z]).T
256 | 
257 |         np.testing.assert_equal(6.976812, np.round(get_avg_3d_dist(np.array([0,4]), coordinate_array, pos, error), decimals=6))
258 |         np.testing.assert_equal(3.5, np.round(get_avg_3d_dist(np.array([0,2]), coordinate_array, pos, error), decimals=6))
259 | 
260 |         np.testing.assert_equal(5.668168, np.round(get_avg_3d_dist(np.array([0,3,4]), coordinate_array, pos, error), decimals=6))
261 |         np.testing.assert_equal(4.666667, np.round(get_avg_3d_dist(np.array([0,3,4]), coordinate_array, pos, error, metric='min'), decimals=6))
262 | 
263 |         np.testing.assert_equal(14, np.round(get_avg_3d_dist(np.array([0,4]), coordinate_array, pos, error, error_operation='plus'), decimals=6))
264 |         error = 0.1*error
265 |         np.testing.assert_equal(13.876812, np.round(get_avg_3d_dist(np.array([0,4]), coordinate_array, pos, error, error_operation='plus'), decimals=6))
266 | 
267 |         x = np.array([1.1,1.1,1.1,1.1])
268 |         y = np.array([1.1,1.1,10.1,20.1])
269 |         z = np.array([1.1,10.1,11.1,12.1])
270 |         pos = np.array([1,4,5,6])
271 |         error = np.array([[0,2,10,2,3,4],[1,0,5,3,2,9],[10,4,0,3,6,7],[10,4,5,0,6,7],[10,4,5,3,0,7],[10,4,0,3,6,0]])
272 | 
273 |         coordinate_array = np.vstack([x,y,z]).T
274 | 
275 |         np.testing.assert_equal(6.976812, np.round(get_avg_3d_dist(np.array([0,2]), coordinate_array, pos, error), decimals=6))
276 | 
277 |     def test_get_avg_1d_dist(self, ):
278 |         pos = np.array([1,2,3,4,5,6])
279 |         np.testing.assert_equal(4, np.round(get_avg_1d_dist(np.array([0,4]), pos), decimals=6))
280 |         np.testing.assert_equal(2.666667, np.round(get_avg_1d_dist(np.array([0,3,4]), pos), decimals=6))
281 |         np.testing.assert_equal(1.666667, np.round(get_avg_1d_dist(np.array([0,3,4]), pos, metric='min'), decimals=6))
282 | 
283 |         pos = np.array([1,4,5,6])
284 |         np.testing.assert_equal(4, np.round(get_avg_1d_dist(np.array([0,2]), pos), decimals=6))
285 |         np.testing.assert_equal(2.666667, np.round(get_avg_1d_dist(np.array([0,1,2]), pos), decimals=6))
286 | 
287 |     def test_find_idr_pattern(self, ):
288 |         assert find_idr_pattern(idr_list = [[0,300],[1,10],[0,500],[1,500]])[0] == True
289 |         assert find_idr_pattern(idr_list = [[0,300],[1,50],[0,500]])[0] == False
290 |         assert find_idr_pattern(idr_list = [[0,50],[0,50],[1,50],[0,500]])[0] == False
291 |         assert find_idr_pattern(idr_list = [[0,30],[0,300],[1,50],[0,50]])[0] == False
292 |         assert find_idr_pattern(idr_list = [[0,30]])[0] == False
293 | 
294 |         assert find_idr_pattern(idr_list = [[0,300],[1,10],[0,500],[1,500]])[1][0][0] == [301]
295 |         assert find_idr_pattern(idr_list = [[0,300],[1,10],[0,500],[1,500]])[1][0][1] == [310]
296 |         assert find_idr_pattern(idr_list = [[1,10],[0,300],[1,10],[0,500],[1,500]])[1][0][0] == [311]
297 |         assert find_idr_pattern(idr_list = [[1,10],[0,300],[1,10],[0,500],[1,500]])[1][0][1] == [320]
298 | 
299 |     def test_annotate_proteins_with_idr_pattern(self, ):
300 |         testdata = pd.DataFrame({'protein_id':[1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2],
301 |                                  'protein_number':[1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2],
302 |                                  'position':[1,2,3,4,5,6,7,8,9,10,11,12,1,2,3,4,5,6],
303 |                                  'IDR':[0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0]})
304 |         test_res = annotate_proteins_with_idr_pattern(testdata, 3, 3)
305 |         np.testing.assert_equal([0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
306 |                                 list(test_res.flexible_pattern.values))
307 | 
308 |     def test_extend_flexible_pattern(self, ):
309 |         np.testing.assert_equal(np.array([1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0]),
310 |                                 extend_flexible_pattern(np.array([1,1,1,0,0,0,0,1,1,0,0,0,0]),1))
311 | 
312 |     def test_get_extended_flexible_pattern(self, ):
313 |         testdata = pd.DataFrame({'protein_id':[1,1,1,1,1,1,2,2,2,2,2,2],
314 |                                  'protein_number':[1,1,1,1,1,1,2,2,2,2,2,2],
315 |                                  'position':[1,2,3,4,5,6,1,2,3,4,5,6],
316 |                                  'score':[1,1,0,0,0,1,1,1,0,0,0,0],
317 |                                  'score_2':[0,0,0,0,0,0,0,0,0,0,0,1]})
318 |         test_res = get_extended_flexible_pattern(testdata, np.array(['score','score_2']), [1])
319 |         np.testing.assert_equal([1,1,1,0,1,1,1,1,1,0,0,0], test_res.score_extended_1.values)
320 |         test_res = get_extended_flexible_pattern(testdata, np.array(['score','score_2']), [2])
321 |         np.testing.assert_equal([1,1,1,1,1,1,1,1,1,1,0,0], test_res.score_extended_2.values)
322 |         np.testing.assert_equal([0,0,0,0,0,0,0,0,0,1,1,1], test_res.score_2_extended_2.values)
323 | 
324 |     def test_get_mod_ptm_fraction(self, ):
325 |         # Example with 2 proteins and 2 randomizations
326 |         # 1st protein with 3 modified lysines and 3 STY sites > 1 phospho
327 |         # 2nd protein with 2 modified lysines and 4 STY sites > 2 phospho
328 |         distances = [
329 |             [[[10, 20, 30], [2, 10, 20], [5, 8, 30]],  # protein 1 > real
330 |              [[30, 20, 50], [20, 10, 20], [50, 10, 30]],  # protein 1 > random 1
331 |              [[20, 50, 10], [50, 40, 10], [50, 20, 30]]],  # protein 1 > random 2
332 |             [[[10, 10, 30, 50], [50, 10, 5, 50]],  # protein 2 > real
333 |              [[50, 20, 30, 40], [20, 20, 10, 80]],  # protein 2 > random 1
334 |              [[15, 10, 30, 10], [10, 10, 20, 20]]]]  # protein 2 > random 2
335 |         mod_idx = [[0],  # protein 1
336 |                    [1, 2]]  # protein 2
337 |         modidied_fraction = get_mod_ptm_fraction(
338 |             distances, mod_idx, min_dist=0, max_dist=10)
339 |         # Real:
340 |         # n_aa: 1,2,2,2,2
341 |         # n_mod: 1,1,1,1,2
342 |         # final: 9,6
343 | 
344 |         # Random 1:
345 |         # n_aa: 0,1,1,0,1
346 |         # n_mod: 0,0,0,0,1
347 |         # final: 3,1
348 | 
349 |         # Random 2:
350 |         # n_aa: 1,1,0,2,2
351 |         # n_mod: 0,0,0,1,1
352 |         # final: 6,2
353 | 
354 |         # Fractions: 0.66, 0.33, 0.33
355 | 
356 |         np.testing.assert_almost_equal(
357 |             modidied_fraction,
358 |             [0.66666666, 0.33333333, 0.33333333])
359 |         modidied_fraction = get_mod_ptm_fraction(
360 |             distances, mod_idx, min_dist=5, max_dist=10)
361 |         np.testing.assert_almost_equal(
362 |             modidied_fraction,
363 |             [0.5, 0.33333333, 0.33333333])
364 | 
365 | 
366 | if __name__ == "__main__":
367 |     unittest.main()
368 | 


--------------------------------------------------------------------------------