├── .github └── workflows │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── LICENSE ├── README.md ├── mp_nerf ├── __init__.py ├── kb_proteins.py ├── massive_pnerf.py ├── ml_utils.py ├── proteins.py └── utils.py ├── notebooks ├── experiments │ ├── [131, 150]_info.joblib │ ├── [200, 250]_info.joblib │ ├── [331, 351]_info.joblib │ ├── [400, 450]_info.joblib │ ├── [500, 550]_info.joblib │ ├── [600, 650]_info.joblib │ ├── [700, 780]_info.joblib │ ├── [800, 900]_info.joblib │ ├── [905, 1070]_info.joblib │ ├── [905, 970]_info.joblib │ ├── logs_experiment.txt │ ├── logs_experiment_scn_various_lengths.txt │ └── profile_csv ├── experiments_manual │ ├── analyzed_prots.joblib │ ├── error_evolution.png │ ├── histogram_errors.png │ ├── profiler_capture.png │ └── rclab_data │ │ ├── 1000_ala.pdb │ │ ├── 500_ala.pdb │ │ ├── 5rsa_ribonuclease.pdb │ │ ├── LICENSE │ │ ├── il10_lactate_dh.pdb │ │ └── other_prots.csv ├── extend_measures.ipynb ├── integrated_alanines.py ├── integrated_test.py ├── preds │ ├── labels.pdb │ └── predicted.pdb ├── test_implementation_loop.ipynb ├── test_implementation_speed.ipynb └── xtension │ └── plots │ ├── A_plot_hists.png │ ├── C_plot_hists.png │ ├── D_plot_hists.png │ ├── E_plot_hists.png │ ├── F_plot_hists.png │ ├── G_plot_hists.png │ ├── H_plot_hists.png │ ├── I_plot_hists.png │ ├── K_plot_hists.png │ ├── L_plot_hists.png │ ├── M_plot_hists.png │ ├── N_plot_hists.png │ ├── P_plot_hists.png │ ├── Q_plot_hists.png │ ├── R_plot_hists.png │ ├── S_plot_hists.png │ ├── T_plot_hists.png │ ├── V_plot_hists.png │ ├── W_plot_hists.png │ ├── Y_plot_hists.png │ └── __plot_hists.png ├── setup.cfg ├── setup.py └── tests ├── test_main.py └── test_ml_utils.py /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ main ] 9 | pull_request: 10 | branches: [ main ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | matrix: 18 | python-version: [3.7, 3.8] 19 | 20 | steps: 21 | - uses: actions/checkout@v2 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v2 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | python -m pip install pytest 30 | python -m pip install -U proDy requests 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | - name: Test with pytest 33 | run: | 34 | python setup.py test 35 | -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries 3 | 4 | name: Upload Python Package 5 | 6 | on: 7 | release: 8 | types: [created] 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: '3.x' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install setuptools wheel twine 25 | - name: Build and publish 26 | env: 27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 29 | run: | 30 | python setup.py sdist bdist_wheel 31 | twine upload dist/* 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | */__pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | 7 | # to ignore 8 | *.DS_Store 9 | *sidechainnet_data/* 10 | *.pkl 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2021, Eric Alcaide 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 2. Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation andor other materials provided 14 | with the distribution. 15 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 16 | products derived from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MP-NeRF: Massively Parallel Natural Extension of Reference Frame 2 | 3 | This is the code for the paper "[MP-NeRF: A Massively Parallel Method for Accelerating Protein Structure Reconstruction from Internal Coordinates](https://www.biorxiv.org/content/10.1101/2021.06.08.446214v1)" 4 | 5 | The code can be installed via `pip` with 6 | 7 | ```bash 8 | $ pip install mp-nerf 9 | ``` 10 | 11 | ## Abstract 12 | 13 | The conversion of polymers between internal and cartesian coordinates is a limiting step in many pipelines, such as molecular dynamics simulations and training of machine learning models. This conversion is typically carried out by sequential or parallel applications of the Natural extension of Reference Frame (NeRF)algorithm. 14 | 15 | This work proposes a massively parallel NeRF implementation, which, depending on the polymer length, achieves speedups between 400-1200x over the most recent parallel NeRF implementation by dviding the conversion into three main phases: a parallel composition of the minimal repeated structure, the assembly of backbone subunits and the parallel elongation of sidechains. 16 | 17 | Special emphasis is placed on reusability and ease of use within diverse pipelines. We open source the code (available at https://github.com/EleutherAI/mp_nerf) and provide a corresponding python package. 18 | 19 | 20 | ## Results: 21 | 22 | * **Tests**: in an intel i5 @ 2.4 ghz (cpu) and (intel i7-6700k @ 4GHz + Nvidia 1060GTX 6gb) (gpu) 23 | 24 | length | sota | **us (cpu)** | Nx | us (gpu) | us (hybrid) | 25 | ---------|--------|--------------|-------|----------|-------------| 26 | ~114 | 2.4s | **5.3ms** | ~446 | 21.1ms | 18.9ms | 27 | ~300 | 3.5s | **8.5ms** | ~400 | 26.2ms | 22.3ms | 28 | ~500 | 7.5s | **9.1ms** | ~651 | 29.2ms | 26.3ms | 29 | ~1000 | 18.66s | **15.3ms** | ~1200 | 43.3ms | 30.1ms | 30 | 31 | * **Profiler Trace (CPU)**: 32 |
33 |
34 |
35 | 36 | #### Considerations 37 | 38 | * In the GPU algo, much of the time is spent in the data transfers / loop in the GPU is very inefficient. 39 | * about 1/2 of time is spent in memory-access patterns and the sequential `for loop`, so ideally 2x from here would be possible by optimizing it or running the sequential loop in cython / numba / whatever 40 | * total profiler time should be multiplied by 0.63-0.5 to see real time (see execution above without profiler). Profiling slows down the code. 41 | 42 | 43 | ## Installation: 44 | 45 | Just clone the repo 46 | 47 | You'll need: 48 | * torch > 1.6 49 | * numpy 50 | * einops 51 | 52 | Plus, if you want to run the experiments / work with data: 53 | * joblib 54 | * sidechainnet: https://github.com/jonathanking/sidechainnet#installation 55 | * manually install `ProDY`, `py3Dmol`, `snakeviz`: 56 | * `pip install proDy` 57 | * `pip install py3Dmol` 58 | * `pip install snakeviz` 59 | * any other package: `pip install package_name` 60 | 61 | 62 | * matplotlib (to do diagnostic plots) 63 | 64 | ## Citations: 65 | 66 | ```bibtex 67 | @article{Parsons2005PracticalCF, 68 | title={Practical conversion from torsion space to Cartesian space for in silico protein synthesis}, 69 | author={Jerod Parsons and J. B. Holmes and J. M. Rojas and J. Tsai and C. Strauss}, 70 | journal={Journal of Computational Chemistry}, 71 | year={2005}, 72 | volume={26} 73 | } 74 | ``` 75 | 76 | ```bibtex 77 | @article{AlQuraishi2018pNeRFPC, 78 | title={pNeRF: Parallelized Conversion from Internal to Cartesian Coordinates}, 79 | author={Mohammed AlQuraishi}, 80 | journal={bioRxiv}, 81 | year={2018} 82 | } 83 | ``` 84 | 85 | ```bibtex 86 | @article{Bayati2020HighperformanceTO, 87 | title={High‐performance transformation of protein structure representation from internal to Cartesian coordinates}, 88 | author={M. Bayati and M. Leeser and J. Bardhan}, 89 | journal={Journal of Computational Chemistry}, 90 | year={2020}, 91 | volume={41}, 92 | pages={2104 - 2114} 93 | } 94 | ``` 95 | 96 | -------------------------------------------------------------------------------- /mp_nerf/__init__.py: -------------------------------------------------------------------------------- 1 | from mp_nerf.massive_pnerf import * 2 | from mp_nerf.proteins import * -------------------------------------------------------------------------------- /mp_nerf/kb_proteins.py: -------------------------------------------------------------------------------- 1 | # Author: Eric Alcaide 2 | 3 | # A substantial part has been borrowed from 4 | # https://github.com/jonathanking/sidechainnet 5 | # 6 | # Here's the License for it: 7 | # 8 | # Copyright 2020 Jonathan King 9 | # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 10 | # following conditions are met: 11 | # 12 | # 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 13 | # 14 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following 15 | # disclaimer in the documentation and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote 18 | # products derived from this software without specific prior written permission. 19 | # 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 21 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 25 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | 28 | import numpy as np 29 | 30 | ######################### 31 | ### FROM SIDECHAINNET ### 32 | ######################### 33 | 34 | # modified by considering rigid bodies in sidechains (remove extra torsions) 35 | 36 | SC_BUILD_INFO = { 37 | 'A': { 38 | 'angles-names': ['N-CA-CB'], 39 | 'angles-types': ['N -CX-CT'], 40 | 'angles-vals': [1.9146261894377796], 41 | 'atom-names': ['CB'], 42 | 'bonds-names': ['CA-CB'], 43 | 'bonds-types': ['CX-CT'], 44 | 'bonds-vals': [1.526], 45 | 'torsion-names': ['C-N-CA-CB'], 46 | 'torsion-types': ['C -N -CX-CT'], 47 | 'torsion-vals': ['p'], 48 | 'rigid-frames-idxs': [[0,1,2], [0,1,4]], 49 | }, 50 | 51 | 'R': { 52 | 'angles-names': [ 53 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-NE', 'CD-NE-CZ', 'NE-CZ-NH1', 54 | 'NE-CZ-NH2' 55 | ], 56 | 'angles-types': [ 57 | 'N -CX-C8', 'CX-C8-C8', 'C8-C8-C8', 'C8-C8-N2', 'C8-N2-CA', 'N2-CA-N2', 58 | 'N2-CA-N2' 59 | ], 60 | 'angles-vals': [ 61 | 1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.9408061282176945, 62 | 2.150245638457014, 2.0943951023931953, 2.0943951023931953 63 | ], 64 | 'atom-names': ['CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2'], 65 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-NE', 'NE-CZ', 'CZ-NH1', 'CZ-NH2'], 66 | 'bonds-types': ['CX-C8', 'C8-C8', 'C8-C8', 'C8-N2', 'N2-CA', 'CA-N2', 'CA-N2'], 67 | 'bonds-vals': [1.526, 1.526, 1.526, 1.463, 1.34, 1.34, 1.34], 68 | 'torsion-names': [ 69 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-NE', 'CG-CD-NE-CZ', 70 | 'CD-NE-CZ-NH1', 'CD-NE-CZ-NH2' 71 | ], 72 | 'torsion-types': [ 73 | 'C -N -CX-C8', 'N -CX-C8-C8', 'CX-C8-C8-C8', 'C8-C8-C8-N2', 'C8-C8-N2-CA', 74 | 'C8-N2-CA-N2', 'C8-N2-CA-N2' 75 | ], 76 | 'torsion-vals': ['p', 'p', 'p', 'p', 'p', 0., 3.141592], 77 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7], [6,7,8]], 78 | }, 79 | 80 | 'N': { 81 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-OD1', 'CB-CG-ND2'], 82 | 'angles-types': ['N -CX-2C', 'CX-2C-C ', '2C-C -O ', '2C-C -N '], 83 | 'angles-vals': [ 84 | 1.9146261894377796, 1.9390607989657, 2.101376419401173, 2.035053907825388 85 | ], 86 | 'atom-names': ['CB', 'CG', 'OD1', 'ND2'], 87 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-OD1', 'CG-ND2'], 88 | 'bonds-types': ['CX-2C', '2C-C ', 'C -O ', 'C -N '], 89 | 'bonds-vals': [1.526, 1.522, 1.229, 1.335], 90 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-OD1', 'CA-CB-CG-ND2'], 91 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-C ', 'CX-2C-C -O ', 'CX-2C-C -N '], 92 | 'torsion-vals': ['p', 'p', 'p', 'i'], 93 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]], 94 | }, 95 | 96 | 'D': { 97 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-OD1', 'CB-CG-OD2'], 98 | 'angles-types': ['N -CX-2C', 'CX-2C-CO', '2C-CO-O2', '2C-CO-O2'], 99 | 'angles-vals': [ 100 | 1.9146261894377796, 1.9390607989657, 2.0420352248333655, 2.0420352248333655 101 | ], 102 | 'atom-names': ['CB', 'CG', 'OD1', 'OD2'], 103 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-OD1', 'CG-OD2'], 104 | 'bonds-types': ['CX-2C', '2C-CO', 'CO-O2', 'CO-O2'], 105 | 'bonds-vals': [1.526, 1.522, 1.25, 1.25], 106 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-OD1', 'CA-CB-CG-OD2'], 107 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-CO', 'CX-2C-CO-O2', 'CX-2C-CO-O2'], 108 | 'torsion-vals': ['p', 'p', 'p', 'i'], 109 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]], 110 | }, 111 | 112 | 'C': { 113 | 'angles-names': ['N-CA-CB', 'CA-CB-SG'], 114 | 'angles-types': ['N -CX-2C', 'CX-2C-SH'], 115 | 'angles-vals': [1.9146261894377796, 1.8954275676658419], 116 | 'atom-names': ['CB', 'SG'], 117 | 'bonds-names': ['CA-CB', 'CB-SG'], 118 | 'bonds-types': ['CX-2C', '2C-SH'], 119 | 'bonds-vals': [1.526, 1.81], 120 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-SG'], 121 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-SH'], 122 | 'torsion-vals': ['p', 'p'], 123 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]], 124 | }, 125 | 126 | 'Q': { 127 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-OE1', 'CG-CD-NE2'], 128 | 'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-C ', '2C-C -O ', '2C-C -N '], 129 | 'angles-vals': [ 130 | 1.9146261894377796, 1.911135530933791, 1.9390607989657, 2.101376419401173, 131 | 2.035053907825388 132 | ], 133 | 'atom-names': ['CB', 'CG', 'CD', 'OE1', 'NE2'], 134 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-OE1', 'CD-NE2'], 135 | 'bonds-types': ['CX-2C', '2C-2C', '2C-C ', 'C -O ', 'C -N '], 136 | 'bonds-vals': [1.526, 1.526, 1.522, 1.229, 1.335], 137 | 'torsion-names': [ 138 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-OE1', 'CB-CG-CD-NE2' 139 | ], 140 | 'torsion-types': [ 141 | 'C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-C ', '2C-2C-C -O ', '2C-2C-C -N ' 142 | ], 143 | 'torsion-vals': ['p', 'p', 'p', 'p', 'i'], 144 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]], 145 | }, 146 | 147 | 'E': { 148 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-OE1', 'CG-CD-OE2'], 149 | 'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-CO', '2C-CO-O2', '2C-CO-O2'], 150 | 'angles-vals': [ 151 | 1.9146261894377796, 1.911135530933791, 1.9390607989657, 2.0420352248333655, 152 | 2.0420352248333655 153 | ], 154 | 'atom-names': ['CB', 'CG', 'CD', 'OE1', 'OE2'], 155 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-OE1', 'CD-OE2'], 156 | 'bonds-types': ['CX-2C', '2C-2C', '2C-CO', 'CO-O2', 'CO-O2'], 157 | 'bonds-vals': [1.526, 1.526, 1.522, 1.25, 1.25], 158 | 'torsion-names': [ 159 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-OE1', 'CB-CG-CD-OE2' 160 | ], 161 | 'torsion-types': [ 162 | 'C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-CO', '2C-2C-CO-O2', '2C-2C-CO-O2' 163 | ], 164 | 'torsion-vals': ['p', 'p', 'p', 'p', 'i'], 165 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]], 166 | }, 167 | 168 | 'G': { 169 | 'angles-names': [], 170 | 'angles-types': [], 171 | 'angles-vals': [], 172 | 'atom-names': [], 173 | 'bonds-names': [], 174 | 'bonds-types': [], 175 | 'bonds-vals': [], 176 | 'torsion-names': [], 177 | 'torsion-types': [], 178 | 'torsion-vals': [], 179 | 'rigid-frames-idxs': [[0,1,2]], 180 | }, 181 | 182 | 'H': { 183 | 'angles-names': [ 184 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-ND1', 'CG-ND1-CE1', 'ND1-CE1-NE2', 'CE1-NE2-CD2' 185 | ], 186 | 'angles-types': [ 187 | 'N -CX-CT', 'CX-CT-CC', 'CT-CC-NA', 'CC-NA-CR', 'NA-CR-NB', 'CR-NB-CV' 188 | ], 189 | 'angles-vals': [ 190 | 1.9146261894377796, 1.9739673840055867, 2.0943951023931953, 191 | 1.8849555921538759, 1.8849555921538759, 1.8849555921538759 192 | ], 193 | 'atom-names': ['CB', 'CG', 'ND1', 'CE1', 'NE2', 'CD2'], 194 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-ND1', 'ND1-CE1', 'CE1-NE2', 'NE2-CD2'], 195 | 'bonds-types': ['CX-CT', 'CT-CC', 'CC-NA', 'NA-CR', 'CR-NB', 'NB-CV'], 196 | 'bonds-vals': [1.526, 1.504, 1.385, 1.343, 1.335, 1.394], 197 | 'torsion-names': [ 198 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-ND1', 'CB-CG-ND1-CE1', 'CG-ND1-CE1-NE2', 199 | 'ND1-CE1-NE2-CD2' 200 | ], 201 | 'torsion-types': [ 202 | 'C -N -CX-CT', 'N -CX-CT-CC', 'CX-CT-CC-NA', 'CT-CC-NA-CR', 'CC-NA-CR-NB', 203 | 'NA-CR-NB-CV' 204 | ], 205 | 'torsion-vals': ['p', 'p', 'p', 3.141592653589793, 0.0, 0.0], 206 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]], 207 | }, 208 | 209 | 'I': { 210 | 'angles-names': ['N-CA-CB', 'CA-CB-CG1', 'CB-CG1-CD1', 'CA-CB-CG2'], 211 | 'angles-types': ['N -CX-3C', 'CX-3C-2C', '3C-2C-CT', 'CX-3C-CT'], 212 | 'angles-vals': [ 213 | 1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791 214 | ], 215 | 'atom-names': ['CB', 'CG1', 'CD1', 'CG2'], 216 | 'bonds-names': ['CA-CB', 'CB-CG1', 'CG1-CD1', 'CB-CG2'], 217 | 'bonds-types': ['CX-3C', '3C-2C', '2C-CT', '3C-CT'], 218 | 'bonds-vals': [1.526, 1.526, 1.526, 1.526], 219 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG1', 'CA-CB-CG1-CD1', 'N-CA-CB-CG2'], 220 | 'torsion-types': ['C -N -CX-3C', 'N -CX-3C-2C', 'CX-3C-2C-CT', 'N -CX-3C-CT'], 221 | 'torsion-vals': ['p', 'p', 'p', -2.1315], # last one was 'p' in the original - but cg1-cg2 = "2.133" 222 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,7]], 223 | }, 224 | 225 | 'L': { 226 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CB-CG-CD2'], 227 | 'angles-types': ['N -CX-2C', 'CX-2C-3C', '2C-3C-CT', '2C-3C-CT'], 228 | 'angles-vals': [ 229 | 1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791 230 | ], 231 | 'atom-names': ['CB', 'CG', 'CD1', 'CD2'], 232 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD1', 'CG-CD2'], 233 | 'bonds-types': ['CX-2C', '2C-3C', '3C-CT', '3C-CT'], 234 | 'bonds-vals': [1.526, 1.526, 1.526, 1.526], 235 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CA-CB-CG-CD2'], 236 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-3C', 'CX-2C-3C-CT', 'CX-2C-3C-CT'], 237 | # extra torsion is in negative bc in mask construction, previous angle is summed. 238 | 'torsion-vals': ['p', 'p', 'p', 2.1315], # last one was 'p' in the original - but cd1-cd2 = "-2.130" 239 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]], 240 | }, 241 | 242 | 'K': { 243 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-CE', 'CD-CE-NZ'], 244 | 'angles-types': ['N -CX-C8', 'CX-C8-C8', 'C8-C8-C8', 'C8-C8-C8', 'C8-C8-N3'], 245 | 'angles-vals': [ 246 | 1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791, 247 | 1.9408061282176945 248 | ], 249 | 'atom-names': ['CB', 'CG', 'CD', 'CE', 'NZ'], 250 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-CE', 'CE-NZ'], 251 | 'bonds-types': ['CX-C8', 'C8-C8', 'C8-C8', 'C8-C8', 'C8-N3'], 252 | 'bonds-vals': [1.526, 1.526, 1.526, 1.526, 1.471], 253 | 'torsion-names': [ 254 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-CE', 'CG-CD-CE-NZ' 255 | ], 256 | 'torsion-types': [ 257 | 'C -N -CX-C8', 'N -CX-C8-C8', 'CX-C8-C8-C8', 'C8-C8-C8-C8', 'C8-C8-C8-N3' 258 | ], 259 | 'torsion-vals': ['p', 'p', 'p', 'p', 'p'], 260 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7], [6,7,8]], 261 | }, 262 | 263 | 'M': { 264 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-SD', 'CG-SD-CE'], 265 | 'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-S ', '2C-S -CT'], 266 | 'angles-vals': [ 267 | 1.9146261894377796, 1.911135530933791, 2.0018926520374962, 1.726130630222392 268 | ], 269 | 'atom-names': ['CB', 'CG', 'SD', 'CE'], 270 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-SD', 'SD-CE'], 271 | 'bonds-types': ['CX-2C', '2C-2C', '2C-S ', 'S -CT'], 272 | 'bonds-vals': [1.526, 1.526, 1.81, 1.81], 273 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-SD', 'CB-CG-SD-CE'], 274 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-S ', '2C-2C-S -CT'], 275 | 'torsion-vals': ['p', 'p', 'p', 'p'], 276 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]], 277 | }, 278 | 279 | 'F': { 280 | 'angles-names': [ 281 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-CE1', 'CD1-CE1-CZ', 'CE1-CZ-CE2', 282 | 'CZ-CE2-CD2' 283 | ], 284 | 'angles-types': [ 285 | 'N -CX-CT', 'CX-CT-CA', 'CT-CA-CA', 'CA-CA-CA', 'CA-CA-CA', 'CA-CA-CA', 286 | 'CA-CA-CA' 287 | ], 288 | 'angles-vals': [ 289 | 1.9146261894377796, 1.9896753472735358, 2.0943951023931953, 290 | 2.0943951023931953, 2.0943951023931953, 2.0943951023931953, 2.0943951023931953 291 | ], 292 | 'atom-names': ['CB', 'CG', 'CD1', 'CE1', 'CZ', 'CE2', 'CD2'], 293 | 'bonds-names': [ 294 | 'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-CE1', 'CE1-CZ', 'CZ-CE2', 'CE2-CD2' 295 | ], 296 | 'bonds-types': ['CX-CT', 'CT-CA', 'CA-CA', 'CA-CA', 'CA-CA', 'CA-CA', 'CA-CA'], 297 | 'bonds-vals': [1.526, 1.51, 1.4, 1.4, 1.4, 1.4, 1.4], 298 | 'torsion-names': [ 299 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-CE1', 'CG-CD1-CE1-CZ', 300 | 'CD1-CE1-CZ-CE2', 'CE1-CZ-CE2-CD2' 301 | ], 302 | 'torsion-types': [ 303 | 'C -N -CX-CT', 'N -CX-CT-CA', 'CX-CT-CA-CA', 'CT-CA-CA-CA', 'CA-CA-CA-CA', 304 | 'CA-CA-CA-CA', 'CA-CA-CA-CA' 305 | ], 306 | 'torsion-vals': ['p', 'p', 'p', 3.141592653589793, 0.0, 0.0, 0.0], 307 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]], 308 | }, 309 | 310 | 'P': { 311 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD'], 312 | 'angles-types': ['N -CX-CT', 'CX-CT-CT', 'CT-CT-CT'], 313 | 'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791], 314 | 'atom-names': ['CB', 'CG', 'CD'], 315 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD'], 316 | 'bonds-types': ['CX-CT', 'CT-CT', 'CT-CT'], 317 | 'bonds-vals': [1.526, 1.526, 1.526], 318 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD'], 319 | 'torsion-types': ['C -N -CX-CT', 'N -CX-CT-CT', 'CX-CT-CT-CT'], 320 | 'torsion-vals': ['p', 'p', 'p'], 321 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]], 322 | }, 323 | 324 | 'S': { 325 | 'angles-names': ['N-CA-CB', 'CA-CB-OG'], 326 | 'angles-types': ['N -CX-2C', 'CX-2C-OH'], 327 | 'angles-vals': [1.9146261894377796, 1.911135530933791], 328 | 'atom-names': ['CB', 'OG'], 329 | 'bonds-names': ['CA-CB', 'CB-OG'], 330 | 'bonds-types': ['CX-2C', '2C-OH'], 331 | 'bonds-vals': [1.526, 1.41], 332 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-OG'], 333 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-OH'], 334 | 'torsion-vals': ['p', 'p'], 335 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]], 336 | }, 337 | 338 | 'T': { 339 | 'angles-names': ['N-CA-CB', 'CA-CB-OG1', 'CA-CB-CG2'], 340 | 'angles-types': ['N -CX-3C', 'CX-3C-OH', 'CX-3C-CT'], 341 | 'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791], 342 | 'atom-names': ['CB', 'OG1', 'CG2'], 343 | 'bonds-names': ['CA-CB', 'CB-OG1', 'CB-CG2'], 344 | 'bonds-types': ['CX-3C', '3C-OH', '3C-CT'], 345 | 'bonds-vals': [1.526, 1.41, 1.526], 346 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-OG1', 'N-CA-CB-CG2'], 347 | 'torsion-types': ['C -N -CX-3C', 'N -CX-3C-OH', 'N -CX-3C-CT'], 348 | 'torsion-vals': ['p', 'p', 'p'], 349 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]], 350 | }, 351 | 352 | 'W': { 353 | 'angles-names': [ 354 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-NE1', 'CD1-NE1-CE2', 355 | 'NE1-CE2-CZ2', 'CE2-CZ2-CH2', 'CZ2-CH2-CZ3', 'CH2-CZ3-CE3', 'CZ3-CE3-CD2' 356 | ], 357 | 'angles-types': [ 358 | 'N -CX-CT', 'CX-CT-C*', 'CT-C*-CW', 'C*-CW-NA', 'CW-NA-CN', 'NA-CN-CA', 359 | 'CN-CA-CA', 'CA-CA-CA', 'CA-CA-CA', 'CA-CA-CB' 360 | ], 361 | 'angles-vals': [ 362 | 1.9146261894377796, 2.0176006153054447, 2.181661564992912, 1.8971728969178363, 363 | 1.9477874452256716, 2.3177972466484698, 2.0943951023931953, 364 | 2.0943951023931953, 2.0943951023931953, 2.0943951023931953 365 | ], 366 | 'atom-names': [ 367 | 'CB', 'CG', 'CD1', 'NE1', 'CE2', 'CZ2', 'CH2', 'CZ3', 'CE3', 'CD2' 368 | ], 369 | 'bonds-names': [ 370 | 'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-NE1', 'NE1-CE2', 'CE2-CZ2', 'CZ2-CH2', 371 | 'CH2-CZ3', 'CZ3-CE3', 'CE3-CD2' 372 | ], 373 | 'bonds-types': [ 374 | 'CX-CT', 'CT-C*', 'C*-CW', 'CW-NA', 'NA-CN', 'CN-CA', 'CA-CA', 'CA-CA', 375 | 'CA-CA', 'CA-CB' 376 | ], 377 | 'bonds-vals': [1.526, 1.495, 1.352, 1.381, 1.38, 1.4, 1.4, 1.4, 1.4, 1.404], 378 | 'torsion-names': [ 379 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-NE1', 'CG-CD1-NE1-CE2', 380 | 'CD1-NE1-CE2-CZ2', 'NE1-CE2-CZ2-CH2', 'CE2-CZ2-CH2-CZ3', 'CZ2-CH2-CZ3-CE3', 381 | 'CH2-CZ3-CE3-CD2' 382 | ], 383 | 'torsion-types': [ 384 | 'C -N -CX-CT', 'N -CX-CT-C*', 'CX-CT-C*-CW', 'CT-C*-CW-NA', 'C*-CW-NA-CN', 385 | 'CW-NA-CN-CA', 'NA-CN-CA-CA', 'CN-CA-CA-CA', 'CA-CA-CA-CA', 'CA-CA-CA-CB' 386 | ], 387 | 'torsion-vals': [ 388 | 'p', 'p', 'p', 3.141592653589793, 0.0, 3.141592653589793, 3.141592653589793, 389 | 0.0, 0.0, 0.0 390 | ], 391 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]] 392 | }, 393 | 394 | 'Y': { 395 | 'angles-names': [ 396 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-CE1', 'CD1-CE1-CZ', 'CE1-CZ-OH', 397 | 'CE1-CZ-CE2', 'CZ-CE2-CD2' 398 | ], 399 | 'angles-types': [ 400 | 'N -CX-CT', 'CX-CT-CA', 'CT-CA-CA', 'CA-CA-CA', 'CA-CA-C ', 'CA-C -OH', 401 | 'CA-C -CA', 'C -CA-CA' 402 | ], 403 | 'angles-vals': [ 404 | 1.9146261894377796, 1.9896753472735358, 2.0943951023931953, 405 | 2.0943951023931953, 2.0943951023931953, 2.0943951023931953, 406 | 2.0943951023931953, 2.0943951023931953 407 | ], 408 | 'atom-names': ['CB', 'CG', 'CD1', 'CE1', 'CZ', 'OH', 'CE2', 'CD2'], 409 | 'bonds-names': [ 410 | 'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-CE1', 'CE1-CZ', 'CZ-OH', 'CZ-CE2', 'CE2-CD2' 411 | ], 412 | 'bonds-types': [ 413 | 'CX-CT', 'CT-CA', 'CA-CA', 'CA-CA', 'CA-C ', 'C -OH', 'C -CA', 'CA-CA' 414 | ], 415 | 'bonds-vals': [1.526, 1.51, 1.4, 1.4, 1.409, 1.364, 1.409, 1.4], 416 | 'torsion-names': [ 417 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-CE1', 'CG-CD1-CE1-CZ', 418 | 'CD1-CE1-CZ-OH', 'CD1-CE1-CZ-CE2', 'CE1-CZ-CE2-CD2' 419 | ], 420 | 'torsion-types': [ 421 | 'C -N -CX-CT', 'N -CX-CT-CA', 'CX-CT-CA-CA', 'CT-CA-CA-CA', 'CA-CA-CA-C ', 422 | 'CA-CA-C -OH', 'CA-CA-C -CA', 'CA-C -CA-CA' 423 | ], 424 | 'torsion-vals': [ 425 | 'p', 'p', 'p', 3.141592653589793, 0.0, 3.141592653589793, 0.0, 0.0 426 | ], 427 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]], 428 | }, 429 | 430 | 'V': { 431 | 'angles-names': ['N-CA-CB', 'CA-CB-CG1', 'CA-CB-CG2'], 432 | 'angles-types': ['N -CX-3C', 'CX-3C-CT', 'CX-3C-CT'], 433 | 'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791], 434 | 'atom-names': ['CB', 'CG1', 'CG2'], 435 | 'bonds-names': ['CA-CB', 'CB-CG1', 'CB-CG2'], 436 | 'bonds-types': ['CX-3C', '3C-CT', '3C-CT'], 437 | 'bonds-vals': [1.526, 1.526, 1.526], 438 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG1', 'N-CA-CB-CG2'], 439 | 'torsion-types': ['C -N -CX-3C', 'N -CX-3C-CT', 'N -CX-3C-CT'], 440 | 'torsion-vals': ['p', 'p', 'p'], 441 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]] 442 | }, 443 | 444 | '_': { 445 | 'angles-names': [], 446 | 'angles-types': [], 447 | 'angles-vals': [], 448 | 'atom-names': [], 449 | 'bonds-names': [], 450 | 'bonds-types': [], 451 | 'bonds-vals': [], 452 | 'torsion-names': [], 453 | 'torsion-types': [], 454 | 'torsion-vals': [], 455 | 'rigid-frames-idxs': [[]], 456 | } 457 | } 458 | 459 | BB_BUILD_INFO = { 460 | "BONDLENS": { 461 | # the updated is according to crystal data from 1DPE_1_A and validated with other structures 462 | # the commented is the sidechainnet one 463 | 'n-ca': 1.4664931, # 1.442, 464 | 'ca-c': 1.524119, # 1.498, 465 | 'c-n': 1.3289373, # 1.379, 466 | 'c-o': 1.229, # From parm10.dat || huge variability according to structures 467 | # we get 1.3389416 from 1DPE_1_A but also 1.2289 from 2F2H_d2f2hf1 468 | 'c-oh': 1.364 469 | }, 470 | # From parm10.dat, for OXT 471 | # For placing oxygens 472 | "BONDANGS": { 473 | 'ca-c-o': 2.0944, # Approximated to be 2pi / 3; parm10.dat says 2.0350539 474 | 'ca-c-oh': 2.0944, 475 | 'ca-c-n': 2.03, 476 | 'n-ca-c': 1.94, 477 | 'c-n-ca': 2.08, 478 | }, 479 | # Equal to 'ca-c-o', for OXT 480 | "BONDTORSIONS": { 481 | 'n-ca-c-n': -0.785398163, # psi (-44 deg, bimodal distro, pick one) 482 | 'c-n-ca-c': -1.3962634015954636, # phi (-80 deg, bimodal distro, pick one) 483 | 'ca-n-c-ca': 3.141592, # omega (180 deg - https://doi.org/10.1016/j.jmb.2005.01.065) 484 | 'n-ca-c-o': -2.406 # oxygen 485 | } # A simple approximation, not meant to be exact. 486 | } 487 | 488 | 489 | # numbers follow the same order as sidechainnet atoms 490 | SCN_CONNECT = { 491 | 'A': { 492 | 'bonds': [[0,1], [1,2], [2,3], [1,4]] 493 | }, 494 | 'R': { 495 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 496 | [6,7], [7,8], [8,9], [8,10]] 497 | }, 498 | 'N': { 499 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 500 | [5,7]] 501 | }, 502 | 'D': { 503 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 504 | [5,7]] 505 | }, 506 | 'C': { 507 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5]] 508 | }, 509 | 'Q': { 510 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 511 | [6,7], [6,8]] 512 | }, 513 | 'E': { 514 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 515 | [6,7], [6,8]] 516 | }, 517 | 'G': { 518 | 'bonds': [[0,1], [1,2], [2,3]] 519 | }, 520 | 'H': { 521 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 522 | [6,7], [7,8], [8,9], [5,9]] 523 | }, 524 | 'I': { 525 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 526 | [4,7]] 527 | }, 528 | 'L': { 529 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 530 | [5,7]] 531 | }, 532 | 'K': { 533 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 534 | [6,7], [7,8]] 535 | }, 536 | 'M': { 537 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 538 | [6,7]] 539 | }, 540 | 'F': { 541 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 542 | [6,7], [7,8], [8,9], [9,10], [5,10]] 543 | }, 544 | 'P': { 545 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 546 | [0,6]] 547 | }, 548 | 'S': { 549 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5]] 550 | }, 551 | 'T': { 552 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [4,6]] 553 | }, 554 | 'W': { 555 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 556 | [6,7], [7,8], [8,9], [9,10], [10,11], [11,12], 557 | [12, 13], [5,13], [8,13]] 558 | }, 559 | 'Y': { 560 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6], 561 | [6,7], [7,8], [8,9], [8,10], [10,11], [5,11]] 562 | }, 563 | 'V': { 564 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [4,6]] 565 | }, 566 | '_': { 567 | 'bonds': [] 568 | } 569 | } 570 | 571 | # from: https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03819-2/MediaObjects/41586_2021_3819_MOESM1_ESM.pdf 572 | # added R's terminal Ns due to a small percentage of misalignments' (10%) 573 | AMBIGUOUS = { 574 | "D": {"names": [["OD1", "OD2"]], 575 | "indexs": [[6, 7]], 576 | }, 577 | "E": {"names": [["OE1", "OE2"]], 578 | "indexs": [[7, 8]], 579 | }, 580 | "F": {"names": [["CD1", "CD2"], ["CE1", "CE2"]], 581 | "indexs": [[6, 10], [7, 9]], 582 | }, 583 | "Y": {"names": [["CD1", "CD2"], ["CE1", "CE2"]], 584 | "indexs": [[6,10], [7,9]], 585 | }, 586 | "R": {"names": [["NH1", "NH2"]], 587 | "indexs": [[9, 10]] 588 | }, 589 | } 590 | 591 | 592 | # AA subst mat 593 | BLOSUM = { 594 | "A" : [4.0, -1.0, -2.0, -2.0, 0.0, -1.0, -1.0, 0.0, -2.0, -1.0, -1.0, -1.0, -1.0, -2.0, -1.0, 1.0, 0.0, -3.0, -2.0, 0.0, 0.0], 595 | "C" : [-1.0, 5.0, 0.0, -2.0, -3.0, 1.0, 0.0, -2.0, 0.0, -3.0, -2.0, 2.0, -1.0, -3.0, -2.0, -1.0, -1.0, -3.0, -2.0, -3.0, 0.0], 596 | "D" : [-2.0, 0.0, 6.0, 1.0, -3.0, 0.0, 0.0, 0.0, 1.0, -3.0, -3.0, 0.0, -2.0, -3.0, -2.0, 1.0, 0.0, -4.0, -2.0, -3.0, 0.0], 597 | "E" : [-2.0, -2.0, 1.0, 6.0, -3.0, 0.0, 2.0, -1.0, -1.0, -3.0, -4.0, -1.0, -3.0, -3.0, -1.0, 0.0, -1.0, -4.0, -3.0, -3.0, 0.0], 598 | "F" : [0.0, -3.0, -3.0, -3.0, 9.0, -3.0, -4.0, -3.0, -3.0, -1.0, -1.0, -3.0, -1.0, -2.0, -3.0, -1.0, -1.0, -2.0, -2.0, -1.0, 0.0], 599 | "G" : [-1.0, 1.0, 0.0, 0.0, -3.0, 5.0, 2.0, -2.0, 0.0, -3.0, -2.0, 1.0, 0.0, -3.0, -1.0, 0.0, -1.0, -2.0, -1.0, -2.0, 0.0], 600 | "H" : [-1.0, 0.0, 0.0, 2.0, -4.0, 2.0, 5.0, -2.0, 0.0, -3.0, -3.0, 1.0, -2.0, -3.0, -1.0, 0.0, -1.0, -3.0, -2.0, -2.0, 0.0], 601 | "I" : [0.0, -2.0, 0.0, -1.0, -3.0, -2.0, -2.0, 6.0, -2.0, -4.0, -4.0, -2.0, -3.0, -3.0, -2.0, 0.0, -2.0, -2.0, -3.0, -3.0, 0.0], 602 | "K" : [-2.0, 0.0, 1.0, -1.0, -3.0, 0.0, 0.0, -2.0, 8.0, -3.0, -3.0, -1.0, -2.0, -1.0, -2.0, -1.0, -2.0, -2.0, 2.0, -3.0, 0.0], 603 | "L" : [-1.0, -3.0, -3.0, -3.0, -1.0, -3.0, -3.0, -4.0, -3.0, 4.0, 2.0, -3.0, 1.0, 0.0, -3.0, -2.0, -1.0, -3.0, -1.0, 3.0, 0.0], 604 | "M" : [-1.0, -2.0, -3.0, -4.0, -1.0, -2.0, -3.0, -4.0, -3.0, 2.0, 4.0, -2.0, 2.0, 0.0, -3.0, -2.0, -1.0, -2.0, -1.0, 1.0, 0.0], 605 | "N" : [-1.0, 2.0, 0.0, -1.0, -3.0, 1.0, 1.0, -2.0, -1.0, -3.0, -2.0, 5.0, -1.0, -3.0, -1.0, 0.0, -1.0, -3.0, -2.0, -2.0, 0.0], 606 | "P" : [-1.0, -1.0, -2.0, -3.0, -1.0, 0.0, -2.0, -3.0, -2.0, 1.0, 2.0, -1.0, 5.0, 0.0, -2.0, -1.0, -1.0, -1.0, -1.0, 1.0, 0.0], 607 | "Q" : [-2.0, -3.0, -3.0, -3.0, -2.0, -3.0, -3.0, -3.0, -1.0, 0.0, 0.0, -3.0, 0.0, 6.0, -4.0, -2.0, -2.0, 1.0, 3.0, -1.0, 0.0], 608 | "R" : [-1.0, -2.0, -2.0, -1.0, -3.0, -1.0, -1.0, -2.0, -2.0, -3.0, -3.0, -1.0, -2.0, -4.0, 7.0, -1.0, -1.0, -4.0, -3.0, -2.0, 0.0], 609 | "S" : [1.0, -1.0, 1.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, -2.0, -2.0, 0.0, -1.0, -2.0, -1.0, 4.0, 1.0, -3.0, -2.0, -2.0, 0.0], 610 | "T" : [0.0, -1.0, 0.0, -1.0, -1.0, -1.0, -1.0, -2.0, -2.0, -1.0, -1.0, -1.0, -1.0, -2.0, -1.0, 1.0, 5.0, -2.0, -2.0, 0.0, 0.0], 611 | "V" : [-3.0, -3.0, -4.0, -4.0, -2.0, -2.0, -3.0, -2.0, -2.0, -3.0, -2.0, -3.0, -1.0, 1.0, -4.0, -3.0, -2.0, 11.0, 2.0, -3.0, 0.0], 612 | "W" : [-2.0, -2.0, -2.0, -3.0, -2.0, -1.0, -2.0, -3.0, 2.0, -1.0, -1.0, -2.0, -1.0, 3.0, -3.0, -2.0, -2.0, 2.0, 7.0, -1.0, 0.0], 613 | "Y" : [0.0, -3.0, -3.0, -3.0, -1.0, -2.0, -2.0, -3.0, -3.0, 3.0, 1.0, -2.0, 1.0, -1.0, -2.0, -2.0, 0.0, -3.0, -1.0, 4.0, 0.0], 614 | "_" : [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0], 615 | } 616 | 617 | 618 | # modified manually to match the mode 619 | MP3SC_INFO = { 620 | 'A': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.848366} 621 | }, 622 | 'R': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.6976738}, 623 | 'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.2}, 624 | 'CD': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -3.141592}, 625 | 'NE': {'bond_lens': 1.463, 'bond_angs': 1.9408059, 'bond_dihedral': -3.141592}, 626 | 'CZ': {'bond_lens': 1.34, 'bond_angs': 2.1502457, 'bond_dihedral': -3.141592}, 627 | 'NH1': {'bond_lens': 1.34, 'bond_angs': 2.094395, 'bond_dihedral': 0.}, 628 | 'NH2': {'bond_lens': 1.34, 'bond_angs': 2.094395, 'bond_dihedral': -3.141592} 629 | }, 630 | 'N': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.8416245}, 631 | 'CG': {'bond_lens': 1.5219998, 'bond_angs': 1.9390607, 'bond_dihedral': -1.15}, 632 | 'OD1': {'bond_lens': 1.229, 'bond_angs': 2.101376, 'bond_dihedral': -1.}, # spread out w/ mean at -1 633 | 'ND2': {'bond_lens': 1.3349999, 'bond_angs': 2.0350537, 'bond_dihedral': 2.14} # spread out with mean at -4 634 | }, 635 | 'D': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146265, 'bond_dihedral': 2.7741134}, 636 | 'CG': {'bond_lens': 1.522, 'bond_angs': 1.9390608, 'bond_dihedral': -1.07}, 637 | 'OD1': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': -0.2678593}, 638 | 'OD2': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': 2.95} 639 | }, 640 | 'C': {'CB': {'bond_lens': 1.5259998, 'bond_angs': 1.9146262, 'bond_dihedral': 2.553627}, 641 | 'SG': {'bond_lens': 1.8099997, 'bond_angs': 1.8954275, 'bond_dihedral': -1.07} 642 | }, 643 | 'Q': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.7262106}, 644 | 'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111353, 'bond_dihedral': -1.075}, 645 | 'CD': {'bond_lens': 1.5219998, 'bond_angs': 1.9390606, 'bond_dihedral': -3.141592}, 646 | 'OE1': {'bond_lens': 1.229, 'bond_angs': 2.101376, 'bond_dihedral': -1}, # bimodal at -1, +1 647 | 'NE2': {'bond_lens': 1.3349998, 'bond_angs': 2.0350537, 'bond_dihedral': 2.14} # bimodal at -2, -4 648 | }, 649 | 'E': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146267, 'bond_dihedral': 2.7813723}, 650 | 'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.07}, # bimodal at -1.07, 3.14 651 | 'CD': {'bond_lens': 1.5219998, 'bond_angs': 1.9390606, 'bond_dihedral': -3.0907722155200403}, 652 | 'OE1': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': 0.003740118}, # spread out btween -1,1 653 | 'OE2': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': -3.1378527} # spread out btween -4.3, -2.14 654 | }, 655 | 'G': {}, 656 | 'H': {'CB': {'bond_lens': 1.5259998, 'bond_angs': 1.9146264, 'bond_dihedral': 2.614421}, 657 | 'CG': {'bond_lens': 1.5039998, 'bond_angs': 1.9739674, 'bond_dihedral': -1.05}, 658 | 'ND1': {'bond_lens': 1.3850001, 'bond_angs': 2.094395, 'bond_dihedral': -1.41}, # bimodal at -1.4, 1.4 659 | 'CE1': {'bond_lens': 1.3430002, 'bond_angs': 1.8849558, 'bond_dihedral': 3.14}, 660 | 'NE2': {'bond_lens': 1.335, 'bond_angs': 1.8849558, 'bond_dihedral': 0.0}, 661 | 'CD2': {'bond_lens': 1.3940002, 'bond_angs': 1.8849558, 'bond_dihedral': 0.0} 662 | }, 663 | 'I': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146265, 'bond_dihedral': 2.5604365}, 664 | 'CG1': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -1.025}, 665 | 'CD1': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.0667439142810267}, 666 | 'CG2': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.1225884596454065} 667 | }, 668 | 'L': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.711971}, 669 | 'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.15}, 670 | 'CD1': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': 3.14}, 671 | 'CD2': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.05} 672 | }, 673 | 'K': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146266, 'bond_dihedral': 2.7441595}, 674 | 'CG': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -1.15}, 675 | 'CD': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.09}, 676 | 'CE': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': 3.092959}, 677 | 'NZ': {'bond_lens': 1.4710001, 'bond_angs': 1.940806, 'bond_dihedral': 3.0515378} 678 | }, 679 | 'M': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146264, 'bond_dihedral': 2.7051392}, 680 | 'CG': {'bond_lens': 1.526, 'bond_angs': 1.9111354, 'bond_dihedral': -1.1}, 681 | 'SD': {'bond_lens': 1.8099998, 'bond_angs': 2.001892, 'bond_dihedral': 3.1411812}, # bimodal at 0, 3.14 682 | 'CE': {'bond_lens': 1.8099998, 'bond_angs': 1.7261307, 'bond_dihedral': -0.048235133} # trimodal at -1.41, 0, 1.41 683 | }, 684 | 'F': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.545154}, 685 | 'CG': {'bond_lens': 1.5100001, 'bond_angs': 1.9896755, 'bond_dihedral': -1.2}, # bimodal at -1, 3.14 686 | 'CD1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 1.41}, # bimodal -1.41, 1.41 687 | 'CE1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592}, 688 | 'CZ': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}, 689 | 'CE2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}, 690 | 'CD2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0} 691 | }, 692 | 'P': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146266, 'bond_dihedral': 3.141592}, 693 | 'CG': {'bond_lens': 1.5260001, 'bond_angs': 1.9111352, 'bond_dihedral': -0.707}, # bimodal at -0.7, 0.7 694 | 'CD': {'bond_lens': 1.5260001, 'bond_angs': 1.9111352, 'bond_dihedral': 0.85} # bimodal at -0.85, 0.85 695 | }, 696 | 'S': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146266, 'bond_dihedral': 2.6017702}, 697 | 'OG': {'bond_lens': 1.41, 'bond_angs': 1.9111352, 'bond_dihedral': 1.1} 698 | }, 699 | 'T': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146265, 'bond_dihedral': 2.55}, 700 | 'OG1': {'bond_lens': 1.4099998, 'bond_angs': 1.9111353, 'bond_dihedral': -1.07}, # bimodal at -1 and +1 701 | 'CG2': {'bond_lens': 1.5260001, 'bond_angs': 1.9111353, 'bond_dihedral': -3.05} # bimodal at -1 and -3 702 | }, 703 | 'W': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146266, 'bond_dihedral': 3.141592}, 704 | 'CG': {'bond_lens': 1.4950002, 'bond_angs': 2.0176008, 'bond_dihedral': -1.2}, 705 | 'CD1': {'bond_lens': 1.3520001, 'bond_angs': 2.1816616, 'bond_dihedral': 1.53}, 706 | 'NE1': {'bond_lens': 1.3810003, 'bond_angs': 1.8971729, 'bond_dihedral': 3.141592}, 707 | 'CE2': {'bond_lens': 1.3799998, 'bond_angs': 1.9477878, 'bond_dihedral': 0.0}, 708 | 'CZ2': {'bond_lens': 1.3999999, 'bond_angs': 2.317797, 'bond_dihedral': 3.141592}, 709 | 'CH2': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592}, 710 | 'CZ3': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}, 711 | 'CE3': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}, 712 | 'CD2': {'bond_lens': 1.404, 'bond_angs': 2.094395, 'bond_dihedral': 0.0} 713 | }, 714 | 'Y': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 3.1}, 715 | 'CG': {'bond_lens': 1.5100001, 'bond_angs': 1.9896754, 'bond_dihedral': -1.1}, 716 | 'CD1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 1.36}, 717 | 'CE1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592}, 718 | 'CZ': {'bond_lens': 1.4090003, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}, 719 | 'OH': {'bond_lens': 1.3640002, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592}, 720 | 'CE2': {'bond_lens': 1.4090003, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}, 721 | 'CD2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0} 722 | }, 723 | 'V': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.55}, 724 | 'CG1': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': 3.141592}, 725 | 'CG2': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.1} 726 | }, 727 | 728 | '_': {} 729 | } 730 | 731 | # experimentally checked distances 732 | FF = {"MIN_DISTS": {1: 1.180, # shortest =N or =O bond 733 | 2: 2.138, # N-N in histidine group 734 | 3: 2.380}, # N-N in backbone (N-CA-C-N) 735 | "MAX_DISTS": {i: 1.840*i for i in range(1, 5+1)} # 1.84 is longest -S bond found, 736 | } 737 | 738 | ATOM_TOKEN_IDS = set(["", "N", "CA", "C", "O"]) 739 | ATOM_TOKEN_IDS = {k: i for i,k in enumerate(sorted( 740 | ATOM_TOKEN_IDS.union( set( 741 | [name for k,v in SC_BUILD_INFO.items() for name in v["atom-names"]] 742 | ) ) 743 | ))} 744 | 745 | ################# 746 | ##### DOERS ##### 747 | ################# 748 | 749 | def make_cloud_mask(aa): 750 | """ relevent points will be 1. paddings will be 0. """ 751 | mask = np.zeros(14) 752 | if aa != "_": 753 | n_atoms = 4+len( SC_BUILD_INFO[aa]["atom-names"] ) 754 | mask[:n_atoms] = True 755 | return mask 756 | 757 | def make_bond_mask(aa): 758 | """ Gives the length of the bond originating each atom. """ 759 | mask = np.zeros(14) 760 | # backbone 761 | if aa != "_": 762 | mask[0] = BB_BUILD_INFO["BONDLENS"]['c-n'] 763 | mask[1] = BB_BUILD_INFO["BONDLENS"]['n-ca'] 764 | mask[2] = BB_BUILD_INFO["BONDLENS"]['ca-c'] 765 | mask[3] = BB_BUILD_INFO["BONDLENS"]['c-o'] 766 | # sidechain - except padding token 767 | if aa in SC_BUILD_INFO.keys(): 768 | for i,bond in enumerate(SC_BUILD_INFO[aa]['bonds-vals']): 769 | mask[4+i] = bond 770 | return mask 771 | 772 | def make_theta_mask(aa): 773 | """ Gives the theta of the bond originating each atom. """ 774 | mask = np.zeros(14) 775 | # backbone 776 | if aa != "_": 777 | mask[0] = BB_BUILD_INFO["BONDANGS"]['ca-c-n'] # nitrogen 778 | mask[1] = BB_BUILD_INFO["BONDANGS"]['c-n-ca'] # c_alpha 779 | mask[2] = BB_BUILD_INFO["BONDANGS"]['n-ca-c'] # carbon 780 | mask[3] = BB_BUILD_INFO["BONDANGS"]['ca-c-o'] # oxygen 781 | # sidechain 782 | for i,theta in enumerate(SC_BUILD_INFO[aa]['angles-vals']): 783 | mask[4+i] = theta 784 | return mask 785 | 786 | def make_torsion_mask(aa, fill=False): 787 | """ Gives the dihedral of the bond originating each atom. """ 788 | mask = np.zeros(14) 789 | if aa != "_": 790 | # backbone 791 | mask[0] = BB_BUILD_INFO["BONDTORSIONS"]['n-ca-c-n'] # psi 792 | mask[1] = BB_BUILD_INFO["BONDTORSIONS"]['ca-n-c-ca'] # omega 793 | mask[2] = BB_BUILD_INFO["BONDTORSIONS"]['c-n-ca-c'] # psi 794 | mask[3] = BB_BUILD_INFO["BONDTORSIONS"]['n-ca-c-o'] # oxygen 795 | # sidechain 796 | for i, torsion in enumerate(SC_BUILD_INFO[aa]['torsion-vals']): 797 | if fill: 798 | mask[4+i] = MP3SC_INFO[aa][ SC_BUILD_INFO[aa]["atom-names"][i] ]["bond_dihedral"] 799 | else: 800 | # https://github.com/jonathanking/sidechainnet/blob/master/sidechainnet/structure/StructureBuilder.py#L372 801 | # 999 is an anotation -- change later || same for 555 802 | mask[4+i] = np.nan if torsion == 'p' else 999 if torsion == "i" else torsion 803 | return mask 804 | 805 | def make_idx_mask(aa): 806 | """ Gives the idxs of the 3 previous points. """ 807 | mask = np.zeros((11, 3)) 808 | if aa != "_": 809 | # backbone 810 | mask[0, :] = np.arange(3) 811 | # sidechain 812 | mapper = {"N": 0, "CA": 1, "C":2, "CB": 4} 813 | for i, torsion in enumerate(SC_BUILD_INFO[aa]['torsion-names']): 814 | # get all the atoms forming the dihedral 815 | torsions = [x.rstrip(" ") for x in torsion.split("-")] 816 | # for each atom 817 | for n, torsion in enumerate(torsions[:-1]): 818 | # get the index of the atom in the coords array 819 | loc = mapper[torsion] if torsion in mapper.keys() else 4 + SC_BUILD_INFO[aa]['atom-names'].index(torsion) 820 | # set position to index 821 | mask[i+1][n] = loc 822 | return mask 823 | 824 | def make_atom_token_mask(aa): 825 | """ Return the tokens for each atom in the aa. """ 826 | mask = np.zeros(14) 827 | # get atom id 828 | if aa != "_": 829 | atom_list = ["N", "CA", "C", "O"] + SC_BUILD_INFO[ aa ]["atom-names"] 830 | for i,atom in enumerate(atom_list): 831 | mask[i] = ATOM_TOKEN_IDS[atom] 832 | return mask 833 | 834 | 835 | ################### 836 | ##### GETTERS ##### 837 | ################### 838 | INDEX2AAS = "ACDEFGHIKLMNPQRSTVWY_" 839 | AAS2INDEX = {aa:i for i,aa in enumerate(INDEX2AAS)} 840 | SUPREME_INFO = {k: {"cloud_mask": make_cloud_mask(k), 841 | "bond_mask": make_bond_mask(k), 842 | "theta_mask": make_theta_mask(k), 843 | "torsion_mask": make_torsion_mask(k), 844 | "torsion_mask_filled": make_torsion_mask(k, fill=True), 845 | "idx_mask": make_idx_mask(k), 846 | "atom_token_mask": make_atom_token_mask(k), 847 | "rigid_idx_mask": SC_BUILD_INFO[k]['rigid-frames-idxs'], 848 | } 849 | for k in INDEX2AAS} 850 | 851 | -------------------------------------------------------------------------------- /mp_nerf/massive_pnerf.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | # diff ml 4 | import torch 5 | from einops import repeat 6 | 7 | 8 | def get_axis_matrix(a, b, c, norm=True): 9 | """ Gets an orthonomal basis as a matrix of [e1, e2, e3]. 10 | Useful for constructing rotation matrices between planes 11 | according to the first answer here: 12 | https://math.stackexchange.com/questions/1876615/rotation-matrix-from-plane-a-to-b 13 | Inputs: 14 | * a: (batch, 3) or (3, ). point(s) of the plane 15 | * b: (batch, 3) or (3, ). point(s) of the plane 16 | * c: (batch, 3) or (3, ). point(s) of the plane 17 | Outputs: orthonomal basis as a matrix of [e1, e2, e3]. calculated as: 18 | * e1_ = (c-b) 19 | * e2_proto = (b-a) 20 | * e3_ = e1_ ^ e2_proto 21 | * e2_ = e3_ ^ e1_ 22 | * basis = normalize_by_vectors( [e1_, e2_, e3_] ) 23 | Note: Could be done more by Grahm-Schmidt and extend to N-dimensions 24 | but this is faster and more intuitive for 3D. 25 | """ 26 | v1_ = c - b 27 | v2_ = b - a 28 | v3_ = torch.cross(v1_, v2_, dim=-1) 29 | v2_ready = torch.cross(v3_, v1_, dim=-1) 30 | basis = torch.stack([v1_, v2_ready, v3_], dim=-2) 31 | # normalize if needed 32 | if norm: 33 | return basis / torch.norm(basis, dim=-1, keepdim=True) 34 | return basis 35 | 36 | 37 | 38 | def mp_nerf_torch(a, b, c, l, theta, chi): 39 | """ Custom Natural extension of Reference Frame. 40 | Inputs: 41 | * a: (batch, 3) or (3,). point(s) of the plane, not connected to d 42 | * b: (batch, 3) or (3,). point(s) of the plane, not connected to d 43 | * c: (batch, 3) or (3,). point(s) of the plane, connected to d 44 | * theta: (batch,) or (float). angle(s) between b-c-d 45 | * chi: (batch,) or float. dihedral angle(s) between the a-b-c and b-c-d planes 46 | Outputs: d (batch, 3) or (float). the next point in the sequence, linked to c 47 | """ 48 | # safety check 49 | if not ( (-np.pi <= theta) * (theta <= np.pi) ).all().item(): 50 | raise ValueError(f"theta(s) must be in radians and in [-pi, pi]. theta(s) = {theta}") 51 | # calc vecs 52 | ba = b-a 53 | cb = c-b 54 | # calc rotation matrix. based on plane normals and normalized 55 | n_plane = torch.cross(ba, cb, dim=-1) 56 | n_plane_ = torch.cross(n_plane, cb, dim=-1) 57 | rotate = torch.stack([cb, n_plane_, n_plane], dim=-1) 58 | rotate /= torch.norm(rotate, dim=-2, keepdim=True) 59 | # calc proto point, rotate. add (-1 for sidechainnet convention) 60 | # https://github.com/jonathanking/sidechainnet/issues/14 61 | d = torch.stack([-torch.cos(theta), 62 | torch.sin(theta) * torch.cos(chi), 63 | torch.sin(theta) * torch.sin(chi)], dim=-1).unsqueeze(-1) 64 | # extend base point, set length 65 | return c + l.unsqueeze(-1) * torch.matmul(rotate, d).squeeze() 66 | 67 | 68 | -------------------------------------------------------------------------------- /mp_nerf/ml_utils.py: -------------------------------------------------------------------------------- 1 | # Author: Eric Alcaide 2 | 3 | import torch 4 | import numpy as np 5 | from einops import repeat, rearrange 6 | 7 | # module 8 | from mp_nerf.massive_pnerf import * 9 | from mp_nerf.utils import * 10 | from mp_nerf.kb_proteins import * 11 | from mp_nerf.proteins import * 12 | 13 | 14 | def scn_atom_embedd(seq_list): 15 | """ Returns the token for each atom in the aa seq. 16 | Inputs: 17 | * seq_list: list of FASTA sequences. same length 18 | """ 19 | batch_tokens = [] 20 | # do loop in cpu 21 | for i,seq in enumerate(seq_list): 22 | batch_tokens.append( torch.tensor([SUPREME_INFO[aa]["atom_token_mask"] \ 23 | for aa in seq]) ) 24 | batch_tokens = torch.stack(batch_tokens, dim=0).long() 25 | return batch_tokens 26 | 27 | 28 | def chain2atoms(x, mask=None, c=3): 29 | """ Expand from (L, other) to (L, C, other). """ 30 | wrap = repeat( x, 'l ... -> l c ...', c=c ) 31 | if mask is not None: 32 | return wrap[mask] 33 | return wrap 34 | 35 | 36 | ###################### 37 | # from: https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03819-2/MediaObjects/41586_2021_3819_MOESM1_ESM.pdf 38 | 39 | def rename_symmetric_atoms(pred_coors, true_coors, seq_list, cloud_mask, pred_feats=None): 40 | """ Corrects ambiguous atoms (due to 180 torsions - ambiguous sidechains). 41 | Inputs: 42 | * pred_coors: (batch, L, 14, 3) float. sidechainnet format (see mp_nerf.kb_proteins) 43 | * true_coors: (batch, L, 14, 3) float. sidechainnet format (see mp_nerf.kb_proteins) 44 | * seq_list: list of FASTA sequences 45 | * cloud_mask: (batch, L, 14) bool. mask for present atoms 46 | * pred_feats: (batch, L, 14, D) optional. atom-wise predicted features 47 | 48 | Warning! A coordinate might be missing. TODO: 49 | Outputs: pred_coors, pred_feats 50 | """ 51 | aux_cloud_mask = cloud_mask.clone() # will be manipulated 52 | 53 | for i,seq in enumerate(seq_list): 54 | for aa, pairs in AMBIGUOUS.items(): 55 | # indexes of aas in chain - check coords are given for aa 56 | amb_idxs = np.array(pairs["indexs"]).flatten().tolist() 57 | idxs = torch.tensor([ 58 | k for k,s in enumerate(seq) if s==aa and \ 59 | k in set( torch.nonzero(aux_cloud_mask[i, :, amb_idxs].sum(dim=-1)).tolist()[0] ) 60 | ]).long() 61 | # check if any AAs matching 62 | if idxs.shape[0] == 0: 63 | continue 64 | # get indexes of non-ambiguous 65 | aux_cloud_mask[i, idxs, amb_idxs] = False 66 | non_amb_idx = torch.nonzero(aux_cloud_mask[i, idxs[0]]).tolist() 67 | for a, pair in enumerate(pairs["indexs"]): 68 | # calc distances 69 | d_ij_pred = torch.cdist(pred_coors[ i, idxs, pair ], pred_coors[i, idxs, non_amb_idx], p=2) # 2, N 70 | d_ij_true = torch.cdist(true_coors[ i, idxs, pair+pair[::-1] ], true_coors[i, idxs, non_amb_idx], p=2) # 2, 2N 71 | # see if alternative is better (less distance) 72 | idxs_to_change = ( (d_ij_pred - d_ij_true[2:]).sum(dim=-1) < (d_ij_pred - d_ij_true[:2]).sum(dim=-1) ).nonzero() 73 | # change those 74 | pred_coors[i, idxs[idxs_to_change], pair] = pred_coors[i, idxs[idxs_to_change], pair[::-1]] 75 | if pred_feats is not None: 76 | pred_feats[i, idxs[idxs_to_change], pair] = pred_feats[i, idxs[idxs_to_change], pair[::-1]] 77 | 78 | return pred_coors, pred_feats 79 | 80 | 81 | def torsion_angle_loss(pred_torsions, true_torsions, coeff=2., angle_mask=None): 82 | """ Computes a loss on the angles as the cosine of the difference. 83 | Due to angle periodicity, calculate the disparity on both sides 84 | Inputs: 85 | * pred_torsions: ( (B), L, X ) float. Predicted torsion angles.(-pi, pi) 86 | Same format as sidechainnet. 87 | * true_torsions: ( (B), L, X ) true torsion angles. (-pi, pi) 88 | * coeff: float. weight coefficient 89 | * angle_mask: ((B), L, (X)) bool. Masks the non-existing angles. 90 | 91 | Outputs: ( (B), L, 6 ) cosine difference 92 | """ 93 | l_normal = torch.cos( pred_torsions - true_torsions ) 94 | l_cycle = torch.cos( to_zero_two_pi(pred_torsions) - \ 95 | to_zero_two_pi(true_torsions) ) 96 | maxi = torch.max( l_normal, l_cycle ) 97 | if angle_mask is not None: 98 | maxi[angle_mask] = 1. 99 | return coeff * (1 - maxi) 100 | 101 | 102 | def fape_torch(pred_coords, true_coords, max_val=10., l_func=None, 103 | c_alpha=False, seq_list=None, rot_mats_g=None): 104 | """ Computes the Frame-Aligned Point Error. Scaled 0 <= FAPE <= 1 105 | Inputs: 106 | * pred_coords: (B, L, C, 3) predicted coordinates. 107 | * true_coords: (B, L, C, 3) ground truth coordinates. 108 | * max_val: maximum value (it's also the radius due to L1 usage) 109 | * l_func: function. allow for options other than l1 (consider dRMSD) 110 | * c_alpha: bool. whether to only calculate frames and loss from c_alphas 111 | * seq_list: list of strs (FASTA sequences). to calculate rigid bodies' indexs. 112 | Defaults to C-alpha if not passed. 113 | * rot_mats_g: optional. List of n_seqs x (N_frames, 3, 3) rotation matrices. 114 | 115 | Outputs: (B, N_atoms) 116 | """ 117 | fape_store = [] 118 | if l_func is None: 119 | l_func = lambda x,y,eps=1e-7,sup=max_val: (((x-y)**2).sum(dim=-1) + eps).sqrt() 120 | # for chain 121 | for s in range(pred_coords.shape[0]): 122 | fape_store.append(0) 123 | cloud_mask = (torch.abs(true_coords[s]).sum(dim=-1) != 0) 124 | # center both structures 125 | pred_center = pred_coords[s] - pred_coords[s, cloud_mask].mean(dim=0, keepdim=True) 126 | true_center = true_coords[s] - true_coords[s, cloud_mask].mean(dim=0, keepdim=True) 127 | # convert to (B, L*C, 3) 128 | pred_center = rearrange(pred_center, 'l c d -> (l c) d') 129 | true_center = rearrange(true_center, 'l c d -> (l c) d') 130 | mask_center = rearrange(cloud_mask, 'l c -> (l c)') 131 | # get frames and conversions - same scheme as in mp_nerf proteins' concat of monomers 132 | if rot_mats_g is None: 133 | rigid_idxs = scn_rigid_index_mask(seq_list[s], c_alpha=c_alpha) 134 | true_frames = get_axis_matrix(*true_center[rigid_idxs].detach(), norm=True) 135 | pred_frames = get_axis_matrix(*pred_center[rigid_idxs].detach(), norm=True) 136 | rot_mats = torch.matmul(torch.transpose(pred_frames, -1, -2), true_frames) 137 | else: 138 | rot_mats = rot_mats_g[s] 139 | 140 | # calculate loss only on c_alphas 141 | if c_alpha: 142 | mask_center[:] = False 143 | mask_center[rigid_idxs[1]] = True 144 | 145 | # measure errors - for residue 146 | for i,rot_mat in enumerate(rot_mats): 147 | fape_store[s] += l_func( pred_center[s][mask_center[s]] @ rot_mat, 148 | true_center[s][mask_center[s]] 149 | ).clamp(0, max_val) 150 | fape_store[s] /= rot_mats.shape[0] 151 | 152 | # stack and average 153 | return (1/max_val) * torch.stack(fape_store, dim=0) 154 | 155 | 156 | # custom 157 | 158 | def atom_selector(scn_seq, x, option=None, discard_absent=True): 159 | """ Returns a selection of the atoms in a protein. 160 | Inputs: 161 | * scn_seq: (batch, len) sidechainnet format or list of strings 162 | * x: (batch, (len * n_aa), dims) sidechainnet format 163 | * option: one of [torch.tensor, 'backbone-only', 'backbone-with-cbeta', 164 | 'all', 'backbone-with-oxygen', 'backbone-with-cbeta-and-oxygen'] 165 | * discard_absent: bool. Whether to discard the points for which 166 | there are no labels (bad recordings) 167 | """ 168 | 169 | 170 | # get mask 171 | present = [] 172 | for i,seq in enumerate(scn_seq): 173 | pass_x = x[i] if discard_absent else None 174 | if pass_x is None and isinstance(seq, torch.Tensor): 175 | seq = "".join([INDEX2AAS[x] for x in seq.cpu().detach().tolist()]) 176 | 177 | present.append( scn_cloud_mask(seq, coords=pass_x) ) 178 | 179 | present = torch.stack(present, dim=0).bool() 180 | 181 | 182 | # atom mask 183 | if isinstance(option, str): 184 | atom_mask = torch.tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) 185 | if "backbone" in option: 186 | atom_mask[[0, 2]] = 1 187 | 188 | if option == "backbone": 189 | pass 190 | elif option == 'backbone-with-oxygen': 191 | atom_mask[3] = 1 192 | elif option == 'backbone-with-cbeta': 193 | atom_mask[5] = 1 194 | elif option == 'backbone-with-cbeta-and-oxygen': 195 | atom_mask[3] = 1 196 | atom_mask[5] = 1 197 | elif option == 'all': 198 | atom_mask[:] = 1 199 | else: 200 | print("Your string doesn't match any option.") 201 | 202 | elif isinstance(option, torch.Tensor): 203 | atom_mask = option 204 | else: 205 | raise ValueError('option needs to be a valid string or a mask tensor of shape (14,) ') 206 | 207 | mask = rearrange(present * atom_mask.unsqueeze(0).unsqueeze(0).bool(), 'b l c -> b (l c)') 208 | return x[mask], mask 209 | 210 | 211 | def noise_internals(seq, angles=None, coords=None, noise_scale=0.5, theta_scale=0.5, verbose=0): 212 | """ Noises the internal coordinates -> dihedral and bond angles. 213 | Inputs: 214 | * seq: string. Sequence in FASTA format 215 | * angles: (l, 11) sidechainnet angles tensor 216 | * coords: (l, 14, 13) 217 | * noise_scale: float. std of noise gaussian. 218 | * theta_scale: float. multiplier for bond angles 219 | Outputs: 220 | * chain (l, c, d) 221 | * cloud_mask (l, c) 222 | """ 223 | assert angles is not None or coords is not None, \ 224 | "You must pass either angles or coordinates" 225 | # get scaffolds 226 | if angles is None: 227 | angles = torch.randn(coords.shape[0], 12).to(coords.device) 228 | 229 | scaffolds = build_scaffolds_from_scn_angles(seq, angles.clone()) 230 | 231 | if coords is not None: 232 | scaffolds = modify_scaffolds_with_coords(scaffolds, coords) 233 | 234 | # noise bond angles and dihedrals (dihedrals of everyone, angles only of BB) 235 | if noise_scale > 0.: 236 | if verbose: 237 | print("noising", noise_scale) 238 | # thetas (half of noise of dihedrals. only for BB) 239 | noised_bb = scaffolds["angles_mask"][0, :, :3].clone() 240 | noised_bb += theta_scale*noise_scale * torch.randn_like(noised_bb) 241 | # get noised values between [-pi, pi] 242 | off_bounds = (noised_bb > 2*np.pi) + (noised_bb < -2*np.pi) 243 | if off_bounds.sum().item() > 0: 244 | noised_bb[off_bounds] = noised_bb[off_bounds] % (2*np.pi) 245 | 246 | upper, lower = noised_bb > np.pi, noised_bb < -np.pi 247 | if upper.sum().item() > 0: 248 | noised_bb[upper] = - ( 2*np.pi - noised_bb[upper] ).clone() 249 | if lower.sum().item() > 0: 250 | noised_bb[lower] = 2*np.pi + noised_bb[lower].clone() 251 | scaffolds["angles_mask"][0, :, :3] = noised_bb 252 | 253 | # dihedrals 254 | noised_dihedrals = scaffolds["angles_mask"][1].clone() 255 | noised_dihedrals += noise_scale * torch.randn_like(noised_dihedrals) 256 | # get noised values between [-pi, pi] 257 | off_bounds = (noised_dihedrals > 2*np.pi) + (noised_dihedrals < -2*np.pi) 258 | if off_bounds.sum().item() > 0: 259 | noised_dihedrals[off_bounds] = noised_dihedrals[off_bounds] % (2*np.pi) 260 | 261 | upper, lower = noised_dihedrals > np.pi, noised_dihedrals < -np.pi 262 | if upper.sum().item() > 0: 263 | noised_dihedrals[upper] = - ( 2*np.pi - noised_dihedrals[upper] ).clone() 264 | if lower.sum().item() > 0: 265 | noised_dihedrals[lower] = 2*np.pi + noised_dihedrals[lower].clone() 266 | scaffolds["angles_mask"][1] = noised_dihedrals 267 | 268 | # reconstruct 269 | return protein_fold(**scaffolds) 270 | 271 | 272 | def combine_noise(true_coords, seq=None, int_seq=None, angles=None, 273 | NOISE_INTERNALS=1e-2, INTERNALS_SCN_SCALE=5., 274 | SIDECHAIN_RECONSTRUCT=True): 275 | """ Combines noises. For internal noise, no points can be missing. 276 | Inputs: 277 | * true_coords: ((B), N, D) 278 | * int_seq: (N,) torch long tensor of sidechainnet AA tokens 279 | * seq: str of length N. FASTA AAs. 280 | * angles: (N_aa, D_). optional. used for internal noising 281 | * NOISE_INTERNALS: float. amount of noise for internal coordinates. 282 | * SIDECHAIN_RECONSTRUCT: bool. whether to discard the sidechain and 283 | rebuild by sampling from plausible distro. 284 | Outputs: (B, N, D) coords and (B, N) boolean mask 285 | """ 286 | # get seqs right 287 | assert int_seq is not None or seq is not None, "Either int_seq or seq must be passed" 288 | if int_seq is not None and seq is None: 289 | seq = "".join([INDEX2AAS[x] for x in int_seq.cpu().detach().tolist()]) 290 | elif int_seq is None and seq is not None: 291 | int_seq = torch.tensor([AAS2INDEX[x] for x in seq.upper()], device=true_coords.device) 292 | 293 | cloud_mask_flat = (true_coords == 0.).sum(dim=-1) != true_coords.shape[-1] 294 | naive_cloud_mask = scn_cloud_mask(seq).bool() 295 | 296 | if NOISE_INTERNALS: 297 | assert cloud_mask_flat.sum().item() == naive_cloud_mask.sum().item(), \ 298 | "atoms missing: {0}".format( naive_cloud_mask.sum().item() - \ 299 | cloud_mask_flat.sum().item() ) 300 | # expand to batch dim if needed 301 | if len(true_coords.shape) < 3: 302 | true_coords = true_coords.unsqueeze(0) 303 | noised_coords = true_coords.clone() 304 | coords_scn = rearrange(true_coords, 'b (l c) d -> b l c d', c=14) 305 | 306 | ###### SETP 1: internals ######### 307 | if NOISE_INTERNALS: 308 | # create noised and masked noised coords 309 | noised_coords, cloud_mask = noise_internals(seq, angles = angles, 310 | coords = coords_scn.squeeze(), 311 | noise_scale = NOISE_INTERNALS, 312 | theta_scale = INTERNALS_SCN_SCALE, 313 | verbose = False) 314 | masked_noised = noised_coords[naive_cloud_mask] 315 | noised_coords = rearrange(noised_coords, 'l c d -> () (l c) d') 316 | 317 | ###### SETP 2: build from backbone ######### 318 | if SIDECHAIN_RECONSTRUCT: 319 | bb, mask = atom_selector(int_seq.unsqueeze(0), noised_coords, option="backbone", discard_absent=False) 320 | scaffolds = build_scaffolds_from_scn_angles(seq, angles=None, device="cpu") 321 | noised_coords[~mask] = 0. 322 | noised_coords = rearrange(noised_coords, '() (l c) d -> l c d', c=14) 323 | noised_coords, _ = sidechain_fold(wrapper = noised_coords.cpu(), **scaffolds, c_beta = False) 324 | noised_coords = rearrange(noised_coords, 'l c d -> () (l c) d').to(true_coords.device) 325 | 326 | 327 | return noised_coords, cloud_mask_flat 328 | 329 | 330 | 331 | if __name__ == "__main__": 332 | import joblib 333 | # imports of data (from mp_nerf.utils.get_prot) 334 | prots = joblib.load("some_route_to_local_serialized_file_with_prots") 335 | 336 | # set params 337 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 338 | 339 | # unpack and test 340 | seq, int_seq, true_coords, angles, padding_seq, mask, pid = prots[-1] 341 | 342 | true_coords = true_coords.unsqueeze(0) 343 | 344 | # check noised internals 345 | coords_scn = rearrange(true_coords, 'b (l c) d -> b l c d', c=14) 346 | cloud, cloud_mask = noise_internals(seq, angles=angles, coords=coords_scn[0], noise_scale=1.) 347 | print("cloud.shape", cloud.shape) 348 | 349 | # check integral 350 | integral, mask = combine_noise(true_coords, seq=seq, int_seq = None, angles=None, 351 | NOISE_INTERNALS=1e-2, SIDECHAIN_RECONSTRUCT=True) 352 | print("integral.shape", integral.shape) 353 | 354 | integral, mask = combine_noise(true_coords, seq=None, int_seq = int_seq, angles=None, 355 | NOISE_INTERNALS=1e-2, SIDECHAIN_RECONSTRUCT=True) 356 | print("integral.shape2", integral.shape) 357 | 358 | 359 | 360 | -------------------------------------------------------------------------------- /mp_nerf/proteins.py: -------------------------------------------------------------------------------- 1 | # science 2 | import numpy as np 3 | # diff / ml 4 | import torch 5 | from einops import repeat 6 | # module 7 | from mp_nerf.massive_pnerf import * 8 | from mp_nerf.utils import * 9 | from mp_nerf.kb_proteins import * 10 | 11 | 12 | def scn_cloud_mask(seq, coords=None, strict=False): 13 | """ Gets the boolean mask atom positions (not all aas have same atoms). 14 | Inputs: 15 | * seqs: (length) iterable of 1-letter aa codes of a protein 16 | * coords: optional .(batch, lc, 3). sidechainnet coords. 17 | returns the true mask (solves potential atoms that might not be provided) 18 | * strict: bool. whther to discard the next points after a missing one 19 | Outputs: (length, 14) boolean mask 20 | """ 21 | if coords is not None: 22 | start = (( rearrange(coords, 'b (l c) d -> b l c d', c=14) != 0 ).sum(dim=-1) != 0).float() 23 | # if a point is 0, the following are 0s as well 24 | if strict: 25 | for b in range(start.shape[0]): 26 | for pos in range(start.shape[1]): 27 | for chain in range(start.shape[2]): 28 | if start[b, pos, chain].item() == 0: 29 | start[b, pos, chain:] *= 0 30 | return start 31 | return torch.tensor([SUPREME_INFO[aa]['cloud_mask'] for aa in seq]) 32 | 33 | 34 | def scn_bond_mask(seq): 35 | """ Inputs: 36 | * seqs: (length). iterable of 1-letter aa codes of a protein 37 | Outputs: (L, 14) maps point to bond length 38 | """ 39 | return torch.tensor([SUPREME_INFO[aa]['bond_mask'] for aa in seq]) 40 | 41 | 42 | def scn_angle_mask(seq, angles=None, device=None): 43 | """ Inputs: 44 | * seq: (length). iterable of 1-letter aa codes of a protein 45 | * angles: (length, 12). [phi, psi, omega, b_angle(n_ca_c), b_angle(ca_c_n), b_angle(c_n_ca), 6_scn_torsions] 46 | Outputs: (L, 14) maps point to theta and dihedral. 47 | first angle is theta, second is dihedral 48 | """ 49 | device = angles.device if angles is not None else torch.device("cpu") 50 | precise = angles.dtype if angles is not None else torch.get_default_dtype() 51 | torsion_mask_use = "torsion_mask" if angles is not None else "torsion_mask_filled" 52 | # get masks 53 | theta_mask = torch.tensor([SUPREME_INFO[aa]['theta_mask'] for aa in seq], dtype=precise).to(device) 54 | torsion_mask = torch.tensor([SUPREME_INFO[aa][torsion_mask_use] for aa in seq], dtype=precise).to(device) 55 | 56 | # adapt general to specific angles if passed 57 | if angles is not None: 58 | # fill masks with angle values 59 | theta_mask[:, 0] = angles[:, 4] # ca_c_n 60 | theta_mask[1:, 1] = angles[:-1, 5] # c_n_ca 61 | theta_mask[:, 2] = angles[:, 3] # n_ca_c 62 | # backbone_torsions 63 | torsion_mask[:, 0] = angles[:, 1] # n determined by psi of previous 64 | torsion_mask[1:, 1] = angles[:-1, 2] # ca determined by omega of previous 65 | torsion_mask[:, 2] = angles[:, 0] # c determined by phi 66 | # https://github.com/jonathanking/sidechainnet/blob/master/sidechainnet/structure/StructureBuilder.py#L313 67 | torsion_mask[:, 3] = angles[:, 1] - np.pi 68 | 69 | # add torsions to sidechains - no need to modify indexes due to torsion modification 70 | # since extra rigid modies are in terminal positions in sidechain 71 | to_fill = torsion_mask != torsion_mask # "p" fill with passed values 72 | to_pick = torsion_mask == 999 # "i" infer from previous one 73 | for i,aa in enumerate(seq): 74 | # check if any is nan -> fill the holes 75 | number = to_fill[i].long().sum() 76 | torsion_mask[i, to_fill[i]] = angles[i, 6:6+number] 77 | 78 | # pick previous value for inferred torsions 79 | for j, val in enumerate(to_pick[i]): 80 | if val: 81 | torsion_mask[i, j] = torsion_mask[i, j-1] - np.pi # pick values from last one. 82 | 83 | # special rigid bodies anomalies: 84 | if aa == "I": # scn_torsion(CG1) - scn_torsion(CG2) = 2.13 (see KB) 85 | torsion_mask[i, 7] += torsion_mask[i, 5] 86 | elif aa == "L": 87 | torsion_mask[i, 7] += torsion_mask[i, 6] 88 | 89 | 90 | torsion_mask[-1, 3] += np.pi 91 | return torch.stack([theta_mask, torsion_mask], dim=0) 92 | 93 | 94 | def scn_index_mask(seq): 95 | """ Inputs: 96 | * seq: (length). iterable of 1-letter aa codes of a protein 97 | Outputs: (L, 11, 3) maps point to theta and dihedral. 98 | first angle is theta, second is dihedral 99 | """ 100 | idxs = torch.tensor([SUPREME_INFO[aa]['idx_mask'] for aa in seq]) 101 | return rearrange(idxs, 'l s d -> d l s') 102 | 103 | 104 | def scn_rigid_index_mask(seq, c_alpha=None): 105 | """ Inputs: 106 | * seq: (length). iterable of 1-letter aa codes of a protein 107 | * c_alpha: bool. whether to return only the c_alpha rigid group 108 | Outputs: (3, Length * Groups). indexes for 1st, 2nd and 3rd point 109 | to construct frames for each group. 110 | """ 111 | if c_alpha: 112 | return torch.cat([torch.tensor(SUPREME_INFO[aa]['rigid_idx_mask'])[:1] + 14*i \ 113 | for i,aa in enumerate(seq)], dim=0).t() 114 | return torch.cat([torch.tensor(SUPREME_INFO[aa]['rigid_idx_mask']) + 14*i \ 115 | for i,aa in enumerate(seq)], dim=0).t() 116 | 117 | 118 | def build_scaffolds_from_scn_angles(seq, angles=None, coords=None, device="auto"): 119 | """ Builds scaffolds for fast access to data 120 | Inputs: 121 | * seq: string of aas (1 letter code) 122 | * angles: (L, 12) tensor containing the internal angles. 123 | Distributed as follows (following sidechainnet convention): 124 | * (L, 3) for torsion angles 125 | * (L, 3) bond angles 126 | * (L, 6) sidechain angles 127 | * coords: (L, 3) sidechainnet coords. builds the mask with those instead 128 | (better accuracy if modified residues present). 129 | Outputs: 130 | * cloud_mask: (L, 14 ) mask of points that should be converted to coords 131 | * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of 132 | previous 3 points in the coords array 133 | * angles_mask: (2, L, 14) maps point to theta and dihedral 134 | * bond_mask: (L, 14) gives the length of the bond originating that atom 135 | """ 136 | # auto infer device and precision 137 | precise = angles.dtype if angles is not None else torch.get_default_dtype() 138 | if device == "auto": 139 | device = angles.device if angles is not None else device 140 | 141 | if coords is not None: 142 | cloud_mask = scn_cloud_mask(seq, coords=coords) 143 | else: 144 | cloud_mask = scn_cloud_mask(seq) 145 | 146 | cloud_mask = cloud_mask.bool().to(device) 147 | 148 | point_ref_mask = scn_index_mask(seq).long().to(device) 149 | 150 | angles_mask = scn_angle_mask(seq, angles).to(device, precise) 151 | 152 | bond_mask = scn_bond_mask(seq).to(device, precise) 153 | # return all in a dict 154 | return {"cloud_mask": cloud_mask, 155 | "point_ref_mask": point_ref_mask, 156 | "angles_mask": angles_mask, 157 | "bond_mask": bond_mask } 158 | 159 | 160 | ############################# 161 | ####### ENCODERS ############ 162 | ############################# 163 | 164 | 165 | def modify_angles_mask_with_torsions(seq, angles_mask, torsions): 166 | """ Modifies a torsion mask to include variable torsions. 167 | Inputs: 168 | * seq: (L,) str. FASTA sequence 169 | * angles_mask: (2, L, 14) float tensor of (angles, torsions) 170 | * torsions: (L, 4) float tensor (or (L, 5) if it includes torsion for cb) 171 | Outputs: (2, L, 14) a new angles mask 172 | """ 173 | c_beta = torsions.shape[-1] == 5 # whether c_beta torsion is passed as well 174 | start = 4 if c_beta else 5 175 | # get mask of to-fill values 176 | torsion_mask = torch.tensor([SUPREME_INFO[aa]["torsion_mask"] for aa in seq]).to(torsions.device) # (L, 14) 177 | torsion_mask = torsion_mask != torsion_mask # values that are nan need replace 178 | # undesired outside of margins 179 | torsion_mask[:, :start] = torsion_mask[:, start+torsions.shape[-1]:] = False 180 | 181 | angles_mask[1, torsion_mask] = torsions[ torsion_mask[:, start:start+torsions.shape[-1]] ] 182 | return angles_mask 183 | 184 | 185 | def modify_scaffolds_with_coords(scaffolds, coords): 186 | """ Gets scaffolds and fills in the right data. 187 | Inputs: 188 | * scaffolds: dict. as returned by `build_scaffolds_from_scn_angles` 189 | * coords: (L, 14, 3). sidechainnet tensor. same device as scaffolds 190 | Outputs: corrected scaffolds 191 | """ 192 | 193 | 194 | # calculate distances and update: 195 | # N, CA, C 196 | scaffolds["bond_mask"][1:, 0] = torch.norm(coords[1:, 0] - coords[:-1, 2], dim=-1) # N 197 | scaffolds["bond_mask"][ :, 1] = torch.norm(coords[ :, 1] - coords[: , 0], dim=-1) # CA 198 | scaffolds["bond_mask"][ :, 2] = torch.norm(coords[ :, 2] - coords[: , 1], dim=-1) # C 199 | # O, CB, side chain 200 | selector = np.arange(len(coords)) 201 | for i in range(3, 14): 202 | # get indexes 203 | idx_a, idx_b, idx_c = scaffolds["point_ref_mask"][:, :, i-3] # (3, L, 11) -> 3 * (L, 11) 204 | # correct distances 205 | scaffolds["bond_mask"][:, i] = torch.norm(coords[:, i] - coords[selector, idx_c], dim=-1) 206 | # get angles 207 | scaffolds["angles_mask"][0, :, i] = get_angle(coords[selector, idx_b], 208 | coords[selector, idx_c], 209 | coords[:, i]) 210 | # handle C-beta, where the C requested is from the previous aa 211 | if i == 4: 212 | # for 1st residue, use position of the second residue's N 213 | first_next_n = coords[1, :1] # 1, 3 214 | # the c requested is from the previous residue 215 | main_c_prev_idxs = coords[selector[:-1], idx_a[1:]]# (L-1), 3 216 | # concat 217 | coords_a = torch.cat([first_next_n, main_c_prev_idxs]) 218 | else: 219 | coords_a = coords[selector, idx_a] 220 | # get dihedrals 221 | scaffolds["angles_mask"][1, :, i] = get_dihedral(coords_a, 222 | coords[selector, idx_b], 223 | coords[selector, idx_c], 224 | coords[:, i]) 225 | # correct angles and dihedrals for backbone 226 | scaffolds["angles_mask"][0, :-1, 0] = get_angle(coords[:-1, 1], coords[:-1, 2], coords[1: , 0]) # ca_c_n 227 | scaffolds["angles_mask"][0, 1:, 1] = get_angle(coords[:-1, 2], coords[1:, 0], coords[1: , 1]) # c_n_ca 228 | scaffolds["angles_mask"][0, :, 2] = get_angle(coords[:, 0], coords[ :, 1], coords[ : , 2]) # n_ca_c 229 | 230 | # N determined by previous psi = f(n, ca, c, n+1) 231 | scaffolds["angles_mask"][1, :-1, 0] = get_dihedral(coords[:-1, 0], coords[:-1, 1], coords[:-1, 2], coords[1:, 0]) 232 | # CA determined by omega = f(ca, c, n+1, ca+1) 233 | scaffolds["angles_mask"][1, 1:, 1] = get_dihedral(coords[:-1, 1], coords[:-1, 2], coords[1:, 0], coords[1:, 1]) 234 | # C determined by phi = f(c-1, n, ca, c) 235 | scaffolds["angles_mask"][1, 1:, 2] = get_dihedral(coords[:-1, 2], coords[1:, 0], coords[1:, 1], coords[1:, 2]) 236 | 237 | return scaffolds 238 | 239 | 240 | ################################## 241 | ####### MAIN FUNCTION ############ 242 | ################################## 243 | 244 | 245 | def protein_fold(cloud_mask, point_ref_mask, angles_mask, bond_mask, 246 | device=torch.device("cpu"), hybrid=False): 247 | """ Calcs coords of a protein given it's 248 | sequence and internal angles. 249 | Inputs: 250 | * cloud_mask: (L, 14) mask of points that should be converted to coords 251 | * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of 252 | previous 3 points in the coords array 253 | * angles_mask: (2, 14, L) maps point to theta and dihedral 254 | * bond_mask: (L, 14) gives the length of the bond originating that atom 255 | 256 | Output: (L, 14, 3) and (L, 14) coordinates and cloud_mask 257 | """ 258 | # automatic type (float, mixed, double) and size detection 259 | precise = bond_mask.dtype 260 | length = cloud_mask.shape[0] 261 | # create coord wrapper 262 | coords = torch.zeros(length, 14, 3, device=device, dtype=precise) 263 | 264 | # do first AA 265 | coords[0, 1] = coords[0, 0] + torch.tensor([1, 0, 0], device=device, dtype=precise) * BB_BUILD_INFO["BONDLENS"]["n-ca"] 266 | coords[0, 2] = coords[0, 1] + torch.tensor([torch.cos(np.pi - angles_mask[0, 0, 2]), 267 | torch.sin(np.pi - angles_mask[0, 0, 2]), 268 | 0.], device=device, dtype=precise) * BB_BUILD_INFO["BONDLENS"]["ca-c"] 269 | 270 | # starting positions (in the x,y plane) and normal vector [0,0,1] 271 | init_a = repeat(torch.tensor([1., 0., 0.], device=device, dtype=precise), 'd -> l d', l=length) 272 | init_b = repeat(torch.tensor([1., 1., 0.], device=device, dtype=precise), 'd -> l d', l=length) 273 | # do N -> CA. don't do 1st since its done already 274 | thetas, dihedrals = angles_mask[:, :, 1] 275 | coords[1:, 1] = mp_nerf_torch(init_a, 276 | init_b, 277 | coords[:, 0], 278 | bond_mask[:, 1], 279 | thetas, dihedrals)[1:] 280 | # do CA -> C. don't do 1st since its done already 281 | thetas, dihedrals = angles_mask[:, :, 2] 282 | coords[1:, 2] = mp_nerf_torch(init_b, 283 | coords[:, 0], 284 | coords[:, 1], 285 | bond_mask[:, 2], 286 | thetas, dihedrals)[1:] 287 | # do C -> N 288 | thetas, dihedrals = angles_mask[:, :, 0] 289 | coords[:, 3] = mp_nerf_torch(coords[:, 0], 290 | coords[:, 1], 291 | coords[:, 2], 292 | bond_mask[:, 0], 293 | thetas, dihedrals) 294 | 295 | ######### 296 | # sequential pass to join fragments 297 | ######### 298 | # part of rotation mat corresponding to origin - 3 orthogonals 299 | mat_origin = get_axis_matrix(init_a[0], init_b[0], coords[0, 0], norm=False) 300 | # part of rotation mat corresponding to destins || a, b, c = CA, C, N+1 301 | # (L-1) since the first is in the origin already 302 | mat_destins = get_axis_matrix(coords[:-1, 1], coords[:-1, 2], coords[:-1, 3]) 303 | 304 | # get rotation matrices from origins 305 | # https://math.stackexchange.com/questions/1876615/rotation-matrix-from-plane-a-to-b 306 | rotations = torch.matmul(mat_origin.t(), mat_destins) 307 | rotations /= torch.norm(rotations, dim=-1, keepdim=True) 308 | 309 | # do rotation concatenation - do for loop in cpu always - faster 310 | rotations = rotations.cpu() if coords.is_cuda and hybrid else rotations 311 | for i in range(1, length-1): 312 | rotations[i] = torch.matmul(rotations[i], rotations[i-1]) 313 | rotations = rotations.to(device) if coords.is_cuda and hybrid else rotations 314 | # rotate all 315 | coords[1:, :4] = torch.matmul(coords[1:, :4], rotations) 316 | # offset each position by cumulative sum at that position 317 | coords[1:, :4] += torch.cumsum(coords[:-1, 3], dim=0).unsqueeze(-2) 318 | 319 | 320 | ######### 321 | # parallel sidechain - do the oxygen, c-beta and side chain 322 | ######### 323 | for i in range(3,14): 324 | level_mask = cloud_mask[:, i] 325 | thetas, dihedrals = angles_mask[:, level_mask, i] 326 | idx_a, idx_b, idx_c = point_ref_mask[:, level_mask, i-3] 327 | 328 | # to place C-beta, we need the carbons from prev res - not available for the 1st res 329 | if i == 4: 330 | # the c requested is from the previous residue - offset boolean mask by one 331 | # can't be done with slicing bc glycines are inside chain (dont have cb) 332 | coords_a = coords[(level_mask.nonzero().view(-1) - 1), idx_a] # (L-1), 3 333 | # if first residue is not glycine, 334 | # for 1st residue, use position of the second residue's N (1,3) 335 | if level_mask[0].item(): 336 | coords_a[0] = coords[1, 1] 337 | else: 338 | coords_a = coords[level_mask, idx_a] 339 | 340 | coords[level_mask, i] = mp_nerf_torch(coords_a, 341 | coords[level_mask, idx_b], 342 | coords[level_mask, idx_c], 343 | bond_mask[level_mask, i], 344 | thetas, dihedrals) 345 | 346 | return coords, cloud_mask 347 | 348 | 349 | def sidechain_fold(wrapper, cloud_mask, point_ref_mask, angles_mask, bond_mask, 350 | device=torch.device("cpu"), c_beta=False): 351 | """ Calcs coords of a protein given it's sequence and internal angles. 352 | Inputs: 353 | * wrapper: (L, 14, 3). coords container with backbone ([:, :3]) and optionally 354 | c_beta ([:, 4]) 355 | * cloud_mask: (L, 14) mask of points that should be converted to coords 356 | * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of 357 | previous 3 points in the coords array 358 | * angles_mask: (2, 14, L) maps point to theta and dihedral 359 | * bond_mask: (L, 14) gives the length of the bond originating that atom 360 | * c_beta: whether to place cbeta 361 | 362 | Output: (L, 14, 3) and (L, 14) coordinates and cloud_mask 363 | """ 364 | precise = wrapper.dtype 365 | 366 | # parallel sidechain - do the oxygen, c-beta and side chain 367 | for i in range(3,14): 368 | # skip cbeta if arg is set 369 | if i == 4 and not c_beta: 370 | continue 371 | # prepare inputs 372 | level_mask = cloud_mask[:, i] 373 | thetas, dihedrals = angles_mask[:, level_mask, i] 374 | idx_a, idx_b, idx_c = point_ref_mask[:, level_mask, i-3] 375 | 376 | # to place C-beta, we need the carbons from prev res - not available for the 1st res 377 | if i == 4: 378 | # the c requested is from the previous residue - offset boolean mask by one 379 | # can't be done with slicing bc glycines are inside chain (dont have cb) 380 | coords_a = wrapper[(level_mask.nonzero().view(-1) - 1), idx_a] # (L-1), 3 381 | # if first residue is not glycine, 382 | # for 1st residue, use position of the second residue's N (1,3) 383 | if level_mask[0].item(): 384 | coords_a[0] = wrapper[1, 1] 385 | else: 386 | coords_a = wrapper[level_mask, idx_a] 387 | 388 | wrapper[level_mask, i] = mp_nerf_torch(coords_a, 389 | wrapper[level_mask, idx_b], 390 | wrapper[level_mask, idx_c], 391 | bond_mask[level_mask, i], 392 | thetas, dihedrals) 393 | 394 | return wrapper, cloud_mask 395 | -------------------------------------------------------------------------------- /mp_nerf/utils.py: -------------------------------------------------------------------------------- 1 | # Author: Eric Alcaide 2 | 3 | import torch 4 | import numpy as np 5 | from einops import repeat, rearrange 6 | 7 | 8 | # random hacks 9 | 10 | # to_pi_minus_pi(4) = -2.28 # to_pi_minus_pi(-4) = 2.28 # rads to pi-(-pi) 11 | to_pi_minus_pi = lambda x: torch.where( (x//np.pi)%2 == 0, x%np.pi , -(2*np.pi-x%(2*np.pi)) ) 12 | to_zero_two_pi = lambda x: torch.where( x>np.pi, x%np.pi, 2*np.pi + x%np.pi ) 13 | 14 | # data utils 15 | def get_prot(dataloader_=None, vocab_=None, min_len=80, max_len=150, verbose=True): 16 | """ Gets a protein from sidechainnet and returns 17 | the right attrs for training. 18 | Inputs: 19 | * dataloader_: sidechainnet iterator over dataset 20 | * vocab_: sidechainnet VOCAB class 21 | * min_len: int. minimum sequence length 22 | * max_len: int. maximum sequence length 23 | * verbose: bool. verbosity level 24 | Outputs: (cleaned, without padding) 25 | (seq_str, int_seq, coords, angles, padding_seq, mask, pid) 26 | """ 27 | while True: 28 | for b,batch in enumerate(dataloader_['train']): 29 | for i in range(batch.int_seqs.shape[0]): 30 | # strip padding - matching angles to string means 31 | # only accepting prots with no missing residues (angles would be 0) 32 | padding_seq = (batch.int_seqs[i] == 20).sum().item() 33 | padding_angles = (torch.abs(batch.angs[i]).sum(dim=-1) == 0).long().sum().item() 34 | 35 | if padding_seq == padding_angles: 36 | # check for appropiate length 37 | real_len = batch.int_seqs[i].shape[0] - padding_seq 38 | if max_len >= real_len >= min_len: 39 | # strip padding tokens 40 | seq = ''.join([vocab_.int2char(aa) for aa in batch.int_seqs[i].numpy()]) 41 | seq = seq[:-padding_seq or None] 42 | int_seq = batch.int_seqs[i][:-padding_seq or None] 43 | angles = batch.angs[i][:-padding_seq or None] 44 | mask = batch.msks[i][:-padding_seq or None] 45 | coords = batch.crds[i][:-padding_seq*14 or None] 46 | 47 | if verbose: 48 | print("stopping at sequence of length", real_len) 49 | return seq, int_seq, coords, angles, padding_seq, mask, batch.pids[i] 50 | else: 51 | if verbose: 52 | print("found a seq of length:", batch.int_seqs[i].shape, 53 | "but oustide the threshold:", min_len, max_len) 54 | else: 55 | if verbose: 56 | print("paddings not matching", padding_seq, padding_angles) 57 | pass 58 | return None 59 | 60 | 61 | ###################### 62 | ## structural utils ## 63 | ###################### 64 | 65 | def get_dihedral(c1, c2, c3, c4): 66 | """ Returns the dihedral angle in radians. 67 | Will use atan2 formula from: 68 | https://en.wikipedia.org/wiki/Dihedral_angle#In_polymer_physics 69 | Inputs: 70 | * c1: (batch, 3) or (3,) 71 | * c2: (batch, 3) or (3,) 72 | * c3: (batch, 3) or (3,) 73 | * c4: (batch, 3) or (3,) 74 | """ 75 | u1 = c2 - c1 76 | u2 = c3 - c2 77 | u3 = c4 - c3 78 | 79 | return torch.atan2( ( (torch.norm(u2, dim=-1, keepdim=True) * u1) * torch.cross(u2,u3, dim=-1) ).sum(dim=-1) , 80 | ( torch.cross(u1,u2, dim=-1) * torch.cross(u2, u3, dim=-1) ).sum(dim=-1) ) 81 | 82 | 83 | def get_angle(c1, c2, c3): 84 | """ Returns the angle in radians. 85 | Inputs: 86 | * c1: (batch, 3) or (3,) 87 | * c2: (batch, 3) or (3,) 88 | * c3: (batch, 3) or (3,) 89 | """ 90 | u1 = c2 - c1 91 | u2 = c3 - c2 92 | 93 | # dont use acos since norms involved. 94 | # better use atan2 formula: atan2(cross, dot) from here: 95 | # https://johnblackburne.blogspot.com/2012/05/angle-between-two-3d-vectors.html 96 | 97 | # add a minus since we want the angle in reversed order - sidechainnet issues 98 | return torch.atan2( torch.norm(torch.cross(u1,u2, dim=-1), dim=-1), 99 | -(u1*u2).sum(dim=-1) ) 100 | 101 | 102 | def kabsch_torch(X, Y): 103 | """ Kabsch alignment of X into Y. 104 | Assumes X,Y are both (D, N) - usually (3, N) 105 | """ 106 | # center X and Y to the origin 107 | X_ = X - X.mean(dim=-1, keepdim=True) 108 | Y_ = Y - Y.mean(dim=-1, keepdim=True) 109 | # calculate convariance matrix (for each prot in the batch) 110 | C = torch.matmul(X_, Y_.t()) 111 | # Optimal rotation matrix via SVD - warning! W must be transposed 112 | if int(torch.__version__.split(".")[1]) < 8: 113 | V, S, W = torch.svd(C.detach()) 114 | W = W.t() 115 | else: 116 | V, S, W = torch.linalg.svd(C.detach()) 117 | # determinant sign for direction correction 118 | d = (torch.det(V) * torch.det(W)) < 0.0 119 | if d: 120 | S[-1] = S[-1] * (-1) 121 | V[:, -1] = V[:, -1] * (-1) 122 | # Create Rotation matrix U 123 | U = torch.matmul(V, W) 124 | # calculate rotations 125 | X_ = torch.matmul(X_.t(), U).t() 126 | # return centered and aligned 127 | return X_, Y_ 128 | 129 | 130 | def rmsd_torch(X, Y): 131 | """ Assumes x,y are both (batch, d, n) - usually (batch, 3, N). """ 132 | return torch.sqrt( torch.mean((X - Y)**2, axis=(-1, -2)) ) 133 | 134 | 135 | 136 | -------------------------------------------------------------------------------- /notebooks/experiments/[131, 150]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[131, 150]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[200, 250]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[200, 250]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[331, 351]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[331, 351]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[400, 450]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[400, 450]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[500, 550]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[500, 550]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[600, 650]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[600, 650]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[700, 780]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[700, 780]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[800, 900]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[800, 900]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[905, 1070]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[905, 1070]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/[905, 970]_info.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[905, 970]_info.joblib -------------------------------------------------------------------------------- /notebooks/experiments/logs_experiment.txt: -------------------------------------------------------------------------------- 1 | 2021-06-07 23:13:24,959 INFO MainThread root Loading data 2 | 3 | 2021-06-07 23:13:38,739 INFO MainThread root Loading data 4 | 5 | 2021-06-07 23:13:48,499 INFO MainThread root Loading data 6 | 7 | 2021-06-07 23:13:48,499 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False 8 | 9 | 2021-06-07 23:14:01,528 INFO MainThread root Loading data 10 | 11 | 2021-06-07 23:14:01,528 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False 12 | 13 | 2021-06-07 23:14:53,890 INFO MainThread root Loading data 14 | 15 | 2021-06-07 23:14:59,937 INFO MainThread root Loading data 16 | 17 | 2021-06-07 23:14:59,937 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False 18 | 19 | 2021-06-07 23:14:59,954 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.02s. 20 | 2021-06-07 23:15:10,979 INFO MainThread root Loading data 21 | 22 | 2021-06-07 23:15:10,980 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False 23 | 24 | 2021-06-07 23:15:10,995 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s. 25 | 2021-06-07 23:15:32,111 INFO MainThread root Loading data 26 | 27 | 2021-06-07 23:15:32,112 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False 28 | 29 | 2021-06-07 23:15:32,124 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s. 30 | 2021-06-07 23:15:32,659 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb 31 | 32 | 2021-06-07 23:15:38,006 INFO MainThread root 5.346866726 for 1000 calls 33 | 2021-06-07 23:15:38,006 INFO MainThread root Done 34 | 2021-06-07 23:15:38,006 INFO MainThread root 35 | 36 | ======= 37 | 38 | 39 | 2021-06-07 23:15:38,084 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.08s. 40 | 2021-06-07 23:15:41,299 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb 41 | 42 | 2021-06-07 23:15:56,652 INFO MainThread root 15.352682389000002 for 1000 calls 43 | 2021-06-07 23:15:56,652 INFO MainThread root Done 44 | 2021-06-07 23:15:56,652 INFO MainThread root 45 | 46 | ======= 47 | 48 | 49 | 2021-06-07 23:15:56,690 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.04s. 50 | 2021-06-07 23:15:58,079 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb 51 | 52 | 2021-06-07 23:16:07,192 INFO MainThread root 9.112788776999999 for 1000 calls 53 | 2021-06-07 23:16:07,192 INFO MainThread root Done 54 | 2021-06-07 23:16:07,192 INFO MainThread root 55 | 56 | ======= 57 | 58 | 59 | 2021-06-07 23:16:07,214 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s. 60 | 2021-06-07 23:16:08,470 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb 61 | 62 | 2021-06-07 23:16:16,980 INFO MainThread root 8.509638406999997 for 1000 calls 63 | 2021-06-07 23:16:16,980 INFO MainThread root Done 64 | 2021-06-07 23:16:16,980 INFO MainThread root 65 | 66 | ======= 67 | 68 | 69 | 2021-06-07 23:16:16,980 INFO MainThread root Execution has finished 70 | 71 | 2021-06-07 22:36:56,573 INFO MainThread root Loading data 72 | 73 | 2021-06-07 22:36:56,648 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False 74 | 75 | 2021-06-07 22:36:56,715 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s. 76 | 2021-06-07 22:37:00,040 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb 77 | 78 | 2021-06-07 22:37:31,005 INFO MainThread root 30.9647682 for 1000 calls 79 | 2021-06-07 22:37:31,005 INFO MainThread root Done 80 | 2021-06-07 22:37:31,005 INFO MainThread root 81 | 82 | ======= 83 | 84 | 85 | 2021-06-07 22:37:31,045 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.04s. 86 | 2021-06-07 22:37:32,399 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb 87 | 88 | 2021-06-07 22:37:47,501 INFO MainThread root 15.102381899999997 for 1000 calls 89 | 2021-06-07 22:37:47,501 INFO MainThread root Done 90 | 2021-06-07 22:37:47,502 INFO MainThread root 91 | 92 | ======= 93 | 94 | 95 | 2021-06-07 22:37:47,514 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s. 96 | 2021-06-07 22:37:47,984 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb 97 | 98 | 2021-06-07 22:37:55,590 INFO MainThread root 7.6064229999999995 for 1000 calls 99 | 2021-06-07 22:37:55,590 INFO MainThread root Done 100 | 2021-06-07 22:37:55,590 INFO MainThread root 101 | 102 | ======= 103 | 104 | 105 | 2021-06-07 22:37:55,608 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s. 106 | 2021-06-07 22:37:56,821 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb 107 | 108 | 2021-06-07 22:38:10,189 INFO MainThread root 13.368083000000006 for 1000 calls 109 | 2021-06-07 22:38:10,189 INFO MainThread root Done 110 | 2021-06-07 22:38:10,189 INFO MainThread root 111 | 112 | ======= 113 | 114 | 115 | 2021-06-07 22:38:10,189 INFO MainThread root Preparing speed tests: for device device(type='cuda') and hybrid_opt: True 116 | 117 | 2021-06-07 22:38:10,255 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s. 118 | 2021-06-07 22:38:16,823 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb 119 | 120 | 2021-06-07 22:38:46,881 INFO MainThread root 30.058233599999994 for 1000 calls 121 | 2021-06-07 22:38:46,881 INFO MainThread root Done 122 | 2021-06-07 22:38:46,881 INFO MainThread root 123 | 124 | ======= 125 | 126 | 127 | 2021-06-07 22:38:46,971 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.09s. 128 | 2021-06-07 22:38:49,619 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb 129 | 130 | 2021-06-07 22:39:15,901 INFO MainThread root 26.281134400000013 for 1000 calls 131 | 2021-06-07 22:39:15,901 INFO MainThread root Done 132 | 2021-06-07 22:39:15,901 INFO MainThread root 133 | 134 | ======= 135 | 136 | 137 | 2021-06-07 22:39:15,914 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s. 138 | 2021-06-07 22:39:16,514 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb 139 | 140 | 2021-06-07 22:39:35,427 INFO MainThread root 18.913132099999984 for 1000 calls 141 | 2021-06-07 22:39:35,428 INFO MainThread root Done 142 | 2021-06-07 22:39:35,428 INFO MainThread root 143 | 144 | ======= 145 | 146 | 147 | 2021-06-07 22:39:35,444 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s. 148 | 2021-06-07 22:39:36,919 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb 149 | 150 | 2021-06-07 22:39:59,241 INFO MainThread root 22.321837799999997 for 1000 calls 151 | 2021-06-07 22:39:59,241 INFO MainThread root Done 152 | 2021-06-07 22:39:59,241 INFO MainThread root 153 | 154 | ======= 155 | 156 | 157 | 2021-06-07 22:39:59,241 INFO MainThread root Preparing speed tests: for device device(type='cuda') and hybrid_opt: False 158 | 159 | 2021-06-07 22:39:59,311 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s. 160 | 2021-06-07 22:40:03,273 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb 161 | 162 | 2021-06-07 22:40:46,576 INFO MainThread root 43.3028281 for 1000 calls 163 | 2021-06-07 22:40:46,576 INFO MainThread root Done 164 | 2021-06-07 22:40:46,576 INFO MainThread root 165 | 166 | ======= 167 | 168 | 169 | 2021-06-07 22:40:46,609 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.03s. 170 | 2021-06-07 22:40:48,396 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb 171 | 172 | 2021-06-07 22:41:17,417 INFO MainThread root 29.020322799999974 for 1000 calls 173 | 2021-06-07 22:41:17,417 INFO MainThread root Done 174 | 2021-06-07 22:41:17,417 INFO MainThread root 175 | 176 | ======= 177 | 178 | 179 | 2021-06-07 22:41:17,430 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s. 180 | 2021-06-07 22:41:18,004 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb 181 | 182 | 2021-06-07 22:41:39,125 INFO MainThread root 21.120834400000035 for 1000 calls 183 | 2021-06-07 22:41:39,125 INFO MainThread root Done 184 | 2021-06-07 22:41:39,125 INFO MainThread root 185 | 186 | ======= 187 | 188 | 189 | 2021-06-07 22:41:39,150 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s. 190 | 2021-06-07 22:41:40,646 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb 191 | 192 | 2021-06-07 22:42:06,874 INFO MainThread root 26.22732030000003 for 1000 calls 193 | 2021-06-07 22:42:06,874 INFO MainThread root Done 194 | 2021-06-07 22:42:06,874 INFO MainThread root 195 | 196 | ======= 197 | 198 | 199 | 2021-06-07 22:42:06,874 INFO MainThread root Execution has finished 200 | 201 | -------------------------------------------------------------------------------- /notebooks/experiments/logs_experiment_scn_various_lengths.txt: -------------------------------------------------------------------------------- 1 | ======= 2 | 2021-05-22 02:14:49,435 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994] 3 | 4 | 2021-05-22 02:14:49,435 INFO MainThread root Preparing speed tests: for device 'cpu' 5 | 6 | 2021-05-22 02:14:49,448 INFO MainThread root Assessing the speed of folding algorithm at length 134 7 | 8 | 2021-05-22 02:14:55,960 INFO MainThread root 6.509940282999999 for 1000 calls 9 | 2021-05-22 02:14:55,960 INFO MainThread root Saving the related information at experiments/[131, 150]_info.joblib 10 | 11 | 2021-05-22 02:14:55,961 INFO MainThread root 12 | 13 | ======= 14 | 15 | 16 | 2021-05-22 02:14:55,977 INFO MainThread root Assessing the speed of folding algorithm at length 214 17 | 18 | 2021-05-22 02:15:02,783 INFO MainThread root 6.805784261999996 for 1000 calls 19 | 2021-05-22 02:15:02,783 INFO MainThread root Saving the related information at experiments/[200, 250]_info.joblib 20 | 21 | 2021-05-22 02:15:02,784 INFO MainThread root 22 | 23 | ======= 24 | 25 | 26 | 2021-05-22 02:15:02,808 INFO MainThread root Assessing the speed of folding algorithm at length 336 27 | 28 | 2021-05-22 02:15:11,765 INFO MainThread root 8.956757892999995 for 1000 calls 29 | 2021-05-22 02:15:11,765 INFO MainThread root Saving the related information at experiments/[331, 351]_info.joblib 30 | 31 | 2021-05-22 02:15:11,766 INFO MainThread root 32 | 33 | ======= 34 | 35 | 36 | 2021-05-22 02:15:11,794 INFO MainThread root Assessing the speed of folding algorithm at length 401 37 | 38 | 2021-05-22 02:15:21,825 INFO MainThread root 10.031293943000009 for 1000 calls 39 | 2021-05-22 02:15:21,825 INFO MainThread root Saving the related information at experiments/[400, 450]_info.joblib 40 | 41 | 2021-05-22 02:15:21,826 INFO MainThread root 42 | 43 | ======= 44 | 45 | 46 | 2021-05-22 02:15:21,862 INFO MainThread root Assessing the speed of folding algorithm at length 501 47 | 48 | 2021-05-22 02:15:33,083 INFO MainThread root 11.221263701000012 for 1000 calls 49 | 2021-05-22 02:15:33,083 INFO MainThread root Saving the related information at experiments/[500, 550]_info.joblib 50 | 51 | 2021-05-22 02:15:33,084 INFO MainThread root 52 | 53 | ======= 54 | 55 | 56 | 2021-05-22 02:15:33,126 INFO MainThread root Assessing the speed of folding algorithm at length 621 57 | 58 | 2021-05-22 02:15:45,854 INFO MainThread root 12.727750233999998 for 1000 calls 59 | 2021-05-22 02:15:45,854 INFO MainThread root Saving the related information at experiments/[600, 650]_info.joblib 60 | 61 | 2021-05-22 02:15:45,855 INFO MainThread root 62 | 63 | ======= 64 | 65 | 66 | 2021-05-22 02:15:45,906 INFO MainThread root Assessing the speed of folding algorithm at length 753 67 | 68 | 2021-05-22 02:16:00,667 INFO MainThread root 14.760831587000013 for 1000 calls 69 | 2021-05-22 02:16:00,672 INFO MainThread root Saving the related information at experiments/[700, 780]_info.joblib 70 | 71 | 2021-05-22 02:16:00,674 INFO MainThread root 72 | 73 | ======= 74 | 75 | 76 | 2021-05-22 02:16:00,734 INFO MainThread root Assessing the speed of folding algorithm at length 862 77 | 78 | 2021-05-22 02:16:17,315 INFO MainThread root 16.580566616 for 1000 calls 79 | 2021-05-22 02:16:17,315 INFO MainThread root Saving the related information at experiments/[800, 900]_info.joblib 80 | 81 | 2021-05-22 02:16:17,316 INFO MainThread root 82 | 83 | ======= 84 | 85 | 86 | 2021-05-22 02:16:17,383 INFO MainThread root Assessing the speed of folding algorithm at length 994 87 | 88 | 2021-05-22 02:16:35,654 INFO MainThread root 18.271017204000003 for 1000 calls 89 | 2021-05-22 02:16:35,654 INFO MainThread root Saving the related information at experiments/[905, 1070]_info.joblib 90 | 91 | 2021-05-22 02:16:35,655 INFO MainThread root 92 | 93 | ======= 94 | 95 | 96 | 2021-05-22 02:16:35,655 INFO MainThread root Preparing speed tests: for device device(type='cpu') 97 | 98 | 2021-05-22 02:16:35,664 INFO MainThread root Assessing the speed of folding algorithm at length 134 99 | 100 | 2021-05-22 02:16:40,994 INFO MainThread root 5.329709648000005 for 1000 calls 101 | 2021-05-22 02:16:40,994 INFO MainThread root Saving the related information at experiments/[131, 150]_info.joblib 102 | 103 | 2021-05-22 02:16:40,995 INFO MainThread root 104 | 105 | ======= 106 | 107 | 108 | 2021-05-22 02:16:41,010 INFO MainThread root Assessing the speed of folding algorithm at length 214 109 | 110 | 2021-05-22 02:16:47,511 INFO MainThread root 6.501463223999991 for 1000 calls 111 | 2021-05-22 02:16:47,512 INFO MainThread root Saving the related information at experiments/[200, 250]_info.joblib 112 | 113 | 2021-05-22 02:16:47,513 INFO MainThread root 114 | 115 | ======= 116 | 117 | 118 | 2021-05-22 02:16:47,536 INFO MainThread root Assessing the speed of folding algorithm at length 336 119 | 120 | 2021-05-22 02:16:56,197 INFO MainThread root 8.660352851999988 for 1000 calls 121 | 2021-05-22 02:16:56,197 INFO MainThread root Saving the related information at experiments/[331, 351]_info.joblib 122 | 123 | 2021-05-22 02:16:56,198 INFO MainThread root 124 | 125 | ======= 126 | 127 | 128 | 2021-05-22 02:16:56,226 INFO MainThread root Assessing the speed of folding algorithm at length 401 129 | 130 | 2021-05-22 02:17:05,869 INFO MainThread root 9.643088333000009 for 1000 calls 131 | 2021-05-22 02:17:05,869 INFO MainThread root Saving the related information at experiments/[400, 450]_info.joblib 132 | 133 | 2021-05-22 02:17:05,871 INFO MainThread root 134 | 135 | ======= 136 | 137 | 138 | 2021-05-22 02:17:05,904 INFO MainThread root Assessing the speed of folding algorithm at length 501 139 | 140 | 2021-05-22 02:17:17,308 INFO MainThread root 11.40289807900001 for 1000 calls 141 | 2021-05-22 02:17:17,308 INFO MainThread root Saving the related information at experiments/[500, 550]_info.joblib 142 | 143 | 2021-05-22 02:17:17,309 INFO MainThread root 144 | 145 | ======= 146 | 147 | 148 | 2021-05-22 02:17:17,351 INFO MainThread root Assessing the speed of folding algorithm at length 621 149 | 150 | 2021-05-22 02:17:30,116 INFO MainThread root 12.764849003000023 for 1000 calls 151 | 2021-05-22 02:17:30,117 INFO MainThread root Saving the related information at experiments/[600, 650]_info.joblib 152 | 153 | 2021-05-22 02:17:30,119 INFO MainThread root 154 | 155 | ======= 156 | 157 | 158 | 2021-05-22 02:17:30,171 INFO MainThread root Assessing the speed of folding algorithm at length 753 159 | 160 | 2021-05-22 02:17:44,858 INFO MainThread root 14.687164622000012 for 1000 calls 161 | 2021-05-22 02:17:44,858 INFO MainThread root Saving the related information at experiments/[700, 780]_info.joblib 162 | 163 | 2021-05-22 02:17:44,859 INFO MainThread root 164 | 165 | ======= 166 | 167 | 168 | 2021-05-22 02:17:44,918 INFO MainThread root Assessing the speed of folding algorithm at length 862 169 | 170 | 2021-05-22 02:18:01,473 INFO MainThread root 16.554769015000005 for 1000 calls 171 | 2021-05-22 02:18:01,473 INFO MainThread root Saving the related information at experiments/[800, 900]_info.joblib 172 | 173 | 2021-05-22 02:18:01,474 INFO MainThread root 174 | 175 | ======= 176 | 177 | 178 | 2021-05-22 02:18:01,538 INFO MainThread root Assessing the speed of folding algorithm at length 994 179 | 180 | 2021-05-22 02:18:19,650 INFO MainThread root 18.111747613000006 for 1000 calls 181 | 2021-05-22 02:18:19,650 INFO MainThread root Saving the related information at experiments/[905, 1070]_info.joblib 182 | 183 | 2021-05-22 02:18:19,651 INFO MainThread root 184 | 185 | ======= 186 | 187 | 188 | 2021-05-22 02:18:19,651 INFO MainThread root Execution has finished 189 | 190 | >>>>>>> 4cabbe55371d6a9a7edeab1db719fa0cf8312eae 191 | 2021-05-22 18:39:33,611 INFO MainThread root Loading data 192 | 193 | 2021-05-22 18:39:33,622 INFO MainThread root Data has been loaded 194 | 195 | 196 | ======= 197 | 198 | 199 | 2021-05-22 18:39:33,622 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994] 200 | 201 | 2021-05-22 18:39:33,646 INFO MainThread root Preparing speed tests: for device 'cpu' 202 | 203 | 2021-05-22 18:39:33,670 INFO MainThread root Assessing the speed of folding algorithm at length 134 204 | 205 | 2021-05-22 18:39:42,657 INFO MainThread root 8.9869345 for 1000 calls 206 | 2021-05-22 18:39:42,657 INFO MainThread root Saving the related information at experiments/100_info.joblib 207 | 208 | 2021-05-22 18:39:42,659 INFO MainThread root 209 | 210 | ======= 211 | 212 | 213 | 2021-05-22 18:39:42,687 INFO MainThread root Assessing the speed of folding algorithm at length 214 214 | 215 | 2021-05-22 18:39:53,087 INFO MainThread root 10.4000911 for 1000 calls 216 | 2021-05-22 18:39:53,087 INFO MainThread root Saving the related information at experiments/200_info.joblib 217 | 218 | 2021-05-22 18:39:53,088 INFO MainThread root 219 | 220 | ======= 221 | 222 | 223 | 2021-05-22 18:39:53,122 INFO MainThread root Assessing the speed of folding algorithm at length 336 224 | 225 | 2021-05-22 18:40:06,577 INFO MainThread root 13.455043199999999 for 1000 calls 226 | 2021-05-22 18:40:06,577 INFO MainThread root Saving the related information at experiments/300_info.joblib 227 | 228 | 2021-05-22 18:40:06,578 INFO MainThread root 229 | 230 | ======= 231 | 232 | 233 | 2021-05-22 18:40:06,617 INFO MainThread root Assessing the speed of folding algorithm at length 401 234 | 235 | 2021-05-22 18:40:21,715 INFO MainThread root 15.097297400000002 for 1000 calls 236 | 2021-05-22 18:40:21,715 INFO MainThread root Saving the related information at experiments/400_info.joblib 237 | 238 | 2021-05-22 18:40:21,716 INFO MainThread root 239 | 240 | ======= 241 | 242 | 243 | 2021-05-22 18:40:21,779 INFO MainThread root Assessing the speed of folding algorithm at length 501 244 | 245 | 2021-05-22 18:40:40,543 INFO MainThread root 18.764004199999995 for 1000 calls 246 | 2021-05-22 18:40:40,543 INFO MainThread root Saving the related information at experiments/500_info.joblib 247 | 248 | 2021-05-22 18:40:40,544 INFO MainThread root 249 | 250 | ======= 251 | 252 | 253 | 2021-05-22 18:40:40,617 INFO MainThread root Assessing the speed of folding algorithm at length 621 254 | 255 | 2021-05-22 18:41:02,270 INFO MainThread root 21.652811900000003 for 1000 calls 256 | 2021-05-22 18:41:02,270 INFO MainThread root Saving the related information at experiments/600_info.joblib 257 | 258 | 2021-05-22 18:41:02,271 INFO MainThread root 259 | 260 | ======= 261 | 262 | 263 | 2021-05-22 18:41:02,367 INFO MainThread root Assessing the speed of folding algorithm at length 753 264 | 265 | 2021-05-22 18:41:27,302 INFO MainThread root 24.934528900000004 for 1000 calls 266 | 2021-05-22 18:41:27,302 INFO MainThread root Saving the related information at experiments/700_info.joblib 267 | 268 | 2021-05-22 18:41:27,304 INFO MainThread root 269 | 270 | ======= 271 | 272 | 273 | 2021-05-22 18:41:27,431 INFO MainThread root Assessing the speed of folding algorithm at length 862 274 | 275 | 2021-05-22 18:41:56,196 INFO MainThread root 28.7642814 for 1000 calls 276 | 2021-05-22 18:41:56,196 INFO MainThread root Saving the related information at experiments/800_info.joblib 277 | 278 | 2021-05-22 18:41:56,197 INFO MainThread root 279 | 280 | ======= 281 | 282 | 283 | 2021-05-22 18:41:56,312 INFO MainThread root Assessing the speed of folding algorithm at length 994 284 | 285 | 2021-05-22 18:42:29,089 INFO MainThread root 32.77735960000001 for 1000 calls 286 | 2021-05-22 18:42:29,090 INFO MainThread root Saving the related information at experiments/900_info.joblib 287 | 288 | 2021-05-22 18:42:29,090 INFO MainThread root 289 | 290 | ======= 291 | 292 | 293 | 2021-05-22 18:42:29,091 INFO MainThread root Preparing speed tests: for device device(type='cuda') -- hybrid=True 294 | 295 | 2021-05-22 18:42:30,437 INFO MainThread root Assessing the speed of folding algorithm at length 134 296 | 297 | 2021-05-22 18:42:48,848 INFO MainThread root 18.41120219999999 for 1000 calls 298 | 2021-05-22 18:42:48,848 INFO MainThread root Saving the related information at experiments/100_info.joblib 299 | 300 | 2021-05-22 18:42:48,849 INFO MainThread root 301 | 302 | ======= 303 | 304 | 305 | 2021-05-22 18:42:49,042 INFO MainThread root Assessing the speed of folding algorithm at length 214 306 | 307 | 2021-05-22 18:43:09,409 INFO MainThread root 20.366851999999994 for 1000 calls 308 | 2021-05-22 18:43:09,409 INFO MainThread root Saving the related information at experiments/200_info.joblib 309 | 310 | 2021-05-22 18:43:09,410 INFO MainThread root 311 | 312 | ======= 313 | 314 | 315 | 2021-05-22 18:43:09,752 INFO MainThread root Assessing the speed of folding algorithm at length 336 316 | 317 | 2021-05-22 18:43:32,291 INFO MainThread root 22.538369399999993 for 1000 calls 318 | 2021-05-22 18:43:32,291 INFO MainThread root Saving the related information at experiments/300_info.joblib 319 | 320 | 2021-05-22 18:43:32,292 INFO MainThread root 321 | 322 | ======= 323 | 324 | 325 | 2021-05-22 18:43:32,658 INFO MainThread root Assessing the speed of folding algorithm at length 401 326 | 327 | 2021-05-22 18:43:55,501 INFO MainThread root 22.84260729999997 for 1000 calls 328 | 2021-05-22 18:43:55,501 INFO MainThread root Saving the related information at experiments/400_info.joblib 329 | 330 | 2021-05-22 18:43:55,502 INFO MainThread root 331 | 332 | ======= 333 | 334 | 335 | 2021-05-22 18:43:56,096 INFO MainThread root Assessing the speed of folding algorithm at length 501 336 | 337 | 2021-05-22 18:44:20,154 INFO MainThread root 24.057599100000004 for 1000 calls 338 | 2021-05-22 18:44:20,154 INFO MainThread root Saving the related information at experiments/500_info.joblib 339 | 340 | 2021-05-22 18:44:20,155 INFO MainThread root 341 | 342 | ======= 343 | 344 | 345 | 2021-05-22 18:44:20,720 INFO MainThread root Assessing the speed of folding algorithm at length 621 346 | 347 | 2021-05-22 18:44:46,706 INFO MainThread root 25.98607320000002 for 1000 calls 348 | 2021-05-22 18:44:46,706 INFO MainThread root Saving the related information at experiments/600_info.joblib 349 | 350 | 2021-05-22 18:44:46,707 INFO MainThread root 351 | 352 | ======= 353 | 354 | 355 | 2021-05-22 18:44:47,392 INFO MainThread root Assessing the speed of folding algorithm at length 753 356 | 357 | 2021-05-22 18:45:15,028 INFO MainThread root 27.6351363 for 1000 calls 358 | 2021-05-22 18:45:15,028 INFO MainThread root Saving the related information at experiments/700_info.joblib 359 | 360 | 2021-05-22 18:45:15,029 INFO MainThread root 361 | 362 | ======= 363 | 364 | 365 | 2021-05-22 18:45:15,818 INFO MainThread root Assessing the speed of folding algorithm at length 862 366 | 367 | 2021-05-22 18:45:45,070 INFO MainThread root 29.25168880000001 for 1000 calls 368 | 2021-05-22 18:45:45,070 INFO MainThread root Saving the related information at experiments/800_info.joblib 369 | 370 | 2021-05-22 18:45:45,071 INFO MainThread root 371 | 372 | ======= 373 | 374 | 375 | 2021-05-22 18:45:45,994 INFO MainThread root Assessing the speed of folding algorithm at length 994 376 | 377 | 2021-05-22 18:46:17,009 INFO MainThread root 31.0138465 for 1000 calls 378 | 2021-05-22 18:46:17,009 INFO MainThread root Saving the related information at experiments/900_info.joblib 379 | 380 | 2021-05-22 18:46:17,010 INFO MainThread root 381 | 382 | ======= 383 | 384 | 385 | 2021-05-22 18:46:17,010 INFO MainThread root Execution has finished 386 | 387 | 2021-05-22 18:50:28,714 INFO MainThread root Loading data 388 | 389 | 2021-05-22 18:50:28,718 INFO MainThread root Data has been loaded 390 | 391 | 392 | ======= 393 | 394 | 395 | 2021-05-22 18:50:28,718 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994] 396 | 397 | 2021-05-22 18:50:40,190 INFO MainThread root Loading data 398 | 399 | 2021-05-22 18:50:40,194 INFO MainThread root Data has been loaded 400 | 401 | 402 | ======= 403 | 404 | 405 | 2021-05-22 18:50:40,194 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994] 406 | 407 | 2021-05-22 18:50:40,217 INFO MainThread root Preparing speed tests: for device device(type='cpu') 408 | 409 | 2021-05-22 18:50:40,232 INFO MainThread root Assessing the speed of folding algorithm at length 134 410 | 411 | 2021-05-22 18:50:48,901 INFO MainThread root 8.6684752 for 1000 calls 412 | 2021-05-22 18:50:48,901 INFO MainThread root Saving the related information at experiments/100_info.joblib 413 | 414 | 2021-05-22 18:50:48,902 INFO MainThread root 415 | 416 | ======= 417 | 418 | 419 | 2021-05-22 18:50:48,923 INFO MainThread root Assessing the speed of folding algorithm at length 214 420 | 421 | 2021-05-22 18:50:59,368 INFO MainThread root 10.4448301 for 1000 calls 422 | 2021-05-22 18:50:59,368 INFO MainThread root Saving the related information at experiments/200_info.joblib 423 | 424 | 2021-05-22 18:50:59,369 INFO MainThread root 425 | 426 | ======= 427 | 428 | 429 | 2021-05-22 18:50:59,415 INFO MainThread root Assessing the speed of folding algorithm at length 336 430 | 431 | 2021-05-22 18:51:13,819 INFO MainThread root 14.403065699999996 for 1000 calls 432 | 2021-05-22 18:51:13,819 INFO MainThread root Saving the related information at experiments/300_info.joblib 433 | 434 | 2021-05-22 18:51:13,820 INFO MainThread root 435 | 436 | ======= 437 | 438 | 439 | 2021-05-22 18:51:13,880 INFO MainThread root Assessing the speed of folding algorithm at length 401 440 | 441 | 2021-05-22 18:51:29,132 INFO MainThread root 15.251432700000002 for 1000 calls 442 | 2021-05-22 18:51:29,132 INFO MainThread root Saving the related information at experiments/400_info.joblib 443 | 444 | 2021-05-22 18:51:29,133 INFO MainThread root 445 | 446 | ======= 447 | 448 | 449 | 2021-05-22 18:51:29,181 INFO MainThread root Assessing the speed of folding algorithm at length 501 450 | 451 | 2021-05-22 18:51:47,005 INFO MainThread root 17.824042999999996 for 1000 calls 452 | 2021-05-22 18:51:47,005 INFO MainThread root Saving the related information at experiments/500_info.joblib 453 | 454 | 2021-05-22 18:51:47,006 INFO MainThread root 455 | 456 | ======= 457 | 458 | 459 | 2021-05-22 18:51:47,083 INFO MainThread root Assessing the speed of folding algorithm at length 621 460 | 461 | 2021-05-22 18:52:07,623 INFO MainThread root 20.5405765 for 1000 calls 462 | 2021-05-22 18:52:07,624 INFO MainThread root Saving the related information at experiments/600_info.joblib 463 | 464 | 2021-05-22 18:52:07,625 INFO MainThread root 465 | 466 | ======= 467 | 468 | 469 | 2021-05-22 18:52:07,708 INFO MainThread root Assessing the speed of folding algorithm at length 753 470 | 471 | 2021-05-22 18:52:31,562 INFO MainThread root 23.853287499999993 for 1000 calls 472 | 2021-05-22 18:52:31,562 INFO MainThread root Saving the related information at experiments/700_info.joblib 473 | 474 | 2021-05-22 18:52:31,563 INFO MainThread root 475 | 476 | ======= 477 | 478 | 479 | 2021-05-22 18:52:31,652 INFO MainThread root Assessing the speed of folding algorithm at length 862 480 | 481 | 2021-05-22 18:52:59,035 INFO MainThread root 27.38281640000001 for 1000 calls 482 | 2021-05-22 18:52:59,035 INFO MainThread root Saving the related information at experiments/800_info.joblib 483 | 484 | 2021-05-22 18:52:59,036 INFO MainThread root 485 | 486 | ======= 487 | 488 | 489 | 2021-05-22 18:52:59,150 INFO MainThread root Assessing the speed of folding algorithm at length 994 490 | 491 | 2021-05-22 18:53:31,180 INFO MainThread root 32.029055200000016 for 1000 calls 492 | 2021-05-22 18:53:31,180 INFO MainThread root Saving the related information at experiments/900_info.joblib 493 | 494 | 2021-05-22 18:53:31,181 INFO MainThread root 495 | 496 | ======= 497 | 498 | 499 | 2021-05-22 18:53:31,181 INFO MainThread root Preparing speed tests: for device device(type='cuda') - hybrid=True 500 | 501 | 2021-05-22 18:53:32,532 INFO MainThread root Assessing the speed of folding algorithm at length 134 502 | 503 | 2021-05-22 18:53:51,151 INFO MainThread root 18.61965140000001 for 1000 calls 504 | 2021-05-22 18:53:51,152 INFO MainThread root Saving the related information at experiments/100_info.joblib 505 | 506 | 2021-05-22 18:53:51,153 INFO MainThread root 507 | 508 | ======= 509 | 510 | 511 | 2021-05-22 18:53:51,347 INFO MainThread root Assessing the speed of folding algorithm at length 214 512 | 513 | 2021-05-22 18:54:11,743 INFO MainThread root 20.395728099999985 for 1000 calls 514 | 2021-05-22 18:54:11,743 INFO MainThread root Saving the related information at experiments/200_info.joblib 515 | 516 | 2021-05-22 18:54:11,744 INFO MainThread root 517 | 518 | ======= 519 | 520 | 521 | 2021-05-22 18:54:12,052 INFO MainThread root Assessing the speed of folding algorithm at length 336 522 | 523 | 2021-05-22 18:54:34,875 INFO MainThread root 22.822907499999985 for 1000 calls 524 | 2021-05-22 18:54:34,875 INFO MainThread root Saving the related information at experiments/300_info.joblib 525 | 526 | 2021-05-22 18:54:34,876 INFO MainThread root 527 | 528 | ======= 529 | 530 | 531 | 2021-05-22 18:54:35,239 INFO MainThread root Assessing the speed of folding algorithm at length 401 532 | 533 | 2021-05-22 18:54:59,075 INFO MainThread root 23.83573979999997 for 1000 calls 534 | 2021-05-22 18:54:59,075 INFO MainThread root Saving the related information at experiments/400_info.joblib 535 | 536 | 2021-05-22 18:54:59,076 INFO MainThread root 537 | 538 | ======= 539 | 540 | 541 | 2021-05-22 18:54:59,530 INFO MainThread root Assessing the speed of folding algorithm at length 501 542 | 543 | 2021-05-22 18:55:24,297 INFO MainThread root 24.76649520000001 for 1000 calls 544 | 2021-05-22 18:55:24,297 INFO MainThread root Saving the related information at experiments/500_info.joblib 545 | 546 | 2021-05-22 18:55:24,298 INFO MainThread root 547 | 548 | ======= 549 | 550 | 551 | 2021-05-22 18:55:24,858 INFO MainThread root Assessing the speed of folding algorithm at length 621 552 | 553 | 2021-05-22 18:55:50,855 INFO MainThread root 25.996778500000005 for 1000 calls 554 | 2021-05-22 18:55:50,856 INFO MainThread root Saving the related information at experiments/600_info.joblib 555 | 556 | 2021-05-22 18:55:50,856 INFO MainThread root 557 | 558 | ======= 559 | 560 | 561 | 2021-05-22 18:55:51,538 INFO MainThread root Assessing the speed of folding algorithm at length 753 562 | 563 | 2021-05-22 18:56:19,326 INFO MainThread root 27.787718600000005 for 1000 calls 564 | 2021-05-22 18:56:19,326 INFO MainThread root Saving the related information at experiments/700_info.joblib 565 | 566 | 2021-05-22 18:56:19,327 INFO MainThread root 567 | 568 | ======= 569 | 570 | 571 | 2021-05-22 18:56:20,108 INFO MainThread root Assessing the speed of folding algorithm at length 862 572 | 573 | 2021-05-22 18:56:49,570 INFO MainThread root 29.461670400000003 for 1000 calls 574 | 2021-05-22 18:56:49,570 INFO MainThread root Saving the related information at experiments/800_info.joblib 575 | 576 | 2021-05-22 18:56:49,571 INFO MainThread root 577 | 578 | ======= 579 | 580 | 581 | 2021-05-22 18:56:50,504 INFO MainThread root Assessing the speed of folding algorithm at length 994 582 | 583 | 2021-05-22 18:57:21,194 INFO MainThread root 30.689694900000006 for 1000 calls 584 | 2021-05-22 18:57:21,194 INFO MainThread root Saving the related information at experiments/900_info.joblib 585 | 586 | 2021-05-22 18:57:21,195 INFO MainThread root 587 | 588 | ======= 589 | 590 | 591 | 2021-05-22 18:57:21,196 INFO MainThread root Preparing speed tests: for device device(type='cuda') -- hybrid=False 592 | 593 | 2021-05-22 18:57:21,317 INFO MainThread root Assessing the speed of folding algorithm at length 134 594 | 595 | 2021-05-22 18:57:41,246 INFO MainThread root 19.9283054 for 1000 calls 596 | 2021-05-22 18:57:41,246 INFO MainThread root Saving the related information at experiments/100_info.joblib 597 | 598 | 2021-05-22 18:57:41,247 INFO MainThread root 599 | 600 | ======= 601 | 602 | 603 | 2021-05-22 18:57:41,440 INFO MainThread root Assessing the speed of folding algorithm at length 214 604 | 605 | 2021-05-22 18:58:03,719 INFO MainThread root 22.279464399999995 for 1000 calls 606 | 2021-05-22 18:58:03,719 INFO MainThread root Saving the related information at experiments/200_info.joblib 607 | 608 | 2021-05-22 18:58:03,720 INFO MainThread root 609 | 610 | ======= 611 | 612 | 613 | 2021-05-22 18:58:04,024 INFO MainThread root Assessing the speed of folding algorithm at length 336 614 | 615 | 2021-05-22 18:58:29,494 INFO MainThread root 25.469947400000024 for 1000 calls 616 | 2021-05-22 18:58:29,494 INFO MainThread root Saving the related information at experiments/300_info.joblib 617 | 618 | 2021-05-22 18:58:29,495 INFO MainThread root 619 | 620 | ======= 621 | 622 | 623 | 2021-05-22 18:58:29,859 INFO MainThread root Assessing the speed of folding algorithm at length 401 624 | 625 | 2021-05-22 18:58:58,097 INFO MainThread root 28.238597999999968 for 1000 calls 626 | 2021-05-22 18:58:58,098 INFO MainThread root Saving the related information at experiments/400_info.joblib 627 | 628 | 2021-05-22 18:58:58,098 INFO MainThread root 629 | 630 | ======= 631 | 632 | 633 | 2021-05-22 18:58:58,548 INFO MainThread root Assessing the speed of folding algorithm at length 501 634 | 635 | 2021-05-22 18:59:29,343 INFO MainThread root 30.79467580000005 for 1000 calls 636 | 2021-05-22 18:59:29,343 INFO MainThread root Saving the related information at experiments/500_info.joblib 637 | 638 | 2021-05-22 18:59:29,344 INFO MainThread root 639 | 640 | ======= 641 | 642 | 643 | 2021-05-22 18:59:29,904 INFO MainThread root Assessing the speed of folding algorithm at length 621 644 | 645 | 2021-05-22 19:00:03,234 INFO MainThread root 33.3301616 for 1000 calls 646 | 2021-05-22 19:00:03,234 INFO MainThread root Saving the related information at experiments/600_info.joblib 647 | 648 | 2021-05-22 19:00:03,235 INFO MainThread root 649 | 650 | ======= 651 | 652 | 653 | 2021-05-22 19:00:03,915 INFO MainThread root Assessing the speed of folding algorithm at length 753 654 | 655 | 2021-05-22 19:00:40,486 INFO MainThread root 36.570508099999984 for 1000 calls 656 | 2021-05-22 19:00:40,486 INFO MainThread root Saving the related information at experiments/700_info.joblib 657 | 658 | 2021-05-22 19:00:40,487 INFO MainThread root 659 | 660 | ======= 661 | 662 | 663 | 2021-05-22 19:00:41,265 INFO MainThread root Assessing the speed of folding algorithm at length 862 664 | 665 | 2021-05-22 19:01:21,325 INFO MainThread root 40.06054449999999 for 1000 calls 666 | 2021-05-22 19:01:21,326 INFO MainThread root Saving the related information at experiments/800_info.joblib 667 | 668 | 2021-05-22 19:01:21,327 INFO MainThread root 669 | 670 | ======= 671 | 672 | 673 | 2021-05-22 19:01:22,419 INFO MainThread root Assessing the speed of folding algorithm at length 994 674 | 675 | 2021-05-22 19:02:06,714 INFO MainThread root 44.29495259999999 for 1000 calls 676 | 2021-05-22 19:02:06,714 INFO MainThread root Saving the related information at experiments/900_info.joblib 677 | 678 | 2021-05-22 19:02:06,715 INFO MainThread root 679 | 680 | ======= 681 | 682 | 683 | 2021-05-22 19:02:06,715 INFO MainThread root Execution has finished 684 | 685 | -------------------------------------------------------------------------------- /notebooks/experiments/profile_csv: -------------------------------------------------------------------------------- 1 | ncalls,tottime,percall,cumtime,percall,filename:lineno(function) 2 | 1,0.00598,0.00598,0.0171,0.0171,massive_pnerf.py:70(proto_fold) 3 | 16,0.004091,0.0002557,0.004091,0.0002557,~:0() 4 | 773,0.002993,3.872e-06,0.002993,3.872e-06,~:0() 5 | 14,0.001775,0.0001268,0.00792,0.0005657,massive_pnerf.py:40(mp_nerf_torch) 6 | 32,0.000533,1.666e-05,0.000533,1.666e-05,~:0() 7 | 30,0.000495,1.65e-05,0.000495,1.65e-05,~:0() 8 | 14,0.000146,1.043e-05,0.000146,1.043e-05,~:0() 9 | 29,0.000119,4.103e-06,0.000119,4.103e-06,~:0() 10 | 16,0.000117,7.312e-06,0.004243,0.0002652,functional.py:1274(norm) 11 | 25,0.000103,4.12e-06,0.000103,4.12e-06,~:0() 12 | 43,0.0001,2.326e-06,0.0001,2.326e-06,~:0() 13 | 14,7.2e-05,5.143e-06,7.2e-05,5.143e-06,~:0() 14 | 25,7.1e-05,2.84e-06,0.000225,9e-06,tensor.py:575(__iter__) 15 | 2,5.9e-05,2.95e-05,0.000483,0.0002415,massive_pnerf.py:10(get_axis_matrix) 16 | 29,4.9e-05,1.69e-06,4.9e-05,1.69e-06,~:0() 17 | 2,4.3e-05,2.15e-05,4.3e-05,2.15e-05,~:0() 18 | 1,4e-05,4e-05,0.01716,0.01716,~:0() 19 | 25,3e-05,1.2e-06,3e-05,1.2e-06,~:0() 20 | 4,2.8e-05,7e-06,2.8e-05,7e-06,~:0() 21 | 6,2.8e-05,4.667e-06,2.8e-05,4.667e-06,~:0() 22 | 1,2.3e-05,2.3e-05,2.3e-05,2.3e-05,~:0() 23 | 1,2.3e-05,2.3e-05,0.01712,0.01712,:1() 24 | 1,1.9e-05,1.9e-05,1.9e-05,1.9e-05,~:0() 25 | 14,1.8e-05,1.286e-06,1.8e-05,1.286e-06,~:0() 26 | 11,1.6e-05,1.455e-06,1.9e-05,1.727e-06,tensor.py:568(__len__) 27 | 2,1.5e-05,7.5e-06,1.5e-05,7.5e-06,~:0() 28 | 4,1.3e-05,3.25e-06,1.3e-05,3.25e-06,~:0() 29 | 2,1.2e-05,6e-06,0.000107,5.35e-05,einops.py:202(apply) 30 | 25,1.1e-05,4.4e-07,1.1e-05,4.4e-07,~:0() 31 | 16,1.1e-05,6.875e-07,2e-05,1.25e-06,_VF.py:25(__getattr__) 32 | 52,1.1e-05,2.115e-07,1.1e-05,2.115e-07,~:0() 33 | 2,1e-05,5e-06,2.5e-05,1.25e-05,tensor.py:525(__rsub__) 34 | 2,1e-05,5e-06,0.00012,6e-05,einops.py:327(reduce) 35 | 16,9e-06,5.625e-07,9e-06,5.625e-07,~:0() 36 | 34,9e-06,2.647e-07,9e-06,2.647e-07,~:0() 37 | 1,8e-06,8e-06,8e-06,8e-06,~:0() 38 | 52,8e-06,1.538e-07,8e-06,1.538e-07,~:0() 39 | 2,6e-06,3e-06,6e-06,3e-06,~:0() 40 | 1,6e-06,6e-06,6e-06,6e-06,~:0() 41 | 2,6e-06,3e-06,6e-06,3e-06,~:0() 42 | 2,5e-06,2.5e-06,7e-06,3.5e-06,_backends.py:22(get_backend) 43 | 2,4e-06,2e-06,4e-06,2e-06,einops.py:26(_reduce_axes) 44 | 2,4e-06,2e-06,5.9e-05,2.95e-05,_backends.py:98(add_axes) 45 | 2,4e-06,2e-06,4.7e-05,2.35e-05,_backends.py:336(tile) 46 | 2,4e-06,2e-06,4e-06,2e-06,~:0() 47 | 1,4e-06,4e-06,4e-06,4e-06,~:0() 48 | 2,3e-06,1.5e-06,0.000123,6.15e-05,einops.py:427(repeat) 49 | 2,3e-06,1.5e-06,9e-06,4.5e-06,_backends.py:330(transpose) 50 | 1,3e-06,3e-06,3e-06,3e-06,~:0() 51 | 6,2e-06,3.333e-07,2e-06,3.333e-07,~:0() 52 | 4,2e-06,5e-07,1.5e-05,3.75e-06,_backends.py:83(reshape) 53 | 2,2e-06,1e-06,2e-06,1e-06,_backends.py:302(is_appropriate_type) 54 | 2,2e-06,1e-06,8e-06,4e-06,_backends.py:339(add_axis) 55 | 8,1e-06,1.25e-07,1e-06,1.25e-07,~:0() 56 | 2,1e-06,5e-07,1e-06,5e-07,~:0() 57 | 1,1e-06,1e-06,1e-06,1e-06,~:0() 58 | 2,0,0,0,0,~:0() 59 | 2,0,0,0,0,_backends.py:79(shape) 60 | 2,0,0,0,0,~:0() 61 | Showing 1 to 59 of 59 entries 62 | -------------------------------------------------------------------------------- /notebooks/experiments_manual/analyzed_prots.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/analyzed_prots.joblib -------------------------------------------------------------------------------- /notebooks/experiments_manual/error_evolution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/error_evolution.png -------------------------------------------------------------------------------- /notebooks/experiments_manual/histogram_errors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/histogram_errors.png -------------------------------------------------------------------------------- /notebooks/experiments_manual/profiler_capture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/profiler_capture.png -------------------------------------------------------------------------------- /notebooks/experiments_manual/rclab_data/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 RCL-lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /notebooks/experiments_manual/rclab_data/other_prots.csv: -------------------------------------------------------------------------------- 1 | prot,url,chain,num, 2 | LDH,_,_,_, 3 | Ribonuclease,_,_,_, -------------------------------------------------------------------------------- /notebooks/integrated_alanines.py: -------------------------------------------------------------------------------- 1 | ########################## 2 | # Clone repos with utils # 3 | ########################## 4 | 5 | # !git clone https://github.com/hypnopump/geometric-vector-perceptron 6 | 7 | import os 8 | import sys 9 | import time 10 | import timeit 11 | import logging 12 | 13 | # science 14 | import numpy as np 15 | import torch 16 | import prody as pr 17 | import sidechainnet 18 | from sidechainnet.utils.sequence import ProteinVocabulary as VOCAB 19 | VOCAB = VOCAB() 20 | 21 | # process 22 | import joblib 23 | 24 | # custom 25 | import mp_nerf 26 | 27 | BASE_FOLDER = "experiments/" 28 | 29 | logging.basicConfig(level=logging.DEBUG, 30 | format="%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s", 31 | # datefmt='%m-%d %H:%M', 32 | filename=BASE_FOLDER+"logs_experiment.txt", 33 | filemode="a") 34 | logger = logging.getLogger() 35 | sep = "\n\n=======\n\n" 36 | 37 | 38 | # begin tests 39 | if __name__ == "__main__": 40 | 41 | logger.info("Loading data"+"\n") 42 | 43 | dataloc = "experiments_manual/rclab_data/" 44 | filenames = [dataloc+x for x in os.listdir(dataloc) if x.endswith(".pdb")] 45 | 46 | run_opts = [(torch.device("cpu"), False)] # tuples of (device, hybrid) 47 | # add possibility for different configs 48 | if torch.cuda.is_available(): 49 | run_opts.append( (torch.device("cuda"), True)) 50 | run_opts.append( (torch.device("cuda"), False)) 51 | 52 | 53 | for device,hybrid in run_opts: 54 | 55 | logger.info("Preparing speed tests: for device "+repr(device)+" and hybrid_opt: "+str(hybrid)+"\n") 56 | 57 | for i,filename in enumerate(filenames): 58 | 59 | # get data 60 | keys = ["angles_np", "coords_np", "observed_sequence"] 61 | chain = pr.parsePDB(datafile, chain=chain, model=1) 62 | parsed = sidechainnet.utils.measure.get_seq_coords_and_angles(chain) 63 | data = {k:v for k,v in zip(keys, parsed)} 64 | # get scaffs 65 | scaffolds = mp_nerf.proteins.build_scaffolds_from_scn_angles(data["observed_sequence"], 66 | torch.from_numpy(data["angles_np"]).to(device)) 67 | 68 | logger.info("Assessing the speed of folding algorithm at file "+filenames[i]+"\n") 69 | 70 | logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=hybrid)', 71 | globals=globals(), number=1000) )+" for 1000 calls" ) 72 | 73 | logger.info("Done") 74 | logger.info(sep) 75 | 76 | logger.info("Execution has finished\n") -------------------------------------------------------------------------------- /notebooks/integrated_test.py: -------------------------------------------------------------------------------- 1 | ########################## 2 | # Clone repos with utils # 3 | ########################## 4 | 5 | # !git clone https://github.com/hypnopump/geometric-vector-perceptron 6 | 7 | import os 8 | import sys 9 | import time 10 | import timeit 11 | import logging 12 | 13 | # science 14 | import numpy as np 15 | import torch 16 | import sidechainnet 17 | from sidechainnet.utils.sequence import ProteinVocabulary as VOCAB 18 | VOCAB = VOCAB() 19 | 20 | # process 21 | import joblib 22 | 23 | # custom 24 | import mp_nerf 25 | 26 | BASE_FOLDER = "experiments/" 27 | 28 | logging.basicConfig(level=logging.DEBUG, 29 | format="%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s", 30 | # datefmt='%m-%d %H:%M', 31 | filename=BASE_FOLDER+"logs_experiment.txt", 32 | filemode="a") 33 | logger = logging.getLogger() 34 | sep = "\n\n=======\n\n" 35 | 36 | 37 | # begin tests 38 | if __name__ == "__main__": 39 | 40 | logger.info("Loading data"+"\n") 41 | lengths = [100, 200, 300, 400, 500, 600, 700, 800, 900]# [::-1] 42 | try: 43 | "a"+9 44 | # skip 45 | dataloaders_ = sidechainnet.load(casp_version=7, with_pytorch="dataloaders", batch_size=2) 46 | logger.info("Data has been loaded"+"\n"+sep) 47 | stored = [ mp_nerf.utils.get_prot(dataloader_=dataloaders_, 48 | vocab_=VOCAB, 49 | min_len=desired_len+5, 50 | max_len=desired_len+60) for desired_len in lengths ] 51 | joblib.dump(stored, BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib") 52 | except: 53 | stored = joblib.load(BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib") 54 | logger.info("Data has been loaded"+"\n"+sep) 55 | 56 | logger.info("Assessing lengths of: "+str([len(x[0]) for x in stored])+"\n") 57 | 58 | run_opts = [(torch.device("cpu"), False)] # tuples of (device, hybrid) 59 | # add possibility for different configs 60 | if torch.cuda.is_available(): 61 | run_opts.append( (torch.device("cuda"), True)) 62 | run_opts.append( (torch.device("cuda"), False)) 63 | 64 | 65 | for device,hybrid in run_opts: 66 | 67 | logger.info("Preparing speed tests: for device "+repr(device)+" and hybrid_opt: "+str(hybrid)+"\n") 68 | 69 | for i,desired_len in enumerate(lengths): 70 | 71 | seq, int_seq, true_coords, angles, padding_seq, mask, pid = stored[i] 72 | scaffolds = mp_nerf.proteins.build_scaffolds_from_scn_angles(seq, angles.to(device)) 73 | 74 | logger.info("Assessing the speed of folding algorithm at length "+str(len(seq))+"\n") 75 | 76 | logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=hybrid)', 77 | globals=globals(), number=1000) )+" for 1000 calls" ) 78 | 79 | logger.info("Saving the related information at {0}{1}_info.joblib\n".format( 80 | BASE_FOLDER, desired_len)) 81 | joblib.dump({"seq": seq, 82 | "true_coords": true_coords, 83 | "angles": angles, 84 | "padding_seq": padding_seq, 85 | "mask": mask, 86 | "pid": pid, 87 | "padding_stripped": True}, BASE_FOLDER+str(desired_len)+"_info.joblib") 88 | logger.info(sep) 89 | 90 | logger.info("Execution has finished\n") -------------------------------------------------------------------------------- /notebooks/xtension/plots/A_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/A_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/C_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/C_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/D_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/D_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/E_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/E_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/F_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/F_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/G_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/G_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/H_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/H_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/I_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/I_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/K_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/K_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/L_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/L_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/M_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/M_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/N_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/N_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/P_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/P_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/Q_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/Q_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/R_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/R_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/S_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/S_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/T_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/T_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/V_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/V_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/W_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/W_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/Y_plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/Y_plot_hists.png -------------------------------------------------------------------------------- /notebooks/xtension/plots/__plot_hists.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/__plot_hists.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [aliases] 2 | test=pytest 3 | 4 | [tool:pytest] 5 | addopts = --verbose 6 | python_files = tests/*.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name = 'mp-nerf', 5 | packages = find_packages(), 6 | version = '1.0.3', 7 | license='MIT', 8 | description = 'MP-NeRF: Massively Parallel Natural Extension of Reference Frame', 9 | author = 'Eric Alcaide', 10 | author_email = 'ericalcaide1@gmail.com', 11 | url = 'https://github.com/eleutherAI/mp_nerf', 12 | keywords = [ 13 | 'computational biolgy', 14 | 'bioinformatics', 15 | 'machine learning' 16 | ], 17 | install_requires=[ 18 | 'einops>=0.3', 19 | 'numpy', 20 | 'torch>=1.6', # 'sidechainnet' # for tests 21 | ], 22 | setup_requires=[ 23 | 'pytest-runner', 24 | ], 25 | tests_require=[ 26 | 'pytest' 27 | ], 28 | classifiers=[ 29 | 'Development Status :: 4 - Beta', 30 | 'Intended Audience :: Developers', 31 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 32 | 'License :: OSI Approved :: MIT License', 33 | 'Programming Language :: Python :: 3.6', 34 | ], 35 | ) 36 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from mp_nerf import * 5 | from mp_nerf.utils import * 6 | from mp_nerf.kb_proteins import * 7 | from mp_nerf.proteins import * 8 | 9 | def test_nerf_and_dihedral(): 10 | # create points 11 | a = torch.tensor([1,2,3]).float() 12 | b = torch.tensor([1,4,5]).float() 13 | c = torch.tensor([1,4,7]).float() 14 | d = torch.tensor([1,8,8]).float() 15 | # calculate internal references 16 | v1 = (b-a).numpy() 17 | v2 = (c-b).numpy() 18 | v3 = (d-c).numpy() 19 | # get angles 20 | theta = np.arccos( np.dot(v2, v3) / \ 21 | (np.linalg.norm(v2) * np.linalg.norm(v3) )) 22 | 23 | normal_p = np.cross(v1, v2) 24 | normal_p_ = np.cross(v2, v3) 25 | chi = np.arccos( np.dot(normal_p, normal_p_) / \ 26 | (np.linalg.norm(normal_p) * np.linalg.norm(normal_p_) )) 27 | # get length: 28 | l = torch.tensor(np.linalg.norm(v3)) 29 | theta = torch.tensor(theta) 30 | chi = torch.tensor(chi) 31 | # reconstruct 32 | # doesnt work because the scn angle was not measured correctly 33 | # so the method corrects that incorrection 34 | assert (mp_nerf_torch(a, b, c, l, theta, chi - np.pi) - torch.tensor([1,0,6])).sum().abs() < 0.1 35 | assert get_dihedral(a, b, c, d).item() == chi 36 | 37 | 38 | def test_modify_angles_mask_with_torsions(): 39 | # create inputs 40 | seq = "AGHHKLHRTVNMSTIL" 41 | angles_mask = torch.randn(2, 16, 14) 42 | torsions = torch.ones(16, 4) 43 | # ensure shape 44 | assert modify_angles_mask_with_torsions(seq, angles_mask, torsions).shape == angles_mask.shape, \ 45 | "Shapes don't match" -------------------------------------------------------------------------------- /tests/test_ml_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from mp_nerf import * 5 | from mp_nerf.utils import * 6 | from mp_nerf.ml_utils import * 7 | from mp_nerf.kb_proteins import * 8 | from mp_nerf.proteins import * 9 | 10 | 11 | # test ML utils 12 | def test_scn_atom_embedd(): 13 | seq_list = ["AGHHKLHRTVNMSTIL", 14 | "WERTQLITANMWTCSD"] 15 | embedds = scn_atom_embedd(seq_list) 16 | assert embedds.shape == torch.Size([2, 16, 14]), "Shapes don't match" 17 | 18 | 19 | def test_chain_to_atoms(): 20 | chain = torch.randn(100, 3) 21 | atoms = chain2atoms(chain, c=14) 22 | assert atoms.shape == torch.Size([100, 14, 3]), "Shapes don't match" 23 | 24 | 25 | def test_rename_symmetric_atoms(): 26 | seq_list = ["AGHHKLHRTVNMSTIL"] 27 | pred_coors = torch.randn(1, 16, 14, 3) 28 | true_coors = torch.randn(1, 16, 14, 3) 29 | cloud_mask = scn_cloud_mask(seq_list[0]).unsqueeze(0) 30 | pred_feats = torch.randn(1, 16, 14, 16) 31 | 32 | renamed = rename_symmetric_atoms(pred_coors, true_coors, seq_list, cloud_mask, pred_feats=pred_feats) 33 | assert renamed[0].shape == pred_coors.shape and renamed[1].shape == pred_feats.shape, "Shapes don't match" 34 | 35 | 36 | def test_torsion_angle_loss(): 37 | pred_torsions = torch.randn(1, 100, 7) 38 | true_torsions = torch.randn(1, 100, 7) 39 | angle_mask = pred_torsions <= 2. 40 | 41 | loss = torsion_angle_loss(pred_torsions, true_torsions, 42 | coeff=2., angle_mask=None) 43 | assert loss.shape == pred_torsions.shape, "Shapes don't match" 44 | 45 | 46 | def test_fape_loss_torch(): 47 | seq_list = ["AGHHKLHRTVNMSTIL"] 48 | pred_coords = torch.randn(1, 16, 14, 3) 49 | true_coords = torch.randn(1, 16, 14, 3) 50 | 51 | loss_c_alpha = fape_torch(pred_coords, true_coords, c_alpha=True, seq_list=seq_list) 52 | loss_full = fape_torch(pred_coords, true_coords, c_alpha=False, seq_list=seq_list) 53 | 54 | assert True 55 | 56 | 57 | 58 | 59 | --------------------------------------------------------------------------------