├── .github
    └── workflows
    │   ├── python-package.yml
    │   └── python-publish.yml
├── .gitignore
├── LICENSE
├── README.md
├── mp_nerf
    ├── __init__.py
    ├── kb_proteins.py
    ├── massive_pnerf.py
    ├── ml_utils.py
    ├── proteins.py
    └── utils.py
├── notebooks
    ├── experiments
    │   ├── [131, 150]_info.joblib
    │   ├── [200, 250]_info.joblib
    │   ├── [331, 351]_info.joblib
    │   ├── [400, 450]_info.joblib
    │   ├── [500, 550]_info.joblib
    │   ├── [600, 650]_info.joblib
    │   ├── [700, 780]_info.joblib
    │   ├── [800, 900]_info.joblib
    │   ├── [905, 1070]_info.joblib
    │   ├── [905, 970]_info.joblib
    │   ├── logs_experiment.txt
    │   ├── logs_experiment_scn_various_lengths.txt
    │   └── profile_csv
    ├── experiments_manual
    │   ├── analyzed_prots.joblib
    │   ├── error_evolution.png
    │   ├── histogram_errors.png
    │   ├── profiler_capture.png
    │   └── rclab_data
    │   │   ├── 1000_ala.pdb
    │   │   ├── 500_ala.pdb
    │   │   ├── 5rsa_ribonuclease.pdb
    │   │   ├── LICENSE
    │   │   ├── il10_lactate_dh.pdb
    │   │   └── other_prots.csv
    ├── extend_measures.ipynb
    ├── integrated_alanines.py
    ├── integrated_test.py
    ├── preds
    │   ├── labels.pdb
    │   └── predicted.pdb
    ├── test_implementation_loop.ipynb
    ├── test_implementation_speed.ipynb
    └── xtension
    │   └── plots
    │       ├── A_plot_hists.png
    │       ├── C_plot_hists.png
    │       ├── D_plot_hists.png
    │       ├── E_plot_hists.png
    │       ├── F_plot_hists.png
    │       ├── G_plot_hists.png
    │       ├── H_plot_hists.png
    │       ├── I_plot_hists.png
    │       ├── K_plot_hists.png
    │       ├── L_plot_hists.png
    │       ├── M_plot_hists.png
    │       ├── N_plot_hists.png
    │       ├── P_plot_hists.png
    │       ├── Q_plot_hists.png
    │       ├── R_plot_hists.png
    │       ├── S_plot_hists.png
    │       ├── T_plot_hists.png
    │       ├── V_plot_hists.png
    │       ├── W_plot_hists.png
    │       ├── Y_plot_hists.png
    │       └── __plot_hists.png
├── setup.cfg
├── setup.py
└── tests
    ├── test_main.py
    └── test_ml_utils.py


/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ main ]
 9 |   pull_request:
10 |     branches: [ main ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       matrix:
18 |         python-version: [3.7, 3.8]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v2
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v2
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         python -m pip install pytest
30 |         python -m pip install -U proDy requests
31 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 |     - name: Test with pytest
33 |       run: |
34 |         python setup.py test
35 | 


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
 3 | 
 4 | name: Upload Python Package
 5 | 
 6 | on:
 7 |   release:
 8 |     types: [created]
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: '3.x'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install setuptools wheel twine
25 |     - name: Build and publish
26 |       env:
27 |         TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 |       run: |
30 |         python setup.py sdist bdist_wheel
31 |         twine upload dist/*
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | */__pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | 
  7 | # to ignore
  8 | *.DS_Store
  9 | *sidechainnet_data/*
 10 | *.pkl
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Distribution / packaging
 16 | .Python
 17 | build/
 18 | develop-eggs/
 19 | dist/
 20 | downloads/
 21 | eggs/
 22 | .eggs/
 23 | lib/
 24 | lib64/
 25 | parts/
 26 | sdist/
 27 | var/
 28 | wheels/
 29 | pip-wheel-metadata/
 30 | share/python-wheels/
 31 | *.egg-info/
 32 | .installed.cfg
 33 | *.egg
 34 | MANIFEST
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .nox/
 50 | .coverage
 51 | .coverage.*
 52 | .cache
 53 | nosetests.xml
 54 | coverage.xml
 55 | *.cover
 56 | *.py,cover
 57 | .hypothesis/
 58 | .pytest_cache/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | db.sqlite3
 68 | db.sqlite3-journal
 69 | 
 70 | # Flask stuff:
 71 | instance/
 72 | .webassets-cache
 73 | 
 74 | # Scrapy stuff:
 75 | .scrapy
 76 | 
 77 | # Sphinx documentation
 78 | docs/_build/
 79 | 
 80 | # PyBuilder
 81 | target/
 82 | 
 83 | # Jupyter Notebook
 84 | .ipynb_checkpoints
 85 | 
 86 | # IPython
 87 | profile_default/
 88 | ipython_config.py
 89 | 
 90 | # pyenv
 91 | .python-version
 92 | 
 93 | # pipenv
 94 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 95 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 96 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 97 | #   install all needed dependencies.
 98 | #Pipfile.lock
 99 | 
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 | 
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 | 
107 | # SageMath parsed files
108 | *.sage.py
109 | 
110 | # Environments
111 | .env
112 | .venv
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 | 
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 | 
123 | # Rope project settings
124 | .ropeproject
125 | 
126 | # mkdocs documentation
127 | /site
128 | 
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 | 
134 | # Pyre type checker
135 | .pyre/
136 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2021, Eric Alcaide
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are
 7 | met:
 8 | 
 9 |     1. Redistributions of source code must retain the above copyright
10 |        notice, this list of conditions and the following disclaimer.
11 |     2. Redistributions in binary form must reproduce the above
12 |        copyright notice, this list of conditions and the following
13 |        disclaimer in the documentation andor other materials provided
14 |        with the distribution.
15 |     3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 |        products derived from this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MP-NeRF: Massively Parallel Natural Extension of Reference Frame
 2 | 
 3 | This is the code for the paper "[MP-NeRF: A Massively Parallel Method for Accelerating Protein Structure Reconstruction from Internal Coordinates](https://www.biorxiv.org/content/10.1101/2021.06.08.446214v1)"
 4 | 
 5 | The code can be installed via `pip` with 
 6 | 
 7 | ```bash
 8 | $ pip install mp-nerf 
 9 | ``` 
10 | 
11 | ## Abstract
12 | 
13 | The conversion of polymers between internal and cartesian coordinates is a limiting step in many pipelines, such as molecular dynamics simulations and training of machine learning models. This conversion is typically carried out by sequential or parallel applications of the Natural extension of Reference Frame (NeRF)algorithm. 
14 | 
15 | This work proposes a massively parallel NeRF implementation, which, depending on the polymer length, achieves speedups between 400-1200x over the most recent parallel NeRF implementation by dviding the conversion into three main phases: a parallel composition of the minimal repeated structure, the assembly of backbone subunits and the parallel elongation of sidechains. 
16 | 
17 | Special emphasis is placed on reusability and ease of use within diverse pipelines. We open source the code (available at https://github.com/EleutherAI/mp_nerf) and provide a corresponding python package.
18 | 
19 | 
20 | ## Results: 
21 | 
22 | * **Tests**: in an intel i5 @ 2.4 ghz (cpu) and (intel i7-6700k @ 4GHz + Nvidia 1060GTX 6gb) (gpu)
23 | 
24 | length   |  sota  | **us (cpu)** |  Nx   | us (gpu) | us (hybrid) |
25 | ---------|--------|--------------|-------|----------|-------------|
26 | ~114     | 2.4s   | **5.3ms**    | ~446  | 21.1ms   | 18.9ms      |
27 | ~300     | 3.5s   | **8.5ms**    | ~400  | 26.2ms   | 22.3ms      |
28 | ~500     | 7.5s   | **9.1ms**    | ~651  | 29.2ms   | 26.3ms      |
29 | ~1000    | 18.66s | **15.3ms**   | ~1200 | 43.3ms   | 30.1ms      |
30 | 
31 | * **Profiler Trace (CPU)**:
32 | <center><img src="notebooks/experiments_manual/profiler_capture.png"></center>
33 | <center><img src="notebooks/experiments_manual/histogram_errors.png"></center>
34 | <center><img src="notebooks/experiments_manual/error_evolution.png"></center>
35 | 
36 | #### Considerations
37 | 
38 | * In the GPU algo, much of the time is spent in the data transfers / loop in the GPU is very inefficient. 
39 | * about 1/2 of time is spent in memory-access patterns and the sequential `for loop`, so ideally 2x from here would be possible by optimizing it or running the sequential loop in cython / numba / whatever
40 | * total profiler time should be multiplied by 0.63-0.5 to see real time (see execution above without profiler). Profiling slows down the code.
41 | 
42 | 
43 | ## Installation:
44 | 
45 | Just clone the repo
46 | 
47 | You'll need:
48 | * torch > 1.6
49 | * numpy
50 | * einops
51 | 
52 | Plus, if you want to run the experiments / work with data: 
53 | * joblib
54 | * sidechainnet: https://github.com/jonathanking/sidechainnet#installation
55 | * manually install `ProDY`, `py3Dmol`, `snakeviz`:
56 | 	* `pip install proDy`
57 | 	* `pip install py3Dmol`
58 | 	* `pip install snakeviz`
59 | 	* any other package: `pip install package_name`
60 | 
61 | 
62 | * matplotlib (to do diagnostic plots)
63 | 
64 | ## Citations:
65 | 
66 | ```bibtex
67 | @article{Parsons2005PracticalCF,
68 |     title={Practical conversion from torsion space to Cartesian space for in silico protein synthesis},
69 |     author={Jerod Parsons and J. B. Holmes and J. M. Rojas and J. Tsai and C. Strauss},
70 |     journal={Journal of Computational Chemistry},
71 |     year={2005},
72 |     volume={26}
73 | }
74 | ```
75 | 
76 | ```bibtex
77 | @article{AlQuraishi2018pNeRFPC,
78 |     title={pNeRF: Parallelized Conversion from Internal to Cartesian Coordinates},
79 |     author={Mohammed AlQuraishi},
80 |     journal={bioRxiv},
81 |     year={2018}
82 | }
83 | ```
84 | 
85 | ```bibtex
86 | @article{Bayati2020HighperformanceTO,
87 |     title={High‐performance transformation of protein structure representation from internal to Cartesian coordinates},
88 |     author={M. Bayati and M. Leeser and J. Bardhan},
89 |     journal={Journal of Computational Chemistry},
90 |     year={2020},
91 |     volume={41},
92 |     pages={2104 - 2114}
93 | }
94 | ```
95 | 
96 | 


--------------------------------------------------------------------------------
/mp_nerf/__init__.py:
--------------------------------------------------------------------------------
1 | from mp_nerf.massive_pnerf import *
2 | from mp_nerf.proteins import *


--------------------------------------------------------------------------------
/mp_nerf/kb_proteins.py:
--------------------------------------------------------------------------------
  1 | # Author: Eric Alcaide
  2 | 
  3 | # A substantial part has been borrowed from
  4 | # https://github.com/jonathanking/sidechainnet
  5 | #
  6 | # Here's the License for it:
  7 | #
  8 | # Copyright 2020 Jonathan King
  9 | # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 10 | # following conditions are met:
 11 | #
 12 | # 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 13 | #
 14 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
 15 | # disclaimer in the documentation and/or other materials provided with the distribution.
 16 | #
 17 | # 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
 18 | # products derived from this software without specific prior written permission.
 19 | #
 20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 21 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 25 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 26 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 27 | 
 28 | import numpy as np
 29 | 
 30 | #########################
 31 | ### FROM SIDECHAINNET ###
 32 | #########################
 33 | 
 34 | # modified by considering rigid bodies in sidechains (remove extra torsions)
 35 | 
 36 | SC_BUILD_INFO = {
 37 |     'A': {
 38 |         'angles-names': ['N-CA-CB'],
 39 |         'angles-types': ['N -CX-CT'],
 40 |         'angles-vals': [1.9146261894377796],
 41 |         'atom-names': ['CB'],
 42 |         'bonds-names': ['CA-CB'],
 43 |         'bonds-types': ['CX-CT'],
 44 |         'bonds-vals': [1.526],
 45 |         'torsion-names': ['C-N-CA-CB'],
 46 |         'torsion-types': ['C -N -CX-CT'],
 47 |         'torsion-vals': ['p'],
 48 |         'rigid-frames-idxs': [[0,1,2], [0,1,4]],
 49 |     },
 50 | 
 51 |     'R': {
 52 |         'angles-names': [
 53 |             'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-NE', 'CD-NE-CZ', 'NE-CZ-NH1',
 54 |             'NE-CZ-NH2'
 55 |         ],
 56 |         'angles-types': [
 57 |             'N -CX-C8', 'CX-C8-C8', 'C8-C8-C8', 'C8-C8-N2', 'C8-N2-CA', 'N2-CA-N2',
 58 |             'N2-CA-N2'
 59 |         ],
 60 |         'angles-vals': [
 61 |             1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.9408061282176945,
 62 |             2.150245638457014, 2.0943951023931953, 2.0943951023931953
 63 |         ],
 64 |         'atom-names': ['CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2'],
 65 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-NE', 'NE-CZ', 'CZ-NH1', 'CZ-NH2'],
 66 |         'bonds-types': ['CX-C8', 'C8-C8', 'C8-C8', 'C8-N2', 'N2-CA', 'CA-N2', 'CA-N2'],
 67 |         'bonds-vals': [1.526, 1.526, 1.526, 1.463, 1.34, 1.34, 1.34],
 68 |         'torsion-names': [
 69 |             'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-NE', 'CG-CD-NE-CZ',
 70 |             'CD-NE-CZ-NH1', 'CD-NE-CZ-NH2'
 71 |         ],
 72 |         'torsion-types': [
 73 |             'C -N -CX-C8', 'N -CX-C8-C8', 'CX-C8-C8-C8', 'C8-C8-C8-N2', 'C8-C8-N2-CA',
 74 |             'C8-N2-CA-N2', 'C8-N2-CA-N2'
 75 |         ],
 76 |         'torsion-vals': ['p', 'p', 'p', 'p', 'p', 0., 3.141592],
 77 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7], [6,7,8]],
 78 |     },
 79 | 
 80 |     'N': {
 81 |         'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-OD1', 'CB-CG-ND2'],
 82 |         'angles-types': ['N -CX-2C', 'CX-2C-C ', '2C-C -O ', '2C-C -N '],
 83 |         'angles-vals': [
 84 |             1.9146261894377796, 1.9390607989657, 2.101376419401173, 2.035053907825388
 85 |         ],
 86 |         'atom-names': ['CB', 'CG', 'OD1', 'ND2'],
 87 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-OD1', 'CG-ND2'],
 88 |         'bonds-types': ['CX-2C', '2C-C ', 'C -O ', 'C -N '],
 89 |         'bonds-vals': [1.526, 1.522, 1.229, 1.335],
 90 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-OD1', 'CA-CB-CG-ND2'],
 91 |         'torsion-types': ['C -N -CX-2C', 'N -CX-2C-C ', 'CX-2C-C -O ', 'CX-2C-C -N '],
 92 |         'torsion-vals': ['p', 'p', 'p', 'i'], 
 93 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
 94 |     },
 95 | 
 96 |     'D': {
 97 |         'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-OD1', 'CB-CG-OD2'],
 98 |         'angles-types': ['N -CX-2C', 'CX-2C-CO', '2C-CO-O2', '2C-CO-O2'],
 99 |         'angles-vals': [
100 |             1.9146261894377796, 1.9390607989657, 2.0420352248333655, 2.0420352248333655
101 |         ],
102 |         'atom-names': ['CB', 'CG', 'OD1', 'OD2'],
103 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-OD1', 'CG-OD2'],
104 |         'bonds-types': ['CX-2C', '2C-CO', 'CO-O2', 'CO-O2'],
105 |         'bonds-vals': [1.526, 1.522, 1.25, 1.25],
106 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-OD1', 'CA-CB-CG-OD2'],
107 |         'torsion-types': ['C -N -CX-2C', 'N -CX-2C-CO', 'CX-2C-CO-O2', 'CX-2C-CO-O2'],
108 |         'torsion-vals': ['p', 'p', 'p', 'i'],
109 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
110 |     },
111 | 
112 |     'C': {
113 |         'angles-names': ['N-CA-CB', 'CA-CB-SG'],
114 |         'angles-types': ['N -CX-2C', 'CX-2C-SH'],
115 |         'angles-vals': [1.9146261894377796, 1.8954275676658419],
116 |         'atom-names': ['CB', 'SG'],
117 |         'bonds-names': ['CA-CB', 'CB-SG'],
118 |         'bonds-types': ['CX-2C', '2C-SH'],
119 |         'bonds-vals': [1.526, 1.81],
120 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-SG'],
121 |         'torsion-types': ['C -N -CX-2C', 'N -CX-2C-SH'],
122 |         'torsion-vals': ['p', 'p'],
123 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]],
124 |     },
125 | 
126 |     'Q': {
127 |         'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-OE1', 'CG-CD-NE2'],
128 |         'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-C ', '2C-C -O ', '2C-C -N '],
129 |         'angles-vals': [
130 |             1.9146261894377796, 1.911135530933791, 1.9390607989657, 2.101376419401173,
131 |             2.035053907825388
132 |         ],
133 |         'atom-names': ['CB', 'CG', 'CD', 'OE1', 'NE2'],
134 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-OE1', 'CD-NE2'],
135 |         'bonds-types': ['CX-2C', '2C-2C', '2C-C ', 'C -O ', 'C -N '],
136 |         'bonds-vals': [1.526, 1.526, 1.522, 1.229, 1.335],
137 |         'torsion-names': [
138 |             'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-OE1', 'CB-CG-CD-NE2'
139 |         ],
140 |         'torsion-types': [
141 |             'C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-C ', '2C-2C-C -O ', '2C-2C-C -N '
142 |         ],
143 |         'torsion-vals': ['p', 'p', 'p', 'p', 'i'],
144 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]],
145 |     },
146 | 
147 |     'E': {
148 |         'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-OE1', 'CG-CD-OE2'],
149 |         'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-CO', '2C-CO-O2', '2C-CO-O2'],
150 |         'angles-vals': [
151 |             1.9146261894377796, 1.911135530933791, 1.9390607989657, 2.0420352248333655,
152 |             2.0420352248333655
153 |         ],
154 |         'atom-names': ['CB', 'CG', 'CD', 'OE1', 'OE2'],
155 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-OE1', 'CD-OE2'],
156 |         'bonds-types': ['CX-2C', '2C-2C', '2C-CO', 'CO-O2', 'CO-O2'],
157 |         'bonds-vals': [1.526, 1.526, 1.522, 1.25, 1.25],
158 |         'torsion-names': [
159 |             'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-OE1', 'CB-CG-CD-OE2'
160 |         ],
161 |         'torsion-types': [
162 |             'C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-CO', '2C-2C-CO-O2', '2C-2C-CO-O2'
163 |         ],
164 |         'torsion-vals': ['p', 'p', 'p', 'p', 'i'],
165 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]],
166 |     },
167 | 
168 |     'G': {
169 |         'angles-names': [],
170 |         'angles-types': [],
171 |         'angles-vals': [],
172 |         'atom-names': [],
173 |         'bonds-names': [],
174 |         'bonds-types': [],
175 |         'bonds-vals': [],
176 |         'torsion-names': [],
177 |         'torsion-types': [],
178 |         'torsion-vals': [],
179 |         'rigid-frames-idxs': [[0,1,2]],
180 |     },
181 | 
182 |     'H': {
183 |         'angles-names': [
184 |             'N-CA-CB', 'CA-CB-CG', 'CB-CG-ND1', 'CG-ND1-CE1', 'ND1-CE1-NE2', 'CE1-NE2-CD2'
185 |         ],
186 |         'angles-types': [
187 |             'N -CX-CT', 'CX-CT-CC', 'CT-CC-NA', 'CC-NA-CR', 'NA-CR-NB', 'CR-NB-CV'
188 |         ],
189 |         'angles-vals': [
190 |             1.9146261894377796, 1.9739673840055867, 2.0943951023931953,
191 |             1.8849555921538759, 1.8849555921538759, 1.8849555921538759
192 |         ],
193 |         'atom-names': ['CB', 'CG', 'ND1', 'CE1', 'NE2', 'CD2'],
194 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-ND1', 'ND1-CE1', 'CE1-NE2', 'NE2-CD2'],
195 |         'bonds-types': ['CX-CT', 'CT-CC', 'CC-NA', 'NA-CR', 'CR-NB', 'NB-CV'],
196 |         'bonds-vals': [1.526, 1.504, 1.385, 1.343, 1.335, 1.394],
197 |         'torsion-names': [
198 |             'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-ND1', 'CB-CG-ND1-CE1', 'CG-ND1-CE1-NE2',
199 |             'ND1-CE1-NE2-CD2'
200 |         ],
201 |         'torsion-types': [
202 |             'C -N -CX-CT', 'N -CX-CT-CC', 'CX-CT-CC-NA', 'CT-CC-NA-CR', 'CC-NA-CR-NB',
203 |             'NA-CR-NB-CV'
204 |         ],
205 |         'torsion-vals': ['p', 'p', 'p', 3.141592653589793, 0.0, 0.0],
206 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
207 |     },
208 | 
209 |     'I': {
210 |         'angles-names': ['N-CA-CB', 'CA-CB-CG1', 'CB-CG1-CD1', 'CA-CB-CG2'],
211 |         'angles-types': ['N -CX-3C', 'CX-3C-2C', '3C-2C-CT', 'CX-3C-CT'],
212 |         'angles-vals': [
213 |             1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791
214 |         ],
215 |         'atom-names': ['CB', 'CG1', 'CD1', 'CG2'],
216 |         'bonds-names': ['CA-CB', 'CB-CG1', 'CG1-CD1', 'CB-CG2'],
217 |         'bonds-types': ['CX-3C', '3C-2C', '2C-CT', '3C-CT'],
218 |         'bonds-vals': [1.526, 1.526, 1.526, 1.526],
219 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG1', 'CA-CB-CG1-CD1', 'N-CA-CB-CG2'],
220 |         'torsion-types': ['C -N -CX-3C', 'N -CX-3C-2C', 'CX-3C-2C-CT', 'N -CX-3C-CT'],
221 |         'torsion-vals': ['p', 'p', 'p', -2.1315], # last one was 'p' in the original - but cg1-cg2 = "2.133"
222 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,7]],
223 |     },
224 | 
225 |     'L': {
226 |         'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CB-CG-CD2'],
227 |         'angles-types': ['N -CX-2C', 'CX-2C-3C', '2C-3C-CT', '2C-3C-CT'],
228 |         'angles-vals': [
229 |             1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791
230 |         ],
231 |         'atom-names': ['CB', 'CG', 'CD1', 'CD2'],
232 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD1', 'CG-CD2'],
233 |         'bonds-types': ['CX-2C', '2C-3C', '3C-CT', '3C-CT'],
234 |         'bonds-vals': [1.526, 1.526, 1.526, 1.526],
235 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CA-CB-CG-CD2'],
236 |         'torsion-types': ['C -N -CX-2C', 'N -CX-2C-3C', 'CX-2C-3C-CT', 'CX-2C-3C-CT'],
237 |         # extra torsion is in negative bc in mask construction, previous angle is summed. 
238 |         'torsion-vals': ['p', 'p', 'p', 2.1315], # last one was 'p' in the original - but cd1-cd2 = "-2.130"
239 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
240 |     },
241 | 
242 |     'K': {
243 |         'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-CE', 'CD-CE-NZ'],
244 |         'angles-types': ['N -CX-C8', 'CX-C8-C8', 'C8-C8-C8', 'C8-C8-C8', 'C8-C8-N3'],
245 |         'angles-vals': [
246 |             1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791,
247 |             1.9408061282176945
248 |         ],
249 |         'atom-names': ['CB', 'CG', 'CD', 'CE', 'NZ'],
250 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-CE', 'CE-NZ'],
251 |         'bonds-types': ['CX-C8', 'C8-C8', 'C8-C8', 'C8-C8', 'C8-N3'],
252 |         'bonds-vals': [1.526, 1.526, 1.526, 1.526, 1.471],
253 |         'torsion-names': [
254 |             'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-CE', 'CG-CD-CE-NZ'
255 |         ],
256 |         'torsion-types': [
257 |             'C -N -CX-C8', 'N -CX-C8-C8', 'CX-C8-C8-C8', 'C8-C8-C8-C8', 'C8-C8-C8-N3'
258 |         ],
259 |         'torsion-vals': ['p', 'p', 'p', 'p', 'p'],
260 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7], [6,7,8]],
261 |     },
262 | 
263 |     'M': {
264 |         'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-SD', 'CG-SD-CE'],
265 |         'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-S ', '2C-S -CT'],
266 |         'angles-vals': [
267 |             1.9146261894377796, 1.911135530933791, 2.0018926520374962, 1.726130630222392
268 |         ],
269 |         'atom-names': ['CB', 'CG', 'SD', 'CE'],
270 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-SD', 'SD-CE'],
271 |         'bonds-types': ['CX-2C', '2C-2C', '2C-S ', 'S -CT'],
272 |         'bonds-vals': [1.526, 1.526, 1.81, 1.81],
273 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-SD', 'CB-CG-SD-CE'],
274 |         'torsion-types': ['C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-S ', '2C-2C-S -CT'],
275 |         'torsion-vals': ['p', 'p', 'p', 'p'],
276 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]],
277 |     },
278 | 
279 |     'F': {
280 |         'angles-names': [
281 |             'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-CE1', 'CD1-CE1-CZ', 'CE1-CZ-CE2',
282 |             'CZ-CE2-CD2'
283 |         ],
284 |         'angles-types': [
285 |             'N -CX-CT', 'CX-CT-CA', 'CT-CA-CA', 'CA-CA-CA', 'CA-CA-CA', 'CA-CA-CA',
286 |             'CA-CA-CA'
287 |         ],
288 |         'angles-vals': [
289 |             1.9146261894377796, 1.9896753472735358, 2.0943951023931953,
290 |             2.0943951023931953, 2.0943951023931953, 2.0943951023931953, 2.0943951023931953
291 |         ],
292 |         'atom-names': ['CB', 'CG', 'CD1', 'CE1', 'CZ', 'CE2', 'CD2'],
293 |         'bonds-names': [
294 |             'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-CE1', 'CE1-CZ', 'CZ-CE2', 'CE2-CD2'
295 |         ],
296 |         'bonds-types': ['CX-CT', 'CT-CA', 'CA-CA', 'CA-CA', 'CA-CA', 'CA-CA', 'CA-CA'],
297 |         'bonds-vals': [1.526, 1.51, 1.4, 1.4, 1.4, 1.4, 1.4],
298 |         'torsion-names': [
299 |             'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-CE1', 'CG-CD1-CE1-CZ',
300 |             'CD1-CE1-CZ-CE2', 'CE1-CZ-CE2-CD2'
301 |         ],
302 |         'torsion-types': [
303 |             'C -N -CX-CT', 'N -CX-CT-CA', 'CX-CT-CA-CA', 'CT-CA-CA-CA', 'CA-CA-CA-CA',
304 |             'CA-CA-CA-CA', 'CA-CA-CA-CA'
305 |         ],
306 |         'torsion-vals': ['p', 'p', 'p', 3.141592653589793, 0.0, 0.0, 0.0],
307 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
308 |     },
309 | 
310 |     'P': {
311 |         'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD'],
312 |         'angles-types': ['N -CX-CT', 'CX-CT-CT', 'CT-CT-CT'],
313 |         'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791],
314 |         'atom-names': ['CB', 'CG', 'CD'],
315 |         'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD'],
316 |         'bonds-types': ['CX-CT', 'CT-CT', 'CT-CT'],
317 |         'bonds-vals': [1.526, 1.526, 1.526],
318 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD'],
319 |         'torsion-types': ['C -N -CX-CT', 'N -CX-CT-CT', 'CX-CT-CT-CT'],
320 |         'torsion-vals': ['p', 'p', 'p'],
321 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
322 |     },
323 | 
324 |     'S': {
325 |         'angles-names': ['N-CA-CB', 'CA-CB-OG'],
326 |         'angles-types': ['N -CX-2C', 'CX-2C-OH'],
327 |         'angles-vals': [1.9146261894377796, 1.911135530933791],
328 |         'atom-names': ['CB', 'OG'],
329 |         'bonds-names': ['CA-CB', 'CB-OG'],
330 |         'bonds-types': ['CX-2C', '2C-OH'],
331 |         'bonds-vals': [1.526, 1.41],
332 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-OG'],
333 |         'torsion-types': ['C -N -CX-2C', 'N -CX-2C-OH'],
334 |         'torsion-vals': ['p', 'p'],
335 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]],
336 |     },
337 | 
338 |     'T': {
339 |         'angles-names': ['N-CA-CB', 'CA-CB-OG1', 'CA-CB-CG2'],
340 |         'angles-types': ['N -CX-3C', 'CX-3C-OH', 'CX-3C-CT'],
341 |         'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791],
342 |         'atom-names': ['CB', 'OG1', 'CG2'],
343 |         'bonds-names': ['CA-CB', 'CB-OG1', 'CB-CG2'],
344 |         'bonds-types': ['CX-3C', '3C-OH', '3C-CT'],
345 |         'bonds-vals': [1.526, 1.41, 1.526],
346 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-OG1', 'N-CA-CB-CG2'],
347 |         'torsion-types': ['C -N -CX-3C', 'N -CX-3C-OH', 'N -CX-3C-CT'],
348 |         'torsion-vals': ['p', 'p', 'p'],
349 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]],
350 |     },
351 | 
352 |     'W': {
353 |         'angles-names': [
354 |             'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-NE1', 'CD1-NE1-CE2',
355 |             'NE1-CE2-CZ2', 'CE2-CZ2-CH2', 'CZ2-CH2-CZ3', 'CH2-CZ3-CE3', 'CZ3-CE3-CD2'
356 |         ],
357 |         'angles-types': [
358 |             'N -CX-CT', 'CX-CT-C*', 'CT-C*-CW', 'C*-CW-NA', 'CW-NA-CN', 'NA-CN-CA',
359 |             'CN-CA-CA', 'CA-CA-CA', 'CA-CA-CA', 'CA-CA-CB'
360 |         ],
361 |         'angles-vals': [
362 |             1.9146261894377796, 2.0176006153054447, 2.181661564992912, 1.8971728969178363,
363 |             1.9477874452256716, 2.3177972466484698, 2.0943951023931953,
364 |             2.0943951023931953, 2.0943951023931953, 2.0943951023931953
365 |         ],
366 |         'atom-names': [
367 |             'CB', 'CG', 'CD1', 'NE1', 'CE2', 'CZ2', 'CH2', 'CZ3', 'CE3', 'CD2'
368 |         ],
369 |         'bonds-names': [
370 |             'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-NE1', 'NE1-CE2', 'CE2-CZ2', 'CZ2-CH2',
371 |             'CH2-CZ3', 'CZ3-CE3', 'CE3-CD2'
372 |         ],
373 |         'bonds-types': [
374 |             'CX-CT', 'CT-C*', 'C*-CW', 'CW-NA', 'NA-CN', 'CN-CA', 'CA-CA', 'CA-CA',
375 |             'CA-CA', 'CA-CB'
376 |         ],
377 |         'bonds-vals': [1.526, 1.495, 1.352, 1.381, 1.38, 1.4, 1.4, 1.4, 1.4, 1.404],
378 |         'torsion-names': [
379 |             'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-NE1', 'CG-CD1-NE1-CE2',
380 |             'CD1-NE1-CE2-CZ2', 'NE1-CE2-CZ2-CH2', 'CE2-CZ2-CH2-CZ3', 'CZ2-CH2-CZ3-CE3',
381 |             'CH2-CZ3-CE3-CD2'
382 |         ],
383 |         'torsion-types': [
384 |             'C -N -CX-CT', 'N -CX-CT-C*', 'CX-CT-C*-CW', 'CT-C*-CW-NA', 'C*-CW-NA-CN',
385 |             'CW-NA-CN-CA', 'NA-CN-CA-CA', 'CN-CA-CA-CA', 'CA-CA-CA-CA', 'CA-CA-CA-CB'
386 |         ],
387 |         'torsion-vals': [
388 |             'p', 'p', 'p', 3.141592653589793, 0.0, 3.141592653589793, 3.141592653589793,
389 |             0.0, 0.0, 0.0
390 |         ],
391 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]]
392 |     },
393 | 
394 |     'Y': {
395 |         'angles-names': [
396 |             'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-CE1', 'CD1-CE1-CZ', 'CE1-CZ-OH',
397 |             'CE1-CZ-CE2', 'CZ-CE2-CD2'
398 |         ],
399 |         'angles-types': [
400 |             'N -CX-CT', 'CX-CT-CA', 'CT-CA-CA', 'CA-CA-CA', 'CA-CA-C ', 'CA-C -OH',
401 |             'CA-C -CA', 'C -CA-CA'
402 |         ],
403 |         'angles-vals': [
404 |             1.9146261894377796, 1.9896753472735358, 2.0943951023931953,
405 |             2.0943951023931953, 2.0943951023931953, 2.0943951023931953,
406 |             2.0943951023931953, 2.0943951023931953
407 |         ],
408 |         'atom-names': ['CB', 'CG', 'CD1', 'CE1', 'CZ', 'OH', 'CE2', 'CD2'],
409 |         'bonds-names': [
410 |             'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-CE1', 'CE1-CZ', 'CZ-OH', 'CZ-CE2', 'CE2-CD2'
411 |         ],
412 |         'bonds-types': [
413 |             'CX-CT', 'CT-CA', 'CA-CA', 'CA-CA', 'CA-C ', 'C -OH', 'C -CA', 'CA-CA'
414 |         ],
415 |         'bonds-vals': [1.526, 1.51, 1.4, 1.4, 1.409, 1.364, 1.409, 1.4],
416 |         'torsion-names': [
417 |             'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-CE1', 'CG-CD1-CE1-CZ',
418 |             'CD1-CE1-CZ-OH', 'CD1-CE1-CZ-CE2', 'CE1-CZ-CE2-CD2'
419 |         ],
420 |         'torsion-types': [
421 |             'C -N -CX-CT', 'N -CX-CT-CA', 'CX-CT-CA-CA', 'CT-CA-CA-CA', 'CA-CA-CA-C ',
422 |             'CA-CA-C -OH', 'CA-CA-C -CA', 'CA-C -CA-CA'
423 |         ],
424 |         'torsion-vals': [
425 |             'p', 'p', 'p', 3.141592653589793, 0.0, 3.141592653589793, 0.0, 0.0
426 |         ],
427 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
428 |     },
429 | 
430 |     'V': {
431 |         'angles-names': ['N-CA-CB', 'CA-CB-CG1', 'CA-CB-CG2'],
432 |         'angles-types': ['N -CX-3C', 'CX-3C-CT', 'CX-3C-CT'],
433 |         'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791],
434 |         'atom-names': ['CB', 'CG1', 'CG2'],
435 |         'bonds-names': ['CA-CB', 'CB-CG1', 'CB-CG2'],
436 |         'bonds-types': ['CX-3C', '3C-CT', '3C-CT'],
437 |         'bonds-vals': [1.526, 1.526, 1.526],
438 |         'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG1', 'N-CA-CB-CG2'],
439 |         'torsion-types': ['C -N -CX-3C', 'N -CX-3C-CT', 'N -CX-3C-CT'],
440 |         'torsion-vals': ['p', 'p', 'p'],
441 |         'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]]
442 |     },
443 | 
444 |     '_': {
445 |         'angles-names': [],
446 |         'angles-types': [],
447 |         'angles-vals': [],
448 |         'atom-names': [],
449 |         'bonds-names': [],
450 |         'bonds-types': [],
451 |         'bonds-vals': [],
452 |         'torsion-names': [],
453 |         'torsion-types': [],
454 |         'torsion-vals': [],
455 |         'rigid-frames-idxs': [[]],
456 |     }
457 | }
458 | 
459 | BB_BUILD_INFO = {
460 |     "BONDLENS": {
461 |         # the updated is according to crystal data from 1DPE_1_A and validated with other structures
462 |         # the commented is the sidechainnet one
463 |         'n-ca': 1.4664931, # 1.442, 
464 |         'ca-c': 1.524119,  # 1.498,
465 |         'c-n': 1.3289373,  # 1.379,
466 |         'c-o': 1.229,  # From parm10.dat || huge variability according to structures
467 |         # we get 1.3389416 from 1DPE_1_A but also 1.2289 from 2F2H_d2f2hf1
468 |         'c-oh': 1.364
469 |     },
470 |       # From parm10.dat, for OXT
471 |     # For placing oxygens
472 |     "BONDANGS": {
473 |         'ca-c-o': 2.0944,  # Approximated to be 2pi / 3; parm10.dat says 2.0350539
474 |         'ca-c-oh': 2.0944, 
475 |         'ca-c-n': 2.03,
476 |         'n-ca-c': 1.94,
477 |         'c-n-ca': 2.08,
478 |     },
479 |       # Equal to 'ca-c-o', for OXT
480 |     "BONDTORSIONS": {
481 |         'n-ca-c-n': -0.785398163, # psi (-44 deg, bimodal distro, pick one)
482 |         'c-n-ca-c': -1.3962634015954636, # phi (-80 deg, bimodal distro, pick one)
483 |         'ca-n-c-ca': 3.141592, # omega (180 deg - https://doi.org/10.1016/j.jmb.2005.01.065) 
484 |         'n-ca-c-o': -2.406 # oxygen
485 |     }  # A simple approximation, not meant to be exact.
486 | }
487 | 
488 | 
489 | # numbers follow the same order as sidechainnet atoms
490 | SCN_CONNECT = { 
491 |     'A': {
492 |         'bonds': [[0,1], [1,2], [2,3], [1,4]] 
493 |          },
494 |     'R': {
495 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
496 |                   [6,7], [7,8], [8,9], [8,10]] 
497 |          },
498 |     'N': {
499 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
500 |                   [5,7]] 
501 |          },
502 |     'D': {
503 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
504 |                   [5,7]] 
505 |          },
506 |     'C': {
507 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5]] 
508 |         },
509 |     'Q': {
510 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
511 |                   [6,7], [6,8]] 
512 |         },
513 |     'E': {
514 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
515 |                   [6,7], [6,8]]
516 |         },
517 |     'G': {
518 |         'bonds': [[0,1], [1,2], [2,3]] 
519 |         },
520 |     'H': {
521 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
522 |                   [6,7], [7,8], [8,9], [5,9]] 
523 |         },
524 |     'I': {
525 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
526 |                   [4,7]] 
527 |          },
528 |     'L': {
529 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
530 |                   [5,7]] 
531 |          },
532 |     'K': {
533 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
534 |                   [6,7], [7,8]] 
535 |          },
536 |     'M': {
537 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
538 |                   [6,7]] 
539 |          },
540 |     'F': {
541 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
542 |                   [6,7], [7,8], [8,9], [9,10], [5,10]] 
543 |          },
544 |     'P': {
545 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
546 |                   [0,6]] 
547 |          },
548 |     'S': {
549 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5]] 
550 |          },
551 |     'T': {
552 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [4,6]] 
553 |          },
554 |     'W': {
555 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
556 |                   [6,7], [7,8], [8,9], [9,10], [10,11], [11,12],
557 |                   [12, 13], [5,13], [8,13]] 
558 |          },
559 |     'Y': {
560 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
561 |                   [6,7], [7,8], [8,9], [8,10], [10,11], [5,11]] 
562 |          },
563 |     'V': {
564 |         'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [4,6]] 
565 |          },
566 |     '_': {
567 |         'bonds': []
568 |         }
569 |     }
570 | 
571 | # from: https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03819-2/MediaObjects/41586_2021_3819_MOESM1_ESM.pdf
572 | # added R's terminal Ns due to a small percentage of misalignments' (10%)
573 | AMBIGUOUS = {
574 |     "D": {"names": [["OD1", "OD2"]], 
575 |           "indexs": [[6, 7]], 
576 |           }, 
577 |     "E": {"names": [["OE1", "OE2"]],
578 |           "indexs": [[7, 8]], 
579 |           }, 
580 |     "F": {"names": [["CD1", "CD2"], ["CE1", "CE2"]], 
581 |           "indexs": [[6, 10], [7, 9]],
582 |           },
583 |     "Y": {"names": [["CD1", "CD2"], ["CE1", "CE2"]], 
584 |           "indexs": [[6,10], [7,9]],
585 |           },
586 |     "R": {"names": [["NH1", "NH2"]], 
587 |           "indexs": [[9, 10]]
588 |           },
589 | }
590 | 
591 | 
592 | # AA subst mat
593 | BLOSUM = {
594 |     "A" : [4.0, -1.0, -2.0, -2.0, 0.0, -1.0, -1.0, 0.0, -2.0, -1.0, -1.0, -1.0, -1.0, -2.0, -1.0, 1.0, 0.0, -3.0, -2.0, 0.0, 0.0],
595 |     "C" : [-1.0, 5.0, 0.0, -2.0, -3.0, 1.0, 0.0, -2.0, 0.0, -3.0, -2.0, 2.0, -1.0, -3.0, -2.0, -1.0, -1.0, -3.0, -2.0, -3.0, 0.0],
596 |     "D" : [-2.0, 0.0, 6.0, 1.0, -3.0, 0.0, 0.0, 0.0, 1.0, -3.0, -3.0, 0.0, -2.0, -3.0, -2.0, 1.0, 0.0, -4.0, -2.0, -3.0, 0.0],
597 |     "E" : [-2.0, -2.0, 1.0, 6.0, -3.0, 0.0, 2.0, -1.0, -1.0, -3.0, -4.0, -1.0, -3.0, -3.0, -1.0, 0.0, -1.0, -4.0, -3.0, -3.0, 0.0],
598 |     "F" : [0.0, -3.0, -3.0, -3.0, 9.0, -3.0, -4.0, -3.0, -3.0, -1.0, -1.0, -3.0, -1.0, -2.0, -3.0, -1.0, -1.0, -2.0, -2.0, -1.0, 0.0],
599 |     "G" : [-1.0, 1.0, 0.0, 0.0, -3.0, 5.0, 2.0, -2.0, 0.0, -3.0, -2.0, 1.0, 0.0, -3.0, -1.0, 0.0, -1.0, -2.0, -1.0, -2.0, 0.0],
600 |     "H" : [-1.0, 0.0, 0.0, 2.0, -4.0, 2.0, 5.0, -2.0, 0.0, -3.0, -3.0, 1.0, -2.0, -3.0, -1.0, 0.0, -1.0, -3.0, -2.0, -2.0, 0.0],
601 |     "I" : [0.0, -2.0, 0.0, -1.0, -3.0, -2.0, -2.0, 6.0, -2.0, -4.0, -4.0, -2.0, -3.0, -3.0, -2.0, 0.0, -2.0, -2.0, -3.0, -3.0, 0.0],
602 |     "K" : [-2.0, 0.0, 1.0, -1.0, -3.0, 0.0, 0.0, -2.0, 8.0, -3.0, -3.0, -1.0, -2.0, -1.0, -2.0, -1.0, -2.0, -2.0, 2.0, -3.0, 0.0],
603 |     "L" : [-1.0, -3.0, -3.0, -3.0, -1.0, -3.0, -3.0, -4.0, -3.0, 4.0, 2.0, -3.0, 1.0, 0.0, -3.0, -2.0, -1.0, -3.0, -1.0, 3.0, 0.0],
604 |     "M" : [-1.0, -2.0, -3.0, -4.0, -1.0, -2.0, -3.0, -4.0, -3.0, 2.0, 4.0, -2.0, 2.0, 0.0, -3.0, -2.0, -1.0, -2.0, -1.0, 1.0, 0.0],
605 |     "N" : [-1.0, 2.0, 0.0, -1.0, -3.0, 1.0, 1.0, -2.0, -1.0, -3.0, -2.0, 5.0, -1.0, -3.0, -1.0, 0.0, -1.0, -3.0, -2.0, -2.0, 0.0],
606 |     "P" : [-1.0, -1.0, -2.0, -3.0, -1.0, 0.0, -2.0, -3.0, -2.0, 1.0, 2.0, -1.0, 5.0, 0.0, -2.0, -1.0, -1.0, -1.0, -1.0, 1.0, 0.0],
607 |     "Q" : [-2.0, -3.0, -3.0, -3.0, -2.0, -3.0, -3.0, -3.0, -1.0, 0.0, 0.0, -3.0, 0.0, 6.0, -4.0, -2.0, -2.0, 1.0, 3.0, -1.0, 0.0],
608 |     "R" : [-1.0, -2.0, -2.0, -1.0, -3.0, -1.0, -1.0, -2.0, -2.0, -3.0, -3.0, -1.0, -2.0, -4.0, 7.0, -1.0, -1.0, -4.0, -3.0, -2.0, 0.0],
609 |     "S" : [1.0, -1.0, 1.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, -2.0, -2.0, 0.0, -1.0, -2.0, -1.0, 4.0, 1.0, -3.0, -2.0, -2.0, 0.0],
610 |     "T" : [0.0, -1.0, 0.0, -1.0, -1.0, -1.0, -1.0, -2.0, -2.0, -1.0, -1.0, -1.0, -1.0, -2.0, -1.0, 1.0, 5.0, -2.0, -2.0, 0.0, 0.0],
611 |     "V" : [-3.0, -3.0, -4.0, -4.0, -2.0, -2.0, -3.0, -2.0, -2.0, -3.0, -2.0, -3.0, -1.0, 1.0, -4.0, -3.0, -2.0, 11.0, 2.0, -3.0, 0.0],
612 |     "W" : [-2.0, -2.0, -2.0, -3.0, -2.0, -1.0, -2.0, -3.0, 2.0, -1.0, -1.0, -2.0, -1.0, 3.0, -3.0, -2.0, -2.0, 2.0, 7.0, -1.0, 0.0],
613 |     "Y" : [0.0, -3.0, -3.0, -3.0, -1.0, -2.0, -2.0, -3.0, -3.0, 3.0, 1.0, -2.0, 1.0, -1.0, -2.0, -2.0, 0.0, -3.0, -1.0, 4.0, 0.0],
614 |     "_" : [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
615 | }
616 | 
617 | 
618 | # modified manually to match the mode
619 | MP3SC_INFO = {
620 |     'A': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.848366}
621 |     },
622 |     'R': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.6976738},
623 |      'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.2},
624 |      'CD': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -3.141592},
625 |      'NE': {'bond_lens': 1.463, 'bond_angs': 1.9408059, 'bond_dihedral': -3.141592},
626 |      'CZ': {'bond_lens': 1.34, 'bond_angs': 2.1502457, 'bond_dihedral': -3.141592},
627 |      'NH1': {'bond_lens': 1.34, 'bond_angs': 2.094395, 'bond_dihedral': 0.},
628 |      'NH2': {'bond_lens': 1.34, 'bond_angs': 2.094395, 'bond_dihedral': -3.141592}
629 |     },
630 |     'N': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.8416245},
631 |      'CG': {'bond_lens': 1.5219998, 'bond_angs': 1.9390607, 'bond_dihedral': -1.15},
632 |      'OD1': {'bond_lens': 1.229, 'bond_angs': 2.101376, 'bond_dihedral': -1.}, # spread out w/ mean at -1
633 |      'ND2': {'bond_lens': 1.3349999, 'bond_angs': 2.0350537, 'bond_dihedral': 2.14} # spread out with mean at -4
634 |     },
635 |     'D': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146265, 'bond_dihedral': 2.7741134},
636 |      'CG': {'bond_lens': 1.522, 'bond_angs': 1.9390608, 'bond_dihedral': -1.07},
637 |      'OD1': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': -0.2678593},
638 |      'OD2': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': 2.95}
639 |     },
640 |     'C': {'CB': {'bond_lens': 1.5259998, 'bond_angs': 1.9146262, 'bond_dihedral': 2.553627},
641 |      'SG': {'bond_lens': 1.8099997, 'bond_angs': 1.8954275, 'bond_dihedral': -1.07}
642 |     },
643 |     'Q': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.7262106},
644 |      'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111353, 'bond_dihedral': -1.075},
645 |      'CD': {'bond_lens': 1.5219998, 'bond_angs': 1.9390606, 'bond_dihedral': -3.141592},
646 |      'OE1': {'bond_lens': 1.229, 'bond_angs': 2.101376, 'bond_dihedral': -1}, # bimodal at -1, +1 
647 |      'NE2': {'bond_lens': 1.3349998, 'bond_angs': 2.0350537, 'bond_dihedral': 2.14} # bimodal at -2, -4
648 |     },
649 |     'E': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146267, 'bond_dihedral': 2.7813723},
650 |      'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.07}, # bimodal at -1.07, 3.14
651 |      'CD': {'bond_lens': 1.5219998, 'bond_angs': 1.9390606, 'bond_dihedral': -3.0907722155200403},
652 |      'OE1': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': 0.003740118}, # spread out btween -1,1
653 |      'OE2': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': -3.1378527} # spread out btween -4.3, -2.14
654 |     },
655 |     'G': {},
656 |     'H': {'CB': {'bond_lens': 1.5259998, 'bond_angs': 1.9146264, 'bond_dihedral': 2.614421},
657 |      'CG': {'bond_lens': 1.5039998, 'bond_angs': 1.9739674, 'bond_dihedral': -1.05},
658 |      'ND1': {'bond_lens': 1.3850001, 'bond_angs': 2.094395, 'bond_dihedral': -1.41}, # bimodal at -1.4, 1.4
659 |      'CE1': {'bond_lens': 1.3430002, 'bond_angs': 1.8849558, 'bond_dihedral': 3.14},
660 |      'NE2': {'bond_lens': 1.335, 'bond_angs': 1.8849558, 'bond_dihedral': 0.0},
661 |      'CD2': {'bond_lens': 1.3940002, 'bond_angs': 1.8849558, 'bond_dihedral': 0.0}
662 |     },
663 |     'I': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146265, 'bond_dihedral': 2.5604365},
664 |      'CG1': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -1.025},
665 |      'CD1': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.0667439142810267},
666 |      'CG2': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.1225884596454065}
667 |     },
668 |     'L': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.711971},
669 |      'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.15},
670 |      'CD1': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': 3.14},
671 |      'CD2': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.05}
672 |     },
673 |     'K': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146266, 'bond_dihedral': 2.7441595},
674 |      'CG': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -1.15},
675 |      'CD': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.09},
676 |      'CE': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': 3.092959},
677 |      'NZ': {'bond_lens': 1.4710001, 'bond_angs': 1.940806, 'bond_dihedral': 3.0515378}
678 |     },
679 |     'M': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146264, 'bond_dihedral': 2.7051392},
680 |      'CG': {'bond_lens': 1.526, 'bond_angs': 1.9111354, 'bond_dihedral': -1.1},
681 |      'SD': {'bond_lens': 1.8099998, 'bond_angs': 2.001892, 'bond_dihedral': 3.1411812}, # bimodal at 0, 3.14
682 |      'CE': {'bond_lens': 1.8099998, 'bond_angs': 1.7261307, 'bond_dihedral': -0.048235133} # trimodal at -1.41, 0, 1.41
683 |     },
684 |     'F': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.545154},
685 |      'CG': {'bond_lens': 1.5100001, 'bond_angs': 1.9896755, 'bond_dihedral': -1.2}, # bimodal at -1, 3.14 
686 |      'CD1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 1.41}, # bimodal -1.41, 1.41
687 |      'CE1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592},
688 |      'CZ': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
689 |      'CE2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
690 |      'CD2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}
691 |     },
692 |     'P': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146266, 'bond_dihedral': 3.141592},
693 |      'CG': {'bond_lens': 1.5260001, 'bond_angs': 1.9111352, 'bond_dihedral': -0.707}, # bimodal at -0.7, 0.7
694 |      'CD': {'bond_lens': 1.5260001, 'bond_angs': 1.9111352, 'bond_dihedral': 0.85} # bimodal at -0.85, 0.85
695 |     },
696 |     'S': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146266, 'bond_dihedral': 2.6017702},
697 |      'OG': {'bond_lens': 1.41, 'bond_angs': 1.9111352, 'bond_dihedral': 1.1}
698 |     },
699 |     'T': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146265, 'bond_dihedral': 2.55},
700 |      'OG1': {'bond_lens': 1.4099998, 'bond_angs': 1.9111353, 'bond_dihedral': -1.07}, # bimodal at -1 and +1
701 |      'CG2': {'bond_lens': 1.5260001, 'bond_angs': 1.9111353, 'bond_dihedral': -3.05} # bimodal at -1 and -3
702 |     },
703 |     'W': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146266, 'bond_dihedral': 3.141592},
704 |      'CG': {'bond_lens': 1.4950002, 'bond_angs': 2.0176008, 'bond_dihedral': -1.2},
705 |      'CD1': {'bond_lens': 1.3520001, 'bond_angs': 2.1816616, 'bond_dihedral': 1.53},
706 |      'NE1': {'bond_lens': 1.3810003, 'bond_angs': 1.8971729, 'bond_dihedral': 3.141592},
707 |      'CE2': {'bond_lens': 1.3799998, 'bond_angs': 1.9477878, 'bond_dihedral': 0.0},
708 |      'CZ2': {'bond_lens': 1.3999999, 'bond_angs': 2.317797, 'bond_dihedral': 3.141592},
709 |      'CH2': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592},
710 |      'CZ3': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
711 |      'CE3': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
712 |      'CD2': {'bond_lens': 1.404, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}
713 |     },
714 |     'Y': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 3.1},
715 |      'CG': {'bond_lens': 1.5100001, 'bond_angs': 1.9896754, 'bond_dihedral': -1.1},
716 |      'CD1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 1.36},
717 |      'CE1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592},
718 |      'CZ': {'bond_lens': 1.4090003, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
719 |      'OH': {'bond_lens': 1.3640002, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592},
720 |      'CE2': {'bond_lens': 1.4090003, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
721 |      'CD2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}
722 |     },
723 |     'V': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.55},
724 |      'CG1': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': 3.141592},
725 |      'CG2': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.1}
726 |     },
727 | 
728 |     '_': {}
729 | }
730 | 
731 | # experimentally checked distances
732 | FF = {"MIN_DISTS": {1: 1.180, # shortest =N or =O bond
733 |                     2: 2.138, # N-N in histidine group
734 |                     3: 2.380}, # N-N in backbone (N-CA-C-N) 
735 |       "MAX_DISTS": {i: 1.840*i for i in range(1, 5+1)} # 1.84 is longest -S bond found,
736 |      } 
737 | 
738 | ATOM_TOKEN_IDS = set(["", "N", "CA", "C", "O"])
739 | ATOM_TOKEN_IDS = {k: i for i,k in enumerate(sorted( 
740 |                     ATOM_TOKEN_IDS.union( set(
741 |                         [name for k,v in SC_BUILD_INFO.items() for name in v["atom-names"]]
742 |                     ) )
743 |                 ))}
744 | 
745 | #################
746 | ##### DOERS #####
747 | #################
748 | 
749 | def make_cloud_mask(aa):
750 |     """ relevent points will be 1. paddings will be 0. """
751 |     mask = np.zeros(14)
752 |     if aa != "_":
753 |         n_atoms = 4+len( SC_BUILD_INFO[aa]["atom-names"] )
754 |         mask[:n_atoms] = True
755 |     return mask
756 | 
757 | def make_bond_mask(aa):
758 |     """ Gives the length of the bond originating each atom. """
759 |     mask = np.zeros(14)
760 |     # backbone
761 |     if aa != "_":
762 |         mask[0] = BB_BUILD_INFO["BONDLENS"]['c-n']
763 |         mask[1] = BB_BUILD_INFO["BONDLENS"]['n-ca']
764 |         mask[2] = BB_BUILD_INFO["BONDLENS"]['ca-c']
765 |         mask[3] = BB_BUILD_INFO["BONDLENS"]['c-o']
766 |         # sidechain - except padding token 
767 |         if aa in SC_BUILD_INFO.keys():
768 |             for i,bond in enumerate(SC_BUILD_INFO[aa]['bonds-vals']):
769 |                 mask[4+i] = bond
770 |     return mask
771 | 
772 | def make_theta_mask(aa):
773 |     """ Gives the theta of the bond originating each atom. """
774 |     mask = np.zeros(14)
775 |     # backbone
776 |     if aa != "_":
777 |         mask[0] = BB_BUILD_INFO["BONDANGS"]['ca-c-n'] # nitrogen
778 |         mask[1] = BB_BUILD_INFO["BONDANGS"]['c-n-ca'] # c_alpha
779 |         mask[2] = BB_BUILD_INFO["BONDANGS"]['n-ca-c'] # carbon
780 |         mask[3] = BB_BUILD_INFO["BONDANGS"]['ca-c-o'] # oxygen
781 |         # sidechain
782 |         for i,theta in enumerate(SC_BUILD_INFO[aa]['angles-vals']):
783 |             mask[4+i] = theta
784 |     return mask
785 | 
786 | def make_torsion_mask(aa, fill=False):
787 |     """ Gives the dihedral of the bond originating each atom. """
788 |     mask = np.zeros(14)
789 |     if aa != "_":
790 |         # backbone
791 |         mask[0] = BB_BUILD_INFO["BONDTORSIONS"]['n-ca-c-n'] # psi
792 |         mask[1] = BB_BUILD_INFO["BONDTORSIONS"]['ca-n-c-ca'] # omega
793 |         mask[2] = BB_BUILD_INFO["BONDTORSIONS"]['c-n-ca-c'] # psi
794 |         mask[3] = BB_BUILD_INFO["BONDTORSIONS"]['n-ca-c-o'] # oxygen
795 |         # sidechain
796 |         for i, torsion in enumerate(SC_BUILD_INFO[aa]['torsion-vals']):
797 |             if fill: 
798 |                 mask[4+i] = MP3SC_INFO[aa][ SC_BUILD_INFO[aa]["atom-names"][i] ]["bond_dihedral"]
799 |             else: 
800 |                 # https://github.com/jonathanking/sidechainnet/blob/master/sidechainnet/structure/StructureBuilder.py#L372
801 |                 # 999 is an anotation -- change later || same for 555
802 |                 mask[4+i] = np.nan if torsion == 'p' else 999 if torsion == "i" else torsion
803 |     return mask
804 | 
805 | def make_idx_mask(aa):
806 |     """ Gives the idxs of the 3 previous points. """
807 |     mask = np.zeros((11, 3))
808 |     if aa != "_":
809 |         # backbone
810 |         mask[0, :] = np.arange(3) 
811 |         # sidechain
812 |         mapper = {"N": 0, "CA": 1, "C":2,  "CB": 4}
813 |         for i, torsion in enumerate(SC_BUILD_INFO[aa]['torsion-names']):
814 |             # get all the atoms forming the dihedral
815 |             torsions = [x.rstrip(" ") for x in torsion.split("-")]
816 |             # for each atom
817 |             for n, torsion in enumerate(torsions[:-1]):
818 |                 # get the index of the atom in the coords array
819 |                 loc = mapper[torsion] if torsion in mapper.keys() else 4 + SC_BUILD_INFO[aa]['atom-names'].index(torsion)
820 |                 # set position to index
821 |                 mask[i+1][n] = loc
822 |     return mask
823 | 
824 | def make_atom_token_mask(aa):
825 |     """ Return the tokens for each atom in the aa. """
826 |     mask = np.zeros(14)
827 |     # get atom id
828 |     if aa != "_":
829 |         atom_list = ["N", "CA", "C", "O"] + SC_BUILD_INFO[ aa ]["atom-names"]
830 |         for i,atom in enumerate(atom_list):
831 |             mask[i] = ATOM_TOKEN_IDS[atom]
832 |     return mask
833 | 
834 | 
835 | ###################
836 | ##### GETTERS #####
837 | ###################
838 | INDEX2AAS = "ACDEFGHIKLMNPQRSTVWY_"
839 | AAS2INDEX = {aa:i for i,aa in enumerate(INDEX2AAS)}
840 | SUPREME_INFO = {k: {"cloud_mask": make_cloud_mask(k),
841 |                     "bond_mask": make_bond_mask(k),
842 |                     "theta_mask": make_theta_mask(k),
843 |                     "torsion_mask": make_torsion_mask(k),
844 |                     "torsion_mask_filled": make_torsion_mask(k, fill=True),
845 |                     "idx_mask": make_idx_mask(k),
846 |                     "atom_token_mask": make_atom_token_mask(k),
847 |                     "rigid_idx_mask": SC_BUILD_INFO[k]['rigid-frames-idxs'],
848 |                     } 
849 |                 for k in INDEX2AAS}
850 | 
851 | 


--------------------------------------------------------------------------------
/mp_nerf/massive_pnerf.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | # diff ml
 4 | import torch
 5 | from einops import repeat
 6 | 
 7 | 
 8 | def get_axis_matrix(a, b, c, norm=True):
 9 |     """ Gets an orthonomal basis as a matrix of [e1, e2, e3]. 
10 |         Useful for constructing rotation matrices between planes
11 |         according to the first answer here:
12 |         https://math.stackexchange.com/questions/1876615/rotation-matrix-from-plane-a-to-b
13 |         Inputs:
14 |         * a: (batch, 3) or (3, ). point(s) of the plane
15 |         * b: (batch, 3) or (3, ). point(s) of the plane
16 |         * c: (batch, 3) or (3, ). point(s) of the plane
17 |         Outputs: orthonomal basis as a matrix of [e1, e2, e3]. calculated as: 
18 |             * e1_ = (c-b)
19 |             * e2_proto = (b-a)
20 |             * e3_ = e1_ ^ e2_proto
21 |             * e2_ = e3_ ^ e1_
22 |             * basis = normalize_by_vectors( [e1_, e2_, e3_] )
23 |         Note: Could be done more by Grahm-Schmidt and extend to N-dimensions
24 |               but this is faster and more intuitive for 3D.
25 |     """
26 |     v1_ = c - b 
27 |     v2_ = b - a
28 |     v3_ = torch.cross(v1_, v2_, dim=-1)
29 |     v2_ready = torch.cross(v3_, v1_, dim=-1)
30 |     basis    = torch.stack([v1_, v2_ready, v3_], dim=-2)
31 |     # normalize if needed
32 |     if norm:
33 |         return basis / torch.norm(basis, dim=-1, keepdim=True) 
34 |     return basis
35 | 
36 | 
37 | 
38 | def mp_nerf_torch(a, b, c, l, theta, chi):
39 |     """ Custom Natural extension of Reference Frame. 
40 |         Inputs:
41 |         * a: (batch, 3) or (3,). point(s) of the plane, not connected to d
42 |         * b: (batch, 3) or (3,). point(s) of the plane, not connected to d
43 |         * c: (batch, 3) or (3,). point(s) of the plane, connected to d
44 |         * theta: (batch,) or (float).  angle(s) between b-c-d
45 |         * chi: (batch,) or float. dihedral angle(s) between the a-b-c and b-c-d planes
46 |         Outputs: d (batch, 3) or (float). the next point in the sequence, linked to c
47 |     """
48 |     # safety check
49 |     if not ( (-np.pi <= theta) * (theta <= np.pi) ).all().item():
50 |         raise ValueError(f"theta(s) must be in radians and in [-pi, pi]. theta(s) = {theta}")
51 |     # calc vecs
52 |     ba = b-a
53 |     cb = c-b
54 |     # calc rotation matrix. based on plane normals and normalized
55 |     n_plane  = torch.cross(ba, cb, dim=-1)
56 |     n_plane_ = torch.cross(n_plane, cb, dim=-1)
57 |     rotate   = torch.stack([cb, n_plane_, n_plane], dim=-1)
58 |     rotate  /= torch.norm(rotate, dim=-2, keepdim=True)
59 |     # calc proto point, rotate. add (-1 for sidechainnet convention)
60 |     # https://github.com/jonathanking/sidechainnet/issues/14
61 |     d = torch.stack([-torch.cos(theta),
62 |                      torch.sin(theta) * torch.cos(chi),
63 |                      torch.sin(theta) * torch.sin(chi)], dim=-1).unsqueeze(-1)
64 |     # extend base point, set length
65 |     return c + l.unsqueeze(-1) * torch.matmul(rotate, d).squeeze()
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/mp_nerf/ml_utils.py:
--------------------------------------------------------------------------------
  1 | # Author: Eric Alcaide
  2 | 
  3 | import torch
  4 | import numpy as np 
  5 | from einops import repeat, rearrange
  6 | 
  7 | # module
  8 | from mp_nerf.massive_pnerf import *
  9 | from mp_nerf.utils import *
 10 | from mp_nerf.kb_proteins import *
 11 | from mp_nerf.proteins import *
 12 | 
 13 | 
 14 | def scn_atom_embedd(seq_list):
 15 |     """ Returns the token for each atom in the aa seq. 
 16 |         Inputs: 
 17 |         * seq_list: list of FASTA sequences. same length
 18 |     """
 19 |     batch_tokens = []
 20 |     # do loop in cpu
 21 |     for i,seq in enumerate(seq_list):
 22 |         batch_tokens.append( torch.tensor([SUPREME_INFO[aa]["atom_token_mask"] \
 23 |                                            for aa in seq]) )
 24 |     batch_tokens = torch.stack(batch_tokens, dim=0).long()
 25 |     return batch_tokens
 26 | 
 27 | 
 28 | def chain2atoms(x, mask=None, c=3):
 29 |     """ Expand from (L, other) to (L, C, other). """
 30 |     wrap = repeat( x, 'l ... -> l c ...', c=c )
 31 |     if mask is not None:
 32 |         return wrap[mask]
 33 |     return wrap
 34 | 
 35 | 
 36 | ######################
 37 | # from: https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03819-2/MediaObjects/41586_2021_3819_MOESM1_ESM.pdf
 38 | 
 39 | def rename_symmetric_atoms(pred_coors, true_coors, seq_list, cloud_mask, pred_feats=None): 
 40 |     """ Corrects ambiguous atoms (due to 180 torsions - ambiguous sidechains).
 41 |         Inputs: 
 42 |         * pred_coors: (batch, L, 14, 3) float. sidechainnet format (see mp_nerf.kb_proteins)
 43 |         * true_coors: (batch, L, 14, 3) float. sidechainnet format (see mp_nerf.kb_proteins)
 44 |         * seq_list: list of FASTA sequences
 45 |         * cloud_mask: (batch, L, 14) bool. mask for present atoms
 46 |         * pred_feats: (batch, L, 14, D) optional. atom-wise predicted features
 47 | 
 48 |         Warning! A coordinate might be missing. TODO:
 49 |         Outputs: pred_coors, pred_feats
 50 |     """
 51 |     aux_cloud_mask = cloud_mask.clone() # will be manipulated
 52 | 
 53 |     for i,seq in enumerate(seq_list):
 54 |         for aa, pairs in AMBIGUOUS.items():
 55 |             # indexes of aas in chain - check coords are given for aa
 56 |             amb_idxs  = np.array(pairs["indexs"]).flatten().tolist()
 57 |             idxs = torch.tensor([
 58 |                 k for k,s in enumerate(seq) if s==aa and \
 59 |                 k in set( torch.nonzero(aux_cloud_mask[i, :, amb_idxs].sum(dim=-1)).tolist()[0] )
 60 |             ]).long()
 61 |             # check if any AAs matching
 62 |             if idxs.shape[0] == 0: 
 63 |                 continue 
 64 |             # get indexes of non-ambiguous
 65 |             aux_cloud_mask[i, idxs, amb_idxs] = False
 66 |             non_amb_idx = torch.nonzero(aux_cloud_mask[i, idxs[0]]).tolist()
 67 |             for a, pair in enumerate(pairs["indexs"]):
 68 |                 # calc distances
 69 |                 d_ij_pred = torch.cdist(pred_coors[ i, idxs, pair ], pred_coors[i, idxs, non_amb_idx], p=2) # 2, N
 70 |                 d_ij_true = torch.cdist(true_coors[ i, idxs, pair+pair[::-1] ], true_coors[i, idxs, non_amb_idx], p=2) # 2, 2N
 71 |                 # see if alternative is better (less distance)
 72 |                 idxs_to_change = ( (d_ij_pred - d_ij_true[2:]).sum(dim=-1) < (d_ij_pred - d_ij_true[:2]).sum(dim=-1) ).nonzero()
 73 |                 # change those 
 74 |                 pred_coors[i, idxs[idxs_to_change], pair] = pred_coors[i, idxs[idxs_to_change], pair[::-1]]
 75 |                 if pred_feats is not None: 
 76 |                     pred_feats[i, idxs[idxs_to_change], pair] = pred_feats[i, idxs[idxs_to_change], pair[::-1]]
 77 | 
 78 |     return pred_coors, pred_feats 
 79 | 
 80 | 
 81 | def torsion_angle_loss(pred_torsions, true_torsions, coeff=2., angle_mask=None): 
 82 |     """ Computes a loss on the angles as the cosine of the difference.
 83 |         Due to angle periodicity, calculate the disparity on both sides
 84 |         Inputs: 
 85 |         * pred_torsions: ( (B), L, X ) float. Predicted torsion angles.(-pi, pi)
 86 |                                        Same format as sidechainnet. 
 87 |         * true_torsions: ( (B), L, X ) true torsion angles. (-pi, pi)
 88 |         * coeff: float. weight coefficient
 89 |         * angle_mask: ((B), L, (X)) bool. Masks the non-existing angles. 
 90 | 
 91 |         Outputs: ( (B), L, 6 ) cosine difference
 92 |     """
 93 |     l_normal = torch.cos( pred_torsions - true_torsions )
 94 |     l_cycle = torch.cos( to_zero_two_pi(pred_torsions) - \
 95 |                          to_zero_two_pi(true_torsions) )
 96 |     maxi = torch.max( l_normal, l_cycle )
 97 |     if angle_mask is not None: 
 98 |         maxi[angle_mask] = 1.
 99 |     return coeff * (1 - maxi)
100 | 
101 | 
102 | def fape_torch(pred_coords, true_coords, max_val=10., l_func=None,
103 |                c_alpha=False, seq_list=None, rot_mats_g=None): 
104 |     """ Computes the Frame-Aligned Point Error. Scaled 0 <= FAPE <= 1
105 |         Inputs: 
106 |         * pred_coords: (B, L, C, 3) predicted coordinates. 
107 |         * true_coords: (B, L, C, 3) ground truth coordinates. 
108 |         * max_val: maximum value (it's also the radius due to L1 usage)
109 |         * l_func: function. allow for options other than l1 (consider dRMSD)
110 |         * c_alpha: bool. whether to only calculate frames and loss from c_alphas
111 |         * seq_list: list of strs (FASTA sequences). to calculate rigid bodies' indexs.
112 |                     Defaults to C-alpha if not passed.
113 |         * rot_mats_g: optional. List of n_seqs x (N_frames, 3, 3) rotation matrices.
114 | 
115 |         Outputs: (B, N_atoms) 
116 |     """
117 |     fape_store = []
118 |     if l_func is None: 
119 |         l_func = lambda x,y,eps=1e-7,sup=max_val: (((x-y)**2).sum(dim=-1) + eps).sqrt() 
120 |     # for chain
121 |     for s in range(pred_coords.shape[0]):  
122 |         fape_store.append(0)
123 |         cloud_mask = (torch.abs(true_coords[s]).sum(dim=-1) != 0)
124 |         # center both structures
125 |         pred_center = pred_coords[s] - pred_coords[s, cloud_mask].mean(dim=0, keepdim=True)
126 |         true_center = true_coords[s] - true_coords[s, cloud_mask].mean(dim=0, keepdim=True)
127 |         # convert to (B, L*C, 3)
128 |         pred_center = rearrange(pred_center, 'l c d -> (l c) d')
129 |         true_center = rearrange(true_center, 'l c d -> (l c) d')
130 |         mask_center = rearrange(cloud_mask, 'l c -> (l c)')
131 |         # get frames and conversions - same scheme as in mp_nerf proteins' concat of monomers
132 |         if rot_mats_g is None:
133 |             rigid_idxs = scn_rigid_index_mask(seq_list[s], c_alpha=c_alpha)  
134 |             true_frames = get_axis_matrix(*true_center[rigid_idxs].detach(), norm=True)
135 |             pred_frames = get_axis_matrix(*pred_center[rigid_idxs].detach(), norm=True)
136 |             rot_mats  = torch.matmul(torch.transpose(pred_frames, -1, -2), true_frames)
137 |         else: 
138 |             rot_mats = rot_mats_g[s]
139 | 
140 |         # calculate loss only on c_alphas
141 |         if c_alpha:
142 |             mask_center[:] = False
143 |             mask_center[rigid_idxs[1]] = True
144 | 
145 |         # measure errors - for residue
146 |         for i,rot_mat in enumerate(rot_mats): 
147 |             fape_store[s] += l_func( pred_center[s][mask_center[s]] @ rot_mat, 
148 |                                      true_center[s][mask_center[s]]
149 |                                ).clamp(0, max_val)
150 |         fape_store[s] /= rot_mats.shape[0]            
151 | 
152 |     # stack and average
153 |     return (1/max_val) * torch.stack(fape_store, dim=0)
154 | 
155 | 
156 | # custom
157 | 
158 | def atom_selector(scn_seq, x, option=None, discard_absent=True): 
159 |     """ Returns a selection of the atoms in a protein. 
160 |         Inputs: 
161 |         * scn_seq: (batch, len) sidechainnet format or list of strings
162 |         * x: (batch, (len * n_aa), dims) sidechainnet format
163 |         * option: one of [torch.tensor, 'backbone-only', 'backbone-with-cbeta',
164 |                   'all', 'backbone-with-oxygen', 'backbone-with-cbeta-and-oxygen']
165 |         * discard_absent: bool. Whether to discard the points for which
166 |                           there are no labels (bad recordings)
167 |     """
168 |     
169 | 
170 |     # get mask
171 |     present = []
172 |     for i,seq in enumerate(scn_seq): 
173 |         pass_x = x[i] if discard_absent else None
174 |         if pass_x is None and isinstance(seq, torch.Tensor):
175 |             seq = "".join([INDEX2AAS[x] for x in seq.cpu().detach().tolist()])
176 | 
177 |         present.append( scn_cloud_mask(seq, coords=pass_x) )
178 | 
179 |     present = torch.stack(present, dim=0).bool()
180 | 
181 |     
182 |     # atom mask
183 |     if isinstance(option, str):
184 |         atom_mask = torch.tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
185 |         if "backbone" in option: 
186 |             atom_mask[[0, 2]] = 1
187 | 
188 |         if option == "backbone": 
189 |             pass
190 |         elif option == 'backbone-with-oxygen':
191 |             atom_mask[3] = 1
192 |         elif option == 'backbone-with-cbeta':
193 |             atom_mask[5] = 1
194 |         elif option == 'backbone-with-cbeta-and-oxygen':
195 |             atom_mask[3] = 1
196 |             atom_mask[5] = 1
197 |         elif option == 'all':
198 |             atom_mask[:] = 1
199 |         else: 
200 |             print("Your string doesn't match any option.")
201 |             
202 |     elif isinstance(option, torch.Tensor):
203 |         atom_mask = option
204 |     else:
205 |         raise ValueError('option needs to be a valid string or a mask tensor of shape (14,) ')
206 |     
207 |     mask = rearrange(present * atom_mask.unsqueeze(0).unsqueeze(0).bool(), 'b l c -> b (l c)')
208 |     return x[mask], mask
209 | 
210 | 
211 | def noise_internals(seq, angles=None, coords=None, noise_scale=0.5, theta_scale=0.5, verbose=0):
212 |     """ Noises the internal coordinates -> dihedral and bond angles. 
213 |         Inputs: 
214 |         * seq: string. Sequence in FASTA format
215 |         * angles: (l, 11) sidechainnet angles tensor
216 |         * coords: (l, 14, 13)
217 |         * noise_scale: float. std of noise gaussian.
218 |         * theta_scale: float. multiplier for bond angles
219 |         Outputs: 
220 |         * chain (l, c, d)
221 |         * cloud_mask (l, c)
222 |     """
223 |     assert angles is not None or coords is not None, \
224 |            "You must pass either angles or coordinates"
225 |     # get scaffolds
226 |     if angles is None:
227 |         angles = torch.randn(coords.shape[0], 12).to(coords.device)
228 |         
229 |     scaffolds = build_scaffolds_from_scn_angles(seq, angles.clone())
230 |     
231 |     if coords is not None:
232 |         scaffolds = modify_scaffolds_with_coords(scaffolds, coords)
233 |     
234 |     # noise bond angles and dihedrals (dihedrals of everyone, angles only of BB)
235 |     if noise_scale > 0.:
236 |         if verbose: 
237 |             print("noising", noise_scale)
238 |         # thetas (half of noise of dihedrals. only for BB)
239 |         noised_bb = scaffolds["angles_mask"][0, :, :3].clone()
240 |         noised_bb += theta_scale*noise_scale * torch.randn_like(noised_bb) 
241 |         # get noised values between [-pi, pi]
242 |         off_bounds = (noised_bb > 2*np.pi) + (noised_bb < -2*np.pi)
243 |         if off_bounds.sum().item() > 0: 
244 |             noised_bb[off_bounds] = noised_bb[off_bounds] % (2*np.pi)
245 |             
246 |         upper, lower = noised_bb > np.pi, noised_bb < -np.pi 
247 |         if upper.sum().item() > 0:
248 |             noised_bb[upper] = - ( 2*np.pi - noised_bb[upper] ).clone()
249 |         if lower.sum().item() > 0:
250 |             noised_bb[lower] = 2*np.pi + noised_bb[lower].clone()
251 |         scaffolds["angles_mask"][0, :, :3] = noised_bb
252 | 
253 |         # dihedrals
254 |         noised_dihedrals = scaffolds["angles_mask"][1].clone()
255 |         noised_dihedrals += noise_scale * torch.randn_like(noised_dihedrals)
256 |         # get noised values between [-pi, pi]
257 |         off_bounds = (noised_dihedrals > 2*np.pi) + (noised_dihedrals < -2*np.pi)
258 |         if off_bounds.sum().item() > 0: 
259 |             noised_dihedrals[off_bounds] = noised_dihedrals[off_bounds] % (2*np.pi)
260 |             
261 |         upper, lower = noised_dihedrals > np.pi, noised_dihedrals < -np.pi 
262 |         if upper.sum().item() > 0:
263 |             noised_dihedrals[upper] = - ( 2*np.pi - noised_dihedrals[upper] ).clone()
264 |         if lower.sum().item() > 0:
265 |             noised_dihedrals[lower] = 2*np.pi + noised_dihedrals[lower].clone()
266 |         scaffolds["angles_mask"][1] = noised_dihedrals
267 |     
268 |     # reconstruct
269 |     return protein_fold(**scaffolds)
270 | 
271 | 
272 | def combine_noise(true_coords, seq=None, int_seq=None, angles=None,
273 |                   NOISE_INTERNALS=1e-2, INTERNALS_SCN_SCALE=5., 
274 |                   SIDECHAIN_RECONSTRUCT=True):
275 |     """ Combines noises. For internal noise, no points can be missing. 
276 |         Inputs: 
277 |         * true_coords: ((B), N, D)
278 |         * int_seq: (N,) torch long tensor of sidechainnet AA tokens 
279 |         * seq: str of length N. FASTA AAs.
280 |         * angles: (N_aa, D_). optional. used for internal noising
281 |         * NOISE_INTERNALS: float. amount of noise for internal coordinates. 
282 |         * SIDECHAIN_RECONSTRUCT: bool. whether to discard the sidechain and
283 |                                  rebuild by sampling from plausible distro.
284 |         Outputs: (B, N, D) coords and (B, N) boolean mask
285 |     """
286 |     # get seqs right
287 |     assert int_seq is not None or seq is not None, "Either int_seq or seq must be passed"
288 |     if int_seq is not None and seq is None: 
289 |     	seq = "".join([INDEX2AAS[x] for x in int_seq.cpu().detach().tolist()])
290 |     elif int_seq is None and seq is not None: 
291 |     	int_seq = torch.tensor([AAS2INDEX[x] for x in seq.upper()], device=true_coords.device)
292 | 
293 |     cloud_mask_flat = (true_coords == 0.).sum(dim=-1) != true_coords.shape[-1]
294 |     naive_cloud_mask = scn_cloud_mask(seq).bool()
295 |     
296 |     if NOISE_INTERNALS: 
297 |         assert cloud_mask_flat.sum().item() == naive_cloud_mask.sum().item(), \
298 |                "atoms missing: {0}".format( naive_cloud_mask.sum().item() - \
299 |                                             cloud_mask_flat.sum().item() )
300 |     # expand to batch dim if needed
301 |     if len(true_coords.shape) < 3: 
302 |         true_coords = true_coords.unsqueeze(0)
303 |     noised_coords = true_coords.clone()
304 |     coords_scn = rearrange(true_coords, 'b (l c) d -> b l c d', c=14)
305 | 
306 |     ###### SETP 1: internals #########
307 |     if NOISE_INTERNALS:
308 |         # create noised and masked noised coords        
309 |         noised_coords, cloud_mask = noise_internals(seq, angles = angles, 
310 |                                                     coords = coords_scn.squeeze(),  
311 |                                                     noise_scale = NOISE_INTERNALS, 
312 |                                                     theta_scale = INTERNALS_SCN_SCALE,
313 |                                                     verbose = False)
314 |         masked_noised = noised_coords[naive_cloud_mask]
315 |         noised_coords = rearrange(noised_coords, 'l c d -> () (l c) d')
316 | 
317 |     ###### SETP 2: build from backbone #########
318 |     if SIDECHAIN_RECONSTRUCT: 
319 |         bb, mask = atom_selector(int_seq.unsqueeze(0), noised_coords, option="backbone", discard_absent=False)
320 |         scaffolds = build_scaffolds_from_scn_angles(seq, angles=None, device="cpu")
321 |         noised_coords[~mask] = 0.
322 |         noised_coords = rearrange(noised_coords, '() (l c) d -> l c d', c=14)
323 |         noised_coords, _ = sidechain_fold(wrapper = noised_coords.cpu(), **scaffolds, c_beta = False)
324 |         noised_coords = rearrange(noised_coords, 'l c d -> () (l c) d').to(true_coords.device)
325 | 
326 | 
327 |     return noised_coords, cloud_mask_flat
328 | 
329 | 
330 | 
331 | if __name__ == "__main__":
332 |     import joblib
333 |     # imports of data (from mp_nerf.utils.get_prot)
334 |     prots = joblib.load("some_route_to_local_serialized_file_with_prots")
335 | 
336 |     # set params
337 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
338 | 
339 |     # unpack and test
340 |     seq, int_seq, true_coords, angles, padding_seq, mask, pid = prots[-1]
341 | 
342 |     true_coords = true_coords.unsqueeze(0)
343 | 
344 |     # check noised internals
345 |     coords_scn = rearrange(true_coords, 'b (l c) d -> b l c d', c=14)
346 |     cloud, cloud_mask = noise_internals(seq, angles=angles, coords=coords_scn[0], noise_scale=1.)
347 |     print("cloud.shape", cloud.shape)
348 | 
349 |     # check integral
350 |     integral, mask = combine_noise(true_coords, seq=seq, int_seq = None, angles=None,
351 |                                    NOISE_INTERNALS=1e-2, SIDECHAIN_RECONSTRUCT=True)
352 |     print("integral.shape", integral.shape)
353 | 
354 |     integral, mask = combine_noise(true_coords, seq=None, int_seq = int_seq, angles=None,
355 |                                    NOISE_INTERNALS=1e-2, SIDECHAIN_RECONSTRUCT=True)
356 |     print("integral.shape2", integral.shape)
357 | 
358 | 
359 | 
360 | 


--------------------------------------------------------------------------------
/mp_nerf/proteins.py:
--------------------------------------------------------------------------------
  1 | # science
  2 | import numpy as np 
  3 | # diff / ml
  4 | import torch
  5 | from einops import repeat
  6 | # module
  7 | from mp_nerf.massive_pnerf import *
  8 | from mp_nerf.utils import *
  9 | from mp_nerf.kb_proteins import *
 10 | 
 11 | 
 12 | def scn_cloud_mask(seq, coords=None, strict=False):
 13 |     """ Gets the boolean mask atom positions (not all aas have same atoms). 
 14 |         Inputs: 
 15 |         * seqs: (length) iterable of 1-letter aa codes of a protein
 16 |         * coords: optional .(batch, lc, 3). sidechainnet coords.
 17 |                   returns the true mask (solves potential atoms that might not be provided)
 18 |         * strict: bool. whther to discard the next points after a missing one 
 19 |         Outputs: (length, 14) boolean mask 
 20 |     """ 
 21 |     if coords is not None:
 22 |         start = (( rearrange(coords, 'b (l c) d -> b l c d', c=14) != 0 ).sum(dim=-1) != 0).float()
 23 |         # if a point is 0, the following are 0s as well
 24 |         if strict:
 25 |             for b in range(start.shape[0]):
 26 |                 for pos in range(start.shape[1]):
 27 |                     for chain in range(start.shape[2]):
 28 |                         if start[b, pos, chain].item() == 0:
 29 |                             start[b, pos, chain:] *= 0
 30 |         return start
 31 |     return torch.tensor([SUPREME_INFO[aa]['cloud_mask'] for aa in seq])
 32 | 
 33 | 
 34 | def scn_bond_mask(seq):
 35 |     """ Inputs: 
 36 |         * seqs: (length). iterable of 1-letter aa codes of a protein
 37 |         Outputs: (L, 14) maps point to bond length
 38 |     """ 
 39 |     return torch.tensor([SUPREME_INFO[aa]['bond_mask'] for aa in seq])
 40 | 
 41 | 
 42 | def scn_angle_mask(seq, angles=None, device=None):
 43 |     """ Inputs: 
 44 |         * seq: (length). iterable of 1-letter aa codes of a protein
 45 |         * angles: (length, 12). [phi, psi, omega, b_angle(n_ca_c), b_angle(ca_c_n), b_angle(c_n_ca), 6_scn_torsions]
 46 |         Outputs: (L, 14) maps point to theta and dihedral.
 47 |                  first angle is theta, second is dihedral
 48 |     """ 
 49 |     device = angles.device if angles is not None else torch.device("cpu")
 50 |     precise = angles.dtype if angles is not None else torch.get_default_dtype()
 51 |     torsion_mask_use = "torsion_mask" if angles is not None else "torsion_mask_filled"
 52 |     # get masks
 53 |     theta_mask   = torch.tensor([SUPREME_INFO[aa]['theta_mask'] for aa in seq], dtype=precise).to(device)
 54 |     torsion_mask = torch.tensor([SUPREME_INFO[aa][torsion_mask_use] for aa in seq], dtype=precise).to(device)
 55 |     
 56 |     # adapt general to specific angles if passed
 57 |     if angles is not None: 
 58 |         # fill masks with angle values
 59 |         theta_mask[:, 0] = angles[:, 4] # ca_c_n
 60 |         theta_mask[1:, 1] = angles[:-1, 5] # c_n_ca
 61 |         theta_mask[:, 2] = angles[:, 3] # n_ca_c
 62 |         # backbone_torsions
 63 |         torsion_mask[:, 0] = angles[:, 1] # n determined by psi of previous
 64 |         torsion_mask[1:, 1] = angles[:-1, 2] # ca determined by omega of previous
 65 |         torsion_mask[:, 2] = angles[:, 0] # c determined by phi
 66 |         # https://github.com/jonathanking/sidechainnet/blob/master/sidechainnet/structure/StructureBuilder.py#L313
 67 |         torsion_mask[:, 3] = angles[:, 1] - np.pi
 68 | 
 69 |         # add torsions to sidechains - no need to modify indexes due to torsion modification
 70 |         # since extra rigid modies are in terminal positions in sidechain
 71 |         to_fill = torsion_mask != torsion_mask # "p" fill with passed values
 72 |         to_pick = torsion_mask == 999          # "i" infer from previous one
 73 |         for i,aa in enumerate(seq):
 74 |             # check if any is nan -> fill the holes
 75 |             number = to_fill[i].long().sum()
 76 |             torsion_mask[i, to_fill[i]] = angles[i, 6:6+number]
 77 | 
 78 |             # pick previous value for inferred torsions
 79 |             for j, val in enumerate(to_pick[i]):
 80 |                 if val:
 81 |                     torsion_mask[i, j] = torsion_mask[i, j-1] - np.pi # pick values from last one.
 82 | 
 83 |             # special rigid bodies anomalies: 
 84 |             if aa == "I": # scn_torsion(CG1) - scn_torsion(CG2) = 2.13 (see KB)
 85 |                 torsion_mask[i, 7] += torsion_mask[i, 5]
 86 |             elif aa == "L": 
 87 |                 torsion_mask[i, 7] += torsion_mask[i, 6]
 88 | 
 89 | 
 90 |     torsion_mask[-1, 3] += np.pi 
 91 |     return torch.stack([theta_mask, torsion_mask], dim=0)
 92 | 
 93 | 
 94 | def scn_index_mask(seq):
 95 |     """ Inputs: 
 96 |         * seq: (length). iterable of 1-letter aa codes of a protein
 97 |         Outputs: (L, 11, 3) maps point to theta and dihedral.
 98 |                  first angle is theta, second is dihedral
 99 |     """ 
100 |     idxs = torch.tensor([SUPREME_INFO[aa]['idx_mask'] for aa in seq])
101 |     return rearrange(idxs, 'l s d -> d l s')
102 | 
103 | 
104 | def scn_rigid_index_mask(seq, c_alpha=None): 
105 |     """ Inputs: 
106 |         * seq: (length). iterable of 1-letter aa codes of a protein 
107 |         * c_alpha: bool. whether to return only the c_alpha rigid group
108 |         Outputs: (3, Length * Groups). indexes for 1st, 2nd and 3rd point 
109 |                   to construct frames for each group. 
110 |     """
111 |     if c_alpha: 
112 |         return torch.cat([torch.tensor(SUPREME_INFO[aa]['rigid_idx_mask'])[:1] + 14*i \
113 |                           for i,aa in enumerate(seq)], dim=0).t()
114 |     return torch.cat([torch.tensor(SUPREME_INFO[aa]['rigid_idx_mask']) + 14*i \
115 |                       for i,aa in enumerate(seq)], dim=0).t()
116 | 
117 | 
118 | def build_scaffolds_from_scn_angles(seq, angles=None, coords=None, device="auto"):
119 |     """ Builds scaffolds for fast access to data
120 |         Inputs: 
121 |         * seq: string of aas (1 letter code)
122 |         * angles: (L, 12) tensor containing the internal angles.
123 |                   Distributed as follows (following sidechainnet convention):
124 |                   * (L, 3) for torsion angles
125 |                   * (L, 3) bond angles
126 |                   * (L, 6) sidechain angles
127 |         * coords: (L, 3) sidechainnet coords. builds the mask with those instead
128 |                   (better accuracy if modified residues present).
129 |         Outputs:
130 |         * cloud_mask: (L, 14 ) mask of points that should be converted to coords 
131 |         * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of
132 |                                      previous 3 points in the coords array
133 |         * angles_mask: (2, L, 14) maps point to theta and dihedral
134 |         * bond_mask: (L, 14) gives the length of the bond originating that atom
135 |     """
136 |     # auto infer device and precision
137 |     precise = angles.dtype if angles is not None else torch.get_default_dtype()
138 |     if device == "auto":
139 |         device = angles.device if angles is not None else device
140 | 
141 |     if coords is not None: 
142 |         cloud_mask = scn_cloud_mask(seq, coords=coords)
143 |     else: 
144 |         cloud_mask = scn_cloud_mask(seq)
145 | 
146 |     cloud_mask = cloud_mask.bool().to(device)
147 |     
148 |     point_ref_mask = scn_index_mask(seq).long().to(device)
149 |      
150 |     angles_mask = scn_angle_mask(seq, angles).to(device, precise)
151 |      
152 |     bond_mask = scn_bond_mask(seq).to(device, precise)
153 |     # return all in a dict
154 |     return {"cloud_mask":     cloud_mask, 
155 |             "point_ref_mask": point_ref_mask,
156 |             "angles_mask":    angles_mask,
157 |             "bond_mask":      bond_mask }
158 | 
159 | 
160 | #############################
161 | ####### ENCODERS ############
162 | #############################
163 | 
164 | 
165 | def modify_angles_mask_with_torsions(seq, angles_mask, torsions): 
166 |     """ Modifies a torsion mask to include variable torsions. 
167 |         Inputs: 
168 |         * seq: (L,) str. FASTA sequence
169 |         * angles_mask: (2, L, 14) float tensor of (angles, torsions)
170 |         * torsions: (L, 4) float tensor (or (L, 5) if it includes torsion for cb)
171 |         Outputs: (2, L, 14) a new angles mask
172 |     """
173 |     c_beta = torsions.shape[-1] == 5 # whether c_beta torsion is passed as well
174 |     start = 4 if c_beta else 5
175 |     # get mask of to-fill values
176 |     torsion_mask = torch.tensor([SUPREME_INFO[aa]["torsion_mask"] for aa in seq]).to(torsions.device) # (L, 14)
177 |     torsion_mask = torsion_mask != torsion_mask # values that are nan need replace
178 |     # undesired outside of margins
179 |     torsion_mask[:, :start] = torsion_mask[:, start+torsions.shape[-1]:] = False
180 | 
181 |     angles_mask[1, torsion_mask] = torsions[ torsion_mask[:, start:start+torsions.shape[-1]] ]
182 |     return angles_mask
183 | 
184 | 
185 | def modify_scaffolds_with_coords(scaffolds, coords):
186 |     """ Gets scaffolds and fills in the right data.
187 |         Inputs: 
188 |         * scaffolds: dict. as returned by `build_scaffolds_from_scn_angles`
189 |         * coords: (L, 14, 3). sidechainnet tensor. same device as scaffolds
190 |         Outputs: corrected scaffolds
191 |     """
192 | 
193 | 
194 |     # calculate distances and update: 
195 |     # N, CA, C
196 |     scaffolds["bond_mask"][1:, 0] = torch.norm(coords[1:, 0] - coords[:-1, 2], dim=-1) # N
197 |     scaffolds["bond_mask"][ :, 1] = torch.norm(coords[ :, 1] - coords[:  , 0], dim=-1) # CA
198 |     scaffolds["bond_mask"][ :, 2] = torch.norm(coords[ :, 2] - coords[:  , 1], dim=-1) # C
199 |     # O, CB, side chain
200 |     selector = np.arange(len(coords))
201 |     for i in range(3, 14):
202 |         # get indexes
203 |         idx_a, idx_b, idx_c = scaffolds["point_ref_mask"][:, :, i-3] # (3, L, 11) -> 3 * (L, 11)
204 |         # correct distances
205 |         scaffolds["bond_mask"][:, i] = torch.norm(coords[:, i] - coords[selector, idx_c], dim=-1)
206 |         # get angles
207 |         scaffolds["angles_mask"][0, :, i] = get_angle(coords[selector, idx_b], 
208 |                                                       coords[selector, idx_c], 
209 |                                                       coords[:, i])
210 |         # handle C-beta, where the C requested is from the previous aa
211 |         if i == 4:
212 |             # for 1st residue, use position of the second residue's N
213 |             first_next_n     = coords[1, :1] # 1, 3
214 |             # the c requested is from the previous residue
215 |             main_c_prev_idxs = coords[selector[:-1], idx_a[1:]]# (L-1), 3
216 |             # concat
217 |             coords_a = torch.cat([first_next_n, main_c_prev_idxs])
218 |         else:
219 |             coords_a = coords[selector, idx_a]
220 |         # get dihedrals
221 |         scaffolds["angles_mask"][1, :, i] = get_dihedral(coords_a,
222 |                                                          coords[selector, idx_b], 
223 |                                                          coords[selector, idx_c], 
224 |                                                          coords[:, i])
225 |     # correct angles and dihedrals for backbone 
226 |     scaffolds["angles_mask"][0, :-1, 0] = get_angle(coords[:-1, 1], coords[:-1, 2], coords[1: , 0]) # ca_c_n
227 |     scaffolds["angles_mask"][0, 1:,  1] = get_angle(coords[:-1, 2], coords[1:,  0], coords[1: , 1]) # c_n_ca
228 |     scaffolds["angles_mask"][0,  :,  2] = get_angle(coords[:,   0], coords[ :,  1], coords[ : , 2]) # n_ca_c
229 |     
230 |     # N determined by previous psi = f(n, ca, c, n+1)
231 |     scaffolds["angles_mask"][1, :-1, 0] = get_dihedral(coords[:-1, 0], coords[:-1, 1], coords[:-1, 2], coords[1:, 0])
232 |     # CA determined by omega = f(ca, c, n+1, ca+1)
233 |     scaffolds["angles_mask"][1,  1:, 1] = get_dihedral(coords[:-1, 1], coords[:-1, 2], coords[1:, 0], coords[1:, 1])
234 |     # C determined by phi = f(c-1, n, ca, c)
235 |     scaffolds["angles_mask"][1,  1:, 2] = get_dihedral(coords[:-1, 2], coords[1:, 0], coords[1:, 1], coords[1:, 2])
236 | 
237 |     return scaffolds
238 | 
239 | 
240 | ##################################
241 | ####### MAIN FUNCTION ############
242 | ##################################
243 | 
244 | 
245 | def protein_fold(cloud_mask, point_ref_mask, angles_mask, bond_mask,
246 |                  device=torch.device("cpu"), hybrid=False):
247 |     """ Calcs coords of a protein given it's
248 |         sequence and internal angles.
249 |         Inputs: 
250 |         * cloud_mask: (L, 14) mask of points that should be converted to coords 
251 |         * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of
252 |                                      previous 3 points in the coords array
253 |         * angles_mask: (2, 14, L) maps point to theta and dihedral
254 |         * bond_mask: (L, 14) gives the length of the bond originating that atom
255 | 
256 |         Output: (L, 14, 3) and (L, 14) coordinates and cloud_mask
257 |     """
258 |     # automatic type (float, mixed, double) and size detection
259 |     precise = bond_mask.dtype
260 |     length  = cloud_mask.shape[0]
261 |     # create coord wrapper
262 |     coords = torch.zeros(length, 14, 3, device=device, dtype=precise)
263 | 
264 |     # do first AA
265 |     coords[0, 1] = coords[0, 0] + torch.tensor([1, 0, 0], device=device, dtype=precise) * BB_BUILD_INFO["BONDLENS"]["n-ca"] 
266 |     coords[0, 2] = coords[0, 1] + torch.tensor([torch.cos(np.pi - angles_mask[0, 0, 2]),
267 |                                                 torch.sin(np.pi - angles_mask[0, 0, 2]),
268 |                                                 0.], device=device, dtype=precise) * BB_BUILD_INFO["BONDLENS"]["ca-c"]
269 |     
270 |     # starting positions (in the x,y plane) and normal vector [0,0,1]
271 |     init_a = repeat(torch.tensor([1., 0., 0.], device=device, dtype=precise), 'd -> l d', l=length)
272 |     init_b = repeat(torch.tensor([1., 1., 0.], device=device, dtype=precise), 'd -> l d', l=length)
273 |     # do N -> CA. don't do 1st since its done already
274 |     thetas, dihedrals = angles_mask[:, :, 1]
275 |     coords[1:, 1] = mp_nerf_torch(init_a,
276 |                                    init_b, 
277 |                                    coords[:, 0], 
278 |                                    bond_mask[:, 1], 
279 |                                    thetas, dihedrals)[1:]
280 |     # do CA -> C. don't do 1st since its done already
281 |     thetas, dihedrals = angles_mask[:, :, 2]
282 |     coords[1:, 2] = mp_nerf_torch(init_b,
283 |                                    coords[:, 0],
284 |                                    coords[:, 1],
285 |                                    bond_mask[:, 2],
286 |                                    thetas, dihedrals)[1:]
287 |     # do C -> N
288 |     thetas, dihedrals = angles_mask[:, :, 0]
289 |     coords[:, 3] = mp_nerf_torch(coords[:, 0],
290 |                                    coords[:, 1],
291 |                                    coords[:, 2],
292 |                                    bond_mask[:, 0],
293 |                                    thetas, dihedrals)
294 | 
295 |     #########
296 |     # sequential pass to join fragments
297 |     #########
298 |     # part of rotation mat corresponding to origin - 3 orthogonals
299 |     mat_origin  = get_axis_matrix(init_a[0], init_b[0], coords[0, 0], norm=False)
300 |     # part of rotation mat corresponding to destins || a, b, c = CA, C, N+1
301 |     # (L-1) since the first is in the origin already 
302 |     mat_destins = get_axis_matrix(coords[:-1, 1], coords[:-1, 2], coords[:-1, 3])
303 | 
304 |     # get rotation matrices from origins
305 |     # https://math.stackexchange.com/questions/1876615/rotation-matrix-from-plane-a-to-b
306 |     rotations  = torch.matmul(mat_origin.t(), mat_destins)
307 |     rotations /= torch.norm(rotations, dim=-1, keepdim=True)
308 | 
309 |     # do rotation concatenation - do for loop in cpu always - faster
310 |     rotations = rotations.cpu() if coords.is_cuda and hybrid else rotations
311 |     for i in range(1, length-1):
312 |         rotations[i] = torch.matmul(rotations[i], rotations[i-1])
313 |     rotations = rotations.to(device) if coords.is_cuda and hybrid else rotations
314 |     # rotate all
315 |     coords[1:, :4] = torch.matmul(coords[1:, :4], rotations)
316 |     # offset each position by cumulative sum at that position
317 |     coords[1:, :4] += torch.cumsum(coords[:-1, 3], dim=0).unsqueeze(-2)
318 | 
319 | 
320 |     #########
321 |     # parallel sidechain - do the oxygen, c-beta and side chain
322 |     #########
323 |     for i in range(3,14):
324 |         level_mask = cloud_mask[:, i]
325 |         thetas, dihedrals = angles_mask[:, level_mask, i]
326 |         idx_a, idx_b, idx_c = point_ref_mask[:, level_mask, i-3]
327 | 
328 |         # to place C-beta, we need the carbons from prev res - not available for the 1st res
329 |         if i == 4:
330 |             # the c requested is from the previous residue - offset boolean mask by one
331 |             # can't be done with slicing bc glycines are inside chain (dont have cb)
332 |             coords_a = coords[(level_mask.nonzero().view(-1) - 1), idx_a] # (L-1), 3
333 |             # if first residue is not glycine, 
334 |             # for 1st residue, use position of the second residue's N (1,3)
335 |             if level_mask[0].item():
336 |                 coords_a[0] = coords[1, 1]
337 |         else:
338 |             coords_a = coords[level_mask, idx_a]
339 | 
340 |         coords[level_mask, i] = mp_nerf_torch(coords_a, 
341 |                                               coords[level_mask, idx_b],
342 |                                               coords[level_mask, idx_c],
343 |                                               bond_mask[level_mask, i], 
344 |                                               thetas, dihedrals)
345 |     
346 |     return coords, cloud_mask
347 | 
348 | 
349 | def sidechain_fold(wrapper, cloud_mask, point_ref_mask, angles_mask, bond_mask,
350 |                    device=torch.device("cpu"), c_beta=False):
351 |     """ Calcs coords of a protein given it's sequence and internal angles.
352 |         Inputs: 
353 |         * wrapper: (L, 14, 3). coords container with backbone ([:, :3]) and optionally
354 |                                c_beta ([:, 4])
355 |         * cloud_mask: (L, 14) mask of points that should be converted to coords 
356 |         * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of
357 |                                      previous 3 points in the coords array
358 |         * angles_mask: (2, 14, L) maps point to theta and dihedral
359 |         * bond_mask: (L, 14) gives the length of the bond originating that atom
360 |         * c_beta: whether to place cbeta
361 | 
362 |         Output: (L, 14, 3) and (L, 14) coordinates and cloud_mask
363 |     """
364 |     precise = wrapper.dtype
365 | 
366 |     # parallel sidechain - do the oxygen, c-beta and side chain
367 |     for i in range(3,14):
368 |         # skip cbeta if arg is set
369 |         if i == 4 and not c_beta:
370 |             continue
371 |         # prepare inputs
372 |         level_mask = cloud_mask[:, i]
373 |         thetas, dihedrals = angles_mask[:, level_mask, i]
374 |         idx_a, idx_b, idx_c = point_ref_mask[:, level_mask, i-3]
375 | 
376 |         # to place C-beta, we need the carbons from prev res - not available for the 1st res
377 |         if i == 4:
378 |             # the c requested is from the previous residue - offset boolean mask by one
379 |             # can't be done with slicing bc glycines are inside chain (dont have cb)
380 |             coords_a = wrapper[(level_mask.nonzero().view(-1) - 1), idx_a] # (L-1), 3
381 |             # if first residue is not glycine, 
382 |             # for 1st residue, use position of the second residue's N (1,3)
383 |             if level_mask[0].item():
384 |                 coords_a[0] = wrapper[1, 1]
385 |         else:
386 |             coords_a = wrapper[level_mask, idx_a]
387 | 
388 |         wrapper[level_mask, i] = mp_nerf_torch(coords_a, 
389 |                                                wrapper[level_mask, idx_b],
390 |                                                wrapper[level_mask, idx_c],
391 |                                                bond_mask[level_mask, i], 
392 |                                                thetas, dihedrals)
393 |     
394 |     return wrapper, cloud_mask
395 | 


--------------------------------------------------------------------------------
/mp_nerf/utils.py:
--------------------------------------------------------------------------------
  1 | # Author: Eric Alcaide
  2 | 
  3 | import torch
  4 | import numpy as np 
  5 | from einops import repeat, rearrange
  6 | 
  7 | 
  8 | # random hacks
  9 | 
 10 | # to_pi_minus_pi(4) = -2.28  # to_pi_minus_pi(-4) = 2.28  # rads to pi-(-pi)
 11 | to_pi_minus_pi = lambda x: torch.where( (x//np.pi)%2 == 0, x%np.pi , -(2*np.pi-x%(2*np.pi)) )
 12 | to_zero_two_pi = lambda x: torch.where( x>np.pi, x%np.pi, 2*np.pi + x%np.pi )
 13 | 
 14 | # data utils
 15 | def get_prot(dataloader_=None, vocab_=None, min_len=80, max_len=150, verbose=True):
 16 |     """ Gets a protein from sidechainnet and returns
 17 |         the right attrs for training. 
 18 |         Inputs: 
 19 |         * dataloader_: sidechainnet iterator over dataset
 20 |         * vocab_: sidechainnet VOCAB class
 21 |         * min_len: int. minimum sequence length
 22 |         * max_len: int. maximum sequence length
 23 |         * verbose: bool. verbosity level
 24 |         Outputs: (cleaned, without padding)
 25 |         (seq_str, int_seq, coords, angles, padding_seq, mask, pid)
 26 |     """
 27 |     while True:
 28 |         for b,batch in enumerate(dataloader_['train']):
 29 |             for i in range(batch.int_seqs.shape[0]):
 30 |                 # strip padding - matching angles to string means
 31 |                 # only accepting prots with no missing residues (angles would be 0)
 32 |                 padding_seq = (batch.int_seqs[i] == 20).sum().item()
 33 |                 padding_angles = (torch.abs(batch.angs[i]).sum(dim=-1) == 0).long().sum().item()
 34 | 
 35 |                 if padding_seq == padding_angles:
 36 |                     # check for appropiate length
 37 |                     real_len = batch.int_seqs[i].shape[0] - padding_seq
 38 |                     if max_len >= real_len >= min_len:
 39 |                         # strip padding tokens
 40 |                         seq = ''.join([vocab_.int2char(aa) for aa in batch.int_seqs[i].numpy()])
 41 |                         seq = seq[:-padding_seq or None]
 42 |                         int_seq = batch.int_seqs[i][:-padding_seq or None]
 43 |                         angles  = batch.angs[i][:-padding_seq or None]
 44 |                         mask    = batch.msks[i][:-padding_seq or None]
 45 |                         coords  = batch.crds[i][:-padding_seq*14 or None]
 46 | 
 47 |                         if verbose:
 48 |                             print("stopping at sequence of length", real_len)
 49 |                         return seq, int_seq, coords, angles, padding_seq, mask, batch.pids[i]
 50 |                     else:
 51 |                         if verbose:
 52 |                             print("found a seq of length:", batch.int_seqs[i].shape,
 53 |                                   "but oustide the threshold:", min_len, max_len)
 54 |                 else:
 55 |                     if verbose:
 56 |                         print("paddings not matching", padding_seq, padding_angles)
 57 |                     pass
 58 |     return None
 59 |     
 60 | 
 61 | ######################
 62 | ## structural utils ##
 63 | ######################
 64 | 
 65 | def get_dihedral(c1, c2, c3, c4):
 66 |     """ Returns the dihedral angle in radians.
 67 |         Will use atan2 formula from: 
 68 |         https://en.wikipedia.org/wiki/Dihedral_angle#In_polymer_physics
 69 |         Inputs: 
 70 |         * c1: (batch, 3) or (3,)
 71 |         * c2: (batch, 3) or (3,)
 72 |         * c3: (batch, 3) or (3,)
 73 |         * c4: (batch, 3) or (3,)
 74 |     """
 75 |     u1 = c2 - c1
 76 |     u2 = c3 - c2
 77 |     u3 = c4 - c3
 78 | 
 79 |     return torch.atan2( ( (torch.norm(u2, dim=-1, keepdim=True) * u1) * torch.cross(u2,u3, dim=-1) ).sum(dim=-1) ,  
 80 |                         (  torch.cross(u1,u2, dim=-1) * torch.cross(u2, u3, dim=-1) ).sum(dim=-1) )
 81 | 
 82 | 
 83 | def get_angle(c1, c2, c3):
 84 |     """ Returns the angle in radians.
 85 |         Inputs: 
 86 |         * c1: (batch, 3) or (3,)
 87 |         * c2: (batch, 3) or (3,)
 88 |         * c3: (batch, 3) or (3,)
 89 |     """
 90 |     u1 = c2 - c1
 91 |     u2 = c3 - c2
 92 | 
 93 |     # dont use acos since norms involved. 
 94 |     # better use atan2 formula: atan2(cross, dot) from here: 
 95 |     # https://johnblackburne.blogspot.com/2012/05/angle-between-two-3d-vectors.html
 96 | 
 97 |     # add a minus since we want the angle in reversed order - sidechainnet issues
 98 |     return torch.atan2( torch.norm(torch.cross(u1,u2, dim=-1), dim=-1), 
 99 |                         -(u1*u2).sum(dim=-1) ) 
100 | 
101 | 
102 | def kabsch_torch(X, Y):
103 |     """ Kabsch alignment of X into Y. 
104 |         Assumes X,Y are both (D, N) - usually (3, N)
105 |     """
106 |     #  center X and Y to the origin
107 |     X_ = X - X.mean(dim=-1, keepdim=True)
108 |     Y_ = Y - Y.mean(dim=-1, keepdim=True)
109 |     # calculate convariance matrix (for each prot in the batch)
110 |     C = torch.matmul(X_, Y_.t())
111 |     # Optimal rotation matrix via SVD - warning! W must be transposed
112 |     if int(torch.__version__.split(".")[1]) < 8:
113 |         V, S, W = torch.svd(C.detach())
114 |         W = W.t()
115 |     else: 
116 |         V, S, W = torch.linalg.svd(C.detach()) 
117 |     # determinant sign for direction correction
118 |     d = (torch.det(V) * torch.det(W)) < 0.0
119 |     if d:
120 |         S[-1]    = S[-1] * (-1)
121 |         V[:, -1] = V[:, -1] * (-1)
122 |     # Create Rotation matrix U
123 |     U = torch.matmul(V, W)
124 |     # calculate rotations
125 |     X_ = torch.matmul(X_.t(), U).t()
126 |     # return centered and aligned
127 |     return X_, Y_
128 | 
129 | 
130 | def rmsd_torch(X, Y):
131 |     """ Assumes x,y are both (batch, d, n) - usually (batch, 3, N). """
132 |     return torch.sqrt( torch.mean((X - Y)**2, axis=(-1, -2)) )
133 | 
134 | 
135 | 
136 | 


--------------------------------------------------------------------------------
/notebooks/experiments/[131, 150]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[131, 150]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[200, 250]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[200, 250]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[331, 351]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[331, 351]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[400, 450]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[400, 450]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[500, 550]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[500, 550]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[600, 650]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[600, 650]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[700, 780]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[700, 780]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[800, 900]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[800, 900]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[905, 1070]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[905, 1070]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/[905, 970]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[905, 970]_info.joblib


--------------------------------------------------------------------------------
/notebooks/experiments/logs_experiment.txt:
--------------------------------------------------------------------------------
  1 | 2021-06-07 23:13:24,959 INFO MainThread root Loading data
  2 | 
  3 | 2021-06-07 23:13:38,739 INFO MainThread root Loading data
  4 | 
  5 | 2021-06-07 23:13:48,499 INFO MainThread root Loading data
  6 | 
  7 | 2021-06-07 23:13:48,499 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
  8 | 
  9 | 2021-06-07 23:14:01,528 INFO MainThread root Loading data
 10 | 
 11 | 2021-06-07 23:14:01,528 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
 12 | 
 13 | 2021-06-07 23:14:53,890 INFO MainThread root Loading data
 14 | 
 15 | 2021-06-07 23:14:59,937 INFO MainThread root Loading data
 16 | 
 17 | 2021-06-07 23:14:59,937 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
 18 | 
 19 | 2021-06-07 23:14:59,954 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.02s.
 20 | 2021-06-07 23:15:10,979 INFO MainThread root Loading data
 21 | 
 22 | 2021-06-07 23:15:10,980 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
 23 | 
 24 | 2021-06-07 23:15:10,995 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
 25 | 2021-06-07 23:15:32,111 INFO MainThread root Loading data
 26 | 
 27 | 2021-06-07 23:15:32,112 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
 28 | 
 29 | 2021-06-07 23:15:32,124 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
 30 | 2021-06-07 23:15:32,659 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb
 31 | 
 32 | 2021-06-07 23:15:38,006 INFO MainThread root 5.346866726 for 1000 calls
 33 | 2021-06-07 23:15:38,006 INFO MainThread root Done
 34 | 2021-06-07 23:15:38,006 INFO MainThread root 
 35 | 
 36 | =======
 37 | 
 38 | 
 39 | 2021-06-07 23:15:38,084 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.08s.
 40 | 2021-06-07 23:15:41,299 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb
 41 | 
 42 | 2021-06-07 23:15:56,652 INFO MainThread root 15.352682389000002 for 1000 calls
 43 | 2021-06-07 23:15:56,652 INFO MainThread root Done
 44 | 2021-06-07 23:15:56,652 INFO MainThread root 
 45 | 
 46 | =======
 47 | 
 48 | 
 49 | 2021-06-07 23:15:56,690 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.04s.
 50 | 2021-06-07 23:15:58,079 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb
 51 | 
 52 | 2021-06-07 23:16:07,192 INFO MainThread root 9.112788776999999 for 1000 calls
 53 | 2021-06-07 23:16:07,192 INFO MainThread root Done
 54 | 2021-06-07 23:16:07,192 INFO MainThread root 
 55 | 
 56 | =======
 57 | 
 58 | 
 59 | 2021-06-07 23:16:07,214 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s.
 60 | 2021-06-07 23:16:08,470 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb
 61 | 
 62 | 2021-06-07 23:16:16,980 INFO MainThread root 8.509638406999997 for 1000 calls
 63 | 2021-06-07 23:16:16,980 INFO MainThread root Done
 64 | 2021-06-07 23:16:16,980 INFO MainThread root 
 65 | 
 66 | =======
 67 | 
 68 | 
 69 | 2021-06-07 23:16:16,980 INFO MainThread root Execution has finished
 70 | 
 71 | 2021-06-07 22:36:56,573 INFO MainThread root Loading data
 72 | 
 73 | 2021-06-07 22:36:56,648 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
 74 | 
 75 | 2021-06-07 22:36:56,715 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s.
 76 | 2021-06-07 22:37:00,040 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb
 77 | 
 78 | 2021-06-07 22:37:31,005 INFO MainThread root 30.9647682 for 1000 calls
 79 | 2021-06-07 22:37:31,005 INFO MainThread root Done
 80 | 2021-06-07 22:37:31,005 INFO MainThread root 
 81 | 
 82 | =======
 83 | 
 84 | 
 85 | 2021-06-07 22:37:31,045 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.04s.
 86 | 2021-06-07 22:37:32,399 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb
 87 | 
 88 | 2021-06-07 22:37:47,501 INFO MainThread root 15.102381899999997 for 1000 calls
 89 | 2021-06-07 22:37:47,501 INFO MainThread root Done
 90 | 2021-06-07 22:37:47,502 INFO MainThread root 
 91 | 
 92 | =======
 93 | 
 94 | 
 95 | 2021-06-07 22:37:47,514 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
 96 | 2021-06-07 22:37:47,984 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb
 97 | 
 98 | 2021-06-07 22:37:55,590 INFO MainThread root 7.6064229999999995 for 1000 calls
 99 | 2021-06-07 22:37:55,590 INFO MainThread root Done
100 | 2021-06-07 22:37:55,590 INFO MainThread root 
101 | 
102 | =======
103 | 
104 | 
105 | 2021-06-07 22:37:55,608 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s.
106 | 2021-06-07 22:37:56,821 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb
107 | 
108 | 2021-06-07 22:38:10,189 INFO MainThread root 13.368083000000006 for 1000 calls
109 | 2021-06-07 22:38:10,189 INFO MainThread root Done
110 | 2021-06-07 22:38:10,189 INFO MainThread root 
111 | 
112 | =======
113 | 
114 | 
115 | 2021-06-07 22:38:10,189 INFO MainThread root Preparing speed tests: for device device(type='cuda') and hybrid_opt: True
116 | 
117 | 2021-06-07 22:38:10,255 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s.
118 | 2021-06-07 22:38:16,823 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb
119 | 
120 | 2021-06-07 22:38:46,881 INFO MainThread root 30.058233599999994 for 1000 calls
121 | 2021-06-07 22:38:46,881 INFO MainThread root Done
122 | 2021-06-07 22:38:46,881 INFO MainThread root 
123 | 
124 | =======
125 | 
126 | 
127 | 2021-06-07 22:38:46,971 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.09s.
128 | 2021-06-07 22:38:49,619 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb
129 | 
130 | 2021-06-07 22:39:15,901 INFO MainThread root 26.281134400000013 for 1000 calls
131 | 2021-06-07 22:39:15,901 INFO MainThread root Done
132 | 2021-06-07 22:39:15,901 INFO MainThread root 
133 | 
134 | =======
135 | 
136 | 
137 | 2021-06-07 22:39:15,914 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
138 | 2021-06-07 22:39:16,514 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb
139 | 
140 | 2021-06-07 22:39:35,427 INFO MainThread root 18.913132099999984 for 1000 calls
141 | 2021-06-07 22:39:35,428 INFO MainThread root Done
142 | 2021-06-07 22:39:35,428 INFO MainThread root 
143 | 
144 | =======
145 | 
146 | 
147 | 2021-06-07 22:39:35,444 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s.
148 | 2021-06-07 22:39:36,919 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb
149 | 
150 | 2021-06-07 22:39:59,241 INFO MainThread root 22.321837799999997 for 1000 calls
151 | 2021-06-07 22:39:59,241 INFO MainThread root Done
152 | 2021-06-07 22:39:59,241 INFO MainThread root 
153 | 
154 | =======
155 | 
156 | 
157 | 2021-06-07 22:39:59,241 INFO MainThread root Preparing speed tests: for device device(type='cuda') and hybrid_opt: False
158 | 
159 | 2021-06-07 22:39:59,311 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s.
160 | 2021-06-07 22:40:03,273 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb
161 | 
162 | 2021-06-07 22:40:46,576 INFO MainThread root 43.3028281 for 1000 calls
163 | 2021-06-07 22:40:46,576 INFO MainThread root Done
164 | 2021-06-07 22:40:46,576 INFO MainThread root 
165 | 
166 | =======
167 | 
168 | 
169 | 2021-06-07 22:40:46,609 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.03s.
170 | 2021-06-07 22:40:48,396 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb
171 | 
172 | 2021-06-07 22:41:17,417 INFO MainThread root 29.020322799999974 for 1000 calls
173 | 2021-06-07 22:41:17,417 INFO MainThread root Done
174 | 2021-06-07 22:41:17,417 INFO MainThread root 
175 | 
176 | =======
177 | 
178 | 
179 | 2021-06-07 22:41:17,430 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
180 | 2021-06-07 22:41:18,004 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb
181 | 
182 | 2021-06-07 22:41:39,125 INFO MainThread root 21.120834400000035 for 1000 calls
183 | 2021-06-07 22:41:39,125 INFO MainThread root Done
184 | 2021-06-07 22:41:39,125 INFO MainThread root 
185 | 
186 | =======
187 | 
188 | 
189 | 2021-06-07 22:41:39,150 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s.
190 | 2021-06-07 22:41:40,646 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb
191 | 
192 | 2021-06-07 22:42:06,874 INFO MainThread root 26.22732030000003 for 1000 calls
193 | 2021-06-07 22:42:06,874 INFO MainThread root Done
194 | 2021-06-07 22:42:06,874 INFO MainThread root 
195 | 
196 | =======
197 | 
198 | 
199 | 2021-06-07 22:42:06,874 INFO MainThread root Execution has finished
200 | 
201 | 


--------------------------------------------------------------------------------
/notebooks/experiments/logs_experiment_scn_various_lengths.txt:
--------------------------------------------------------------------------------
  1 | =======
  2 | 2021-05-22 02:14:49,435 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994]
  3 | 
  4 | 2021-05-22 02:14:49,435 INFO MainThread root Preparing speed tests: for device 'cpu'
  5 | 
  6 | 2021-05-22 02:14:49,448 INFO MainThread root Assessing the speed of folding algorithm at length 134
  7 | 
  8 | 2021-05-22 02:14:55,960 INFO MainThread root 6.509940282999999 for 1000 calls
  9 | 2021-05-22 02:14:55,960 INFO MainThread root Saving the related information at experiments/[131, 150]_info.joblib
 10 | 
 11 | 2021-05-22 02:14:55,961 INFO MainThread root 
 12 | 
 13 | =======
 14 | 
 15 | 
 16 | 2021-05-22 02:14:55,977 INFO MainThread root Assessing the speed of folding algorithm at length 214
 17 | 
 18 | 2021-05-22 02:15:02,783 INFO MainThread root 6.805784261999996 for 1000 calls
 19 | 2021-05-22 02:15:02,783 INFO MainThread root Saving the related information at experiments/[200, 250]_info.joblib
 20 | 
 21 | 2021-05-22 02:15:02,784 INFO MainThread root 
 22 | 
 23 | =======
 24 | 
 25 | 
 26 | 2021-05-22 02:15:02,808 INFO MainThread root Assessing the speed of folding algorithm at length 336
 27 | 
 28 | 2021-05-22 02:15:11,765 INFO MainThread root 8.956757892999995 for 1000 calls
 29 | 2021-05-22 02:15:11,765 INFO MainThread root Saving the related information at experiments/[331, 351]_info.joblib
 30 | 
 31 | 2021-05-22 02:15:11,766 INFO MainThread root 
 32 | 
 33 | =======
 34 | 
 35 | 
 36 | 2021-05-22 02:15:11,794 INFO MainThread root Assessing the speed of folding algorithm at length 401
 37 | 
 38 | 2021-05-22 02:15:21,825 INFO MainThread root 10.031293943000009 for 1000 calls
 39 | 2021-05-22 02:15:21,825 INFO MainThread root Saving the related information at experiments/[400, 450]_info.joblib
 40 | 
 41 | 2021-05-22 02:15:21,826 INFO MainThread root 
 42 | 
 43 | =======
 44 | 
 45 | 
 46 | 2021-05-22 02:15:21,862 INFO MainThread root Assessing the speed of folding algorithm at length 501
 47 | 
 48 | 2021-05-22 02:15:33,083 INFO MainThread root 11.221263701000012 for 1000 calls
 49 | 2021-05-22 02:15:33,083 INFO MainThread root Saving the related information at experiments/[500, 550]_info.joblib
 50 | 
 51 | 2021-05-22 02:15:33,084 INFO MainThread root 
 52 | 
 53 | =======
 54 | 
 55 | 
 56 | 2021-05-22 02:15:33,126 INFO MainThread root Assessing the speed of folding algorithm at length 621
 57 | 
 58 | 2021-05-22 02:15:45,854 INFO MainThread root 12.727750233999998 for 1000 calls
 59 | 2021-05-22 02:15:45,854 INFO MainThread root Saving the related information at experiments/[600, 650]_info.joblib
 60 | 
 61 | 2021-05-22 02:15:45,855 INFO MainThread root 
 62 | 
 63 | =======
 64 | 
 65 | 
 66 | 2021-05-22 02:15:45,906 INFO MainThread root Assessing the speed of folding algorithm at length 753
 67 | 
 68 | 2021-05-22 02:16:00,667 INFO MainThread root 14.760831587000013 for 1000 calls
 69 | 2021-05-22 02:16:00,672 INFO MainThread root Saving the related information at experiments/[700, 780]_info.joblib
 70 | 
 71 | 2021-05-22 02:16:00,674 INFO MainThread root 
 72 | 
 73 | =======
 74 | 
 75 | 
 76 | 2021-05-22 02:16:00,734 INFO MainThread root Assessing the speed of folding algorithm at length 862
 77 | 
 78 | 2021-05-22 02:16:17,315 INFO MainThread root 16.580566616 for 1000 calls
 79 | 2021-05-22 02:16:17,315 INFO MainThread root Saving the related information at experiments/[800, 900]_info.joblib
 80 | 
 81 | 2021-05-22 02:16:17,316 INFO MainThread root 
 82 | 
 83 | =======
 84 | 
 85 | 
 86 | 2021-05-22 02:16:17,383 INFO MainThread root Assessing the speed of folding algorithm at length 994
 87 | 
 88 | 2021-05-22 02:16:35,654 INFO MainThread root 18.271017204000003 for 1000 calls
 89 | 2021-05-22 02:16:35,654 INFO MainThread root Saving the related information at experiments/[905, 1070]_info.joblib
 90 | 
 91 | 2021-05-22 02:16:35,655 INFO MainThread root 
 92 | 
 93 | =======
 94 | 
 95 | 
 96 | 2021-05-22 02:16:35,655 INFO MainThread root Preparing speed tests: for device device(type='cpu')
 97 | 
 98 | 2021-05-22 02:16:35,664 INFO MainThread root Assessing the speed of folding algorithm at length 134
 99 | 
100 | 2021-05-22 02:16:40,994 INFO MainThread root 5.329709648000005 for 1000 calls
101 | 2021-05-22 02:16:40,994 INFO MainThread root Saving the related information at experiments/[131, 150]_info.joblib
102 | 
103 | 2021-05-22 02:16:40,995 INFO MainThread root 
104 | 
105 | =======
106 | 
107 | 
108 | 2021-05-22 02:16:41,010 INFO MainThread root Assessing the speed of folding algorithm at length 214
109 | 
110 | 2021-05-22 02:16:47,511 INFO MainThread root 6.501463223999991 for 1000 calls
111 | 2021-05-22 02:16:47,512 INFO MainThread root Saving the related information at experiments/[200, 250]_info.joblib
112 | 
113 | 2021-05-22 02:16:47,513 INFO MainThread root 
114 | 
115 | =======
116 | 
117 | 
118 | 2021-05-22 02:16:47,536 INFO MainThread root Assessing the speed of folding algorithm at length 336
119 | 
120 | 2021-05-22 02:16:56,197 INFO MainThread root 8.660352851999988 for 1000 calls
121 | 2021-05-22 02:16:56,197 INFO MainThread root Saving the related information at experiments/[331, 351]_info.joblib
122 | 
123 | 2021-05-22 02:16:56,198 INFO MainThread root 
124 | 
125 | =======
126 | 
127 | 
128 | 2021-05-22 02:16:56,226 INFO MainThread root Assessing the speed of folding algorithm at length 401
129 | 
130 | 2021-05-22 02:17:05,869 INFO MainThread root 9.643088333000009 for 1000 calls
131 | 2021-05-22 02:17:05,869 INFO MainThread root Saving the related information at experiments/[400, 450]_info.joblib
132 | 
133 | 2021-05-22 02:17:05,871 INFO MainThread root 
134 | 
135 | =======
136 | 
137 | 
138 | 2021-05-22 02:17:05,904 INFO MainThread root Assessing the speed of folding algorithm at length 501
139 | 
140 | 2021-05-22 02:17:17,308 INFO MainThread root 11.40289807900001 for 1000 calls
141 | 2021-05-22 02:17:17,308 INFO MainThread root Saving the related information at experiments/[500, 550]_info.joblib
142 | 
143 | 2021-05-22 02:17:17,309 INFO MainThread root 
144 | 
145 | =======
146 | 
147 | 
148 | 2021-05-22 02:17:17,351 INFO MainThread root Assessing the speed of folding algorithm at length 621
149 | 
150 | 2021-05-22 02:17:30,116 INFO MainThread root 12.764849003000023 for 1000 calls
151 | 2021-05-22 02:17:30,117 INFO MainThread root Saving the related information at experiments/[600, 650]_info.joblib
152 | 
153 | 2021-05-22 02:17:30,119 INFO MainThread root 
154 | 
155 | =======
156 | 
157 | 
158 | 2021-05-22 02:17:30,171 INFO MainThread root Assessing the speed of folding algorithm at length 753
159 | 
160 | 2021-05-22 02:17:44,858 INFO MainThread root 14.687164622000012 for 1000 calls
161 | 2021-05-22 02:17:44,858 INFO MainThread root Saving the related information at experiments/[700, 780]_info.joblib
162 | 
163 | 2021-05-22 02:17:44,859 INFO MainThread root 
164 | 
165 | =======
166 | 
167 | 
168 | 2021-05-22 02:17:44,918 INFO MainThread root Assessing the speed of folding algorithm at length 862
169 | 
170 | 2021-05-22 02:18:01,473 INFO MainThread root 16.554769015000005 for 1000 calls
171 | 2021-05-22 02:18:01,473 INFO MainThread root Saving the related information at experiments/[800, 900]_info.joblib
172 | 
173 | 2021-05-22 02:18:01,474 INFO MainThread root 
174 | 
175 | =======
176 | 
177 | 
178 | 2021-05-22 02:18:01,538 INFO MainThread root Assessing the speed of folding algorithm at length 994
179 | 
180 | 2021-05-22 02:18:19,650 INFO MainThread root 18.111747613000006 for 1000 calls
181 | 2021-05-22 02:18:19,650 INFO MainThread root Saving the related information at experiments/[905, 1070]_info.joblib
182 | 
183 | 2021-05-22 02:18:19,651 INFO MainThread root 
184 | 
185 | =======
186 | 
187 | 
188 | 2021-05-22 02:18:19,651 INFO MainThread root Execution has finished
189 | 
190 | >>>>>>> 4cabbe55371d6a9a7edeab1db719fa0cf8312eae
191 | 2021-05-22 18:39:33,611 INFO MainThread root Loading data
192 | 
193 | 2021-05-22 18:39:33,622 INFO MainThread root Data has been loaded
194 | 
195 | 
196 | =======
197 | 
198 | 
199 | 2021-05-22 18:39:33,622 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994]
200 | 
201 | 2021-05-22 18:39:33,646 INFO MainThread root Preparing speed tests: for device 'cpu'
202 | 
203 | 2021-05-22 18:39:33,670 INFO MainThread root Assessing the speed of folding algorithm at length 134
204 | 
205 | 2021-05-22 18:39:42,657 INFO MainThread root 8.9869345 for 1000 calls
206 | 2021-05-22 18:39:42,657 INFO MainThread root Saving the related information at experiments/100_info.joblib
207 | 
208 | 2021-05-22 18:39:42,659 INFO MainThread root 
209 | 
210 | =======
211 | 
212 | 
213 | 2021-05-22 18:39:42,687 INFO MainThread root Assessing the speed of folding algorithm at length 214
214 | 
215 | 2021-05-22 18:39:53,087 INFO MainThread root 10.4000911 for 1000 calls
216 | 2021-05-22 18:39:53,087 INFO MainThread root Saving the related information at experiments/200_info.joblib
217 | 
218 | 2021-05-22 18:39:53,088 INFO MainThread root 
219 | 
220 | =======
221 | 
222 | 
223 | 2021-05-22 18:39:53,122 INFO MainThread root Assessing the speed of folding algorithm at length 336
224 | 
225 | 2021-05-22 18:40:06,577 INFO MainThread root 13.455043199999999 for 1000 calls
226 | 2021-05-22 18:40:06,577 INFO MainThread root Saving the related information at experiments/300_info.joblib
227 | 
228 | 2021-05-22 18:40:06,578 INFO MainThread root 
229 | 
230 | =======
231 | 
232 | 
233 | 2021-05-22 18:40:06,617 INFO MainThread root Assessing the speed of folding algorithm at length 401
234 | 
235 | 2021-05-22 18:40:21,715 INFO MainThread root 15.097297400000002 for 1000 calls
236 | 2021-05-22 18:40:21,715 INFO MainThread root Saving the related information at experiments/400_info.joblib
237 | 
238 | 2021-05-22 18:40:21,716 INFO MainThread root 
239 | 
240 | =======
241 | 
242 | 
243 | 2021-05-22 18:40:21,779 INFO MainThread root Assessing the speed of folding algorithm at length 501
244 | 
245 | 2021-05-22 18:40:40,543 INFO MainThread root 18.764004199999995 for 1000 calls
246 | 2021-05-22 18:40:40,543 INFO MainThread root Saving the related information at experiments/500_info.joblib
247 | 
248 | 2021-05-22 18:40:40,544 INFO MainThread root 
249 | 
250 | =======
251 | 
252 | 
253 | 2021-05-22 18:40:40,617 INFO MainThread root Assessing the speed of folding algorithm at length 621
254 | 
255 | 2021-05-22 18:41:02,270 INFO MainThread root 21.652811900000003 for 1000 calls
256 | 2021-05-22 18:41:02,270 INFO MainThread root Saving the related information at experiments/600_info.joblib
257 | 
258 | 2021-05-22 18:41:02,271 INFO MainThread root 
259 | 
260 | =======
261 | 
262 | 
263 | 2021-05-22 18:41:02,367 INFO MainThread root Assessing the speed of folding algorithm at length 753
264 | 
265 | 2021-05-22 18:41:27,302 INFO MainThread root 24.934528900000004 for 1000 calls
266 | 2021-05-22 18:41:27,302 INFO MainThread root Saving the related information at experiments/700_info.joblib
267 | 
268 | 2021-05-22 18:41:27,304 INFO MainThread root 
269 | 
270 | =======
271 | 
272 | 
273 | 2021-05-22 18:41:27,431 INFO MainThread root Assessing the speed of folding algorithm at length 862
274 | 
275 | 2021-05-22 18:41:56,196 INFO MainThread root 28.7642814 for 1000 calls
276 | 2021-05-22 18:41:56,196 INFO MainThread root Saving the related information at experiments/800_info.joblib
277 | 
278 | 2021-05-22 18:41:56,197 INFO MainThread root 
279 | 
280 | =======
281 | 
282 | 
283 | 2021-05-22 18:41:56,312 INFO MainThread root Assessing the speed of folding algorithm at length 994
284 | 
285 | 2021-05-22 18:42:29,089 INFO MainThread root 32.77735960000001 for 1000 calls
286 | 2021-05-22 18:42:29,090 INFO MainThread root Saving the related information at experiments/900_info.joblib
287 | 
288 | 2021-05-22 18:42:29,090 INFO MainThread root 
289 | 
290 | =======
291 | 
292 | 
293 | 2021-05-22 18:42:29,091 INFO MainThread root Preparing speed tests: for device device(type='cuda') -- hybrid=True
294 | 
295 | 2021-05-22 18:42:30,437 INFO MainThread root Assessing the speed of folding algorithm at length 134
296 | 
297 | 2021-05-22 18:42:48,848 INFO MainThread root 18.41120219999999 for 1000 calls
298 | 2021-05-22 18:42:48,848 INFO MainThread root Saving the related information at experiments/100_info.joblib
299 | 
300 | 2021-05-22 18:42:48,849 INFO MainThread root 
301 | 
302 | =======
303 | 
304 | 
305 | 2021-05-22 18:42:49,042 INFO MainThread root Assessing the speed of folding algorithm at length 214
306 | 
307 | 2021-05-22 18:43:09,409 INFO MainThread root 20.366851999999994 for 1000 calls
308 | 2021-05-22 18:43:09,409 INFO MainThread root Saving the related information at experiments/200_info.joblib
309 | 
310 | 2021-05-22 18:43:09,410 INFO MainThread root 
311 | 
312 | =======
313 | 
314 | 
315 | 2021-05-22 18:43:09,752 INFO MainThread root Assessing the speed of folding algorithm at length 336
316 | 
317 | 2021-05-22 18:43:32,291 INFO MainThread root 22.538369399999993 for 1000 calls
318 | 2021-05-22 18:43:32,291 INFO MainThread root Saving the related information at experiments/300_info.joblib
319 | 
320 | 2021-05-22 18:43:32,292 INFO MainThread root 
321 | 
322 | =======
323 | 
324 | 
325 | 2021-05-22 18:43:32,658 INFO MainThread root Assessing the speed of folding algorithm at length 401
326 | 
327 | 2021-05-22 18:43:55,501 INFO MainThread root 22.84260729999997 for 1000 calls
328 | 2021-05-22 18:43:55,501 INFO MainThread root Saving the related information at experiments/400_info.joblib
329 | 
330 | 2021-05-22 18:43:55,502 INFO MainThread root 
331 | 
332 | =======
333 | 
334 | 
335 | 2021-05-22 18:43:56,096 INFO MainThread root Assessing the speed of folding algorithm at length 501
336 | 
337 | 2021-05-22 18:44:20,154 INFO MainThread root 24.057599100000004 for 1000 calls
338 | 2021-05-22 18:44:20,154 INFO MainThread root Saving the related information at experiments/500_info.joblib
339 | 
340 | 2021-05-22 18:44:20,155 INFO MainThread root 
341 | 
342 | =======
343 | 
344 | 
345 | 2021-05-22 18:44:20,720 INFO MainThread root Assessing the speed of folding algorithm at length 621
346 | 
347 | 2021-05-22 18:44:46,706 INFO MainThread root 25.98607320000002 for 1000 calls
348 | 2021-05-22 18:44:46,706 INFO MainThread root Saving the related information at experiments/600_info.joblib
349 | 
350 | 2021-05-22 18:44:46,707 INFO MainThread root 
351 | 
352 | =======
353 | 
354 | 
355 | 2021-05-22 18:44:47,392 INFO MainThread root Assessing the speed of folding algorithm at length 753
356 | 
357 | 2021-05-22 18:45:15,028 INFO MainThread root 27.6351363 for 1000 calls
358 | 2021-05-22 18:45:15,028 INFO MainThread root Saving the related information at experiments/700_info.joblib
359 | 
360 | 2021-05-22 18:45:15,029 INFO MainThread root 
361 | 
362 | =======
363 | 
364 | 
365 | 2021-05-22 18:45:15,818 INFO MainThread root Assessing the speed of folding algorithm at length 862
366 | 
367 | 2021-05-22 18:45:45,070 INFO MainThread root 29.25168880000001 for 1000 calls
368 | 2021-05-22 18:45:45,070 INFO MainThread root Saving the related information at experiments/800_info.joblib
369 | 
370 | 2021-05-22 18:45:45,071 INFO MainThread root 
371 | 
372 | =======
373 | 
374 | 
375 | 2021-05-22 18:45:45,994 INFO MainThread root Assessing the speed of folding algorithm at length 994
376 | 
377 | 2021-05-22 18:46:17,009 INFO MainThread root 31.0138465 for 1000 calls
378 | 2021-05-22 18:46:17,009 INFO MainThread root Saving the related information at experiments/900_info.joblib
379 | 
380 | 2021-05-22 18:46:17,010 INFO MainThread root 
381 | 
382 | =======
383 | 
384 | 
385 | 2021-05-22 18:46:17,010 INFO MainThread root Execution has finished
386 | 
387 | 2021-05-22 18:50:28,714 INFO MainThread root Loading data
388 | 
389 | 2021-05-22 18:50:28,718 INFO MainThread root Data has been loaded
390 | 
391 | 
392 | =======
393 | 
394 | 
395 | 2021-05-22 18:50:28,718 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994]
396 | 
397 | 2021-05-22 18:50:40,190 INFO MainThread root Loading data
398 | 
399 | 2021-05-22 18:50:40,194 INFO MainThread root Data has been loaded
400 | 
401 | 
402 | =======
403 | 
404 | 
405 | 2021-05-22 18:50:40,194 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994]
406 | 
407 | 2021-05-22 18:50:40,217 INFO MainThread root Preparing speed tests: for device device(type='cpu')
408 | 
409 | 2021-05-22 18:50:40,232 INFO MainThread root Assessing the speed of folding algorithm at length 134
410 | 
411 | 2021-05-22 18:50:48,901 INFO MainThread root 8.6684752 for 1000 calls
412 | 2021-05-22 18:50:48,901 INFO MainThread root Saving the related information at experiments/100_info.joblib
413 | 
414 | 2021-05-22 18:50:48,902 INFO MainThread root 
415 | 
416 | =======
417 | 
418 | 
419 | 2021-05-22 18:50:48,923 INFO MainThread root Assessing the speed of folding algorithm at length 214
420 | 
421 | 2021-05-22 18:50:59,368 INFO MainThread root 10.4448301 for 1000 calls
422 | 2021-05-22 18:50:59,368 INFO MainThread root Saving the related information at experiments/200_info.joblib
423 | 
424 | 2021-05-22 18:50:59,369 INFO MainThread root 
425 | 
426 | =======
427 | 
428 | 
429 | 2021-05-22 18:50:59,415 INFO MainThread root Assessing the speed of folding algorithm at length 336
430 | 
431 | 2021-05-22 18:51:13,819 INFO MainThread root 14.403065699999996 for 1000 calls
432 | 2021-05-22 18:51:13,819 INFO MainThread root Saving the related information at experiments/300_info.joblib
433 | 
434 | 2021-05-22 18:51:13,820 INFO MainThread root 
435 | 
436 | =======
437 | 
438 | 
439 | 2021-05-22 18:51:13,880 INFO MainThread root Assessing the speed of folding algorithm at length 401
440 | 
441 | 2021-05-22 18:51:29,132 INFO MainThread root 15.251432700000002 for 1000 calls
442 | 2021-05-22 18:51:29,132 INFO MainThread root Saving the related information at experiments/400_info.joblib
443 | 
444 | 2021-05-22 18:51:29,133 INFO MainThread root 
445 | 
446 | =======
447 | 
448 | 
449 | 2021-05-22 18:51:29,181 INFO MainThread root Assessing the speed of folding algorithm at length 501
450 | 
451 | 2021-05-22 18:51:47,005 INFO MainThread root 17.824042999999996 for 1000 calls
452 | 2021-05-22 18:51:47,005 INFO MainThread root Saving the related information at experiments/500_info.joblib
453 | 
454 | 2021-05-22 18:51:47,006 INFO MainThread root 
455 | 
456 | =======
457 | 
458 | 
459 | 2021-05-22 18:51:47,083 INFO MainThread root Assessing the speed of folding algorithm at length 621
460 | 
461 | 2021-05-22 18:52:07,623 INFO MainThread root 20.5405765 for 1000 calls
462 | 2021-05-22 18:52:07,624 INFO MainThread root Saving the related information at experiments/600_info.joblib
463 | 
464 | 2021-05-22 18:52:07,625 INFO MainThread root 
465 | 
466 | =======
467 | 
468 | 
469 | 2021-05-22 18:52:07,708 INFO MainThread root Assessing the speed of folding algorithm at length 753
470 | 
471 | 2021-05-22 18:52:31,562 INFO MainThread root 23.853287499999993 for 1000 calls
472 | 2021-05-22 18:52:31,562 INFO MainThread root Saving the related information at experiments/700_info.joblib
473 | 
474 | 2021-05-22 18:52:31,563 INFO MainThread root 
475 | 
476 | =======
477 | 
478 | 
479 | 2021-05-22 18:52:31,652 INFO MainThread root Assessing the speed of folding algorithm at length 862
480 | 
481 | 2021-05-22 18:52:59,035 INFO MainThread root 27.38281640000001 for 1000 calls
482 | 2021-05-22 18:52:59,035 INFO MainThread root Saving the related information at experiments/800_info.joblib
483 | 
484 | 2021-05-22 18:52:59,036 INFO MainThread root 
485 | 
486 | =======
487 | 
488 | 
489 | 2021-05-22 18:52:59,150 INFO MainThread root Assessing the speed of folding algorithm at length 994
490 | 
491 | 2021-05-22 18:53:31,180 INFO MainThread root 32.029055200000016 for 1000 calls
492 | 2021-05-22 18:53:31,180 INFO MainThread root Saving the related information at experiments/900_info.joblib
493 | 
494 | 2021-05-22 18:53:31,181 INFO MainThread root 
495 | 
496 | =======
497 | 
498 | 
499 | 2021-05-22 18:53:31,181 INFO MainThread root Preparing speed tests: for device device(type='cuda') - hybrid=True
500 | 
501 | 2021-05-22 18:53:32,532 INFO MainThread root Assessing the speed of folding algorithm at length 134
502 | 
503 | 2021-05-22 18:53:51,151 INFO MainThread root 18.61965140000001 for 1000 calls
504 | 2021-05-22 18:53:51,152 INFO MainThread root Saving the related information at experiments/100_info.joblib
505 | 
506 | 2021-05-22 18:53:51,153 INFO MainThread root 
507 | 
508 | =======
509 | 
510 | 
511 | 2021-05-22 18:53:51,347 INFO MainThread root Assessing the speed of folding algorithm at length 214
512 | 
513 | 2021-05-22 18:54:11,743 INFO MainThread root 20.395728099999985 for 1000 calls
514 | 2021-05-22 18:54:11,743 INFO MainThread root Saving the related information at experiments/200_info.joblib
515 | 
516 | 2021-05-22 18:54:11,744 INFO MainThread root 
517 | 
518 | =======
519 | 
520 | 
521 | 2021-05-22 18:54:12,052 INFO MainThread root Assessing the speed of folding algorithm at length 336
522 | 
523 | 2021-05-22 18:54:34,875 INFO MainThread root 22.822907499999985 for 1000 calls
524 | 2021-05-22 18:54:34,875 INFO MainThread root Saving the related information at experiments/300_info.joblib
525 | 
526 | 2021-05-22 18:54:34,876 INFO MainThread root 
527 | 
528 | =======
529 | 
530 | 
531 | 2021-05-22 18:54:35,239 INFO MainThread root Assessing the speed of folding algorithm at length 401
532 | 
533 | 2021-05-22 18:54:59,075 INFO MainThread root 23.83573979999997 for 1000 calls
534 | 2021-05-22 18:54:59,075 INFO MainThread root Saving the related information at experiments/400_info.joblib
535 | 
536 | 2021-05-22 18:54:59,076 INFO MainThread root 
537 | 
538 | =======
539 | 
540 | 
541 | 2021-05-22 18:54:59,530 INFO MainThread root Assessing the speed of folding algorithm at length 501
542 | 
543 | 2021-05-22 18:55:24,297 INFO MainThread root 24.76649520000001 for 1000 calls
544 | 2021-05-22 18:55:24,297 INFO MainThread root Saving the related information at experiments/500_info.joblib
545 | 
546 | 2021-05-22 18:55:24,298 INFO MainThread root 
547 | 
548 | =======
549 | 
550 | 
551 | 2021-05-22 18:55:24,858 INFO MainThread root Assessing the speed of folding algorithm at length 621
552 | 
553 | 2021-05-22 18:55:50,855 INFO MainThread root 25.996778500000005 for 1000 calls
554 | 2021-05-22 18:55:50,856 INFO MainThread root Saving the related information at experiments/600_info.joblib
555 | 
556 | 2021-05-22 18:55:50,856 INFO MainThread root 
557 | 
558 | =======
559 | 
560 | 
561 | 2021-05-22 18:55:51,538 INFO MainThread root Assessing the speed of folding algorithm at length 753
562 | 
563 | 2021-05-22 18:56:19,326 INFO MainThread root 27.787718600000005 for 1000 calls
564 | 2021-05-22 18:56:19,326 INFO MainThread root Saving the related information at experiments/700_info.joblib
565 | 
566 | 2021-05-22 18:56:19,327 INFO MainThread root 
567 | 
568 | =======
569 | 
570 | 
571 | 2021-05-22 18:56:20,108 INFO MainThread root Assessing the speed of folding algorithm at length 862
572 | 
573 | 2021-05-22 18:56:49,570 INFO MainThread root 29.461670400000003 for 1000 calls
574 | 2021-05-22 18:56:49,570 INFO MainThread root Saving the related information at experiments/800_info.joblib
575 | 
576 | 2021-05-22 18:56:49,571 INFO MainThread root 
577 | 
578 | =======
579 | 
580 | 
581 | 2021-05-22 18:56:50,504 INFO MainThread root Assessing the speed of folding algorithm at length 994
582 | 
583 | 2021-05-22 18:57:21,194 INFO MainThread root 30.689694900000006 for 1000 calls
584 | 2021-05-22 18:57:21,194 INFO MainThread root Saving the related information at experiments/900_info.joblib
585 | 
586 | 2021-05-22 18:57:21,195 INFO MainThread root 
587 | 
588 | =======
589 | 
590 | 
591 | 2021-05-22 18:57:21,196 INFO MainThread root Preparing speed tests: for device device(type='cuda') -- hybrid=False
592 | 
593 | 2021-05-22 18:57:21,317 INFO MainThread root Assessing the speed of folding algorithm at length 134
594 | 
595 | 2021-05-22 18:57:41,246 INFO MainThread root 19.9283054 for 1000 calls
596 | 2021-05-22 18:57:41,246 INFO MainThread root Saving the related information at experiments/100_info.joblib
597 | 
598 | 2021-05-22 18:57:41,247 INFO MainThread root 
599 | 
600 | =======
601 | 
602 | 
603 | 2021-05-22 18:57:41,440 INFO MainThread root Assessing the speed of folding algorithm at length 214
604 | 
605 | 2021-05-22 18:58:03,719 INFO MainThread root 22.279464399999995 for 1000 calls
606 | 2021-05-22 18:58:03,719 INFO MainThread root Saving the related information at experiments/200_info.joblib
607 | 
608 | 2021-05-22 18:58:03,720 INFO MainThread root 
609 | 
610 | =======
611 | 
612 | 
613 | 2021-05-22 18:58:04,024 INFO MainThread root Assessing the speed of folding algorithm at length 336
614 | 
615 | 2021-05-22 18:58:29,494 INFO MainThread root 25.469947400000024 for 1000 calls
616 | 2021-05-22 18:58:29,494 INFO MainThread root Saving the related information at experiments/300_info.joblib
617 | 
618 | 2021-05-22 18:58:29,495 INFO MainThread root 
619 | 
620 | =======
621 | 
622 | 
623 | 2021-05-22 18:58:29,859 INFO MainThread root Assessing the speed of folding algorithm at length 401
624 | 
625 | 2021-05-22 18:58:58,097 INFO MainThread root 28.238597999999968 for 1000 calls
626 | 2021-05-22 18:58:58,098 INFO MainThread root Saving the related information at experiments/400_info.joblib
627 | 
628 | 2021-05-22 18:58:58,098 INFO MainThread root 
629 | 
630 | =======
631 | 
632 | 
633 | 2021-05-22 18:58:58,548 INFO MainThread root Assessing the speed of folding algorithm at length 501
634 | 
635 | 2021-05-22 18:59:29,343 INFO MainThread root 30.79467580000005 for 1000 calls
636 | 2021-05-22 18:59:29,343 INFO MainThread root Saving the related information at experiments/500_info.joblib
637 | 
638 | 2021-05-22 18:59:29,344 INFO MainThread root 
639 | 
640 | =======
641 | 
642 | 
643 | 2021-05-22 18:59:29,904 INFO MainThread root Assessing the speed of folding algorithm at length 621
644 | 
645 | 2021-05-22 19:00:03,234 INFO MainThread root 33.3301616 for 1000 calls
646 | 2021-05-22 19:00:03,234 INFO MainThread root Saving the related information at experiments/600_info.joblib
647 | 
648 | 2021-05-22 19:00:03,235 INFO MainThread root 
649 | 
650 | =======
651 | 
652 | 
653 | 2021-05-22 19:00:03,915 INFO MainThread root Assessing the speed of folding algorithm at length 753
654 | 
655 | 2021-05-22 19:00:40,486 INFO MainThread root 36.570508099999984 for 1000 calls
656 | 2021-05-22 19:00:40,486 INFO MainThread root Saving the related information at experiments/700_info.joblib
657 | 
658 | 2021-05-22 19:00:40,487 INFO MainThread root 
659 | 
660 | =======
661 | 
662 | 
663 | 2021-05-22 19:00:41,265 INFO MainThread root Assessing the speed of folding algorithm at length 862
664 | 
665 | 2021-05-22 19:01:21,325 INFO MainThread root 40.06054449999999 for 1000 calls
666 | 2021-05-22 19:01:21,326 INFO MainThread root Saving the related information at experiments/800_info.joblib
667 | 
668 | 2021-05-22 19:01:21,327 INFO MainThread root 
669 | 
670 | =======
671 | 
672 | 
673 | 2021-05-22 19:01:22,419 INFO MainThread root Assessing the speed of folding algorithm at length 994
674 | 
675 | 2021-05-22 19:02:06,714 INFO MainThread root 44.29495259999999 for 1000 calls
676 | 2021-05-22 19:02:06,714 INFO MainThread root Saving the related information at experiments/900_info.joblib
677 | 
678 | 2021-05-22 19:02:06,715 INFO MainThread root 
679 | 
680 | =======
681 | 
682 | 
683 | 2021-05-22 19:02:06,715 INFO MainThread root Execution has finished
684 | 
685 | 


--------------------------------------------------------------------------------
/notebooks/experiments/profile_csv:
--------------------------------------------------------------------------------
 1 | ncalls,tottime,percall,cumtime,percall,filename:lineno(function)
 2 | 1,0.00598,0.00598,0.0171,0.0171,massive_pnerf.py:70(proto_fold)
 3 | 16,0.004091,0.0002557,0.004091,0.0002557,~:0(<built-in method frobenius_norm>)
 4 | 773,0.002993,3.872e-06,0.002993,3.872e-06,~:0(<built-in method matmul>)
 5 | 14,0.001775,0.0001268,0.00792,0.0005657,massive_pnerf.py:40(mp_nerf_torch)
 6 | 32,0.000533,1.666e-05,0.000533,1.666e-05,~:0(<built-in method cross>)
 7 | 30,0.000495,1.65e-05,0.000495,1.65e-05,~:0(<built-in method stack>)
 8 | 14,0.000146,1.043e-05,0.000146,1.043e-05,~:0(<method 'squeeze' of 'torch._C._TensorBase' objects>)
 9 | 29,0.000119,4.103e-06,0.000119,4.103e-06,~:0(<built-in method cos>)
10 | 16,0.000117,7.312e-06,0.004243,0.0002652,functional.py:1274(norm)
11 | 25,0.000103,4.12e-06,0.000103,4.12e-06,~:0(<method 'unbind' of 'torch._C._TensorBase' objects>)
12 | 43,0.0001,2.326e-06,0.0001,2.326e-06,~:0(<built-in method sin>)
13 | 14,7.2e-05,5.143e-06,7.2e-05,5.143e-06,~:0(<method 'all' of 'torch._C._TensorBase' objects>)
14 | 25,7.1e-05,2.84e-06,0.000225,9e-06,tensor.py:575(__iter__)
15 | 2,5.9e-05,2.95e-05,0.000483,0.0002415,massive_pnerf.py:10(get_axis_matrix)
16 | 29,4.9e-05,1.69e-06,4.9e-05,1.69e-06,~:0(<method 'unsqueeze' of 'torch._C._TensorBase' objects>)
17 | 2,4.3e-05,2.15e-05,4.3e-05,2.15e-05,~:0(<method 'repeat' of 'torch._C._TensorBase' objects>)
18 | 1,4e-05,4e-05,0.01716,0.01716,~:0(<built-in method builtins.exec>)
19 | 25,3e-05,1.2e-06,3e-05,1.2e-06,~:0(<built-in method torch._C._get_tracing_state>)
20 | 4,2.8e-05,7e-06,2.8e-05,7e-06,~:0(<built-in method tensor>)
21 | 6,2.8e-05,4.667e-06,2.8e-05,4.667e-06,~:0(<method 'type' of 'torch._C._TensorBase' objects>)
22 | 1,2.3e-05,2.3e-05,2.3e-05,2.3e-05,~:0(<built-in method zeros>)
23 | 1,2.3e-05,2.3e-05,0.01712,0.01712,<string>:1(<module>)
24 | 1,1.9e-05,1.9e-05,1.9e-05,1.9e-05,~:0(<built-in method cumsum>)
25 | 14,1.8e-05,1.286e-06,1.8e-05,1.286e-06,~:0(<method 'item' of 'torch._C._TensorBase' objects>)
26 | 11,1.6e-05,1.455e-06,1.9e-05,1.727e-06,tensor.py:568(__len__)
27 | 2,1.5e-05,7.5e-06,1.5e-05,7.5e-06,~:0(<built-in method rsub>)
28 | 4,1.3e-05,3.25e-06,1.3e-05,3.25e-06,~:0(<method 'reshape' of 'torch._C._TensorBase' objects>)
29 | 2,1.2e-05,6e-06,0.000107,5.35e-05,einops.py:202(apply)
30 | 25,1.1e-05,4.4e-07,1.1e-05,4.4e-07,~:0(<built-in method builtins.iter>)
31 | 16,1.1e-05,6.875e-07,2e-05,1.25e-06,_VF.py:25(__getattr__)
32 | 52,1.1e-05,2.115e-07,1.1e-05,2.115e-07,~:0(<method 'dim' of 'torch._C._TensorBase' objects>)
33 | 2,1e-05,5e-06,2.5e-05,1.25e-05,tensor.py:525(__rsub__)
34 | 2,1e-05,5e-06,0.00012,6e-05,einops.py:327(reduce)
35 | 16,9e-06,5.625e-07,9e-06,5.625e-07,~:0(<built-in method builtins.getattr>)
36 | 34,9e-06,2.647e-07,9e-06,2.647e-07,~:0(<built-in method builtins.isinstance>)
37 | 1,8e-06,8e-06,8e-06,8e-06,~:0(<built-in method cat>)
38 | 52,8e-06,1.538e-07,8e-06,1.538e-07,~:0(<built-in method torch._C._has_torch_function_unary>)
39 | 2,6e-06,3e-06,6e-06,3e-06,~:0(<built-in method unsqueeze>)
40 | 1,6e-06,6e-06,6e-06,6e-06,~:0(<method 'nonzero' of 'torch._C._TensorBase' objects>)
41 | 2,6e-06,3e-06,6e-06,3e-06,~:0(<method 'permute' of 'torch._C._TensorBase' objects>)
42 | 2,5e-06,2.5e-06,7e-06,3.5e-06,_backends.py:22(get_backend)
43 | 2,4e-06,2e-06,4e-06,2e-06,einops.py:26(_reduce_axes)
44 | 2,4e-06,2e-06,5.9e-05,2.95e-05,_backends.py:98(add_axes)
45 | 2,4e-06,2e-06,4.7e-05,2.35e-05,_backends.py:336(tile)
46 | 2,4e-06,2e-06,4e-06,2e-06,~:0(<method 'to' of 'torch._C._TensorBase' objects>)
47 | 1,4e-06,4e-06,4e-06,4e-06,~:0(<method 't' of 'torch._C._TensorBase' objects>)
48 | 2,3e-06,1.5e-06,0.000123,6.15e-05,einops.py:427(repeat)
49 | 2,3e-06,1.5e-06,9e-06,4.5e-06,_backends.py:330(transpose)
50 | 1,3e-06,3e-06,3e-06,3e-06,~:0(<method 'view' of 'torch._C._TensorBase' objects>)
51 | 6,2e-06,3.333e-07,2e-06,3.333e-07,~:0(<method 'items' of 'dict' objects>)
52 | 4,2e-06,5e-07,1.5e-05,3.75e-06,_backends.py:83(reshape)
53 | 2,2e-06,1e-06,2e-06,1e-06,_backends.py:302(is_appropriate_type)
54 | 2,2e-06,1e-06,8e-06,4e-06,_backends.py:339(add_axis)
55 | 8,1e-06,1.25e-07,1e-06,1.25e-07,~:0(<built-in method builtins.len>)
56 | 2,1e-06,5e-07,1e-06,5e-07,~:0(<built-in method builtins.sorted>)
57 | 1,1e-06,1e-06,1e-06,1e-06,~:0(<method 'disable' of '_lsprof.Profiler' objects>)
58 | 2,0,0,0,0,~:0(<built-in method builtins.callable>)
59 | 2,0,0,0,0,_backends.py:79(shape)
60 | 2,0,0,0,0,~:0(<built-in method torch._C._has_torch_function_variadic>)
61 | Showing 1 to 59 of 59 entries
62 | 


--------------------------------------------------------------------------------
/notebooks/experiments_manual/analyzed_prots.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/analyzed_prots.joblib


--------------------------------------------------------------------------------
/notebooks/experiments_manual/error_evolution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/error_evolution.png


--------------------------------------------------------------------------------
/notebooks/experiments_manual/histogram_errors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/histogram_errors.png


--------------------------------------------------------------------------------
/notebooks/experiments_manual/profiler_capture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/profiler_capture.png


--------------------------------------------------------------------------------
/notebooks/experiments_manual/rclab_data/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 RCL-lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/notebooks/experiments_manual/rclab_data/other_prots.csv:
--------------------------------------------------------------------------------
1 | prot,url,chain,num,
2 | LDH,_,_,_,
3 | Ribonuclease,_,_,_,


--------------------------------------------------------------------------------
/notebooks/integrated_alanines.py:
--------------------------------------------------------------------------------
 1 | ##########################
 2 | # Clone repos with utils #
 3 | ##########################
 4 | 
 5 | # !git clone https://github.com/hypnopump/geometric-vector-perceptron
 6 | 
 7 | import os
 8 | import sys
 9 | import time
10 | import timeit
11 | import logging
12 | 
13 | # science
14 | import numpy as np 
15 | import torch
16 | import prody as pr
17 | import sidechainnet
18 | from sidechainnet.utils.sequence import ProteinVocabulary as VOCAB
19 | VOCAB = VOCAB()
20 | 
21 | # process
22 | import joblib
23 | 
24 | # custom
25 | import mp_nerf
26 | 
27 | BASE_FOLDER = "experiments/"
28 | 
29 | logging.basicConfig(level=logging.DEBUG,
30 |                     format="%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s",
31 |                     # datefmt='%m-%d %H:%M',
32 |                     filename=BASE_FOLDER+"logs_experiment.txt",
33 |                     filemode="a")
34 | logger = logging.getLogger()
35 | sep = "\n\n=======\n\n"
36 | 
37 | 
38 | # begin tests
39 | if __name__ == "__main__":
40 | 
41 |     logger.info("Loading data"+"\n")
42 |     
43 |     dataloc = "experiments_manual/rclab_data/"
44 |     filenames = [dataloc+x for x in os.listdir(dataloc) if x.endswith(".pdb")]
45 | 
46 |     run_opts = [(torch.device("cpu"), False)] # tuples of (device, hybrid)
47 |     # add possibility for different configs
48 |     if torch.cuda.is_available():
49 |         run_opts.append( (torch.device("cuda"), True))
50 |         run_opts.append( (torch.device("cuda"), False))
51 | 
52 | 
53 |     for device,hybrid in run_opts:
54 | 
55 |         logger.info("Preparing speed tests: for device "+repr(device)+" and hybrid_opt: "+str(hybrid)+"\n")
56 | 
57 |         for i,filename in enumerate(filenames):
58 | 
59 |             # get data
60 |             keys = ["angles_np", "coords_np", "observed_sequence"]
61 |             chain = pr.parsePDB(datafile, chain=chain, model=1)
62 |             parsed = sidechainnet.utils.measure.get_seq_coords_and_angles(chain)
63 |             data = {k:v for k,v in zip(keys, parsed)}
64 |             # get scaffs
65 |             scaffolds = mp_nerf.proteins.build_scaffolds_from_scn_angles(data["observed_sequence"], 
66 |                                                                          torch.from_numpy(data["angles_np"]).to(device))
67 | 
68 |             logger.info("Assessing the speed of folding algorithm at file "+filenames[i]+"\n")
69 | 
70 |             logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=hybrid)',
71 |             	                             globals=globals(), number=1000) )+" for 1000 calls" )
72 | 
73 |             logger.info("Done")
74 |             logger.info(sep)
75 | 
76 |     logger.info("Execution has finished\n")


--------------------------------------------------------------------------------
/notebooks/integrated_test.py:
--------------------------------------------------------------------------------
 1 | ##########################
 2 | # Clone repos with utils #
 3 | ##########################
 4 | 
 5 | # !git clone https://github.com/hypnopump/geometric-vector-perceptron
 6 | 
 7 | import os
 8 | import sys
 9 | import time
10 | import timeit
11 | import logging
12 | 
13 | # science
14 | import numpy as np 
15 | import torch
16 | import sidechainnet
17 | from sidechainnet.utils.sequence import ProteinVocabulary as VOCAB
18 | VOCAB = VOCAB()
19 | 
20 | # process
21 | import joblib
22 | 
23 | # custom
24 | import mp_nerf
25 | 
26 | BASE_FOLDER = "experiments/"
27 | 
28 | logging.basicConfig(level=logging.DEBUG,
29 |                     format="%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s",
30 |                     # datefmt='%m-%d %H:%M',
31 |                     filename=BASE_FOLDER+"logs_experiment.txt",
32 |                     filemode="a")
33 | logger = logging.getLogger()
34 | sep = "\n\n=======\n\n"
35 | 
36 | 
37 | # begin tests
38 | if __name__ == "__main__":
39 | 
40 |     logger.info("Loading data"+"\n")
41 |     lengths = [100, 200, 300, 400, 500, 600, 700, 800, 900]# [::-1]
42 |     try: 
43 |         "a"+9
44 |         # skip
45 |         dataloaders_ = sidechainnet.load(casp_version=7, with_pytorch="dataloaders", batch_size=2)
46 |         logger.info("Data has been loaded"+"\n"+sep)
47 |         stored  = [ mp_nerf.utils.get_prot(dataloader_=dataloaders_, 
48 |                                            vocab_=VOCAB, 
49 |                                            min_len=desired_len+5, 
50 |                                            max_len=desired_len+60) for desired_len in lengths ]
51 |         joblib.dump(stored, BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib")
52 |     except: 
53 |         stored = joblib.load(BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib")
54 |         logger.info("Data has been loaded"+"\n"+sep)
55 | 
56 |     logger.info("Assessing lengths of: "+str([len(x[0]) for x in stored])+"\n")
57 | 
58 |     run_opts = [(torch.device("cpu"), False)] # tuples of (device, hybrid)
59 |     # add possibility for different configs
60 |     if torch.cuda.is_available():
61 |         run_opts.append( (torch.device("cuda"), True))
62 |         run_opts.append( (torch.device("cuda"), False))
63 | 
64 | 
65 |     for device,hybrid in run_opts:
66 | 
67 |         logger.info("Preparing speed tests: for device "+repr(device)+" and hybrid_opt: "+str(hybrid)+"\n")
68 | 
69 |         for i,desired_len in enumerate(lengths):
70 | 
71 |             seq, int_seq, true_coords, angles, padding_seq, mask, pid = stored[i]
72 |             scaffolds = mp_nerf.proteins.build_scaffolds_from_scn_angles(seq, angles.to(device))
73 | 
74 |             logger.info("Assessing the speed of folding algorithm at length "+str(len(seq))+"\n")
75 | 
76 |             logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=hybrid)',
77 |             	                             globals=globals(), number=1000) )+" for 1000 calls" )
78 | 
79 |             logger.info("Saving the related information at {0}{1}_info.joblib\n".format(
80 |                         BASE_FOLDER, desired_len))
81 |             joblib.dump({"seq": seq, 
82 |                          "true_coords": true_coords,
83 |                          "angles": angles,
84 |                          "padding_seq": padding_seq,
85 |                          "mask": mask,
86 |                          "pid": pid, 
87 |                          "padding_stripped": True}, BASE_FOLDER+str(desired_len)+"_info.joblib")
88 |             logger.info(sep)
89 | 
90 |     logger.info("Execution has finished\n")


--------------------------------------------------------------------------------
/notebooks/xtension/plots/A_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/A_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/C_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/C_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/D_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/D_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/E_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/E_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/F_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/F_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/G_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/G_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/H_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/H_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/I_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/I_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/K_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/K_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/L_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/L_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/M_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/M_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/N_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/N_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/P_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/P_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/Q_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/Q_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/R_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/R_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/S_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/S_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/T_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/T_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/V_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/V_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/W_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/W_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/Y_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/Y_plot_hists.png


--------------------------------------------------------------------------------
/notebooks/xtension/plots/__plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/__plot_hists.png


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
3 | 
4 | [tool:pytest]
5 | addopts = --verbose
6 | python_files = tests/*.py


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |   name = 'mp-nerf',
 5 |   packages = find_packages(),
 6 |   version = '1.0.3',
 7 |   license='MIT',
 8 |   description = 'MP-NeRF: Massively Parallel Natural Extension of Reference Frame',
 9 |   author = 'Eric Alcaide',
10 |   author_email = 'ericalcaide1@gmail.com',
11 |   url = 'https://github.com/eleutherAI/mp_nerf',
12 |   keywords = [
13 |     'computational biolgy',
14 |     'bioinformatics',
15 |     'machine learning' 
16 |   ],
17 |   install_requires=[
18 |     'einops>=0.3',
19 |     'numpy',
20 |     'torch>=1.6', # 'sidechainnet' # for tests
21 |   ],
22 |   setup_requires=[
23 |     'pytest-runner',
24 |   ],
25 |   tests_require=[
26 |     'pytest'
27 |   ],
28 |   classifiers=[
29 |     'Development Status :: 4 - Beta',
30 |     'Intended Audience :: Developers',
31 |     'Topic :: Scientific/Engineering :: Artificial Intelligence',
32 |     'License :: OSI Approved :: MIT License',
33 |     'Programming Language :: Python :: 3.6',
34 |   ],
35 | )
36 | 


--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from mp_nerf import *
 5 | from mp_nerf.utils import *
 6 | from mp_nerf.kb_proteins import *
 7 | from mp_nerf.proteins import *
 8 | 
 9 | def test_nerf_and_dihedral():
10 |     # create points
11 |     a = torch.tensor([1,2,3]).float()
12 |     b = torch.tensor([1,4,5]).float()
13 |     c = torch.tensor([1,4,7]).float()
14 |     d = torch.tensor([1,8,8]).float()
15 |     # calculate internal references
16 |     v1 = (b-a).numpy()
17 |     v2 = (c-b).numpy()
18 |     v3 = (d-c).numpy()
19 |     # get angles
20 |     theta = np.arccos( np.dot(v2, v3) / \
21 |                       (np.linalg.norm(v2) * np.linalg.norm(v3) )) 
22 | 
23 |     normal_p  = np.cross(v1, v2) 
24 |     normal_p_ = np.cross(v2, v3)
25 |     chi = np.arccos( np.dot(normal_p, normal_p_) / \
26 |                     (np.linalg.norm(normal_p) * np.linalg.norm(normal_p_) ))
27 |     # get length:
28 |     l = torch.tensor(np.linalg.norm(v3))
29 |     theta = torch.tensor(theta)
30 |     chi = torch.tensor(chi)
31 |     # reconstruct
32 |     # doesnt work because the scn angle was not measured correctly
33 |     # so the method corrects that incorrection
34 |     assert (mp_nerf_torch(a, b, c, l, theta, chi - np.pi) - torch.tensor([1,0,6])).sum().abs() < 0.1
35 |     assert get_dihedral(a, b, c, d).item() == chi
36 | 
37 | 
38 | def test_modify_angles_mask_with_torsions():
39 |     # create inputs
40 |     seq = "AGHHKLHRTVNMSTIL"
41 |     angles_mask = torch.randn(2, 16, 14)
42 |     torsions = torch.ones(16, 4)
43 |     # ensure shape
44 |     assert modify_angles_mask_with_torsions(seq, angles_mask, torsions).shape == angles_mask.shape, \
45 |            "Shapes don't match"


--------------------------------------------------------------------------------
/tests/test_ml_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from mp_nerf import *
 5 | from mp_nerf.utils import *
 6 | from mp_nerf.ml_utils import *
 7 | from mp_nerf.kb_proteins import *
 8 | from mp_nerf.proteins import *
 9 | 
10 | 
11 | # test ML utils
12 | def test_scn_atom_embedd(): 
13 |     seq_list = ["AGHHKLHRTVNMSTIL",
14 |                 "WERTQLITANMWTCSD"]
15 |     embedds = scn_atom_embedd(seq_list)
16 |     assert embedds.shape == torch.Size([2, 16, 14]), "Shapes don't match"
17 | 
18 | 
19 | def test_chain_to_atoms(): 
20 |     chain = torch.randn(100, 3)
21 |     atoms = chain2atoms(chain, c=14)
22 |     assert atoms.shape == torch.Size([100, 14, 3]), "Shapes don't match"
23 | 
24 | 
25 | def test_rename_symmetric_atoms(): 
26 |     seq_list = ["AGHHKLHRTVNMSTIL"]
27 |     pred_coors = torch.randn(1, 16, 14, 3)
28 |     true_coors = torch.randn(1, 16, 14, 3)
29 |     cloud_mask = scn_cloud_mask(seq_list[0]).unsqueeze(0)
30 |     pred_feats = torch.randn(1, 16, 14, 16)
31 | 
32 |     renamed = rename_symmetric_atoms(pred_coors, true_coors, seq_list, cloud_mask, pred_feats=pred_feats)
33 |     assert renamed[0].shape == pred_coors.shape and renamed[1].shape == pred_feats.shape, "Shapes don't match"
34 | 
35 | 
36 | def test_torsion_angle_loss():
37 |     pred_torsions = torch.randn(1, 100, 7)
38 |     true_torsions = torch.randn(1, 100, 7)
39 |     angle_mask = pred_torsions <= 2.
40 | 
41 |     loss = torsion_angle_loss(pred_torsions, true_torsions, 
42 |                               coeff=2., angle_mask=None)
43 |     assert loss.shape == pred_torsions.shape, "Shapes don't match"
44 | 
45 | 
46 | def test_fape_loss_torch():
47 |     seq_list = ["AGHHKLHRTVNMSTIL"]
48 |     pred_coords = torch.randn(1, 16, 14, 3)
49 |     true_coords = torch.randn(1, 16, 14, 3)
50 | 
51 |     loss_c_alpha = fape_torch(pred_coords, true_coords, c_alpha=True, seq_list=seq_list)
52 |     loss_full = fape_torch(pred_coords, true_coords, c_alpha=False, seq_list=seq_list)
53 | 
54 |     assert True
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------