├── .github
└── workflows
│ ├── python-package.yml
│ └── python-publish.yml
├── .gitignore
├── LICENSE
├── README.md
├── mp_nerf
├── __init__.py
├── kb_proteins.py
├── massive_pnerf.py
├── ml_utils.py
├── proteins.py
└── utils.py
├── notebooks
├── experiments
│ ├── [131, 150]_info.joblib
│ ├── [200, 250]_info.joblib
│ ├── [331, 351]_info.joblib
│ ├── [400, 450]_info.joblib
│ ├── [500, 550]_info.joblib
│ ├── [600, 650]_info.joblib
│ ├── [700, 780]_info.joblib
│ ├── [800, 900]_info.joblib
│ ├── [905, 1070]_info.joblib
│ ├── [905, 970]_info.joblib
│ ├── logs_experiment.txt
│ ├── logs_experiment_scn_various_lengths.txt
│ └── profile_csv
├── experiments_manual
│ ├── analyzed_prots.joblib
│ ├── error_evolution.png
│ ├── histogram_errors.png
│ ├── profiler_capture.png
│ └── rclab_data
│ │ ├── 1000_ala.pdb
│ │ ├── 500_ala.pdb
│ │ ├── 5rsa_ribonuclease.pdb
│ │ ├── LICENSE
│ │ ├── il10_lactate_dh.pdb
│ │ └── other_prots.csv
├── extend_measures.ipynb
├── integrated_alanines.py
├── integrated_test.py
├── preds
│ ├── labels.pdb
│ └── predicted.pdb
├── test_implementation_loop.ipynb
├── test_implementation_speed.ipynb
└── xtension
│ └── plots
│ ├── A_plot_hists.png
│ ├── C_plot_hists.png
│ ├── D_plot_hists.png
│ ├── E_plot_hists.png
│ ├── F_plot_hists.png
│ ├── G_plot_hists.png
│ ├── H_plot_hists.png
│ ├── I_plot_hists.png
│ ├── K_plot_hists.png
│ ├── L_plot_hists.png
│ ├── M_plot_hists.png
│ ├── N_plot_hists.png
│ ├── P_plot_hists.png
│ ├── Q_plot_hists.png
│ ├── R_plot_hists.png
│ ├── S_plot_hists.png
│ ├── T_plot_hists.png
│ ├── V_plot_hists.png
│ ├── W_plot_hists.png
│ ├── Y_plot_hists.png
│ └── __plot_hists.png
├── setup.cfg
├── setup.py
└── tests
├── test_main.py
└── test_ml_utils.py
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
3 |
4 | name: Python package
5 |
6 | on:
7 | push:
8 | branches: [ main ]
9 | pull_request:
10 | branches: [ main ]
11 |
12 | jobs:
13 | build:
14 |
15 | runs-on: ubuntu-latest
16 | strategy:
17 | matrix:
18 | python-version: [3.7, 3.8]
19 |
20 | steps:
21 | - uses: actions/checkout@v2
22 | - name: Set up Python ${{ matrix.python-version }}
23 | uses: actions/setup-python@v2
24 | with:
25 | python-version: ${{ matrix.python-version }}
26 | - name: Install dependencies
27 | run: |
28 | python -m pip install --upgrade pip
29 | python -m pip install pytest
30 | python -m pip install -U proDy requests
31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 | - name: Test with pytest
33 | run: |
34 | python setup.py test
35 |
--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3 |
4 | name: Upload Python Package
5 |
6 | on:
7 | release:
8 | types: [created]
9 |
10 | jobs:
11 | deploy:
12 |
13 | runs-on: ubuntu-latest
14 |
15 | steps:
16 | - uses: actions/checkout@v2
17 | - name: Set up Python
18 | uses: actions/setup-python@v2
19 | with:
20 | python-version: '3.x'
21 | - name: Install dependencies
22 | run: |
23 | python -m pip install --upgrade pip
24 | pip install setuptools wheel twine
25 | - name: Build and publish
26 | env:
27 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
28 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
29 | run: |
30 | python setup.py sdist bdist_wheel
31 | twine upload dist/*
32 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | */__pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 |
7 | # to ignore
8 | *.DS_Store
9 | *sidechainnet_data/*
10 | *.pkl
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Distribution / packaging
16 | .Python
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | pip-wheel-metadata/
30 | share/python-wheels/
31 | *.egg-info/
32 | .installed.cfg
33 | *.egg
34 | MANIFEST
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .nox/
50 | .coverage
51 | .coverage.*
52 | .cache
53 | nosetests.xml
54 | coverage.xml
55 | *.cover
56 | *.py,cover
57 | .hypothesis/
58 | .pytest_cache/
59 |
60 | # Translations
61 | *.mo
62 | *.pot
63 |
64 | # Django stuff:
65 | *.log
66 | local_settings.py
67 | db.sqlite3
68 | db.sqlite3-journal
69 |
70 | # Flask stuff:
71 | instance/
72 | .webassets-cache
73 |
74 | # Scrapy stuff:
75 | .scrapy
76 |
77 | # Sphinx documentation
78 | docs/_build/
79 |
80 | # PyBuilder
81 | target/
82 |
83 | # Jupyter Notebook
84 | .ipynb_checkpoints
85 |
86 | # IPython
87 | profile_default/
88 | ipython_config.py
89 |
90 | # pyenv
91 | .python-version
92 |
93 | # pipenv
94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
97 | # install all needed dependencies.
98 | #Pipfile.lock
99 |
100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
101 | __pypackages__/
102 |
103 | # Celery stuff
104 | celerybeat-schedule
105 | celerybeat.pid
106 |
107 | # SageMath parsed files
108 | *.sage.py
109 |
110 | # Environments
111 | .env
112 | .venv
113 | env/
114 | venv/
115 | ENV/
116 | env.bak/
117 | venv.bak/
118 |
119 | # Spyder project settings
120 | .spyderproject
121 | .spyproject
122 |
123 | # Rope project settings
124 | .ropeproject
125 |
126 | # mkdocs documentation
127 | /site
128 |
129 | # mypy
130 | .mypy_cache/
131 | .dmypy.json
132 | dmypy.json
133 |
134 | # Pyre type checker
135 | .pyre/
136 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Copyright (c) 2021, Eric Alcaide
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are
7 | met:
8 |
9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 2. Redistributions in binary form must reproduce the above
12 | copyright notice, this list of conditions and the following
13 | disclaimer in the documentation andor other materials provided
14 | with the distribution.
15 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
16 | products derived from this software without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MP-NeRF: Massively Parallel Natural Extension of Reference Frame
2 |
3 | This is the code for the paper "[MP-NeRF: A Massively Parallel Method for Accelerating Protein Structure Reconstruction from Internal Coordinates](https://www.biorxiv.org/content/10.1101/2021.06.08.446214v1)"
4 |
5 | The code can be installed via `pip` with
6 |
7 | ```bash
8 | $ pip install mp-nerf
9 | ```
10 |
11 | ## Abstract
12 |
13 | The conversion of polymers between internal and cartesian coordinates is a limiting step in many pipelines, such as molecular dynamics simulations and training of machine learning models. This conversion is typically carried out by sequential or parallel applications of the Natural extension of Reference Frame (NeRF)algorithm.
14 |
15 | This work proposes a massively parallel NeRF implementation, which, depending on the polymer length, achieves speedups between 400-1200x over the most recent parallel NeRF implementation by dviding the conversion into three main phases: a parallel composition of the minimal repeated structure, the assembly of backbone subunits and the parallel elongation of sidechains.
16 |
17 | Special emphasis is placed on reusability and ease of use within diverse pipelines. We open source the code (available at https://github.com/EleutherAI/mp_nerf) and provide a corresponding python package.
18 |
19 |
20 | ## Results:
21 |
22 | * **Tests**: in an intel i5 @ 2.4 ghz (cpu) and (intel i7-6700k @ 4GHz + Nvidia 1060GTX 6gb) (gpu)
23 |
24 | length | sota | **us (cpu)** | Nx | us (gpu) | us (hybrid) |
25 | ---------|--------|--------------|-------|----------|-------------|
26 | ~114 | 2.4s | **5.3ms** | ~446 | 21.1ms | 18.9ms |
27 | ~300 | 3.5s | **8.5ms** | ~400 | 26.2ms | 22.3ms |
28 | ~500 | 7.5s | **9.1ms** | ~651 | 29.2ms | 26.3ms |
29 | ~1000 | 18.66s | **15.3ms** | ~1200 | 43.3ms | 30.1ms |
30 |
31 | * **Profiler Trace (CPU)**:
32 |
33 |
34 |
35 |
36 | #### Considerations
37 |
38 | * In the GPU algo, much of the time is spent in the data transfers / loop in the GPU is very inefficient.
39 | * about 1/2 of time is spent in memory-access patterns and the sequential `for loop`, so ideally 2x from here would be possible by optimizing it or running the sequential loop in cython / numba / whatever
40 | * total profiler time should be multiplied by 0.63-0.5 to see real time (see execution above without profiler). Profiling slows down the code.
41 |
42 |
43 | ## Installation:
44 |
45 | Just clone the repo
46 |
47 | You'll need:
48 | * torch > 1.6
49 | * numpy
50 | * einops
51 |
52 | Plus, if you want to run the experiments / work with data:
53 | * joblib
54 | * sidechainnet: https://github.com/jonathanking/sidechainnet#installation
55 | * manually install `ProDY`, `py3Dmol`, `snakeviz`:
56 | * `pip install proDy`
57 | * `pip install py3Dmol`
58 | * `pip install snakeviz`
59 | * any other package: `pip install package_name`
60 |
61 |
62 | * matplotlib (to do diagnostic plots)
63 |
64 | ## Citations:
65 |
66 | ```bibtex
67 | @article{Parsons2005PracticalCF,
68 | title={Practical conversion from torsion space to Cartesian space for in silico protein synthesis},
69 | author={Jerod Parsons and J. B. Holmes and J. M. Rojas and J. Tsai and C. Strauss},
70 | journal={Journal of Computational Chemistry},
71 | year={2005},
72 | volume={26}
73 | }
74 | ```
75 |
76 | ```bibtex
77 | @article{AlQuraishi2018pNeRFPC,
78 | title={pNeRF: Parallelized Conversion from Internal to Cartesian Coordinates},
79 | author={Mohammed AlQuraishi},
80 | journal={bioRxiv},
81 | year={2018}
82 | }
83 | ```
84 |
85 | ```bibtex
86 | @article{Bayati2020HighperformanceTO,
87 | title={High‐performance transformation of protein structure representation from internal to Cartesian coordinates},
88 | author={M. Bayati and M. Leeser and J. Bardhan},
89 | journal={Journal of Computational Chemistry},
90 | year={2020},
91 | volume={41},
92 | pages={2104 - 2114}
93 | }
94 | ```
95 |
96 |
--------------------------------------------------------------------------------
/mp_nerf/__init__.py:
--------------------------------------------------------------------------------
1 | from mp_nerf.massive_pnerf import *
2 | from mp_nerf.proteins import *
--------------------------------------------------------------------------------
/mp_nerf/kb_proteins.py:
--------------------------------------------------------------------------------
1 | # Author: Eric Alcaide
2 |
3 | # A substantial part has been borrowed from
4 | # https://github.com/jonathanking/sidechainnet
5 | #
6 | # Here's the License for it:
7 | #
8 | # Copyright 2020 Jonathan King
9 | # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
10 | # following conditions are met:
11 | #
12 | # 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
13 | #
14 | # 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
15 | # disclaimer in the documentation and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
18 | # products derived from this software without specific prior written permission.
19 | #
20 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
21 | # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 | # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 |
28 | import numpy as np
29 |
30 | #########################
31 | ### FROM SIDECHAINNET ###
32 | #########################
33 |
34 | # modified by considering rigid bodies in sidechains (remove extra torsions)
35 |
36 | SC_BUILD_INFO = {
37 | 'A': {
38 | 'angles-names': ['N-CA-CB'],
39 | 'angles-types': ['N -CX-CT'],
40 | 'angles-vals': [1.9146261894377796],
41 | 'atom-names': ['CB'],
42 | 'bonds-names': ['CA-CB'],
43 | 'bonds-types': ['CX-CT'],
44 | 'bonds-vals': [1.526],
45 | 'torsion-names': ['C-N-CA-CB'],
46 | 'torsion-types': ['C -N -CX-CT'],
47 | 'torsion-vals': ['p'],
48 | 'rigid-frames-idxs': [[0,1,2], [0,1,4]],
49 | },
50 |
51 | 'R': {
52 | 'angles-names': [
53 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-NE', 'CD-NE-CZ', 'NE-CZ-NH1',
54 | 'NE-CZ-NH2'
55 | ],
56 | 'angles-types': [
57 | 'N -CX-C8', 'CX-C8-C8', 'C8-C8-C8', 'C8-C8-N2', 'C8-N2-CA', 'N2-CA-N2',
58 | 'N2-CA-N2'
59 | ],
60 | 'angles-vals': [
61 | 1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.9408061282176945,
62 | 2.150245638457014, 2.0943951023931953, 2.0943951023931953
63 | ],
64 | 'atom-names': ['CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2'],
65 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-NE', 'NE-CZ', 'CZ-NH1', 'CZ-NH2'],
66 | 'bonds-types': ['CX-C8', 'C8-C8', 'C8-C8', 'C8-N2', 'N2-CA', 'CA-N2', 'CA-N2'],
67 | 'bonds-vals': [1.526, 1.526, 1.526, 1.463, 1.34, 1.34, 1.34],
68 | 'torsion-names': [
69 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-NE', 'CG-CD-NE-CZ',
70 | 'CD-NE-CZ-NH1', 'CD-NE-CZ-NH2'
71 | ],
72 | 'torsion-types': [
73 | 'C -N -CX-C8', 'N -CX-C8-C8', 'CX-C8-C8-C8', 'C8-C8-C8-N2', 'C8-C8-N2-CA',
74 | 'C8-N2-CA-N2', 'C8-N2-CA-N2'
75 | ],
76 | 'torsion-vals': ['p', 'p', 'p', 'p', 'p', 0., 3.141592],
77 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7], [6,7,8]],
78 | },
79 |
80 | 'N': {
81 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-OD1', 'CB-CG-ND2'],
82 | 'angles-types': ['N -CX-2C', 'CX-2C-C ', '2C-C -O ', '2C-C -N '],
83 | 'angles-vals': [
84 | 1.9146261894377796, 1.9390607989657, 2.101376419401173, 2.035053907825388
85 | ],
86 | 'atom-names': ['CB', 'CG', 'OD1', 'ND2'],
87 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-OD1', 'CG-ND2'],
88 | 'bonds-types': ['CX-2C', '2C-C ', 'C -O ', 'C -N '],
89 | 'bonds-vals': [1.526, 1.522, 1.229, 1.335],
90 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-OD1', 'CA-CB-CG-ND2'],
91 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-C ', 'CX-2C-C -O ', 'CX-2C-C -N '],
92 | 'torsion-vals': ['p', 'p', 'p', 'i'],
93 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
94 | },
95 |
96 | 'D': {
97 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-OD1', 'CB-CG-OD2'],
98 | 'angles-types': ['N -CX-2C', 'CX-2C-CO', '2C-CO-O2', '2C-CO-O2'],
99 | 'angles-vals': [
100 | 1.9146261894377796, 1.9390607989657, 2.0420352248333655, 2.0420352248333655
101 | ],
102 | 'atom-names': ['CB', 'CG', 'OD1', 'OD2'],
103 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-OD1', 'CG-OD2'],
104 | 'bonds-types': ['CX-2C', '2C-CO', 'CO-O2', 'CO-O2'],
105 | 'bonds-vals': [1.526, 1.522, 1.25, 1.25],
106 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-OD1', 'CA-CB-CG-OD2'],
107 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-CO', 'CX-2C-CO-O2', 'CX-2C-CO-O2'],
108 | 'torsion-vals': ['p', 'p', 'p', 'i'],
109 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
110 | },
111 |
112 | 'C': {
113 | 'angles-names': ['N-CA-CB', 'CA-CB-SG'],
114 | 'angles-types': ['N -CX-2C', 'CX-2C-SH'],
115 | 'angles-vals': [1.9146261894377796, 1.8954275676658419],
116 | 'atom-names': ['CB', 'SG'],
117 | 'bonds-names': ['CA-CB', 'CB-SG'],
118 | 'bonds-types': ['CX-2C', '2C-SH'],
119 | 'bonds-vals': [1.526, 1.81],
120 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-SG'],
121 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-SH'],
122 | 'torsion-vals': ['p', 'p'],
123 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]],
124 | },
125 |
126 | 'Q': {
127 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-OE1', 'CG-CD-NE2'],
128 | 'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-C ', '2C-C -O ', '2C-C -N '],
129 | 'angles-vals': [
130 | 1.9146261894377796, 1.911135530933791, 1.9390607989657, 2.101376419401173,
131 | 2.035053907825388
132 | ],
133 | 'atom-names': ['CB', 'CG', 'CD', 'OE1', 'NE2'],
134 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-OE1', 'CD-NE2'],
135 | 'bonds-types': ['CX-2C', '2C-2C', '2C-C ', 'C -O ', 'C -N '],
136 | 'bonds-vals': [1.526, 1.526, 1.522, 1.229, 1.335],
137 | 'torsion-names': [
138 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-OE1', 'CB-CG-CD-NE2'
139 | ],
140 | 'torsion-types': [
141 | 'C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-C ', '2C-2C-C -O ', '2C-2C-C -N '
142 | ],
143 | 'torsion-vals': ['p', 'p', 'p', 'p', 'i'],
144 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]],
145 | },
146 |
147 | 'E': {
148 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-OE1', 'CG-CD-OE2'],
149 | 'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-CO', '2C-CO-O2', '2C-CO-O2'],
150 | 'angles-vals': [
151 | 1.9146261894377796, 1.911135530933791, 1.9390607989657, 2.0420352248333655,
152 | 2.0420352248333655
153 | ],
154 | 'atom-names': ['CB', 'CG', 'CD', 'OE1', 'OE2'],
155 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-OE1', 'CD-OE2'],
156 | 'bonds-types': ['CX-2C', '2C-2C', '2C-CO', 'CO-O2', 'CO-O2'],
157 | 'bonds-vals': [1.526, 1.526, 1.522, 1.25, 1.25],
158 | 'torsion-names': [
159 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-OE1', 'CB-CG-CD-OE2'
160 | ],
161 | 'torsion-types': [
162 | 'C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-CO', '2C-2C-CO-O2', '2C-2C-CO-O2'
163 | ],
164 | 'torsion-vals': ['p', 'p', 'p', 'p', 'i'],
165 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]],
166 | },
167 |
168 | 'G': {
169 | 'angles-names': [],
170 | 'angles-types': [],
171 | 'angles-vals': [],
172 | 'atom-names': [],
173 | 'bonds-names': [],
174 | 'bonds-types': [],
175 | 'bonds-vals': [],
176 | 'torsion-names': [],
177 | 'torsion-types': [],
178 | 'torsion-vals': [],
179 | 'rigid-frames-idxs': [[0,1,2]],
180 | },
181 |
182 | 'H': {
183 | 'angles-names': [
184 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-ND1', 'CG-ND1-CE1', 'ND1-CE1-NE2', 'CE1-NE2-CD2'
185 | ],
186 | 'angles-types': [
187 | 'N -CX-CT', 'CX-CT-CC', 'CT-CC-NA', 'CC-NA-CR', 'NA-CR-NB', 'CR-NB-CV'
188 | ],
189 | 'angles-vals': [
190 | 1.9146261894377796, 1.9739673840055867, 2.0943951023931953,
191 | 1.8849555921538759, 1.8849555921538759, 1.8849555921538759
192 | ],
193 | 'atom-names': ['CB', 'CG', 'ND1', 'CE1', 'NE2', 'CD2'],
194 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-ND1', 'ND1-CE1', 'CE1-NE2', 'NE2-CD2'],
195 | 'bonds-types': ['CX-CT', 'CT-CC', 'CC-NA', 'NA-CR', 'CR-NB', 'NB-CV'],
196 | 'bonds-vals': [1.526, 1.504, 1.385, 1.343, 1.335, 1.394],
197 | 'torsion-names': [
198 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-ND1', 'CB-CG-ND1-CE1', 'CG-ND1-CE1-NE2',
199 | 'ND1-CE1-NE2-CD2'
200 | ],
201 | 'torsion-types': [
202 | 'C -N -CX-CT', 'N -CX-CT-CC', 'CX-CT-CC-NA', 'CT-CC-NA-CR', 'CC-NA-CR-NB',
203 | 'NA-CR-NB-CV'
204 | ],
205 | 'torsion-vals': ['p', 'p', 'p', 3.141592653589793, 0.0, 0.0],
206 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
207 | },
208 |
209 | 'I': {
210 | 'angles-names': ['N-CA-CB', 'CA-CB-CG1', 'CB-CG1-CD1', 'CA-CB-CG2'],
211 | 'angles-types': ['N -CX-3C', 'CX-3C-2C', '3C-2C-CT', 'CX-3C-CT'],
212 | 'angles-vals': [
213 | 1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791
214 | ],
215 | 'atom-names': ['CB', 'CG1', 'CD1', 'CG2'],
216 | 'bonds-names': ['CA-CB', 'CB-CG1', 'CG1-CD1', 'CB-CG2'],
217 | 'bonds-types': ['CX-3C', '3C-2C', '2C-CT', '3C-CT'],
218 | 'bonds-vals': [1.526, 1.526, 1.526, 1.526],
219 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG1', 'CA-CB-CG1-CD1', 'N-CA-CB-CG2'],
220 | 'torsion-types': ['C -N -CX-3C', 'N -CX-3C-2C', 'CX-3C-2C-CT', 'N -CX-3C-CT'],
221 | 'torsion-vals': ['p', 'p', 'p', -2.1315], # last one was 'p' in the original - but cg1-cg2 = "2.133"
222 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,7]],
223 | },
224 |
225 | 'L': {
226 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CB-CG-CD2'],
227 | 'angles-types': ['N -CX-2C', 'CX-2C-3C', '2C-3C-CT', '2C-3C-CT'],
228 | 'angles-vals': [
229 | 1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791
230 | ],
231 | 'atom-names': ['CB', 'CG', 'CD1', 'CD2'],
232 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD1', 'CG-CD2'],
233 | 'bonds-types': ['CX-2C', '2C-3C', '3C-CT', '3C-CT'],
234 | 'bonds-vals': [1.526, 1.526, 1.526, 1.526],
235 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CA-CB-CG-CD2'],
236 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-3C', 'CX-2C-3C-CT', 'CX-2C-3C-CT'],
237 | # extra torsion is in negative bc in mask construction, previous angle is summed.
238 | 'torsion-vals': ['p', 'p', 'p', 2.1315], # last one was 'p' in the original - but cd1-cd2 = "-2.130"
239 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
240 | },
241 |
242 | 'K': {
243 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD', 'CG-CD-CE', 'CD-CE-NZ'],
244 | 'angles-types': ['N -CX-C8', 'CX-C8-C8', 'C8-C8-C8', 'C8-C8-C8', 'C8-C8-N3'],
245 | 'angles-vals': [
246 | 1.9146261894377796, 1.911135530933791, 1.911135530933791, 1.911135530933791,
247 | 1.9408061282176945
248 | ],
249 | 'atom-names': ['CB', 'CG', 'CD', 'CE', 'NZ'],
250 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD', 'CD-CE', 'CE-NZ'],
251 | 'bonds-types': ['CX-C8', 'C8-C8', 'C8-C8', 'C8-C8', 'C8-N3'],
252 | 'bonds-vals': [1.526, 1.526, 1.526, 1.526, 1.471],
253 | 'torsion-names': [
254 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD', 'CB-CG-CD-CE', 'CG-CD-CE-NZ'
255 | ],
256 | 'torsion-types': [
257 | 'C -N -CX-C8', 'N -CX-C8-C8', 'CX-C8-C8-C8', 'C8-C8-C8-C8', 'C8-C8-C8-N3'
258 | ],
259 | 'torsion-vals': ['p', 'p', 'p', 'p', 'p'],
260 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7], [6,7,8]],
261 | },
262 |
263 | 'M': {
264 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-SD', 'CG-SD-CE'],
265 | 'angles-types': ['N -CX-2C', 'CX-2C-2C', '2C-2C-S ', '2C-S -CT'],
266 | 'angles-vals': [
267 | 1.9146261894377796, 1.911135530933791, 2.0018926520374962, 1.726130630222392
268 | ],
269 | 'atom-names': ['CB', 'CG', 'SD', 'CE'],
270 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-SD', 'SD-CE'],
271 | 'bonds-types': ['CX-2C', '2C-2C', '2C-S ', 'S -CT'],
272 | 'bonds-vals': [1.526, 1.526, 1.81, 1.81],
273 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-SD', 'CB-CG-SD-CE'],
274 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-2C', 'CX-2C-2C-S ', '2C-2C-S -CT'],
275 | 'torsion-vals': ['p', 'p', 'p', 'p'],
276 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6], [5,6,7]],
277 | },
278 |
279 | 'F': {
280 | 'angles-names': [
281 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-CE1', 'CD1-CE1-CZ', 'CE1-CZ-CE2',
282 | 'CZ-CE2-CD2'
283 | ],
284 | 'angles-types': [
285 | 'N -CX-CT', 'CX-CT-CA', 'CT-CA-CA', 'CA-CA-CA', 'CA-CA-CA', 'CA-CA-CA',
286 | 'CA-CA-CA'
287 | ],
288 | 'angles-vals': [
289 | 1.9146261894377796, 1.9896753472735358, 2.0943951023931953,
290 | 2.0943951023931953, 2.0943951023931953, 2.0943951023931953, 2.0943951023931953
291 | ],
292 | 'atom-names': ['CB', 'CG', 'CD1', 'CE1', 'CZ', 'CE2', 'CD2'],
293 | 'bonds-names': [
294 | 'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-CE1', 'CE1-CZ', 'CZ-CE2', 'CE2-CD2'
295 | ],
296 | 'bonds-types': ['CX-CT', 'CT-CA', 'CA-CA', 'CA-CA', 'CA-CA', 'CA-CA', 'CA-CA'],
297 | 'bonds-vals': [1.526, 1.51, 1.4, 1.4, 1.4, 1.4, 1.4],
298 | 'torsion-names': [
299 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-CE1', 'CG-CD1-CE1-CZ',
300 | 'CD1-CE1-CZ-CE2', 'CE1-CZ-CE2-CD2'
301 | ],
302 | 'torsion-types': [
303 | 'C -N -CX-CT', 'N -CX-CT-CA', 'CX-CT-CA-CA', 'CT-CA-CA-CA', 'CA-CA-CA-CA',
304 | 'CA-CA-CA-CA', 'CA-CA-CA-CA'
305 | ],
306 | 'torsion-vals': ['p', 'p', 'p', 3.141592653589793, 0.0, 0.0, 0.0],
307 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
308 | },
309 |
310 | 'P': {
311 | 'angles-names': ['N-CA-CB', 'CA-CB-CG', 'CB-CG-CD'],
312 | 'angles-types': ['N -CX-CT', 'CX-CT-CT', 'CT-CT-CT'],
313 | 'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791],
314 | 'atom-names': ['CB', 'CG', 'CD'],
315 | 'bonds-names': ['CA-CB', 'CB-CG', 'CG-CD'],
316 | 'bonds-types': ['CX-CT', 'CT-CT', 'CT-CT'],
317 | 'bonds-vals': [1.526, 1.526, 1.526],
318 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD'],
319 | 'torsion-types': ['C -N -CX-CT', 'N -CX-CT-CT', 'CX-CT-CT-CT'],
320 | 'torsion-vals': ['p', 'p', 'p'],
321 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
322 | },
323 |
324 | 'S': {
325 | 'angles-names': ['N-CA-CB', 'CA-CB-OG'],
326 | 'angles-types': ['N -CX-2C', 'CX-2C-OH'],
327 | 'angles-vals': [1.9146261894377796, 1.911135530933791],
328 | 'atom-names': ['CB', 'OG'],
329 | 'bonds-names': ['CA-CB', 'CB-OG'],
330 | 'bonds-types': ['CX-2C', '2C-OH'],
331 | 'bonds-vals': [1.526, 1.41],
332 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-OG'],
333 | 'torsion-types': ['C -N -CX-2C', 'N -CX-2C-OH'],
334 | 'torsion-vals': ['p', 'p'],
335 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]],
336 | },
337 |
338 | 'T': {
339 | 'angles-names': ['N-CA-CB', 'CA-CB-OG1', 'CA-CB-CG2'],
340 | 'angles-types': ['N -CX-3C', 'CX-3C-OH', 'CX-3C-CT'],
341 | 'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791],
342 | 'atom-names': ['CB', 'OG1', 'CG2'],
343 | 'bonds-names': ['CA-CB', 'CB-OG1', 'CB-CG2'],
344 | 'bonds-types': ['CX-3C', '3C-OH', '3C-CT'],
345 | 'bonds-vals': [1.526, 1.41, 1.526],
346 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-OG1', 'N-CA-CB-CG2'],
347 | 'torsion-types': ['C -N -CX-3C', 'N -CX-3C-OH', 'N -CX-3C-CT'],
348 | 'torsion-vals': ['p', 'p', 'p'],
349 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]],
350 | },
351 |
352 | 'W': {
353 | 'angles-names': [
354 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-NE1', 'CD1-NE1-CE2',
355 | 'NE1-CE2-CZ2', 'CE2-CZ2-CH2', 'CZ2-CH2-CZ3', 'CH2-CZ3-CE3', 'CZ3-CE3-CD2'
356 | ],
357 | 'angles-types': [
358 | 'N -CX-CT', 'CX-CT-C*', 'CT-C*-CW', 'C*-CW-NA', 'CW-NA-CN', 'NA-CN-CA',
359 | 'CN-CA-CA', 'CA-CA-CA', 'CA-CA-CA', 'CA-CA-CB'
360 | ],
361 | 'angles-vals': [
362 | 1.9146261894377796, 2.0176006153054447, 2.181661564992912, 1.8971728969178363,
363 | 1.9477874452256716, 2.3177972466484698, 2.0943951023931953,
364 | 2.0943951023931953, 2.0943951023931953, 2.0943951023931953
365 | ],
366 | 'atom-names': [
367 | 'CB', 'CG', 'CD1', 'NE1', 'CE2', 'CZ2', 'CH2', 'CZ3', 'CE3', 'CD2'
368 | ],
369 | 'bonds-names': [
370 | 'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-NE1', 'NE1-CE2', 'CE2-CZ2', 'CZ2-CH2',
371 | 'CH2-CZ3', 'CZ3-CE3', 'CE3-CD2'
372 | ],
373 | 'bonds-types': [
374 | 'CX-CT', 'CT-C*', 'C*-CW', 'CW-NA', 'NA-CN', 'CN-CA', 'CA-CA', 'CA-CA',
375 | 'CA-CA', 'CA-CB'
376 | ],
377 | 'bonds-vals': [1.526, 1.495, 1.352, 1.381, 1.38, 1.4, 1.4, 1.4, 1.4, 1.404],
378 | 'torsion-names': [
379 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-NE1', 'CG-CD1-NE1-CE2',
380 | 'CD1-NE1-CE2-CZ2', 'NE1-CE2-CZ2-CH2', 'CE2-CZ2-CH2-CZ3', 'CZ2-CH2-CZ3-CE3',
381 | 'CH2-CZ3-CE3-CD2'
382 | ],
383 | 'torsion-types': [
384 | 'C -N -CX-CT', 'N -CX-CT-C*', 'CX-CT-C*-CW', 'CT-C*-CW-NA', 'C*-CW-NA-CN',
385 | 'CW-NA-CN-CA', 'NA-CN-CA-CA', 'CN-CA-CA-CA', 'CA-CA-CA-CA', 'CA-CA-CA-CB'
386 | ],
387 | 'torsion-vals': [
388 | 'p', 'p', 'p', 3.141592653589793, 0.0, 3.141592653589793, 3.141592653589793,
389 | 0.0, 0.0, 0.0
390 | ],
391 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]]
392 | },
393 |
394 | 'Y': {
395 | 'angles-names': [
396 | 'N-CA-CB', 'CA-CB-CG', 'CB-CG-CD1', 'CG-CD1-CE1', 'CD1-CE1-CZ', 'CE1-CZ-OH',
397 | 'CE1-CZ-CE2', 'CZ-CE2-CD2'
398 | ],
399 | 'angles-types': [
400 | 'N -CX-CT', 'CX-CT-CA', 'CT-CA-CA', 'CA-CA-CA', 'CA-CA-C ', 'CA-C -OH',
401 | 'CA-C -CA', 'C -CA-CA'
402 | ],
403 | 'angles-vals': [
404 | 1.9146261894377796, 1.9896753472735358, 2.0943951023931953,
405 | 2.0943951023931953, 2.0943951023931953, 2.0943951023931953,
406 | 2.0943951023931953, 2.0943951023931953
407 | ],
408 | 'atom-names': ['CB', 'CG', 'CD1', 'CE1', 'CZ', 'OH', 'CE2', 'CD2'],
409 | 'bonds-names': [
410 | 'CA-CB', 'CB-CG', 'CG-CD1', 'CD1-CE1', 'CE1-CZ', 'CZ-OH', 'CZ-CE2', 'CE2-CD2'
411 | ],
412 | 'bonds-types': [
413 | 'CX-CT', 'CT-CA', 'CA-CA', 'CA-CA', 'CA-C ', 'C -OH', 'C -CA', 'CA-CA'
414 | ],
415 | 'bonds-vals': [1.526, 1.51, 1.4, 1.4, 1.409, 1.364, 1.409, 1.4],
416 | 'torsion-names': [
417 | 'C-N-CA-CB', 'N-CA-CB-CG', 'CA-CB-CG-CD1', 'CB-CG-CD1-CE1', 'CG-CD1-CE1-CZ',
418 | 'CD1-CE1-CZ-OH', 'CD1-CE1-CZ-CE2', 'CE1-CZ-CE2-CD2'
419 | ],
420 | 'torsion-types': [
421 | 'C -N -CX-CT', 'N -CX-CT-CA', 'CX-CT-CA-CA', 'CT-CA-CA-CA', 'CA-CA-CA-C ',
422 | 'CA-CA-C -OH', 'CA-CA-C -CA', 'CA-C -CA-CA'
423 | ],
424 | 'torsion-vals': [
425 | 'p', 'p', 'p', 3.141592653589793, 0.0, 3.141592653589793, 0.0, 0.0
426 | ],
427 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5], [4,5,6]],
428 | },
429 |
430 | 'V': {
431 | 'angles-names': ['N-CA-CB', 'CA-CB-CG1', 'CA-CB-CG2'],
432 | 'angles-types': ['N -CX-3C', 'CX-3C-CT', 'CX-3C-CT'],
433 | 'angles-vals': [1.9146261894377796, 1.911135530933791, 1.911135530933791],
434 | 'atom-names': ['CB', 'CG1', 'CG2'],
435 | 'bonds-names': ['CA-CB', 'CB-CG1', 'CB-CG2'],
436 | 'bonds-types': ['CX-3C', '3C-CT', '3C-CT'],
437 | 'bonds-vals': [1.526, 1.526, 1.526],
438 | 'torsion-names': ['C-N-CA-CB', 'N-CA-CB-CG1', 'N-CA-CB-CG2'],
439 | 'torsion-types': ['C -N -CX-3C', 'N -CX-3C-CT', 'N -CX-3C-CT'],
440 | 'torsion-vals': ['p', 'p', 'p'],
441 | 'rigid-frames-idxs': [[0,1,2], [0,1,4], [1,4,5]]
442 | },
443 |
444 | '_': {
445 | 'angles-names': [],
446 | 'angles-types': [],
447 | 'angles-vals': [],
448 | 'atom-names': [],
449 | 'bonds-names': [],
450 | 'bonds-types': [],
451 | 'bonds-vals': [],
452 | 'torsion-names': [],
453 | 'torsion-types': [],
454 | 'torsion-vals': [],
455 | 'rigid-frames-idxs': [[]],
456 | }
457 | }
458 |
459 | BB_BUILD_INFO = {
460 | "BONDLENS": {
461 | # the updated is according to crystal data from 1DPE_1_A and validated with other structures
462 | # the commented is the sidechainnet one
463 | 'n-ca': 1.4664931, # 1.442,
464 | 'ca-c': 1.524119, # 1.498,
465 | 'c-n': 1.3289373, # 1.379,
466 | 'c-o': 1.229, # From parm10.dat || huge variability according to structures
467 | # we get 1.3389416 from 1DPE_1_A but also 1.2289 from 2F2H_d2f2hf1
468 | 'c-oh': 1.364
469 | },
470 | # From parm10.dat, for OXT
471 | # For placing oxygens
472 | "BONDANGS": {
473 | 'ca-c-o': 2.0944, # Approximated to be 2pi / 3; parm10.dat says 2.0350539
474 | 'ca-c-oh': 2.0944,
475 | 'ca-c-n': 2.03,
476 | 'n-ca-c': 1.94,
477 | 'c-n-ca': 2.08,
478 | },
479 | # Equal to 'ca-c-o', for OXT
480 | "BONDTORSIONS": {
481 | 'n-ca-c-n': -0.785398163, # psi (-44 deg, bimodal distro, pick one)
482 | 'c-n-ca-c': -1.3962634015954636, # phi (-80 deg, bimodal distro, pick one)
483 | 'ca-n-c-ca': 3.141592, # omega (180 deg - https://doi.org/10.1016/j.jmb.2005.01.065)
484 | 'n-ca-c-o': -2.406 # oxygen
485 | } # A simple approximation, not meant to be exact.
486 | }
487 |
488 |
489 | # numbers follow the same order as sidechainnet atoms
490 | SCN_CONNECT = {
491 | 'A': {
492 | 'bonds': [[0,1], [1,2], [2,3], [1,4]]
493 | },
494 | 'R': {
495 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
496 | [6,7], [7,8], [8,9], [8,10]]
497 | },
498 | 'N': {
499 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
500 | [5,7]]
501 | },
502 | 'D': {
503 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
504 | [5,7]]
505 | },
506 | 'C': {
507 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5]]
508 | },
509 | 'Q': {
510 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
511 | [6,7], [6,8]]
512 | },
513 | 'E': {
514 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
515 | [6,7], [6,8]]
516 | },
517 | 'G': {
518 | 'bonds': [[0,1], [1,2], [2,3]]
519 | },
520 | 'H': {
521 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
522 | [6,7], [7,8], [8,9], [5,9]]
523 | },
524 | 'I': {
525 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
526 | [4,7]]
527 | },
528 | 'L': {
529 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
530 | [5,7]]
531 | },
532 | 'K': {
533 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
534 | [6,7], [7,8]]
535 | },
536 | 'M': {
537 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
538 | [6,7]]
539 | },
540 | 'F': {
541 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
542 | [6,7], [7,8], [8,9], [9,10], [5,10]]
543 | },
544 | 'P': {
545 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
546 | [0,6]]
547 | },
548 | 'S': {
549 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5]]
550 | },
551 | 'T': {
552 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [4,6]]
553 | },
554 | 'W': {
555 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
556 | [6,7], [7,8], [8,9], [9,10], [10,11], [11,12],
557 | [12, 13], [5,13], [8,13]]
558 | },
559 | 'Y': {
560 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [5,6],
561 | [6,7], [7,8], [8,9], [8,10], [10,11], [5,11]]
562 | },
563 | 'V': {
564 | 'bonds': [[0,1], [1,2], [2,3], [1,4], [4,5], [4,6]]
565 | },
566 | '_': {
567 | 'bonds': []
568 | }
569 | }
570 |
571 | # from: https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03819-2/MediaObjects/41586_2021_3819_MOESM1_ESM.pdf
572 | # added R's terminal Ns due to a small percentage of misalignments' (10%)
573 | AMBIGUOUS = {
574 | "D": {"names": [["OD1", "OD2"]],
575 | "indexs": [[6, 7]],
576 | },
577 | "E": {"names": [["OE1", "OE2"]],
578 | "indexs": [[7, 8]],
579 | },
580 | "F": {"names": [["CD1", "CD2"], ["CE1", "CE2"]],
581 | "indexs": [[6, 10], [7, 9]],
582 | },
583 | "Y": {"names": [["CD1", "CD2"], ["CE1", "CE2"]],
584 | "indexs": [[6,10], [7,9]],
585 | },
586 | "R": {"names": [["NH1", "NH2"]],
587 | "indexs": [[9, 10]]
588 | },
589 | }
590 |
591 |
592 | # AA subst mat
593 | BLOSUM = {
594 | "A" : [4.0, -1.0, -2.0, -2.0, 0.0, -1.0, -1.0, 0.0, -2.0, -1.0, -1.0, -1.0, -1.0, -2.0, -1.0, 1.0, 0.0, -3.0, -2.0, 0.0, 0.0],
595 | "C" : [-1.0, 5.0, 0.0, -2.0, -3.0, 1.0, 0.0, -2.0, 0.0, -3.0, -2.0, 2.0, -1.0, -3.0, -2.0, -1.0, -1.0, -3.0, -2.0, -3.0, 0.0],
596 | "D" : [-2.0, 0.0, 6.0, 1.0, -3.0, 0.0, 0.0, 0.0, 1.0, -3.0, -3.0, 0.0, -2.0, -3.0, -2.0, 1.0, 0.0, -4.0, -2.0, -3.0, 0.0],
597 | "E" : [-2.0, -2.0, 1.0, 6.0, -3.0, 0.0, 2.0, -1.0, -1.0, -3.0, -4.0, -1.0, -3.0, -3.0, -1.0, 0.0, -1.0, -4.0, -3.0, -3.0, 0.0],
598 | "F" : [0.0, -3.0, -3.0, -3.0, 9.0, -3.0, -4.0, -3.0, -3.0, -1.0, -1.0, -3.0, -1.0, -2.0, -3.0, -1.0, -1.0, -2.0, -2.0, -1.0, 0.0],
599 | "G" : [-1.0, 1.0, 0.0, 0.0, -3.0, 5.0, 2.0, -2.0, 0.0, -3.0, -2.0, 1.0, 0.0, -3.0, -1.0, 0.0, -1.0, -2.0, -1.0, -2.0, 0.0],
600 | "H" : [-1.0, 0.0, 0.0, 2.0, -4.0, 2.0, 5.0, -2.0, 0.0, -3.0, -3.0, 1.0, -2.0, -3.0, -1.0, 0.0, -1.0, -3.0, -2.0, -2.0, 0.0],
601 | "I" : [0.0, -2.0, 0.0, -1.0, -3.0, -2.0, -2.0, 6.0, -2.0, -4.0, -4.0, -2.0, -3.0, -3.0, -2.0, 0.0, -2.0, -2.0, -3.0, -3.0, 0.0],
602 | "K" : [-2.0, 0.0, 1.0, -1.0, -3.0, 0.0, 0.0, -2.0, 8.0, -3.0, -3.0, -1.0, -2.0, -1.0, -2.0, -1.0, -2.0, -2.0, 2.0, -3.0, 0.0],
603 | "L" : [-1.0, -3.0, -3.0, -3.0, -1.0, -3.0, -3.0, -4.0, -3.0, 4.0, 2.0, -3.0, 1.0, 0.0, -3.0, -2.0, -1.0, -3.0, -1.0, 3.0, 0.0],
604 | "M" : [-1.0, -2.0, -3.0, -4.0, -1.0, -2.0, -3.0, -4.0, -3.0, 2.0, 4.0, -2.0, 2.0, 0.0, -3.0, -2.0, -1.0, -2.0, -1.0, 1.0, 0.0],
605 | "N" : [-1.0, 2.0, 0.0, -1.0, -3.0, 1.0, 1.0, -2.0, -1.0, -3.0, -2.0, 5.0, -1.0, -3.0, -1.0, 0.0, -1.0, -3.0, -2.0, -2.0, 0.0],
606 | "P" : [-1.0, -1.0, -2.0, -3.0, -1.0, 0.0, -2.0, -3.0, -2.0, 1.0, 2.0, -1.0, 5.0, 0.0, -2.0, -1.0, -1.0, -1.0, -1.0, 1.0, 0.0],
607 | "Q" : [-2.0, -3.0, -3.0, -3.0, -2.0, -3.0, -3.0, -3.0, -1.0, 0.0, 0.0, -3.0, 0.0, 6.0, -4.0, -2.0, -2.0, 1.0, 3.0, -1.0, 0.0],
608 | "R" : [-1.0, -2.0, -2.0, -1.0, -3.0, -1.0, -1.0, -2.0, -2.0, -3.0, -3.0, -1.0, -2.0, -4.0, 7.0, -1.0, -1.0, -4.0, -3.0, -2.0, 0.0],
609 | "S" : [1.0, -1.0, 1.0, 0.0, -1.0, 0.0, 0.0, 0.0, -1.0, -2.0, -2.0, 0.0, -1.0, -2.0, -1.0, 4.0, 1.0, -3.0, -2.0, -2.0, 0.0],
610 | "T" : [0.0, -1.0, 0.0, -1.0, -1.0, -1.0, -1.0, -2.0, -2.0, -1.0, -1.0, -1.0, -1.0, -2.0, -1.0, 1.0, 5.0, -2.0, -2.0, 0.0, 0.0],
611 | "V" : [-3.0, -3.0, -4.0, -4.0, -2.0, -2.0, -3.0, -2.0, -2.0, -3.0, -2.0, -3.0, -1.0, 1.0, -4.0, -3.0, -2.0, 11.0, 2.0, -3.0, 0.0],
612 | "W" : [-2.0, -2.0, -2.0, -3.0, -2.0, -1.0, -2.0, -3.0, 2.0, -1.0, -1.0, -2.0, -1.0, 3.0, -3.0, -2.0, -2.0, 2.0, 7.0, -1.0, 0.0],
613 | "Y" : [0.0, -3.0, -3.0, -3.0, -1.0, -2.0, -2.0, -3.0, -3.0, 3.0, 1.0, -2.0, 1.0, -1.0, -2.0, -2.0, 0.0, -3.0, -1.0, 4.0, 0.0],
614 | "_" : [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
615 | }
616 |
617 |
618 | # modified manually to match the mode
619 | MP3SC_INFO = {
620 | 'A': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.848366}
621 | },
622 | 'R': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.6976738},
623 | 'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.2},
624 | 'CD': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -3.141592},
625 | 'NE': {'bond_lens': 1.463, 'bond_angs': 1.9408059, 'bond_dihedral': -3.141592},
626 | 'CZ': {'bond_lens': 1.34, 'bond_angs': 2.1502457, 'bond_dihedral': -3.141592},
627 | 'NH1': {'bond_lens': 1.34, 'bond_angs': 2.094395, 'bond_dihedral': 0.},
628 | 'NH2': {'bond_lens': 1.34, 'bond_angs': 2.094395, 'bond_dihedral': -3.141592}
629 | },
630 | 'N': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.8416245},
631 | 'CG': {'bond_lens': 1.5219998, 'bond_angs': 1.9390607, 'bond_dihedral': -1.15},
632 | 'OD1': {'bond_lens': 1.229, 'bond_angs': 2.101376, 'bond_dihedral': -1.}, # spread out w/ mean at -1
633 | 'ND2': {'bond_lens': 1.3349999, 'bond_angs': 2.0350537, 'bond_dihedral': 2.14} # spread out with mean at -4
634 | },
635 | 'D': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146265, 'bond_dihedral': 2.7741134},
636 | 'CG': {'bond_lens': 1.522, 'bond_angs': 1.9390608, 'bond_dihedral': -1.07},
637 | 'OD1': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': -0.2678593},
638 | 'OD2': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': 2.95}
639 | },
640 | 'C': {'CB': {'bond_lens': 1.5259998, 'bond_angs': 1.9146262, 'bond_dihedral': 2.553627},
641 | 'SG': {'bond_lens': 1.8099997, 'bond_angs': 1.8954275, 'bond_dihedral': -1.07}
642 | },
643 | 'Q': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.7262106},
644 | 'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111353, 'bond_dihedral': -1.075},
645 | 'CD': {'bond_lens': 1.5219998, 'bond_angs': 1.9390606, 'bond_dihedral': -3.141592},
646 | 'OE1': {'bond_lens': 1.229, 'bond_angs': 2.101376, 'bond_dihedral': -1}, # bimodal at -1, +1
647 | 'NE2': {'bond_lens': 1.3349998, 'bond_angs': 2.0350537, 'bond_dihedral': 2.14} # bimodal at -2, -4
648 | },
649 | 'E': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146267, 'bond_dihedral': 2.7813723},
650 | 'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.07}, # bimodal at -1.07, 3.14
651 | 'CD': {'bond_lens': 1.5219998, 'bond_angs': 1.9390606, 'bond_dihedral': -3.0907722155200403},
652 | 'OE1': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': 0.003740118}, # spread out btween -1,1
653 | 'OE2': {'bond_lens': 1.25, 'bond_angs': 2.0420356, 'bond_dihedral': -3.1378527} # spread out btween -4.3, -2.14
654 | },
655 | 'G': {},
656 | 'H': {'CB': {'bond_lens': 1.5259998, 'bond_angs': 1.9146264, 'bond_dihedral': 2.614421},
657 | 'CG': {'bond_lens': 1.5039998, 'bond_angs': 1.9739674, 'bond_dihedral': -1.05},
658 | 'ND1': {'bond_lens': 1.3850001, 'bond_angs': 2.094395, 'bond_dihedral': -1.41}, # bimodal at -1.4, 1.4
659 | 'CE1': {'bond_lens': 1.3430002, 'bond_angs': 1.8849558, 'bond_dihedral': 3.14},
660 | 'NE2': {'bond_lens': 1.335, 'bond_angs': 1.8849558, 'bond_dihedral': 0.0},
661 | 'CD2': {'bond_lens': 1.3940002, 'bond_angs': 1.8849558, 'bond_dihedral': 0.0}
662 | },
663 | 'I': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146265, 'bond_dihedral': 2.5604365},
664 | 'CG1': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -1.025},
665 | 'CD1': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.0667439142810267},
666 | 'CG2': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.1225884596454065}
667 | },
668 | 'L': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146265, 'bond_dihedral': 2.711971},
669 | 'CG': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.15},
670 | 'CD1': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': 3.14},
671 | 'CD2': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.05}
672 | },
673 | 'K': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146266, 'bond_dihedral': 2.7441595},
674 | 'CG': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -1.15},
675 | 'CD': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': -3.09},
676 | 'CE': {'bond_lens': 1.526, 'bond_angs': 1.9111353, 'bond_dihedral': 3.092959},
677 | 'NZ': {'bond_lens': 1.4710001, 'bond_angs': 1.940806, 'bond_dihedral': 3.0515378}
678 | },
679 | 'M': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146264, 'bond_dihedral': 2.7051392},
680 | 'CG': {'bond_lens': 1.526, 'bond_angs': 1.9111354, 'bond_dihedral': -1.1},
681 | 'SD': {'bond_lens': 1.8099998, 'bond_angs': 2.001892, 'bond_dihedral': 3.1411812}, # bimodal at 0, 3.14
682 | 'CE': {'bond_lens': 1.8099998, 'bond_angs': 1.7261307, 'bond_dihedral': -0.048235133} # trimodal at -1.41, 0, 1.41
683 | },
684 | 'F': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.545154},
685 | 'CG': {'bond_lens': 1.5100001, 'bond_angs': 1.9896755, 'bond_dihedral': -1.2}, # bimodal at -1, 3.14
686 | 'CD1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 1.41}, # bimodal -1.41, 1.41
687 | 'CE1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592},
688 | 'CZ': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
689 | 'CE2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
690 | 'CD2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}
691 | },
692 | 'P': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146266, 'bond_dihedral': 3.141592},
693 | 'CG': {'bond_lens': 1.5260001, 'bond_angs': 1.9111352, 'bond_dihedral': -0.707}, # bimodal at -0.7, 0.7
694 | 'CD': {'bond_lens': 1.5260001, 'bond_angs': 1.9111352, 'bond_dihedral': 0.85} # bimodal at -0.85, 0.85
695 | },
696 | 'S': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146266, 'bond_dihedral': 2.6017702},
697 | 'OG': {'bond_lens': 1.41, 'bond_angs': 1.9111352, 'bond_dihedral': 1.1}
698 | },
699 | 'T': {'CB': {'bond_lens': 1.5260001, 'bond_angs': 1.9146265, 'bond_dihedral': 2.55},
700 | 'OG1': {'bond_lens': 1.4099998, 'bond_angs': 1.9111353, 'bond_dihedral': -1.07}, # bimodal at -1 and +1
701 | 'CG2': {'bond_lens': 1.5260001, 'bond_angs': 1.9111353, 'bond_dihedral': -3.05} # bimodal at -1 and -3
702 | },
703 | 'W': {'CB': {'bond_lens': 1.526, 'bond_angs': 1.9146266, 'bond_dihedral': 3.141592},
704 | 'CG': {'bond_lens': 1.4950002, 'bond_angs': 2.0176008, 'bond_dihedral': -1.2},
705 | 'CD1': {'bond_lens': 1.3520001, 'bond_angs': 2.1816616, 'bond_dihedral': 1.53},
706 | 'NE1': {'bond_lens': 1.3810003, 'bond_angs': 1.8971729, 'bond_dihedral': 3.141592},
707 | 'CE2': {'bond_lens': 1.3799998, 'bond_angs': 1.9477878, 'bond_dihedral': 0.0},
708 | 'CZ2': {'bond_lens': 1.3999999, 'bond_angs': 2.317797, 'bond_dihedral': 3.141592},
709 | 'CH2': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592},
710 | 'CZ3': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
711 | 'CE3': {'bond_lens': 1.3999999, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
712 | 'CD2': {'bond_lens': 1.404, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}
713 | },
714 | 'Y': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 3.1},
715 | 'CG': {'bond_lens': 1.5100001, 'bond_angs': 1.9896754, 'bond_dihedral': -1.1},
716 | 'CD1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 1.36},
717 | 'CE1': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592},
718 | 'CZ': {'bond_lens': 1.4090003, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
719 | 'OH': {'bond_lens': 1.3640002, 'bond_angs': 2.094395, 'bond_dihedral': 3.141592},
720 | 'CE2': {'bond_lens': 1.4090003, 'bond_angs': 2.094395, 'bond_dihedral': 0.0},
721 | 'CD2': {'bond_lens': 1.3999997, 'bond_angs': 2.094395, 'bond_dihedral': 0.0}
722 | },
723 | 'V': {'CB': {'bond_lens': 1.5260003, 'bond_angs': 1.9146266, 'bond_dihedral': 2.55},
724 | 'CG1': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': 3.141592},
725 | 'CG2': {'bond_lens': 1.5260003, 'bond_angs': 1.9111352, 'bond_dihedral': -1.1}
726 | },
727 |
728 | '_': {}
729 | }
730 |
731 | # experimentally checked distances
732 | FF = {"MIN_DISTS": {1: 1.180, # shortest =N or =O bond
733 | 2: 2.138, # N-N in histidine group
734 | 3: 2.380}, # N-N in backbone (N-CA-C-N)
735 | "MAX_DISTS": {i: 1.840*i for i in range(1, 5+1)} # 1.84 is longest -S bond found,
736 | }
737 |
738 | ATOM_TOKEN_IDS = set(["", "N", "CA", "C", "O"])
739 | ATOM_TOKEN_IDS = {k: i for i,k in enumerate(sorted(
740 | ATOM_TOKEN_IDS.union( set(
741 | [name for k,v in SC_BUILD_INFO.items() for name in v["atom-names"]]
742 | ) )
743 | ))}
744 |
745 | #################
746 | ##### DOERS #####
747 | #################
748 |
749 | def make_cloud_mask(aa):
750 | """ relevent points will be 1. paddings will be 0. """
751 | mask = np.zeros(14)
752 | if aa != "_":
753 | n_atoms = 4+len( SC_BUILD_INFO[aa]["atom-names"] )
754 | mask[:n_atoms] = True
755 | return mask
756 |
757 | def make_bond_mask(aa):
758 | """ Gives the length of the bond originating each atom. """
759 | mask = np.zeros(14)
760 | # backbone
761 | if aa != "_":
762 | mask[0] = BB_BUILD_INFO["BONDLENS"]['c-n']
763 | mask[1] = BB_BUILD_INFO["BONDLENS"]['n-ca']
764 | mask[2] = BB_BUILD_INFO["BONDLENS"]['ca-c']
765 | mask[3] = BB_BUILD_INFO["BONDLENS"]['c-o']
766 | # sidechain - except padding token
767 | if aa in SC_BUILD_INFO.keys():
768 | for i,bond in enumerate(SC_BUILD_INFO[aa]['bonds-vals']):
769 | mask[4+i] = bond
770 | return mask
771 |
772 | def make_theta_mask(aa):
773 | """ Gives the theta of the bond originating each atom. """
774 | mask = np.zeros(14)
775 | # backbone
776 | if aa != "_":
777 | mask[0] = BB_BUILD_INFO["BONDANGS"]['ca-c-n'] # nitrogen
778 | mask[1] = BB_BUILD_INFO["BONDANGS"]['c-n-ca'] # c_alpha
779 | mask[2] = BB_BUILD_INFO["BONDANGS"]['n-ca-c'] # carbon
780 | mask[3] = BB_BUILD_INFO["BONDANGS"]['ca-c-o'] # oxygen
781 | # sidechain
782 | for i,theta in enumerate(SC_BUILD_INFO[aa]['angles-vals']):
783 | mask[4+i] = theta
784 | return mask
785 |
786 | def make_torsion_mask(aa, fill=False):
787 | """ Gives the dihedral of the bond originating each atom. """
788 | mask = np.zeros(14)
789 | if aa != "_":
790 | # backbone
791 | mask[0] = BB_BUILD_INFO["BONDTORSIONS"]['n-ca-c-n'] # psi
792 | mask[1] = BB_BUILD_INFO["BONDTORSIONS"]['ca-n-c-ca'] # omega
793 | mask[2] = BB_BUILD_INFO["BONDTORSIONS"]['c-n-ca-c'] # psi
794 | mask[3] = BB_BUILD_INFO["BONDTORSIONS"]['n-ca-c-o'] # oxygen
795 | # sidechain
796 | for i, torsion in enumerate(SC_BUILD_INFO[aa]['torsion-vals']):
797 | if fill:
798 | mask[4+i] = MP3SC_INFO[aa][ SC_BUILD_INFO[aa]["atom-names"][i] ]["bond_dihedral"]
799 | else:
800 | # https://github.com/jonathanking/sidechainnet/blob/master/sidechainnet/structure/StructureBuilder.py#L372
801 | # 999 is an anotation -- change later || same for 555
802 | mask[4+i] = np.nan if torsion == 'p' else 999 if torsion == "i" else torsion
803 | return mask
804 |
805 | def make_idx_mask(aa):
806 | """ Gives the idxs of the 3 previous points. """
807 | mask = np.zeros((11, 3))
808 | if aa != "_":
809 | # backbone
810 | mask[0, :] = np.arange(3)
811 | # sidechain
812 | mapper = {"N": 0, "CA": 1, "C":2, "CB": 4}
813 | for i, torsion in enumerate(SC_BUILD_INFO[aa]['torsion-names']):
814 | # get all the atoms forming the dihedral
815 | torsions = [x.rstrip(" ") for x in torsion.split("-")]
816 | # for each atom
817 | for n, torsion in enumerate(torsions[:-1]):
818 | # get the index of the atom in the coords array
819 | loc = mapper[torsion] if torsion in mapper.keys() else 4 + SC_BUILD_INFO[aa]['atom-names'].index(torsion)
820 | # set position to index
821 | mask[i+1][n] = loc
822 | return mask
823 |
824 | def make_atom_token_mask(aa):
825 | """ Return the tokens for each atom in the aa. """
826 | mask = np.zeros(14)
827 | # get atom id
828 | if aa != "_":
829 | atom_list = ["N", "CA", "C", "O"] + SC_BUILD_INFO[ aa ]["atom-names"]
830 | for i,atom in enumerate(atom_list):
831 | mask[i] = ATOM_TOKEN_IDS[atom]
832 | return mask
833 |
834 |
835 | ###################
836 | ##### GETTERS #####
837 | ###################
838 | INDEX2AAS = "ACDEFGHIKLMNPQRSTVWY_"
839 | AAS2INDEX = {aa:i for i,aa in enumerate(INDEX2AAS)}
840 | SUPREME_INFO = {k: {"cloud_mask": make_cloud_mask(k),
841 | "bond_mask": make_bond_mask(k),
842 | "theta_mask": make_theta_mask(k),
843 | "torsion_mask": make_torsion_mask(k),
844 | "torsion_mask_filled": make_torsion_mask(k, fill=True),
845 | "idx_mask": make_idx_mask(k),
846 | "atom_token_mask": make_atom_token_mask(k),
847 | "rigid_idx_mask": SC_BUILD_INFO[k]['rigid-frames-idxs'],
848 | }
849 | for k in INDEX2AAS}
850 |
851 |
--------------------------------------------------------------------------------
/mp_nerf/massive_pnerf.py:
--------------------------------------------------------------------------------
1 | import time
2 | import numpy as np
3 | # diff ml
4 | import torch
5 | from einops import repeat
6 |
7 |
8 | def get_axis_matrix(a, b, c, norm=True):
9 | """ Gets an orthonomal basis as a matrix of [e1, e2, e3].
10 | Useful for constructing rotation matrices between planes
11 | according to the first answer here:
12 | https://math.stackexchange.com/questions/1876615/rotation-matrix-from-plane-a-to-b
13 | Inputs:
14 | * a: (batch, 3) or (3, ). point(s) of the plane
15 | * b: (batch, 3) or (3, ). point(s) of the plane
16 | * c: (batch, 3) or (3, ). point(s) of the plane
17 | Outputs: orthonomal basis as a matrix of [e1, e2, e3]. calculated as:
18 | * e1_ = (c-b)
19 | * e2_proto = (b-a)
20 | * e3_ = e1_ ^ e2_proto
21 | * e2_ = e3_ ^ e1_
22 | * basis = normalize_by_vectors( [e1_, e2_, e3_] )
23 | Note: Could be done more by Grahm-Schmidt and extend to N-dimensions
24 | but this is faster and more intuitive for 3D.
25 | """
26 | v1_ = c - b
27 | v2_ = b - a
28 | v3_ = torch.cross(v1_, v2_, dim=-1)
29 | v2_ready = torch.cross(v3_, v1_, dim=-1)
30 | basis = torch.stack([v1_, v2_ready, v3_], dim=-2)
31 | # normalize if needed
32 | if norm:
33 | return basis / torch.norm(basis, dim=-1, keepdim=True)
34 | return basis
35 |
36 |
37 |
38 | def mp_nerf_torch(a, b, c, l, theta, chi):
39 | """ Custom Natural extension of Reference Frame.
40 | Inputs:
41 | * a: (batch, 3) or (3,). point(s) of the plane, not connected to d
42 | * b: (batch, 3) or (3,). point(s) of the plane, not connected to d
43 | * c: (batch, 3) or (3,). point(s) of the plane, connected to d
44 | * theta: (batch,) or (float). angle(s) between b-c-d
45 | * chi: (batch,) or float. dihedral angle(s) between the a-b-c and b-c-d planes
46 | Outputs: d (batch, 3) or (float). the next point in the sequence, linked to c
47 | """
48 | # safety check
49 | if not ( (-np.pi <= theta) * (theta <= np.pi) ).all().item():
50 | raise ValueError(f"theta(s) must be in radians and in [-pi, pi]. theta(s) = {theta}")
51 | # calc vecs
52 | ba = b-a
53 | cb = c-b
54 | # calc rotation matrix. based on plane normals and normalized
55 | n_plane = torch.cross(ba, cb, dim=-1)
56 | n_plane_ = torch.cross(n_plane, cb, dim=-1)
57 | rotate = torch.stack([cb, n_plane_, n_plane], dim=-1)
58 | rotate /= torch.norm(rotate, dim=-2, keepdim=True)
59 | # calc proto point, rotate. add (-1 for sidechainnet convention)
60 | # https://github.com/jonathanking/sidechainnet/issues/14
61 | d = torch.stack([-torch.cos(theta),
62 | torch.sin(theta) * torch.cos(chi),
63 | torch.sin(theta) * torch.sin(chi)], dim=-1).unsqueeze(-1)
64 | # extend base point, set length
65 | return c + l.unsqueeze(-1) * torch.matmul(rotate, d).squeeze()
66 |
67 |
68 |
--------------------------------------------------------------------------------
/mp_nerf/ml_utils.py:
--------------------------------------------------------------------------------
1 | # Author: Eric Alcaide
2 |
3 | import torch
4 | import numpy as np
5 | from einops import repeat, rearrange
6 |
7 | # module
8 | from mp_nerf.massive_pnerf import *
9 | from mp_nerf.utils import *
10 | from mp_nerf.kb_proteins import *
11 | from mp_nerf.proteins import *
12 |
13 |
14 | def scn_atom_embedd(seq_list):
15 | """ Returns the token for each atom in the aa seq.
16 | Inputs:
17 | * seq_list: list of FASTA sequences. same length
18 | """
19 | batch_tokens = []
20 | # do loop in cpu
21 | for i,seq in enumerate(seq_list):
22 | batch_tokens.append( torch.tensor([SUPREME_INFO[aa]["atom_token_mask"] \
23 | for aa in seq]) )
24 | batch_tokens = torch.stack(batch_tokens, dim=0).long()
25 | return batch_tokens
26 |
27 |
28 | def chain2atoms(x, mask=None, c=3):
29 | """ Expand from (L, other) to (L, C, other). """
30 | wrap = repeat( x, 'l ... -> l c ...', c=c )
31 | if mask is not None:
32 | return wrap[mask]
33 | return wrap
34 |
35 |
36 | ######################
37 | # from: https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03819-2/MediaObjects/41586_2021_3819_MOESM1_ESM.pdf
38 |
39 | def rename_symmetric_atoms(pred_coors, true_coors, seq_list, cloud_mask, pred_feats=None):
40 | """ Corrects ambiguous atoms (due to 180 torsions - ambiguous sidechains).
41 | Inputs:
42 | * pred_coors: (batch, L, 14, 3) float. sidechainnet format (see mp_nerf.kb_proteins)
43 | * true_coors: (batch, L, 14, 3) float. sidechainnet format (see mp_nerf.kb_proteins)
44 | * seq_list: list of FASTA sequences
45 | * cloud_mask: (batch, L, 14) bool. mask for present atoms
46 | * pred_feats: (batch, L, 14, D) optional. atom-wise predicted features
47 |
48 | Warning! A coordinate might be missing. TODO:
49 | Outputs: pred_coors, pred_feats
50 | """
51 | aux_cloud_mask = cloud_mask.clone() # will be manipulated
52 |
53 | for i,seq in enumerate(seq_list):
54 | for aa, pairs in AMBIGUOUS.items():
55 | # indexes of aas in chain - check coords are given for aa
56 | amb_idxs = np.array(pairs["indexs"]).flatten().tolist()
57 | idxs = torch.tensor([
58 | k for k,s in enumerate(seq) if s==aa and \
59 | k in set( torch.nonzero(aux_cloud_mask[i, :, amb_idxs].sum(dim=-1)).tolist()[0] )
60 | ]).long()
61 | # check if any AAs matching
62 | if idxs.shape[0] == 0:
63 | continue
64 | # get indexes of non-ambiguous
65 | aux_cloud_mask[i, idxs, amb_idxs] = False
66 | non_amb_idx = torch.nonzero(aux_cloud_mask[i, idxs[0]]).tolist()
67 | for a, pair in enumerate(pairs["indexs"]):
68 | # calc distances
69 | d_ij_pred = torch.cdist(pred_coors[ i, idxs, pair ], pred_coors[i, idxs, non_amb_idx], p=2) # 2, N
70 | d_ij_true = torch.cdist(true_coors[ i, idxs, pair+pair[::-1] ], true_coors[i, idxs, non_amb_idx], p=2) # 2, 2N
71 | # see if alternative is better (less distance)
72 | idxs_to_change = ( (d_ij_pred - d_ij_true[2:]).sum(dim=-1) < (d_ij_pred - d_ij_true[:2]).sum(dim=-1) ).nonzero()
73 | # change those
74 | pred_coors[i, idxs[idxs_to_change], pair] = pred_coors[i, idxs[idxs_to_change], pair[::-1]]
75 | if pred_feats is not None:
76 | pred_feats[i, idxs[idxs_to_change], pair] = pred_feats[i, idxs[idxs_to_change], pair[::-1]]
77 |
78 | return pred_coors, pred_feats
79 |
80 |
81 | def torsion_angle_loss(pred_torsions, true_torsions, coeff=2., angle_mask=None):
82 | """ Computes a loss on the angles as the cosine of the difference.
83 | Due to angle periodicity, calculate the disparity on both sides
84 | Inputs:
85 | * pred_torsions: ( (B), L, X ) float. Predicted torsion angles.(-pi, pi)
86 | Same format as sidechainnet.
87 | * true_torsions: ( (B), L, X ) true torsion angles. (-pi, pi)
88 | * coeff: float. weight coefficient
89 | * angle_mask: ((B), L, (X)) bool. Masks the non-existing angles.
90 |
91 | Outputs: ( (B), L, 6 ) cosine difference
92 | """
93 | l_normal = torch.cos( pred_torsions - true_torsions )
94 | l_cycle = torch.cos( to_zero_two_pi(pred_torsions) - \
95 | to_zero_two_pi(true_torsions) )
96 | maxi = torch.max( l_normal, l_cycle )
97 | if angle_mask is not None:
98 | maxi[angle_mask] = 1.
99 | return coeff * (1 - maxi)
100 |
101 |
102 | def fape_torch(pred_coords, true_coords, max_val=10., l_func=None,
103 | c_alpha=False, seq_list=None, rot_mats_g=None):
104 | """ Computes the Frame-Aligned Point Error. Scaled 0 <= FAPE <= 1
105 | Inputs:
106 | * pred_coords: (B, L, C, 3) predicted coordinates.
107 | * true_coords: (B, L, C, 3) ground truth coordinates.
108 | * max_val: maximum value (it's also the radius due to L1 usage)
109 | * l_func: function. allow for options other than l1 (consider dRMSD)
110 | * c_alpha: bool. whether to only calculate frames and loss from c_alphas
111 | * seq_list: list of strs (FASTA sequences). to calculate rigid bodies' indexs.
112 | Defaults to C-alpha if not passed.
113 | * rot_mats_g: optional. List of n_seqs x (N_frames, 3, 3) rotation matrices.
114 |
115 | Outputs: (B, N_atoms)
116 | """
117 | fape_store = []
118 | if l_func is None:
119 | l_func = lambda x,y,eps=1e-7,sup=max_val: (((x-y)**2).sum(dim=-1) + eps).sqrt()
120 | # for chain
121 | for s in range(pred_coords.shape[0]):
122 | fape_store.append(0)
123 | cloud_mask = (torch.abs(true_coords[s]).sum(dim=-1) != 0)
124 | # center both structures
125 | pred_center = pred_coords[s] - pred_coords[s, cloud_mask].mean(dim=0, keepdim=True)
126 | true_center = true_coords[s] - true_coords[s, cloud_mask].mean(dim=0, keepdim=True)
127 | # convert to (B, L*C, 3)
128 | pred_center = rearrange(pred_center, 'l c d -> (l c) d')
129 | true_center = rearrange(true_center, 'l c d -> (l c) d')
130 | mask_center = rearrange(cloud_mask, 'l c -> (l c)')
131 | # get frames and conversions - same scheme as in mp_nerf proteins' concat of monomers
132 | if rot_mats_g is None:
133 | rigid_idxs = scn_rigid_index_mask(seq_list[s], c_alpha=c_alpha)
134 | true_frames = get_axis_matrix(*true_center[rigid_idxs].detach(), norm=True)
135 | pred_frames = get_axis_matrix(*pred_center[rigid_idxs].detach(), norm=True)
136 | rot_mats = torch.matmul(torch.transpose(pred_frames, -1, -2), true_frames)
137 | else:
138 | rot_mats = rot_mats_g[s]
139 |
140 | # calculate loss only on c_alphas
141 | if c_alpha:
142 | mask_center[:] = False
143 | mask_center[rigid_idxs[1]] = True
144 |
145 | # measure errors - for residue
146 | for i,rot_mat in enumerate(rot_mats):
147 | fape_store[s] += l_func( pred_center[s][mask_center[s]] @ rot_mat,
148 | true_center[s][mask_center[s]]
149 | ).clamp(0, max_val)
150 | fape_store[s] /= rot_mats.shape[0]
151 |
152 | # stack and average
153 | return (1/max_val) * torch.stack(fape_store, dim=0)
154 |
155 |
156 | # custom
157 |
158 | def atom_selector(scn_seq, x, option=None, discard_absent=True):
159 | """ Returns a selection of the atoms in a protein.
160 | Inputs:
161 | * scn_seq: (batch, len) sidechainnet format or list of strings
162 | * x: (batch, (len * n_aa), dims) sidechainnet format
163 | * option: one of [torch.tensor, 'backbone-only', 'backbone-with-cbeta',
164 | 'all', 'backbone-with-oxygen', 'backbone-with-cbeta-and-oxygen']
165 | * discard_absent: bool. Whether to discard the points for which
166 | there are no labels (bad recordings)
167 | """
168 |
169 |
170 | # get mask
171 | present = []
172 | for i,seq in enumerate(scn_seq):
173 | pass_x = x[i] if discard_absent else None
174 | if pass_x is None and isinstance(seq, torch.Tensor):
175 | seq = "".join([INDEX2AAS[x] for x in seq.cpu().detach().tolist()])
176 |
177 | present.append( scn_cloud_mask(seq, coords=pass_x) )
178 |
179 | present = torch.stack(present, dim=0).bool()
180 |
181 |
182 | # atom mask
183 | if isinstance(option, str):
184 | atom_mask = torch.tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
185 | if "backbone" in option:
186 | atom_mask[[0, 2]] = 1
187 |
188 | if option == "backbone":
189 | pass
190 | elif option == 'backbone-with-oxygen':
191 | atom_mask[3] = 1
192 | elif option == 'backbone-with-cbeta':
193 | atom_mask[5] = 1
194 | elif option == 'backbone-with-cbeta-and-oxygen':
195 | atom_mask[3] = 1
196 | atom_mask[5] = 1
197 | elif option == 'all':
198 | atom_mask[:] = 1
199 | else:
200 | print("Your string doesn't match any option.")
201 |
202 | elif isinstance(option, torch.Tensor):
203 | atom_mask = option
204 | else:
205 | raise ValueError('option needs to be a valid string or a mask tensor of shape (14,) ')
206 |
207 | mask = rearrange(present * atom_mask.unsqueeze(0).unsqueeze(0).bool(), 'b l c -> b (l c)')
208 | return x[mask], mask
209 |
210 |
211 | def noise_internals(seq, angles=None, coords=None, noise_scale=0.5, theta_scale=0.5, verbose=0):
212 | """ Noises the internal coordinates -> dihedral and bond angles.
213 | Inputs:
214 | * seq: string. Sequence in FASTA format
215 | * angles: (l, 11) sidechainnet angles tensor
216 | * coords: (l, 14, 13)
217 | * noise_scale: float. std of noise gaussian.
218 | * theta_scale: float. multiplier for bond angles
219 | Outputs:
220 | * chain (l, c, d)
221 | * cloud_mask (l, c)
222 | """
223 | assert angles is not None or coords is not None, \
224 | "You must pass either angles or coordinates"
225 | # get scaffolds
226 | if angles is None:
227 | angles = torch.randn(coords.shape[0], 12).to(coords.device)
228 |
229 | scaffolds = build_scaffolds_from_scn_angles(seq, angles.clone())
230 |
231 | if coords is not None:
232 | scaffolds = modify_scaffolds_with_coords(scaffolds, coords)
233 |
234 | # noise bond angles and dihedrals (dihedrals of everyone, angles only of BB)
235 | if noise_scale > 0.:
236 | if verbose:
237 | print("noising", noise_scale)
238 | # thetas (half of noise of dihedrals. only for BB)
239 | noised_bb = scaffolds["angles_mask"][0, :, :3].clone()
240 | noised_bb += theta_scale*noise_scale * torch.randn_like(noised_bb)
241 | # get noised values between [-pi, pi]
242 | off_bounds = (noised_bb > 2*np.pi) + (noised_bb < -2*np.pi)
243 | if off_bounds.sum().item() > 0:
244 | noised_bb[off_bounds] = noised_bb[off_bounds] % (2*np.pi)
245 |
246 | upper, lower = noised_bb > np.pi, noised_bb < -np.pi
247 | if upper.sum().item() > 0:
248 | noised_bb[upper] = - ( 2*np.pi - noised_bb[upper] ).clone()
249 | if lower.sum().item() > 0:
250 | noised_bb[lower] = 2*np.pi + noised_bb[lower].clone()
251 | scaffolds["angles_mask"][0, :, :3] = noised_bb
252 |
253 | # dihedrals
254 | noised_dihedrals = scaffolds["angles_mask"][1].clone()
255 | noised_dihedrals += noise_scale * torch.randn_like(noised_dihedrals)
256 | # get noised values between [-pi, pi]
257 | off_bounds = (noised_dihedrals > 2*np.pi) + (noised_dihedrals < -2*np.pi)
258 | if off_bounds.sum().item() > 0:
259 | noised_dihedrals[off_bounds] = noised_dihedrals[off_bounds] % (2*np.pi)
260 |
261 | upper, lower = noised_dihedrals > np.pi, noised_dihedrals < -np.pi
262 | if upper.sum().item() > 0:
263 | noised_dihedrals[upper] = - ( 2*np.pi - noised_dihedrals[upper] ).clone()
264 | if lower.sum().item() > 0:
265 | noised_dihedrals[lower] = 2*np.pi + noised_dihedrals[lower].clone()
266 | scaffolds["angles_mask"][1] = noised_dihedrals
267 |
268 | # reconstruct
269 | return protein_fold(**scaffolds)
270 |
271 |
272 | def combine_noise(true_coords, seq=None, int_seq=None, angles=None,
273 | NOISE_INTERNALS=1e-2, INTERNALS_SCN_SCALE=5.,
274 | SIDECHAIN_RECONSTRUCT=True):
275 | """ Combines noises. For internal noise, no points can be missing.
276 | Inputs:
277 | * true_coords: ((B), N, D)
278 | * int_seq: (N,) torch long tensor of sidechainnet AA tokens
279 | * seq: str of length N. FASTA AAs.
280 | * angles: (N_aa, D_). optional. used for internal noising
281 | * NOISE_INTERNALS: float. amount of noise for internal coordinates.
282 | * SIDECHAIN_RECONSTRUCT: bool. whether to discard the sidechain and
283 | rebuild by sampling from plausible distro.
284 | Outputs: (B, N, D) coords and (B, N) boolean mask
285 | """
286 | # get seqs right
287 | assert int_seq is not None or seq is not None, "Either int_seq or seq must be passed"
288 | if int_seq is not None and seq is None:
289 | seq = "".join([INDEX2AAS[x] for x in int_seq.cpu().detach().tolist()])
290 | elif int_seq is None and seq is not None:
291 | int_seq = torch.tensor([AAS2INDEX[x] for x in seq.upper()], device=true_coords.device)
292 |
293 | cloud_mask_flat = (true_coords == 0.).sum(dim=-1) != true_coords.shape[-1]
294 | naive_cloud_mask = scn_cloud_mask(seq).bool()
295 |
296 | if NOISE_INTERNALS:
297 | assert cloud_mask_flat.sum().item() == naive_cloud_mask.sum().item(), \
298 | "atoms missing: {0}".format( naive_cloud_mask.sum().item() - \
299 | cloud_mask_flat.sum().item() )
300 | # expand to batch dim if needed
301 | if len(true_coords.shape) < 3:
302 | true_coords = true_coords.unsqueeze(0)
303 | noised_coords = true_coords.clone()
304 | coords_scn = rearrange(true_coords, 'b (l c) d -> b l c d', c=14)
305 |
306 | ###### SETP 1: internals #########
307 | if NOISE_INTERNALS:
308 | # create noised and masked noised coords
309 | noised_coords, cloud_mask = noise_internals(seq, angles = angles,
310 | coords = coords_scn.squeeze(),
311 | noise_scale = NOISE_INTERNALS,
312 | theta_scale = INTERNALS_SCN_SCALE,
313 | verbose = False)
314 | masked_noised = noised_coords[naive_cloud_mask]
315 | noised_coords = rearrange(noised_coords, 'l c d -> () (l c) d')
316 |
317 | ###### SETP 2: build from backbone #########
318 | if SIDECHAIN_RECONSTRUCT:
319 | bb, mask = atom_selector(int_seq.unsqueeze(0), noised_coords, option="backbone", discard_absent=False)
320 | scaffolds = build_scaffolds_from_scn_angles(seq, angles=None, device="cpu")
321 | noised_coords[~mask] = 0.
322 | noised_coords = rearrange(noised_coords, '() (l c) d -> l c d', c=14)
323 | noised_coords, _ = sidechain_fold(wrapper = noised_coords.cpu(), **scaffolds, c_beta = False)
324 | noised_coords = rearrange(noised_coords, 'l c d -> () (l c) d').to(true_coords.device)
325 |
326 |
327 | return noised_coords, cloud_mask_flat
328 |
329 |
330 |
331 | if __name__ == "__main__":
332 | import joblib
333 | # imports of data (from mp_nerf.utils.get_prot)
334 | prots = joblib.load("some_route_to_local_serialized_file_with_prots")
335 |
336 | # set params
337 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
338 |
339 | # unpack and test
340 | seq, int_seq, true_coords, angles, padding_seq, mask, pid = prots[-1]
341 |
342 | true_coords = true_coords.unsqueeze(0)
343 |
344 | # check noised internals
345 | coords_scn = rearrange(true_coords, 'b (l c) d -> b l c d', c=14)
346 | cloud, cloud_mask = noise_internals(seq, angles=angles, coords=coords_scn[0], noise_scale=1.)
347 | print("cloud.shape", cloud.shape)
348 |
349 | # check integral
350 | integral, mask = combine_noise(true_coords, seq=seq, int_seq = None, angles=None,
351 | NOISE_INTERNALS=1e-2, SIDECHAIN_RECONSTRUCT=True)
352 | print("integral.shape", integral.shape)
353 |
354 | integral, mask = combine_noise(true_coords, seq=None, int_seq = int_seq, angles=None,
355 | NOISE_INTERNALS=1e-2, SIDECHAIN_RECONSTRUCT=True)
356 | print("integral.shape2", integral.shape)
357 |
358 |
359 |
360 |
--------------------------------------------------------------------------------
/mp_nerf/proteins.py:
--------------------------------------------------------------------------------
1 | # science
2 | import numpy as np
3 | # diff / ml
4 | import torch
5 | from einops import repeat
6 | # module
7 | from mp_nerf.massive_pnerf import *
8 | from mp_nerf.utils import *
9 | from mp_nerf.kb_proteins import *
10 |
11 |
12 | def scn_cloud_mask(seq, coords=None, strict=False):
13 | """ Gets the boolean mask atom positions (not all aas have same atoms).
14 | Inputs:
15 | * seqs: (length) iterable of 1-letter aa codes of a protein
16 | * coords: optional .(batch, lc, 3). sidechainnet coords.
17 | returns the true mask (solves potential atoms that might not be provided)
18 | * strict: bool. whther to discard the next points after a missing one
19 | Outputs: (length, 14) boolean mask
20 | """
21 | if coords is not None:
22 | start = (( rearrange(coords, 'b (l c) d -> b l c d', c=14) != 0 ).sum(dim=-1) != 0).float()
23 | # if a point is 0, the following are 0s as well
24 | if strict:
25 | for b in range(start.shape[0]):
26 | for pos in range(start.shape[1]):
27 | for chain in range(start.shape[2]):
28 | if start[b, pos, chain].item() == 0:
29 | start[b, pos, chain:] *= 0
30 | return start
31 | return torch.tensor([SUPREME_INFO[aa]['cloud_mask'] for aa in seq])
32 |
33 |
34 | def scn_bond_mask(seq):
35 | """ Inputs:
36 | * seqs: (length). iterable of 1-letter aa codes of a protein
37 | Outputs: (L, 14) maps point to bond length
38 | """
39 | return torch.tensor([SUPREME_INFO[aa]['bond_mask'] for aa in seq])
40 |
41 |
42 | def scn_angle_mask(seq, angles=None, device=None):
43 | """ Inputs:
44 | * seq: (length). iterable of 1-letter aa codes of a protein
45 | * angles: (length, 12). [phi, psi, omega, b_angle(n_ca_c), b_angle(ca_c_n), b_angle(c_n_ca), 6_scn_torsions]
46 | Outputs: (L, 14) maps point to theta and dihedral.
47 | first angle is theta, second is dihedral
48 | """
49 | device = angles.device if angles is not None else torch.device("cpu")
50 | precise = angles.dtype if angles is not None else torch.get_default_dtype()
51 | torsion_mask_use = "torsion_mask" if angles is not None else "torsion_mask_filled"
52 | # get masks
53 | theta_mask = torch.tensor([SUPREME_INFO[aa]['theta_mask'] for aa in seq], dtype=precise).to(device)
54 | torsion_mask = torch.tensor([SUPREME_INFO[aa][torsion_mask_use] for aa in seq], dtype=precise).to(device)
55 |
56 | # adapt general to specific angles if passed
57 | if angles is not None:
58 | # fill masks with angle values
59 | theta_mask[:, 0] = angles[:, 4] # ca_c_n
60 | theta_mask[1:, 1] = angles[:-1, 5] # c_n_ca
61 | theta_mask[:, 2] = angles[:, 3] # n_ca_c
62 | # backbone_torsions
63 | torsion_mask[:, 0] = angles[:, 1] # n determined by psi of previous
64 | torsion_mask[1:, 1] = angles[:-1, 2] # ca determined by omega of previous
65 | torsion_mask[:, 2] = angles[:, 0] # c determined by phi
66 | # https://github.com/jonathanking/sidechainnet/blob/master/sidechainnet/structure/StructureBuilder.py#L313
67 | torsion_mask[:, 3] = angles[:, 1] - np.pi
68 |
69 | # add torsions to sidechains - no need to modify indexes due to torsion modification
70 | # since extra rigid modies are in terminal positions in sidechain
71 | to_fill = torsion_mask != torsion_mask # "p" fill with passed values
72 | to_pick = torsion_mask == 999 # "i" infer from previous one
73 | for i,aa in enumerate(seq):
74 | # check if any is nan -> fill the holes
75 | number = to_fill[i].long().sum()
76 | torsion_mask[i, to_fill[i]] = angles[i, 6:6+number]
77 |
78 | # pick previous value for inferred torsions
79 | for j, val in enumerate(to_pick[i]):
80 | if val:
81 | torsion_mask[i, j] = torsion_mask[i, j-1] - np.pi # pick values from last one.
82 |
83 | # special rigid bodies anomalies:
84 | if aa == "I": # scn_torsion(CG1) - scn_torsion(CG2) = 2.13 (see KB)
85 | torsion_mask[i, 7] += torsion_mask[i, 5]
86 | elif aa == "L":
87 | torsion_mask[i, 7] += torsion_mask[i, 6]
88 |
89 |
90 | torsion_mask[-1, 3] += np.pi
91 | return torch.stack([theta_mask, torsion_mask], dim=0)
92 |
93 |
94 | def scn_index_mask(seq):
95 | """ Inputs:
96 | * seq: (length). iterable of 1-letter aa codes of a protein
97 | Outputs: (L, 11, 3) maps point to theta and dihedral.
98 | first angle is theta, second is dihedral
99 | """
100 | idxs = torch.tensor([SUPREME_INFO[aa]['idx_mask'] for aa in seq])
101 | return rearrange(idxs, 'l s d -> d l s')
102 |
103 |
104 | def scn_rigid_index_mask(seq, c_alpha=None):
105 | """ Inputs:
106 | * seq: (length). iterable of 1-letter aa codes of a protein
107 | * c_alpha: bool. whether to return only the c_alpha rigid group
108 | Outputs: (3, Length * Groups). indexes for 1st, 2nd and 3rd point
109 | to construct frames for each group.
110 | """
111 | if c_alpha:
112 | return torch.cat([torch.tensor(SUPREME_INFO[aa]['rigid_idx_mask'])[:1] + 14*i \
113 | for i,aa in enumerate(seq)], dim=0).t()
114 | return torch.cat([torch.tensor(SUPREME_INFO[aa]['rigid_idx_mask']) + 14*i \
115 | for i,aa in enumerate(seq)], dim=0).t()
116 |
117 |
118 | def build_scaffolds_from_scn_angles(seq, angles=None, coords=None, device="auto"):
119 | """ Builds scaffolds for fast access to data
120 | Inputs:
121 | * seq: string of aas (1 letter code)
122 | * angles: (L, 12) tensor containing the internal angles.
123 | Distributed as follows (following sidechainnet convention):
124 | * (L, 3) for torsion angles
125 | * (L, 3) bond angles
126 | * (L, 6) sidechain angles
127 | * coords: (L, 3) sidechainnet coords. builds the mask with those instead
128 | (better accuracy if modified residues present).
129 | Outputs:
130 | * cloud_mask: (L, 14 ) mask of points that should be converted to coords
131 | * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of
132 | previous 3 points in the coords array
133 | * angles_mask: (2, L, 14) maps point to theta and dihedral
134 | * bond_mask: (L, 14) gives the length of the bond originating that atom
135 | """
136 | # auto infer device and precision
137 | precise = angles.dtype if angles is not None else torch.get_default_dtype()
138 | if device == "auto":
139 | device = angles.device if angles is not None else device
140 |
141 | if coords is not None:
142 | cloud_mask = scn_cloud_mask(seq, coords=coords)
143 | else:
144 | cloud_mask = scn_cloud_mask(seq)
145 |
146 | cloud_mask = cloud_mask.bool().to(device)
147 |
148 | point_ref_mask = scn_index_mask(seq).long().to(device)
149 |
150 | angles_mask = scn_angle_mask(seq, angles).to(device, precise)
151 |
152 | bond_mask = scn_bond_mask(seq).to(device, precise)
153 | # return all in a dict
154 | return {"cloud_mask": cloud_mask,
155 | "point_ref_mask": point_ref_mask,
156 | "angles_mask": angles_mask,
157 | "bond_mask": bond_mask }
158 |
159 |
160 | #############################
161 | ####### ENCODERS ############
162 | #############################
163 |
164 |
165 | def modify_angles_mask_with_torsions(seq, angles_mask, torsions):
166 | """ Modifies a torsion mask to include variable torsions.
167 | Inputs:
168 | * seq: (L,) str. FASTA sequence
169 | * angles_mask: (2, L, 14) float tensor of (angles, torsions)
170 | * torsions: (L, 4) float tensor (or (L, 5) if it includes torsion for cb)
171 | Outputs: (2, L, 14) a new angles mask
172 | """
173 | c_beta = torsions.shape[-1] == 5 # whether c_beta torsion is passed as well
174 | start = 4 if c_beta else 5
175 | # get mask of to-fill values
176 | torsion_mask = torch.tensor([SUPREME_INFO[aa]["torsion_mask"] for aa in seq]).to(torsions.device) # (L, 14)
177 | torsion_mask = torsion_mask != torsion_mask # values that are nan need replace
178 | # undesired outside of margins
179 | torsion_mask[:, :start] = torsion_mask[:, start+torsions.shape[-1]:] = False
180 |
181 | angles_mask[1, torsion_mask] = torsions[ torsion_mask[:, start:start+torsions.shape[-1]] ]
182 | return angles_mask
183 |
184 |
185 | def modify_scaffolds_with_coords(scaffolds, coords):
186 | """ Gets scaffolds and fills in the right data.
187 | Inputs:
188 | * scaffolds: dict. as returned by `build_scaffolds_from_scn_angles`
189 | * coords: (L, 14, 3). sidechainnet tensor. same device as scaffolds
190 | Outputs: corrected scaffolds
191 | """
192 |
193 |
194 | # calculate distances and update:
195 | # N, CA, C
196 | scaffolds["bond_mask"][1:, 0] = torch.norm(coords[1:, 0] - coords[:-1, 2], dim=-1) # N
197 | scaffolds["bond_mask"][ :, 1] = torch.norm(coords[ :, 1] - coords[: , 0], dim=-1) # CA
198 | scaffolds["bond_mask"][ :, 2] = torch.norm(coords[ :, 2] - coords[: , 1], dim=-1) # C
199 | # O, CB, side chain
200 | selector = np.arange(len(coords))
201 | for i in range(3, 14):
202 | # get indexes
203 | idx_a, idx_b, idx_c = scaffolds["point_ref_mask"][:, :, i-3] # (3, L, 11) -> 3 * (L, 11)
204 | # correct distances
205 | scaffolds["bond_mask"][:, i] = torch.norm(coords[:, i] - coords[selector, idx_c], dim=-1)
206 | # get angles
207 | scaffolds["angles_mask"][0, :, i] = get_angle(coords[selector, idx_b],
208 | coords[selector, idx_c],
209 | coords[:, i])
210 | # handle C-beta, where the C requested is from the previous aa
211 | if i == 4:
212 | # for 1st residue, use position of the second residue's N
213 | first_next_n = coords[1, :1] # 1, 3
214 | # the c requested is from the previous residue
215 | main_c_prev_idxs = coords[selector[:-1], idx_a[1:]]# (L-1), 3
216 | # concat
217 | coords_a = torch.cat([first_next_n, main_c_prev_idxs])
218 | else:
219 | coords_a = coords[selector, idx_a]
220 | # get dihedrals
221 | scaffolds["angles_mask"][1, :, i] = get_dihedral(coords_a,
222 | coords[selector, idx_b],
223 | coords[selector, idx_c],
224 | coords[:, i])
225 | # correct angles and dihedrals for backbone
226 | scaffolds["angles_mask"][0, :-1, 0] = get_angle(coords[:-1, 1], coords[:-1, 2], coords[1: , 0]) # ca_c_n
227 | scaffolds["angles_mask"][0, 1:, 1] = get_angle(coords[:-1, 2], coords[1:, 0], coords[1: , 1]) # c_n_ca
228 | scaffolds["angles_mask"][0, :, 2] = get_angle(coords[:, 0], coords[ :, 1], coords[ : , 2]) # n_ca_c
229 |
230 | # N determined by previous psi = f(n, ca, c, n+1)
231 | scaffolds["angles_mask"][1, :-1, 0] = get_dihedral(coords[:-1, 0], coords[:-1, 1], coords[:-1, 2], coords[1:, 0])
232 | # CA determined by omega = f(ca, c, n+1, ca+1)
233 | scaffolds["angles_mask"][1, 1:, 1] = get_dihedral(coords[:-1, 1], coords[:-1, 2], coords[1:, 0], coords[1:, 1])
234 | # C determined by phi = f(c-1, n, ca, c)
235 | scaffolds["angles_mask"][1, 1:, 2] = get_dihedral(coords[:-1, 2], coords[1:, 0], coords[1:, 1], coords[1:, 2])
236 |
237 | return scaffolds
238 |
239 |
240 | ##################################
241 | ####### MAIN FUNCTION ############
242 | ##################################
243 |
244 |
245 | def protein_fold(cloud_mask, point_ref_mask, angles_mask, bond_mask,
246 | device=torch.device("cpu"), hybrid=False):
247 | """ Calcs coords of a protein given it's
248 | sequence and internal angles.
249 | Inputs:
250 | * cloud_mask: (L, 14) mask of points that should be converted to coords
251 | * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of
252 | previous 3 points in the coords array
253 | * angles_mask: (2, 14, L) maps point to theta and dihedral
254 | * bond_mask: (L, 14) gives the length of the bond originating that atom
255 |
256 | Output: (L, 14, 3) and (L, 14) coordinates and cloud_mask
257 | """
258 | # automatic type (float, mixed, double) and size detection
259 | precise = bond_mask.dtype
260 | length = cloud_mask.shape[0]
261 | # create coord wrapper
262 | coords = torch.zeros(length, 14, 3, device=device, dtype=precise)
263 |
264 | # do first AA
265 | coords[0, 1] = coords[0, 0] + torch.tensor([1, 0, 0], device=device, dtype=precise) * BB_BUILD_INFO["BONDLENS"]["n-ca"]
266 | coords[0, 2] = coords[0, 1] + torch.tensor([torch.cos(np.pi - angles_mask[0, 0, 2]),
267 | torch.sin(np.pi - angles_mask[0, 0, 2]),
268 | 0.], device=device, dtype=precise) * BB_BUILD_INFO["BONDLENS"]["ca-c"]
269 |
270 | # starting positions (in the x,y plane) and normal vector [0,0,1]
271 | init_a = repeat(torch.tensor([1., 0., 0.], device=device, dtype=precise), 'd -> l d', l=length)
272 | init_b = repeat(torch.tensor([1., 1., 0.], device=device, dtype=precise), 'd -> l d', l=length)
273 | # do N -> CA. don't do 1st since its done already
274 | thetas, dihedrals = angles_mask[:, :, 1]
275 | coords[1:, 1] = mp_nerf_torch(init_a,
276 | init_b,
277 | coords[:, 0],
278 | bond_mask[:, 1],
279 | thetas, dihedrals)[1:]
280 | # do CA -> C. don't do 1st since its done already
281 | thetas, dihedrals = angles_mask[:, :, 2]
282 | coords[1:, 2] = mp_nerf_torch(init_b,
283 | coords[:, 0],
284 | coords[:, 1],
285 | bond_mask[:, 2],
286 | thetas, dihedrals)[1:]
287 | # do C -> N
288 | thetas, dihedrals = angles_mask[:, :, 0]
289 | coords[:, 3] = mp_nerf_torch(coords[:, 0],
290 | coords[:, 1],
291 | coords[:, 2],
292 | bond_mask[:, 0],
293 | thetas, dihedrals)
294 |
295 | #########
296 | # sequential pass to join fragments
297 | #########
298 | # part of rotation mat corresponding to origin - 3 orthogonals
299 | mat_origin = get_axis_matrix(init_a[0], init_b[0], coords[0, 0], norm=False)
300 | # part of rotation mat corresponding to destins || a, b, c = CA, C, N+1
301 | # (L-1) since the first is in the origin already
302 | mat_destins = get_axis_matrix(coords[:-1, 1], coords[:-1, 2], coords[:-1, 3])
303 |
304 | # get rotation matrices from origins
305 | # https://math.stackexchange.com/questions/1876615/rotation-matrix-from-plane-a-to-b
306 | rotations = torch.matmul(mat_origin.t(), mat_destins)
307 | rotations /= torch.norm(rotations, dim=-1, keepdim=True)
308 |
309 | # do rotation concatenation - do for loop in cpu always - faster
310 | rotations = rotations.cpu() if coords.is_cuda and hybrid else rotations
311 | for i in range(1, length-1):
312 | rotations[i] = torch.matmul(rotations[i], rotations[i-1])
313 | rotations = rotations.to(device) if coords.is_cuda and hybrid else rotations
314 | # rotate all
315 | coords[1:, :4] = torch.matmul(coords[1:, :4], rotations)
316 | # offset each position by cumulative sum at that position
317 | coords[1:, :4] += torch.cumsum(coords[:-1, 3], dim=0).unsqueeze(-2)
318 |
319 |
320 | #########
321 | # parallel sidechain - do the oxygen, c-beta and side chain
322 | #########
323 | for i in range(3,14):
324 | level_mask = cloud_mask[:, i]
325 | thetas, dihedrals = angles_mask[:, level_mask, i]
326 | idx_a, idx_b, idx_c = point_ref_mask[:, level_mask, i-3]
327 |
328 | # to place C-beta, we need the carbons from prev res - not available for the 1st res
329 | if i == 4:
330 | # the c requested is from the previous residue - offset boolean mask by one
331 | # can't be done with slicing bc glycines are inside chain (dont have cb)
332 | coords_a = coords[(level_mask.nonzero().view(-1) - 1), idx_a] # (L-1), 3
333 | # if first residue is not glycine,
334 | # for 1st residue, use position of the second residue's N (1,3)
335 | if level_mask[0].item():
336 | coords_a[0] = coords[1, 1]
337 | else:
338 | coords_a = coords[level_mask, idx_a]
339 |
340 | coords[level_mask, i] = mp_nerf_torch(coords_a,
341 | coords[level_mask, idx_b],
342 | coords[level_mask, idx_c],
343 | bond_mask[level_mask, i],
344 | thetas, dihedrals)
345 |
346 | return coords, cloud_mask
347 |
348 |
349 | def sidechain_fold(wrapper, cloud_mask, point_ref_mask, angles_mask, bond_mask,
350 | device=torch.device("cpu"), c_beta=False):
351 | """ Calcs coords of a protein given it's sequence and internal angles.
352 | Inputs:
353 | * wrapper: (L, 14, 3). coords container with backbone ([:, :3]) and optionally
354 | c_beta ([:, 4])
355 | * cloud_mask: (L, 14) mask of points that should be converted to coords
356 | * point_ref_mask: (3, L, 11) maps point (except n-ca-c) to idxs of
357 | previous 3 points in the coords array
358 | * angles_mask: (2, 14, L) maps point to theta and dihedral
359 | * bond_mask: (L, 14) gives the length of the bond originating that atom
360 | * c_beta: whether to place cbeta
361 |
362 | Output: (L, 14, 3) and (L, 14) coordinates and cloud_mask
363 | """
364 | precise = wrapper.dtype
365 |
366 | # parallel sidechain - do the oxygen, c-beta and side chain
367 | for i in range(3,14):
368 | # skip cbeta if arg is set
369 | if i == 4 and not c_beta:
370 | continue
371 | # prepare inputs
372 | level_mask = cloud_mask[:, i]
373 | thetas, dihedrals = angles_mask[:, level_mask, i]
374 | idx_a, idx_b, idx_c = point_ref_mask[:, level_mask, i-3]
375 |
376 | # to place C-beta, we need the carbons from prev res - not available for the 1st res
377 | if i == 4:
378 | # the c requested is from the previous residue - offset boolean mask by one
379 | # can't be done with slicing bc glycines are inside chain (dont have cb)
380 | coords_a = wrapper[(level_mask.nonzero().view(-1) - 1), idx_a] # (L-1), 3
381 | # if first residue is not glycine,
382 | # for 1st residue, use position of the second residue's N (1,3)
383 | if level_mask[0].item():
384 | coords_a[0] = wrapper[1, 1]
385 | else:
386 | coords_a = wrapper[level_mask, idx_a]
387 |
388 | wrapper[level_mask, i] = mp_nerf_torch(coords_a,
389 | wrapper[level_mask, idx_b],
390 | wrapper[level_mask, idx_c],
391 | bond_mask[level_mask, i],
392 | thetas, dihedrals)
393 |
394 | return wrapper, cloud_mask
395 |
--------------------------------------------------------------------------------
/mp_nerf/utils.py:
--------------------------------------------------------------------------------
1 | # Author: Eric Alcaide
2 |
3 | import torch
4 | import numpy as np
5 | from einops import repeat, rearrange
6 |
7 |
8 | # random hacks
9 |
10 | # to_pi_minus_pi(4) = -2.28 # to_pi_minus_pi(-4) = 2.28 # rads to pi-(-pi)
11 | to_pi_minus_pi = lambda x: torch.where( (x//np.pi)%2 == 0, x%np.pi , -(2*np.pi-x%(2*np.pi)) )
12 | to_zero_two_pi = lambda x: torch.where( x>np.pi, x%np.pi, 2*np.pi + x%np.pi )
13 |
14 | # data utils
15 | def get_prot(dataloader_=None, vocab_=None, min_len=80, max_len=150, verbose=True):
16 | """ Gets a protein from sidechainnet and returns
17 | the right attrs for training.
18 | Inputs:
19 | * dataloader_: sidechainnet iterator over dataset
20 | * vocab_: sidechainnet VOCAB class
21 | * min_len: int. minimum sequence length
22 | * max_len: int. maximum sequence length
23 | * verbose: bool. verbosity level
24 | Outputs: (cleaned, without padding)
25 | (seq_str, int_seq, coords, angles, padding_seq, mask, pid)
26 | """
27 | while True:
28 | for b,batch in enumerate(dataloader_['train']):
29 | for i in range(batch.int_seqs.shape[0]):
30 | # strip padding - matching angles to string means
31 | # only accepting prots with no missing residues (angles would be 0)
32 | padding_seq = (batch.int_seqs[i] == 20).sum().item()
33 | padding_angles = (torch.abs(batch.angs[i]).sum(dim=-1) == 0).long().sum().item()
34 |
35 | if padding_seq == padding_angles:
36 | # check for appropiate length
37 | real_len = batch.int_seqs[i].shape[0] - padding_seq
38 | if max_len >= real_len >= min_len:
39 | # strip padding tokens
40 | seq = ''.join([vocab_.int2char(aa) for aa in batch.int_seqs[i].numpy()])
41 | seq = seq[:-padding_seq or None]
42 | int_seq = batch.int_seqs[i][:-padding_seq or None]
43 | angles = batch.angs[i][:-padding_seq or None]
44 | mask = batch.msks[i][:-padding_seq or None]
45 | coords = batch.crds[i][:-padding_seq*14 or None]
46 |
47 | if verbose:
48 | print("stopping at sequence of length", real_len)
49 | return seq, int_seq, coords, angles, padding_seq, mask, batch.pids[i]
50 | else:
51 | if verbose:
52 | print("found a seq of length:", batch.int_seqs[i].shape,
53 | "but oustide the threshold:", min_len, max_len)
54 | else:
55 | if verbose:
56 | print("paddings not matching", padding_seq, padding_angles)
57 | pass
58 | return None
59 |
60 |
61 | ######################
62 | ## structural utils ##
63 | ######################
64 |
65 | def get_dihedral(c1, c2, c3, c4):
66 | """ Returns the dihedral angle in radians.
67 | Will use atan2 formula from:
68 | https://en.wikipedia.org/wiki/Dihedral_angle#In_polymer_physics
69 | Inputs:
70 | * c1: (batch, 3) or (3,)
71 | * c2: (batch, 3) or (3,)
72 | * c3: (batch, 3) or (3,)
73 | * c4: (batch, 3) or (3,)
74 | """
75 | u1 = c2 - c1
76 | u2 = c3 - c2
77 | u3 = c4 - c3
78 |
79 | return torch.atan2( ( (torch.norm(u2, dim=-1, keepdim=True) * u1) * torch.cross(u2,u3, dim=-1) ).sum(dim=-1) ,
80 | ( torch.cross(u1,u2, dim=-1) * torch.cross(u2, u3, dim=-1) ).sum(dim=-1) )
81 |
82 |
83 | def get_angle(c1, c2, c3):
84 | """ Returns the angle in radians.
85 | Inputs:
86 | * c1: (batch, 3) or (3,)
87 | * c2: (batch, 3) or (3,)
88 | * c3: (batch, 3) or (3,)
89 | """
90 | u1 = c2 - c1
91 | u2 = c3 - c2
92 |
93 | # dont use acos since norms involved.
94 | # better use atan2 formula: atan2(cross, dot) from here:
95 | # https://johnblackburne.blogspot.com/2012/05/angle-between-two-3d-vectors.html
96 |
97 | # add a minus since we want the angle in reversed order - sidechainnet issues
98 | return torch.atan2( torch.norm(torch.cross(u1,u2, dim=-1), dim=-1),
99 | -(u1*u2).sum(dim=-1) )
100 |
101 |
102 | def kabsch_torch(X, Y):
103 | """ Kabsch alignment of X into Y.
104 | Assumes X,Y are both (D, N) - usually (3, N)
105 | """
106 | # center X and Y to the origin
107 | X_ = X - X.mean(dim=-1, keepdim=True)
108 | Y_ = Y - Y.mean(dim=-1, keepdim=True)
109 | # calculate convariance matrix (for each prot in the batch)
110 | C = torch.matmul(X_, Y_.t())
111 | # Optimal rotation matrix via SVD - warning! W must be transposed
112 | if int(torch.__version__.split(".")[1]) < 8:
113 | V, S, W = torch.svd(C.detach())
114 | W = W.t()
115 | else:
116 | V, S, W = torch.linalg.svd(C.detach())
117 | # determinant sign for direction correction
118 | d = (torch.det(V) * torch.det(W)) < 0.0
119 | if d:
120 | S[-1] = S[-1] * (-1)
121 | V[:, -1] = V[:, -1] * (-1)
122 | # Create Rotation matrix U
123 | U = torch.matmul(V, W)
124 | # calculate rotations
125 | X_ = torch.matmul(X_.t(), U).t()
126 | # return centered and aligned
127 | return X_, Y_
128 |
129 |
130 | def rmsd_torch(X, Y):
131 | """ Assumes x,y are both (batch, d, n) - usually (batch, 3, N). """
132 | return torch.sqrt( torch.mean((X - Y)**2, axis=(-1, -2)) )
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/notebooks/experiments/[131, 150]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[131, 150]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[200, 250]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[200, 250]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[331, 351]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[331, 351]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[400, 450]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[400, 450]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[500, 550]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[500, 550]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[600, 650]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[600, 650]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[700, 780]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[700, 780]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[800, 900]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[800, 900]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[905, 1070]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[905, 1070]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/[905, 970]_info.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments/[905, 970]_info.joblib
--------------------------------------------------------------------------------
/notebooks/experiments/logs_experiment.txt:
--------------------------------------------------------------------------------
1 | 2021-06-07 23:13:24,959 INFO MainThread root Loading data
2 |
3 | 2021-06-07 23:13:38,739 INFO MainThread root Loading data
4 |
5 | 2021-06-07 23:13:48,499 INFO MainThread root Loading data
6 |
7 | 2021-06-07 23:13:48,499 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
8 |
9 | 2021-06-07 23:14:01,528 INFO MainThread root Loading data
10 |
11 | 2021-06-07 23:14:01,528 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
12 |
13 | 2021-06-07 23:14:53,890 INFO MainThread root Loading data
14 |
15 | 2021-06-07 23:14:59,937 INFO MainThread root Loading data
16 |
17 | 2021-06-07 23:14:59,937 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
18 |
19 | 2021-06-07 23:14:59,954 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.02s.
20 | 2021-06-07 23:15:10,979 INFO MainThread root Loading data
21 |
22 | 2021-06-07 23:15:10,980 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
23 |
24 | 2021-06-07 23:15:10,995 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
25 | 2021-06-07 23:15:32,111 INFO MainThread root Loading data
26 |
27 | 2021-06-07 23:15:32,112 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
28 |
29 | 2021-06-07 23:15:32,124 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
30 | 2021-06-07 23:15:32,659 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb
31 |
32 | 2021-06-07 23:15:38,006 INFO MainThread root 5.346866726 for 1000 calls
33 | 2021-06-07 23:15:38,006 INFO MainThread root Done
34 | 2021-06-07 23:15:38,006 INFO MainThread root
35 |
36 | =======
37 |
38 |
39 | 2021-06-07 23:15:38,084 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.08s.
40 | 2021-06-07 23:15:41,299 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb
41 |
42 | 2021-06-07 23:15:56,652 INFO MainThread root 15.352682389000002 for 1000 calls
43 | 2021-06-07 23:15:56,652 INFO MainThread root Done
44 | 2021-06-07 23:15:56,652 INFO MainThread root
45 |
46 | =======
47 |
48 |
49 | 2021-06-07 23:15:56,690 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.04s.
50 | 2021-06-07 23:15:58,079 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb
51 |
52 | 2021-06-07 23:16:07,192 INFO MainThread root 9.112788776999999 for 1000 calls
53 | 2021-06-07 23:16:07,192 INFO MainThread root Done
54 | 2021-06-07 23:16:07,192 INFO MainThread root
55 |
56 | =======
57 |
58 |
59 | 2021-06-07 23:16:07,214 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s.
60 | 2021-06-07 23:16:08,470 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb
61 |
62 | 2021-06-07 23:16:16,980 INFO MainThread root 8.509638406999997 for 1000 calls
63 | 2021-06-07 23:16:16,980 INFO MainThread root Done
64 | 2021-06-07 23:16:16,980 INFO MainThread root
65 |
66 | =======
67 |
68 |
69 | 2021-06-07 23:16:16,980 INFO MainThread root Execution has finished
70 |
71 | 2021-06-07 22:36:56,573 INFO MainThread root Loading data
72 |
73 | 2021-06-07 22:36:56,648 INFO MainThread root Preparing speed tests: for device device(type='cpu') and hybrid_opt: False
74 |
75 | 2021-06-07 22:36:56,715 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s.
76 | 2021-06-07 22:37:00,040 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb
77 |
78 | 2021-06-07 22:37:31,005 INFO MainThread root 30.9647682 for 1000 calls
79 | 2021-06-07 22:37:31,005 INFO MainThread root Done
80 | 2021-06-07 22:37:31,005 INFO MainThread root
81 |
82 | =======
83 |
84 |
85 | 2021-06-07 22:37:31,045 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.04s.
86 | 2021-06-07 22:37:32,399 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb
87 |
88 | 2021-06-07 22:37:47,501 INFO MainThread root 15.102381899999997 for 1000 calls
89 | 2021-06-07 22:37:47,501 INFO MainThread root Done
90 | 2021-06-07 22:37:47,502 INFO MainThread root
91 |
92 | =======
93 |
94 |
95 | 2021-06-07 22:37:47,514 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
96 | 2021-06-07 22:37:47,984 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb
97 |
98 | 2021-06-07 22:37:55,590 INFO MainThread root 7.6064229999999995 for 1000 calls
99 | 2021-06-07 22:37:55,590 INFO MainThread root Done
100 | 2021-06-07 22:37:55,590 INFO MainThread root
101 |
102 | =======
103 |
104 |
105 | 2021-06-07 22:37:55,608 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s.
106 | 2021-06-07 22:37:56,821 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb
107 |
108 | 2021-06-07 22:38:10,189 INFO MainThread root 13.368083000000006 for 1000 calls
109 | 2021-06-07 22:38:10,189 INFO MainThread root Done
110 | 2021-06-07 22:38:10,189 INFO MainThread root
111 |
112 | =======
113 |
114 |
115 | 2021-06-07 22:38:10,189 INFO MainThread root Preparing speed tests: for device device(type='cuda') and hybrid_opt: True
116 |
117 | 2021-06-07 22:38:10,255 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s.
118 | 2021-06-07 22:38:16,823 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb
119 |
120 | 2021-06-07 22:38:46,881 INFO MainThread root 30.058233599999994 for 1000 calls
121 | 2021-06-07 22:38:46,881 INFO MainThread root Done
122 | 2021-06-07 22:38:46,881 INFO MainThread root
123 |
124 | =======
125 |
126 |
127 | 2021-06-07 22:38:46,971 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.09s.
128 | 2021-06-07 22:38:49,619 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb
129 |
130 | 2021-06-07 22:39:15,901 INFO MainThread root 26.281134400000013 for 1000 calls
131 | 2021-06-07 22:39:15,901 INFO MainThread root Done
132 | 2021-06-07 22:39:15,901 INFO MainThread root
133 |
134 | =======
135 |
136 |
137 | 2021-06-07 22:39:15,914 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
138 | 2021-06-07 22:39:16,514 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb
139 |
140 | 2021-06-07 22:39:35,427 INFO MainThread root 18.913132099999984 for 1000 calls
141 | 2021-06-07 22:39:35,428 INFO MainThread root Done
142 | 2021-06-07 22:39:35,428 INFO MainThread root
143 |
144 | =======
145 |
146 |
147 | 2021-06-07 22:39:35,444 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s.
148 | 2021-06-07 22:39:36,919 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb
149 |
150 | 2021-06-07 22:39:59,241 INFO MainThread root 22.321837799999997 for 1000 calls
151 | 2021-06-07 22:39:59,241 INFO MainThread root Done
152 | 2021-06-07 22:39:59,241 INFO MainThread root
153 |
154 | =======
155 |
156 |
157 | 2021-06-07 22:39:59,241 INFO MainThread root Preparing speed tests: for device device(type='cuda') and hybrid_opt: False
158 |
159 | 2021-06-07 22:39:59,311 DEBUG MainThread .prody 10003 atoms and 1 coordinate set(s) were parsed in 0.07s.
160 | 2021-06-07 22:40:03,273 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/1000_ala.pdb
161 |
162 | 2021-06-07 22:40:46,576 INFO MainThread root 43.3028281 for 1000 calls
163 | 2021-06-07 22:40:46,576 INFO MainThread root Done
164 | 2021-06-07 22:40:46,576 INFO MainThread root
165 |
166 | =======
167 |
168 |
169 | 2021-06-07 22:40:46,609 DEBUG MainThread .prody 5003 atoms and 1 coordinate set(s) were parsed in 0.03s.
170 | 2021-06-07 22:40:48,396 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/500_ala.pdb
171 |
172 | 2021-06-07 22:41:17,417 INFO MainThread root 29.020322799999974 for 1000 calls
173 | 2021-06-07 22:41:17,417 INFO MainThread root Done
174 | 2021-06-07 22:41:17,417 INFO MainThread root
175 |
176 | =======
177 |
178 |
179 | 2021-06-07 22:41:17,430 DEBUG MainThread .prody 1864 atoms and 1 coordinate set(s) were parsed in 0.01s.
180 | 2021-06-07 22:41:18,004 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/5rsa_ribonuclease.pdb
181 |
182 | 2021-06-07 22:41:39,125 INFO MainThread root 21.120834400000035 for 1000 calls
183 | 2021-06-07 22:41:39,125 INFO MainThread root Done
184 | 2021-06-07 22:41:39,125 INFO MainThread root
185 |
186 | =======
187 |
188 |
189 | 2021-06-07 22:41:39,150 DEBUG MainThread .prody 2568 atoms and 1 coordinate set(s) were parsed in 0.02s.
190 | 2021-06-07 22:41:40,646 INFO MainThread root Assessing the speed of folding algorithm at file experiments_manual/rclab_data/il10_lactate_dh.pdb
191 |
192 | 2021-06-07 22:42:06,874 INFO MainThread root 26.22732030000003 for 1000 calls
193 | 2021-06-07 22:42:06,874 INFO MainThread root Done
194 | 2021-06-07 22:42:06,874 INFO MainThread root
195 |
196 | =======
197 |
198 |
199 | 2021-06-07 22:42:06,874 INFO MainThread root Execution has finished
200 |
201 |
--------------------------------------------------------------------------------
/notebooks/experiments/logs_experiment_scn_various_lengths.txt:
--------------------------------------------------------------------------------
1 | =======
2 | 2021-05-22 02:14:49,435 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994]
3 |
4 | 2021-05-22 02:14:49,435 INFO MainThread root Preparing speed tests: for device 'cpu'
5 |
6 | 2021-05-22 02:14:49,448 INFO MainThread root Assessing the speed of folding algorithm at length 134
7 |
8 | 2021-05-22 02:14:55,960 INFO MainThread root 6.509940282999999 for 1000 calls
9 | 2021-05-22 02:14:55,960 INFO MainThread root Saving the related information at experiments/[131, 150]_info.joblib
10 |
11 | 2021-05-22 02:14:55,961 INFO MainThread root
12 |
13 | =======
14 |
15 |
16 | 2021-05-22 02:14:55,977 INFO MainThread root Assessing the speed of folding algorithm at length 214
17 |
18 | 2021-05-22 02:15:02,783 INFO MainThread root 6.805784261999996 for 1000 calls
19 | 2021-05-22 02:15:02,783 INFO MainThread root Saving the related information at experiments/[200, 250]_info.joblib
20 |
21 | 2021-05-22 02:15:02,784 INFO MainThread root
22 |
23 | =======
24 |
25 |
26 | 2021-05-22 02:15:02,808 INFO MainThread root Assessing the speed of folding algorithm at length 336
27 |
28 | 2021-05-22 02:15:11,765 INFO MainThread root 8.956757892999995 for 1000 calls
29 | 2021-05-22 02:15:11,765 INFO MainThread root Saving the related information at experiments/[331, 351]_info.joblib
30 |
31 | 2021-05-22 02:15:11,766 INFO MainThread root
32 |
33 | =======
34 |
35 |
36 | 2021-05-22 02:15:11,794 INFO MainThread root Assessing the speed of folding algorithm at length 401
37 |
38 | 2021-05-22 02:15:21,825 INFO MainThread root 10.031293943000009 for 1000 calls
39 | 2021-05-22 02:15:21,825 INFO MainThread root Saving the related information at experiments/[400, 450]_info.joblib
40 |
41 | 2021-05-22 02:15:21,826 INFO MainThread root
42 |
43 | =======
44 |
45 |
46 | 2021-05-22 02:15:21,862 INFO MainThread root Assessing the speed of folding algorithm at length 501
47 |
48 | 2021-05-22 02:15:33,083 INFO MainThread root 11.221263701000012 for 1000 calls
49 | 2021-05-22 02:15:33,083 INFO MainThread root Saving the related information at experiments/[500, 550]_info.joblib
50 |
51 | 2021-05-22 02:15:33,084 INFO MainThread root
52 |
53 | =======
54 |
55 |
56 | 2021-05-22 02:15:33,126 INFO MainThread root Assessing the speed of folding algorithm at length 621
57 |
58 | 2021-05-22 02:15:45,854 INFO MainThread root 12.727750233999998 for 1000 calls
59 | 2021-05-22 02:15:45,854 INFO MainThread root Saving the related information at experiments/[600, 650]_info.joblib
60 |
61 | 2021-05-22 02:15:45,855 INFO MainThread root
62 |
63 | =======
64 |
65 |
66 | 2021-05-22 02:15:45,906 INFO MainThread root Assessing the speed of folding algorithm at length 753
67 |
68 | 2021-05-22 02:16:00,667 INFO MainThread root 14.760831587000013 for 1000 calls
69 | 2021-05-22 02:16:00,672 INFO MainThread root Saving the related information at experiments/[700, 780]_info.joblib
70 |
71 | 2021-05-22 02:16:00,674 INFO MainThread root
72 |
73 | =======
74 |
75 |
76 | 2021-05-22 02:16:00,734 INFO MainThread root Assessing the speed of folding algorithm at length 862
77 |
78 | 2021-05-22 02:16:17,315 INFO MainThread root 16.580566616 for 1000 calls
79 | 2021-05-22 02:16:17,315 INFO MainThread root Saving the related information at experiments/[800, 900]_info.joblib
80 |
81 | 2021-05-22 02:16:17,316 INFO MainThread root
82 |
83 | =======
84 |
85 |
86 | 2021-05-22 02:16:17,383 INFO MainThread root Assessing the speed of folding algorithm at length 994
87 |
88 | 2021-05-22 02:16:35,654 INFO MainThread root 18.271017204000003 for 1000 calls
89 | 2021-05-22 02:16:35,654 INFO MainThread root Saving the related information at experiments/[905, 1070]_info.joblib
90 |
91 | 2021-05-22 02:16:35,655 INFO MainThread root
92 |
93 | =======
94 |
95 |
96 | 2021-05-22 02:16:35,655 INFO MainThread root Preparing speed tests: for device device(type='cpu')
97 |
98 | 2021-05-22 02:16:35,664 INFO MainThread root Assessing the speed of folding algorithm at length 134
99 |
100 | 2021-05-22 02:16:40,994 INFO MainThread root 5.329709648000005 for 1000 calls
101 | 2021-05-22 02:16:40,994 INFO MainThread root Saving the related information at experiments/[131, 150]_info.joblib
102 |
103 | 2021-05-22 02:16:40,995 INFO MainThread root
104 |
105 | =======
106 |
107 |
108 | 2021-05-22 02:16:41,010 INFO MainThread root Assessing the speed of folding algorithm at length 214
109 |
110 | 2021-05-22 02:16:47,511 INFO MainThread root 6.501463223999991 for 1000 calls
111 | 2021-05-22 02:16:47,512 INFO MainThread root Saving the related information at experiments/[200, 250]_info.joblib
112 |
113 | 2021-05-22 02:16:47,513 INFO MainThread root
114 |
115 | =======
116 |
117 |
118 | 2021-05-22 02:16:47,536 INFO MainThread root Assessing the speed of folding algorithm at length 336
119 |
120 | 2021-05-22 02:16:56,197 INFO MainThread root 8.660352851999988 for 1000 calls
121 | 2021-05-22 02:16:56,197 INFO MainThread root Saving the related information at experiments/[331, 351]_info.joblib
122 |
123 | 2021-05-22 02:16:56,198 INFO MainThread root
124 |
125 | =======
126 |
127 |
128 | 2021-05-22 02:16:56,226 INFO MainThread root Assessing the speed of folding algorithm at length 401
129 |
130 | 2021-05-22 02:17:05,869 INFO MainThread root 9.643088333000009 for 1000 calls
131 | 2021-05-22 02:17:05,869 INFO MainThread root Saving the related information at experiments/[400, 450]_info.joblib
132 |
133 | 2021-05-22 02:17:05,871 INFO MainThread root
134 |
135 | =======
136 |
137 |
138 | 2021-05-22 02:17:05,904 INFO MainThread root Assessing the speed of folding algorithm at length 501
139 |
140 | 2021-05-22 02:17:17,308 INFO MainThread root 11.40289807900001 for 1000 calls
141 | 2021-05-22 02:17:17,308 INFO MainThread root Saving the related information at experiments/[500, 550]_info.joblib
142 |
143 | 2021-05-22 02:17:17,309 INFO MainThread root
144 |
145 | =======
146 |
147 |
148 | 2021-05-22 02:17:17,351 INFO MainThread root Assessing the speed of folding algorithm at length 621
149 |
150 | 2021-05-22 02:17:30,116 INFO MainThread root 12.764849003000023 for 1000 calls
151 | 2021-05-22 02:17:30,117 INFO MainThread root Saving the related information at experiments/[600, 650]_info.joblib
152 |
153 | 2021-05-22 02:17:30,119 INFO MainThread root
154 |
155 | =======
156 |
157 |
158 | 2021-05-22 02:17:30,171 INFO MainThread root Assessing the speed of folding algorithm at length 753
159 |
160 | 2021-05-22 02:17:44,858 INFO MainThread root 14.687164622000012 for 1000 calls
161 | 2021-05-22 02:17:44,858 INFO MainThread root Saving the related information at experiments/[700, 780]_info.joblib
162 |
163 | 2021-05-22 02:17:44,859 INFO MainThread root
164 |
165 | =======
166 |
167 |
168 | 2021-05-22 02:17:44,918 INFO MainThread root Assessing the speed of folding algorithm at length 862
169 |
170 | 2021-05-22 02:18:01,473 INFO MainThread root 16.554769015000005 for 1000 calls
171 | 2021-05-22 02:18:01,473 INFO MainThread root Saving the related information at experiments/[800, 900]_info.joblib
172 |
173 | 2021-05-22 02:18:01,474 INFO MainThread root
174 |
175 | =======
176 |
177 |
178 | 2021-05-22 02:18:01,538 INFO MainThread root Assessing the speed of folding algorithm at length 994
179 |
180 | 2021-05-22 02:18:19,650 INFO MainThread root 18.111747613000006 for 1000 calls
181 | 2021-05-22 02:18:19,650 INFO MainThread root Saving the related information at experiments/[905, 1070]_info.joblib
182 |
183 | 2021-05-22 02:18:19,651 INFO MainThread root
184 |
185 | =======
186 |
187 |
188 | 2021-05-22 02:18:19,651 INFO MainThread root Execution has finished
189 |
190 | >>>>>>> 4cabbe55371d6a9a7edeab1db719fa0cf8312eae
191 | 2021-05-22 18:39:33,611 INFO MainThread root Loading data
192 |
193 | 2021-05-22 18:39:33,622 INFO MainThread root Data has been loaded
194 |
195 |
196 | =======
197 |
198 |
199 | 2021-05-22 18:39:33,622 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994]
200 |
201 | 2021-05-22 18:39:33,646 INFO MainThread root Preparing speed tests: for device 'cpu'
202 |
203 | 2021-05-22 18:39:33,670 INFO MainThread root Assessing the speed of folding algorithm at length 134
204 |
205 | 2021-05-22 18:39:42,657 INFO MainThread root 8.9869345 for 1000 calls
206 | 2021-05-22 18:39:42,657 INFO MainThread root Saving the related information at experiments/100_info.joblib
207 |
208 | 2021-05-22 18:39:42,659 INFO MainThread root
209 |
210 | =======
211 |
212 |
213 | 2021-05-22 18:39:42,687 INFO MainThread root Assessing the speed of folding algorithm at length 214
214 |
215 | 2021-05-22 18:39:53,087 INFO MainThread root 10.4000911 for 1000 calls
216 | 2021-05-22 18:39:53,087 INFO MainThread root Saving the related information at experiments/200_info.joblib
217 |
218 | 2021-05-22 18:39:53,088 INFO MainThread root
219 |
220 | =======
221 |
222 |
223 | 2021-05-22 18:39:53,122 INFO MainThread root Assessing the speed of folding algorithm at length 336
224 |
225 | 2021-05-22 18:40:06,577 INFO MainThread root 13.455043199999999 for 1000 calls
226 | 2021-05-22 18:40:06,577 INFO MainThread root Saving the related information at experiments/300_info.joblib
227 |
228 | 2021-05-22 18:40:06,578 INFO MainThread root
229 |
230 | =======
231 |
232 |
233 | 2021-05-22 18:40:06,617 INFO MainThread root Assessing the speed of folding algorithm at length 401
234 |
235 | 2021-05-22 18:40:21,715 INFO MainThread root 15.097297400000002 for 1000 calls
236 | 2021-05-22 18:40:21,715 INFO MainThread root Saving the related information at experiments/400_info.joblib
237 |
238 | 2021-05-22 18:40:21,716 INFO MainThread root
239 |
240 | =======
241 |
242 |
243 | 2021-05-22 18:40:21,779 INFO MainThread root Assessing the speed of folding algorithm at length 501
244 |
245 | 2021-05-22 18:40:40,543 INFO MainThread root 18.764004199999995 for 1000 calls
246 | 2021-05-22 18:40:40,543 INFO MainThread root Saving the related information at experiments/500_info.joblib
247 |
248 | 2021-05-22 18:40:40,544 INFO MainThread root
249 |
250 | =======
251 |
252 |
253 | 2021-05-22 18:40:40,617 INFO MainThread root Assessing the speed of folding algorithm at length 621
254 |
255 | 2021-05-22 18:41:02,270 INFO MainThread root 21.652811900000003 for 1000 calls
256 | 2021-05-22 18:41:02,270 INFO MainThread root Saving the related information at experiments/600_info.joblib
257 |
258 | 2021-05-22 18:41:02,271 INFO MainThread root
259 |
260 | =======
261 |
262 |
263 | 2021-05-22 18:41:02,367 INFO MainThread root Assessing the speed of folding algorithm at length 753
264 |
265 | 2021-05-22 18:41:27,302 INFO MainThread root 24.934528900000004 for 1000 calls
266 | 2021-05-22 18:41:27,302 INFO MainThread root Saving the related information at experiments/700_info.joblib
267 |
268 | 2021-05-22 18:41:27,304 INFO MainThread root
269 |
270 | =======
271 |
272 |
273 | 2021-05-22 18:41:27,431 INFO MainThread root Assessing the speed of folding algorithm at length 862
274 |
275 | 2021-05-22 18:41:56,196 INFO MainThread root 28.7642814 for 1000 calls
276 | 2021-05-22 18:41:56,196 INFO MainThread root Saving the related information at experiments/800_info.joblib
277 |
278 | 2021-05-22 18:41:56,197 INFO MainThread root
279 |
280 | =======
281 |
282 |
283 | 2021-05-22 18:41:56,312 INFO MainThread root Assessing the speed of folding algorithm at length 994
284 |
285 | 2021-05-22 18:42:29,089 INFO MainThread root 32.77735960000001 for 1000 calls
286 | 2021-05-22 18:42:29,090 INFO MainThread root Saving the related information at experiments/900_info.joblib
287 |
288 | 2021-05-22 18:42:29,090 INFO MainThread root
289 |
290 | =======
291 |
292 |
293 | 2021-05-22 18:42:29,091 INFO MainThread root Preparing speed tests: for device device(type='cuda') -- hybrid=True
294 |
295 | 2021-05-22 18:42:30,437 INFO MainThread root Assessing the speed of folding algorithm at length 134
296 |
297 | 2021-05-22 18:42:48,848 INFO MainThread root 18.41120219999999 for 1000 calls
298 | 2021-05-22 18:42:48,848 INFO MainThread root Saving the related information at experiments/100_info.joblib
299 |
300 | 2021-05-22 18:42:48,849 INFO MainThread root
301 |
302 | =======
303 |
304 |
305 | 2021-05-22 18:42:49,042 INFO MainThread root Assessing the speed of folding algorithm at length 214
306 |
307 | 2021-05-22 18:43:09,409 INFO MainThread root 20.366851999999994 for 1000 calls
308 | 2021-05-22 18:43:09,409 INFO MainThread root Saving the related information at experiments/200_info.joblib
309 |
310 | 2021-05-22 18:43:09,410 INFO MainThread root
311 |
312 | =======
313 |
314 |
315 | 2021-05-22 18:43:09,752 INFO MainThread root Assessing the speed of folding algorithm at length 336
316 |
317 | 2021-05-22 18:43:32,291 INFO MainThread root 22.538369399999993 for 1000 calls
318 | 2021-05-22 18:43:32,291 INFO MainThread root Saving the related information at experiments/300_info.joblib
319 |
320 | 2021-05-22 18:43:32,292 INFO MainThread root
321 |
322 | =======
323 |
324 |
325 | 2021-05-22 18:43:32,658 INFO MainThread root Assessing the speed of folding algorithm at length 401
326 |
327 | 2021-05-22 18:43:55,501 INFO MainThread root 22.84260729999997 for 1000 calls
328 | 2021-05-22 18:43:55,501 INFO MainThread root Saving the related information at experiments/400_info.joblib
329 |
330 | 2021-05-22 18:43:55,502 INFO MainThread root
331 |
332 | =======
333 |
334 |
335 | 2021-05-22 18:43:56,096 INFO MainThread root Assessing the speed of folding algorithm at length 501
336 |
337 | 2021-05-22 18:44:20,154 INFO MainThread root 24.057599100000004 for 1000 calls
338 | 2021-05-22 18:44:20,154 INFO MainThread root Saving the related information at experiments/500_info.joblib
339 |
340 | 2021-05-22 18:44:20,155 INFO MainThread root
341 |
342 | =======
343 |
344 |
345 | 2021-05-22 18:44:20,720 INFO MainThread root Assessing the speed of folding algorithm at length 621
346 |
347 | 2021-05-22 18:44:46,706 INFO MainThread root 25.98607320000002 for 1000 calls
348 | 2021-05-22 18:44:46,706 INFO MainThread root Saving the related information at experiments/600_info.joblib
349 |
350 | 2021-05-22 18:44:46,707 INFO MainThread root
351 |
352 | =======
353 |
354 |
355 | 2021-05-22 18:44:47,392 INFO MainThread root Assessing the speed of folding algorithm at length 753
356 |
357 | 2021-05-22 18:45:15,028 INFO MainThread root 27.6351363 for 1000 calls
358 | 2021-05-22 18:45:15,028 INFO MainThread root Saving the related information at experiments/700_info.joblib
359 |
360 | 2021-05-22 18:45:15,029 INFO MainThread root
361 |
362 | =======
363 |
364 |
365 | 2021-05-22 18:45:15,818 INFO MainThread root Assessing the speed of folding algorithm at length 862
366 |
367 | 2021-05-22 18:45:45,070 INFO MainThread root 29.25168880000001 for 1000 calls
368 | 2021-05-22 18:45:45,070 INFO MainThread root Saving the related information at experiments/800_info.joblib
369 |
370 | 2021-05-22 18:45:45,071 INFO MainThread root
371 |
372 | =======
373 |
374 |
375 | 2021-05-22 18:45:45,994 INFO MainThread root Assessing the speed of folding algorithm at length 994
376 |
377 | 2021-05-22 18:46:17,009 INFO MainThread root 31.0138465 for 1000 calls
378 | 2021-05-22 18:46:17,009 INFO MainThread root Saving the related information at experiments/900_info.joblib
379 |
380 | 2021-05-22 18:46:17,010 INFO MainThread root
381 |
382 | =======
383 |
384 |
385 | 2021-05-22 18:46:17,010 INFO MainThread root Execution has finished
386 |
387 | 2021-05-22 18:50:28,714 INFO MainThread root Loading data
388 |
389 | 2021-05-22 18:50:28,718 INFO MainThread root Data has been loaded
390 |
391 |
392 | =======
393 |
394 |
395 | 2021-05-22 18:50:28,718 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994]
396 |
397 | 2021-05-22 18:50:40,190 INFO MainThread root Loading data
398 |
399 | 2021-05-22 18:50:40,194 INFO MainThread root Data has been loaded
400 |
401 |
402 | =======
403 |
404 |
405 | 2021-05-22 18:50:40,194 INFO MainThread root Assessing lengths of: [134, 214, 336, 401, 501, 621, 753, 862, 994]
406 |
407 | 2021-05-22 18:50:40,217 INFO MainThread root Preparing speed tests: for device device(type='cpu')
408 |
409 | 2021-05-22 18:50:40,232 INFO MainThread root Assessing the speed of folding algorithm at length 134
410 |
411 | 2021-05-22 18:50:48,901 INFO MainThread root 8.6684752 for 1000 calls
412 | 2021-05-22 18:50:48,901 INFO MainThread root Saving the related information at experiments/100_info.joblib
413 |
414 | 2021-05-22 18:50:48,902 INFO MainThread root
415 |
416 | =======
417 |
418 |
419 | 2021-05-22 18:50:48,923 INFO MainThread root Assessing the speed of folding algorithm at length 214
420 |
421 | 2021-05-22 18:50:59,368 INFO MainThread root 10.4448301 for 1000 calls
422 | 2021-05-22 18:50:59,368 INFO MainThread root Saving the related information at experiments/200_info.joblib
423 |
424 | 2021-05-22 18:50:59,369 INFO MainThread root
425 |
426 | =======
427 |
428 |
429 | 2021-05-22 18:50:59,415 INFO MainThread root Assessing the speed of folding algorithm at length 336
430 |
431 | 2021-05-22 18:51:13,819 INFO MainThread root 14.403065699999996 for 1000 calls
432 | 2021-05-22 18:51:13,819 INFO MainThread root Saving the related information at experiments/300_info.joblib
433 |
434 | 2021-05-22 18:51:13,820 INFO MainThread root
435 |
436 | =======
437 |
438 |
439 | 2021-05-22 18:51:13,880 INFO MainThread root Assessing the speed of folding algorithm at length 401
440 |
441 | 2021-05-22 18:51:29,132 INFO MainThread root 15.251432700000002 for 1000 calls
442 | 2021-05-22 18:51:29,132 INFO MainThread root Saving the related information at experiments/400_info.joblib
443 |
444 | 2021-05-22 18:51:29,133 INFO MainThread root
445 |
446 | =======
447 |
448 |
449 | 2021-05-22 18:51:29,181 INFO MainThread root Assessing the speed of folding algorithm at length 501
450 |
451 | 2021-05-22 18:51:47,005 INFO MainThread root 17.824042999999996 for 1000 calls
452 | 2021-05-22 18:51:47,005 INFO MainThread root Saving the related information at experiments/500_info.joblib
453 |
454 | 2021-05-22 18:51:47,006 INFO MainThread root
455 |
456 | =======
457 |
458 |
459 | 2021-05-22 18:51:47,083 INFO MainThread root Assessing the speed of folding algorithm at length 621
460 |
461 | 2021-05-22 18:52:07,623 INFO MainThread root 20.5405765 for 1000 calls
462 | 2021-05-22 18:52:07,624 INFO MainThread root Saving the related information at experiments/600_info.joblib
463 |
464 | 2021-05-22 18:52:07,625 INFO MainThread root
465 |
466 | =======
467 |
468 |
469 | 2021-05-22 18:52:07,708 INFO MainThread root Assessing the speed of folding algorithm at length 753
470 |
471 | 2021-05-22 18:52:31,562 INFO MainThread root 23.853287499999993 for 1000 calls
472 | 2021-05-22 18:52:31,562 INFO MainThread root Saving the related information at experiments/700_info.joblib
473 |
474 | 2021-05-22 18:52:31,563 INFO MainThread root
475 |
476 | =======
477 |
478 |
479 | 2021-05-22 18:52:31,652 INFO MainThread root Assessing the speed of folding algorithm at length 862
480 |
481 | 2021-05-22 18:52:59,035 INFO MainThread root 27.38281640000001 for 1000 calls
482 | 2021-05-22 18:52:59,035 INFO MainThread root Saving the related information at experiments/800_info.joblib
483 |
484 | 2021-05-22 18:52:59,036 INFO MainThread root
485 |
486 | =======
487 |
488 |
489 | 2021-05-22 18:52:59,150 INFO MainThread root Assessing the speed of folding algorithm at length 994
490 |
491 | 2021-05-22 18:53:31,180 INFO MainThread root 32.029055200000016 for 1000 calls
492 | 2021-05-22 18:53:31,180 INFO MainThread root Saving the related information at experiments/900_info.joblib
493 |
494 | 2021-05-22 18:53:31,181 INFO MainThread root
495 |
496 | =======
497 |
498 |
499 | 2021-05-22 18:53:31,181 INFO MainThread root Preparing speed tests: for device device(type='cuda') - hybrid=True
500 |
501 | 2021-05-22 18:53:32,532 INFO MainThread root Assessing the speed of folding algorithm at length 134
502 |
503 | 2021-05-22 18:53:51,151 INFO MainThread root 18.61965140000001 for 1000 calls
504 | 2021-05-22 18:53:51,152 INFO MainThread root Saving the related information at experiments/100_info.joblib
505 |
506 | 2021-05-22 18:53:51,153 INFO MainThread root
507 |
508 | =======
509 |
510 |
511 | 2021-05-22 18:53:51,347 INFO MainThread root Assessing the speed of folding algorithm at length 214
512 |
513 | 2021-05-22 18:54:11,743 INFO MainThread root 20.395728099999985 for 1000 calls
514 | 2021-05-22 18:54:11,743 INFO MainThread root Saving the related information at experiments/200_info.joblib
515 |
516 | 2021-05-22 18:54:11,744 INFO MainThread root
517 |
518 | =======
519 |
520 |
521 | 2021-05-22 18:54:12,052 INFO MainThread root Assessing the speed of folding algorithm at length 336
522 |
523 | 2021-05-22 18:54:34,875 INFO MainThread root 22.822907499999985 for 1000 calls
524 | 2021-05-22 18:54:34,875 INFO MainThread root Saving the related information at experiments/300_info.joblib
525 |
526 | 2021-05-22 18:54:34,876 INFO MainThread root
527 |
528 | =======
529 |
530 |
531 | 2021-05-22 18:54:35,239 INFO MainThread root Assessing the speed of folding algorithm at length 401
532 |
533 | 2021-05-22 18:54:59,075 INFO MainThread root 23.83573979999997 for 1000 calls
534 | 2021-05-22 18:54:59,075 INFO MainThread root Saving the related information at experiments/400_info.joblib
535 |
536 | 2021-05-22 18:54:59,076 INFO MainThread root
537 |
538 | =======
539 |
540 |
541 | 2021-05-22 18:54:59,530 INFO MainThread root Assessing the speed of folding algorithm at length 501
542 |
543 | 2021-05-22 18:55:24,297 INFO MainThread root 24.76649520000001 for 1000 calls
544 | 2021-05-22 18:55:24,297 INFO MainThread root Saving the related information at experiments/500_info.joblib
545 |
546 | 2021-05-22 18:55:24,298 INFO MainThread root
547 |
548 | =======
549 |
550 |
551 | 2021-05-22 18:55:24,858 INFO MainThread root Assessing the speed of folding algorithm at length 621
552 |
553 | 2021-05-22 18:55:50,855 INFO MainThread root 25.996778500000005 for 1000 calls
554 | 2021-05-22 18:55:50,856 INFO MainThread root Saving the related information at experiments/600_info.joblib
555 |
556 | 2021-05-22 18:55:50,856 INFO MainThread root
557 |
558 | =======
559 |
560 |
561 | 2021-05-22 18:55:51,538 INFO MainThread root Assessing the speed of folding algorithm at length 753
562 |
563 | 2021-05-22 18:56:19,326 INFO MainThread root 27.787718600000005 for 1000 calls
564 | 2021-05-22 18:56:19,326 INFO MainThread root Saving the related information at experiments/700_info.joblib
565 |
566 | 2021-05-22 18:56:19,327 INFO MainThread root
567 |
568 | =======
569 |
570 |
571 | 2021-05-22 18:56:20,108 INFO MainThread root Assessing the speed of folding algorithm at length 862
572 |
573 | 2021-05-22 18:56:49,570 INFO MainThread root 29.461670400000003 for 1000 calls
574 | 2021-05-22 18:56:49,570 INFO MainThread root Saving the related information at experiments/800_info.joblib
575 |
576 | 2021-05-22 18:56:49,571 INFO MainThread root
577 |
578 | =======
579 |
580 |
581 | 2021-05-22 18:56:50,504 INFO MainThread root Assessing the speed of folding algorithm at length 994
582 |
583 | 2021-05-22 18:57:21,194 INFO MainThread root 30.689694900000006 for 1000 calls
584 | 2021-05-22 18:57:21,194 INFO MainThread root Saving the related information at experiments/900_info.joblib
585 |
586 | 2021-05-22 18:57:21,195 INFO MainThread root
587 |
588 | =======
589 |
590 |
591 | 2021-05-22 18:57:21,196 INFO MainThread root Preparing speed tests: for device device(type='cuda') -- hybrid=False
592 |
593 | 2021-05-22 18:57:21,317 INFO MainThread root Assessing the speed of folding algorithm at length 134
594 |
595 | 2021-05-22 18:57:41,246 INFO MainThread root 19.9283054 for 1000 calls
596 | 2021-05-22 18:57:41,246 INFO MainThread root Saving the related information at experiments/100_info.joblib
597 |
598 | 2021-05-22 18:57:41,247 INFO MainThread root
599 |
600 | =======
601 |
602 |
603 | 2021-05-22 18:57:41,440 INFO MainThread root Assessing the speed of folding algorithm at length 214
604 |
605 | 2021-05-22 18:58:03,719 INFO MainThread root 22.279464399999995 for 1000 calls
606 | 2021-05-22 18:58:03,719 INFO MainThread root Saving the related information at experiments/200_info.joblib
607 |
608 | 2021-05-22 18:58:03,720 INFO MainThread root
609 |
610 | =======
611 |
612 |
613 | 2021-05-22 18:58:04,024 INFO MainThread root Assessing the speed of folding algorithm at length 336
614 |
615 | 2021-05-22 18:58:29,494 INFO MainThread root 25.469947400000024 for 1000 calls
616 | 2021-05-22 18:58:29,494 INFO MainThread root Saving the related information at experiments/300_info.joblib
617 |
618 | 2021-05-22 18:58:29,495 INFO MainThread root
619 |
620 | =======
621 |
622 |
623 | 2021-05-22 18:58:29,859 INFO MainThread root Assessing the speed of folding algorithm at length 401
624 |
625 | 2021-05-22 18:58:58,097 INFO MainThread root 28.238597999999968 for 1000 calls
626 | 2021-05-22 18:58:58,098 INFO MainThread root Saving the related information at experiments/400_info.joblib
627 |
628 | 2021-05-22 18:58:58,098 INFO MainThread root
629 |
630 | =======
631 |
632 |
633 | 2021-05-22 18:58:58,548 INFO MainThread root Assessing the speed of folding algorithm at length 501
634 |
635 | 2021-05-22 18:59:29,343 INFO MainThread root 30.79467580000005 for 1000 calls
636 | 2021-05-22 18:59:29,343 INFO MainThread root Saving the related information at experiments/500_info.joblib
637 |
638 | 2021-05-22 18:59:29,344 INFO MainThread root
639 |
640 | =======
641 |
642 |
643 | 2021-05-22 18:59:29,904 INFO MainThread root Assessing the speed of folding algorithm at length 621
644 |
645 | 2021-05-22 19:00:03,234 INFO MainThread root 33.3301616 for 1000 calls
646 | 2021-05-22 19:00:03,234 INFO MainThread root Saving the related information at experiments/600_info.joblib
647 |
648 | 2021-05-22 19:00:03,235 INFO MainThread root
649 |
650 | =======
651 |
652 |
653 | 2021-05-22 19:00:03,915 INFO MainThread root Assessing the speed of folding algorithm at length 753
654 |
655 | 2021-05-22 19:00:40,486 INFO MainThread root 36.570508099999984 for 1000 calls
656 | 2021-05-22 19:00:40,486 INFO MainThread root Saving the related information at experiments/700_info.joblib
657 |
658 | 2021-05-22 19:00:40,487 INFO MainThread root
659 |
660 | =======
661 |
662 |
663 | 2021-05-22 19:00:41,265 INFO MainThread root Assessing the speed of folding algorithm at length 862
664 |
665 | 2021-05-22 19:01:21,325 INFO MainThread root 40.06054449999999 for 1000 calls
666 | 2021-05-22 19:01:21,326 INFO MainThread root Saving the related information at experiments/800_info.joblib
667 |
668 | 2021-05-22 19:01:21,327 INFO MainThread root
669 |
670 | =======
671 |
672 |
673 | 2021-05-22 19:01:22,419 INFO MainThread root Assessing the speed of folding algorithm at length 994
674 |
675 | 2021-05-22 19:02:06,714 INFO MainThread root 44.29495259999999 for 1000 calls
676 | 2021-05-22 19:02:06,714 INFO MainThread root Saving the related information at experiments/900_info.joblib
677 |
678 | 2021-05-22 19:02:06,715 INFO MainThread root
679 |
680 | =======
681 |
682 |
683 | 2021-05-22 19:02:06,715 INFO MainThread root Execution has finished
684 |
685 |
--------------------------------------------------------------------------------
/notebooks/experiments/profile_csv:
--------------------------------------------------------------------------------
1 | ncalls,tottime,percall,cumtime,percall,filename:lineno(function)
2 | 1,0.00598,0.00598,0.0171,0.0171,massive_pnerf.py:70(proto_fold)
3 | 16,0.004091,0.0002557,0.004091,0.0002557,~:0()
4 | 773,0.002993,3.872e-06,0.002993,3.872e-06,~:0()
5 | 14,0.001775,0.0001268,0.00792,0.0005657,massive_pnerf.py:40(mp_nerf_torch)
6 | 32,0.000533,1.666e-05,0.000533,1.666e-05,~:0()
7 | 30,0.000495,1.65e-05,0.000495,1.65e-05,~:0()
8 | 14,0.000146,1.043e-05,0.000146,1.043e-05,~:0()
9 | 29,0.000119,4.103e-06,0.000119,4.103e-06,~:0()
10 | 16,0.000117,7.312e-06,0.004243,0.0002652,functional.py:1274(norm)
11 | 25,0.000103,4.12e-06,0.000103,4.12e-06,~:0()
12 | 43,0.0001,2.326e-06,0.0001,2.326e-06,~:0()
13 | 14,7.2e-05,5.143e-06,7.2e-05,5.143e-06,~:0()
14 | 25,7.1e-05,2.84e-06,0.000225,9e-06,tensor.py:575(__iter__)
15 | 2,5.9e-05,2.95e-05,0.000483,0.0002415,massive_pnerf.py:10(get_axis_matrix)
16 | 29,4.9e-05,1.69e-06,4.9e-05,1.69e-06,~:0()
17 | 2,4.3e-05,2.15e-05,4.3e-05,2.15e-05,~:0()
18 | 1,4e-05,4e-05,0.01716,0.01716,~:0()
19 | 25,3e-05,1.2e-06,3e-05,1.2e-06,~:0()
20 | 4,2.8e-05,7e-06,2.8e-05,7e-06,~:0()
21 | 6,2.8e-05,4.667e-06,2.8e-05,4.667e-06,~:0()
22 | 1,2.3e-05,2.3e-05,2.3e-05,2.3e-05,~:0()
23 | 1,2.3e-05,2.3e-05,0.01712,0.01712,:1()
24 | 1,1.9e-05,1.9e-05,1.9e-05,1.9e-05,~:0()
25 | 14,1.8e-05,1.286e-06,1.8e-05,1.286e-06,~:0()
26 | 11,1.6e-05,1.455e-06,1.9e-05,1.727e-06,tensor.py:568(__len__)
27 | 2,1.5e-05,7.5e-06,1.5e-05,7.5e-06,~:0()
28 | 4,1.3e-05,3.25e-06,1.3e-05,3.25e-06,~:0()
29 | 2,1.2e-05,6e-06,0.000107,5.35e-05,einops.py:202(apply)
30 | 25,1.1e-05,4.4e-07,1.1e-05,4.4e-07,~:0()
31 | 16,1.1e-05,6.875e-07,2e-05,1.25e-06,_VF.py:25(__getattr__)
32 | 52,1.1e-05,2.115e-07,1.1e-05,2.115e-07,~:0()
33 | 2,1e-05,5e-06,2.5e-05,1.25e-05,tensor.py:525(__rsub__)
34 | 2,1e-05,5e-06,0.00012,6e-05,einops.py:327(reduce)
35 | 16,9e-06,5.625e-07,9e-06,5.625e-07,~:0()
36 | 34,9e-06,2.647e-07,9e-06,2.647e-07,~:0()
37 | 1,8e-06,8e-06,8e-06,8e-06,~:0()
38 | 52,8e-06,1.538e-07,8e-06,1.538e-07,~:0()
39 | 2,6e-06,3e-06,6e-06,3e-06,~:0()
40 | 1,6e-06,6e-06,6e-06,6e-06,~:0()
41 | 2,6e-06,3e-06,6e-06,3e-06,~:0()
42 | 2,5e-06,2.5e-06,7e-06,3.5e-06,_backends.py:22(get_backend)
43 | 2,4e-06,2e-06,4e-06,2e-06,einops.py:26(_reduce_axes)
44 | 2,4e-06,2e-06,5.9e-05,2.95e-05,_backends.py:98(add_axes)
45 | 2,4e-06,2e-06,4.7e-05,2.35e-05,_backends.py:336(tile)
46 | 2,4e-06,2e-06,4e-06,2e-06,~:0()
47 | 1,4e-06,4e-06,4e-06,4e-06,~:0()
48 | 2,3e-06,1.5e-06,0.000123,6.15e-05,einops.py:427(repeat)
49 | 2,3e-06,1.5e-06,9e-06,4.5e-06,_backends.py:330(transpose)
50 | 1,3e-06,3e-06,3e-06,3e-06,~:0()
51 | 6,2e-06,3.333e-07,2e-06,3.333e-07,~:0()
52 | 4,2e-06,5e-07,1.5e-05,3.75e-06,_backends.py:83(reshape)
53 | 2,2e-06,1e-06,2e-06,1e-06,_backends.py:302(is_appropriate_type)
54 | 2,2e-06,1e-06,8e-06,4e-06,_backends.py:339(add_axis)
55 | 8,1e-06,1.25e-07,1e-06,1.25e-07,~:0()
56 | 2,1e-06,5e-07,1e-06,5e-07,~:0()
57 | 1,1e-06,1e-06,1e-06,1e-06,~:0()
58 | 2,0,0,0,0,~:0()
59 | 2,0,0,0,0,_backends.py:79(shape)
60 | 2,0,0,0,0,~:0()
61 | Showing 1 to 59 of 59 entries
62 |
--------------------------------------------------------------------------------
/notebooks/experiments_manual/analyzed_prots.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/analyzed_prots.joblib
--------------------------------------------------------------------------------
/notebooks/experiments_manual/error_evolution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/error_evolution.png
--------------------------------------------------------------------------------
/notebooks/experiments_manual/histogram_errors.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/histogram_errors.png
--------------------------------------------------------------------------------
/notebooks/experiments_manual/profiler_capture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/experiments_manual/profiler_capture.png
--------------------------------------------------------------------------------
/notebooks/experiments_manual/rclab_data/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 RCL-lab
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/notebooks/experiments_manual/rclab_data/other_prots.csv:
--------------------------------------------------------------------------------
1 | prot,url,chain,num,
2 | LDH,_,_,_,
3 | Ribonuclease,_,_,_,
--------------------------------------------------------------------------------
/notebooks/integrated_alanines.py:
--------------------------------------------------------------------------------
1 | ##########################
2 | # Clone repos with utils #
3 | ##########################
4 |
5 | # !git clone https://github.com/hypnopump/geometric-vector-perceptron
6 |
7 | import os
8 | import sys
9 | import time
10 | import timeit
11 | import logging
12 |
13 | # science
14 | import numpy as np
15 | import torch
16 | import prody as pr
17 | import sidechainnet
18 | from sidechainnet.utils.sequence import ProteinVocabulary as VOCAB
19 | VOCAB = VOCAB()
20 |
21 | # process
22 | import joblib
23 |
24 | # custom
25 | import mp_nerf
26 |
27 | BASE_FOLDER = "experiments/"
28 |
29 | logging.basicConfig(level=logging.DEBUG,
30 | format="%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s",
31 | # datefmt='%m-%d %H:%M',
32 | filename=BASE_FOLDER+"logs_experiment.txt",
33 | filemode="a")
34 | logger = logging.getLogger()
35 | sep = "\n\n=======\n\n"
36 |
37 |
38 | # begin tests
39 | if __name__ == "__main__":
40 |
41 | logger.info("Loading data"+"\n")
42 |
43 | dataloc = "experiments_manual/rclab_data/"
44 | filenames = [dataloc+x for x in os.listdir(dataloc) if x.endswith(".pdb")]
45 |
46 | run_opts = [(torch.device("cpu"), False)] # tuples of (device, hybrid)
47 | # add possibility for different configs
48 | if torch.cuda.is_available():
49 | run_opts.append( (torch.device("cuda"), True))
50 | run_opts.append( (torch.device("cuda"), False))
51 |
52 |
53 | for device,hybrid in run_opts:
54 |
55 | logger.info("Preparing speed tests: for device "+repr(device)+" and hybrid_opt: "+str(hybrid)+"\n")
56 |
57 | for i,filename in enumerate(filenames):
58 |
59 | # get data
60 | keys = ["angles_np", "coords_np", "observed_sequence"]
61 | chain = pr.parsePDB(datafile, chain=chain, model=1)
62 | parsed = sidechainnet.utils.measure.get_seq_coords_and_angles(chain)
63 | data = {k:v for k,v in zip(keys, parsed)}
64 | # get scaffs
65 | scaffolds = mp_nerf.proteins.build_scaffolds_from_scn_angles(data["observed_sequence"],
66 | torch.from_numpy(data["angles_np"]).to(device))
67 |
68 | logger.info("Assessing the speed of folding algorithm at file "+filenames[i]+"\n")
69 |
70 | logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=hybrid)',
71 | globals=globals(), number=1000) )+" for 1000 calls" )
72 |
73 | logger.info("Done")
74 | logger.info(sep)
75 |
76 | logger.info("Execution has finished\n")
--------------------------------------------------------------------------------
/notebooks/integrated_test.py:
--------------------------------------------------------------------------------
1 | ##########################
2 | # Clone repos with utils #
3 | ##########################
4 |
5 | # !git clone https://github.com/hypnopump/geometric-vector-perceptron
6 |
7 | import os
8 | import sys
9 | import time
10 | import timeit
11 | import logging
12 |
13 | # science
14 | import numpy as np
15 | import torch
16 | import sidechainnet
17 | from sidechainnet.utils.sequence import ProteinVocabulary as VOCAB
18 | VOCAB = VOCAB()
19 |
20 | # process
21 | import joblib
22 |
23 | # custom
24 | import mp_nerf
25 |
26 | BASE_FOLDER = "experiments/"
27 |
28 | logging.basicConfig(level=logging.DEBUG,
29 | format="%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s",
30 | # datefmt='%m-%d %H:%M',
31 | filename=BASE_FOLDER+"logs_experiment.txt",
32 | filemode="a")
33 | logger = logging.getLogger()
34 | sep = "\n\n=======\n\n"
35 |
36 |
37 | # begin tests
38 | if __name__ == "__main__":
39 |
40 | logger.info("Loading data"+"\n")
41 | lengths = [100, 200, 300, 400, 500, 600, 700, 800, 900]# [::-1]
42 | try:
43 | "a"+9
44 | # skip
45 | dataloaders_ = sidechainnet.load(casp_version=7, with_pytorch="dataloaders", batch_size=2)
46 | logger.info("Data has been loaded"+"\n"+sep)
47 | stored = [ mp_nerf.utils.get_prot(dataloader_=dataloaders_,
48 | vocab_=VOCAB,
49 | min_len=desired_len+5,
50 | max_len=desired_len+60) for desired_len in lengths ]
51 | joblib.dump(stored, BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib")
52 | except:
53 | stored = joblib.load(BASE_FOLDER[:-1]+"_manual/analyzed_prots.joblib")
54 | logger.info("Data has been loaded"+"\n"+sep)
55 |
56 | logger.info("Assessing lengths of: "+str([len(x[0]) for x in stored])+"\n")
57 |
58 | run_opts = [(torch.device("cpu"), False)] # tuples of (device, hybrid)
59 | # add possibility for different configs
60 | if torch.cuda.is_available():
61 | run_opts.append( (torch.device("cuda"), True))
62 | run_opts.append( (torch.device("cuda"), False))
63 |
64 |
65 | for device,hybrid in run_opts:
66 |
67 | logger.info("Preparing speed tests: for device "+repr(device)+" and hybrid_opt: "+str(hybrid)+"\n")
68 |
69 | for i,desired_len in enumerate(lengths):
70 |
71 | seq, int_seq, true_coords, angles, padding_seq, mask, pid = stored[i]
72 | scaffolds = mp_nerf.proteins.build_scaffolds_from_scn_angles(seq, angles.to(device))
73 |
74 | logger.info("Assessing the speed of folding algorithm at length "+str(len(seq))+"\n")
75 |
76 | logger.info( str( timeit.timeit('mp_nerf.proteins.protein_fold(**scaffolds, device=device, hybrid=hybrid)',
77 | globals=globals(), number=1000) )+" for 1000 calls" )
78 |
79 | logger.info("Saving the related information at {0}{1}_info.joblib\n".format(
80 | BASE_FOLDER, desired_len))
81 | joblib.dump({"seq": seq,
82 | "true_coords": true_coords,
83 | "angles": angles,
84 | "padding_seq": padding_seq,
85 | "mask": mask,
86 | "pid": pid,
87 | "padding_stripped": True}, BASE_FOLDER+str(desired_len)+"_info.joblib")
88 | logger.info(sep)
89 |
90 | logger.info("Execution has finished\n")
--------------------------------------------------------------------------------
/notebooks/xtension/plots/A_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/A_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/C_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/C_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/D_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/D_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/E_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/E_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/F_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/F_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/G_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/G_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/H_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/H_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/I_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/I_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/K_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/K_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/L_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/L_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/M_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/M_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/N_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/N_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/P_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/P_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/Q_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/Q_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/R_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/R_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/S_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/S_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/T_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/T_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/V_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/V_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/W_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/W_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/Y_plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/Y_plot_hists.png
--------------------------------------------------------------------------------
/notebooks/xtension/plots/__plot_hists.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/EleutherAI/mp_nerf/2c2c595b70637071ba6ca272270b8af0d9cd58be/notebooks/xtension/plots/__plot_hists.png
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [aliases]
2 | test=pytest
3 |
4 | [tool:pytest]
5 | addopts = --verbose
6 | python_files = tests/*.py
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(
4 | name = 'mp-nerf',
5 | packages = find_packages(),
6 | version = '1.0.3',
7 | license='MIT',
8 | description = 'MP-NeRF: Massively Parallel Natural Extension of Reference Frame',
9 | author = 'Eric Alcaide',
10 | author_email = 'ericalcaide1@gmail.com',
11 | url = 'https://github.com/eleutherAI/mp_nerf',
12 | keywords = [
13 | 'computational biolgy',
14 | 'bioinformatics',
15 | 'machine learning'
16 | ],
17 | install_requires=[
18 | 'einops>=0.3',
19 | 'numpy',
20 | 'torch>=1.6', # 'sidechainnet' # for tests
21 | ],
22 | setup_requires=[
23 | 'pytest-runner',
24 | ],
25 | tests_require=[
26 | 'pytest'
27 | ],
28 | classifiers=[
29 | 'Development Status :: 4 - Beta',
30 | 'Intended Audience :: Developers',
31 | 'Topic :: Scientific/Engineering :: Artificial Intelligence',
32 | 'License :: OSI Approved :: MIT License',
33 | 'Programming Language :: Python :: 3.6',
34 | ],
35 | )
36 |
--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from mp_nerf import *
5 | from mp_nerf.utils import *
6 | from mp_nerf.kb_proteins import *
7 | from mp_nerf.proteins import *
8 |
9 | def test_nerf_and_dihedral():
10 | # create points
11 | a = torch.tensor([1,2,3]).float()
12 | b = torch.tensor([1,4,5]).float()
13 | c = torch.tensor([1,4,7]).float()
14 | d = torch.tensor([1,8,8]).float()
15 | # calculate internal references
16 | v1 = (b-a).numpy()
17 | v2 = (c-b).numpy()
18 | v3 = (d-c).numpy()
19 | # get angles
20 | theta = np.arccos( np.dot(v2, v3) / \
21 | (np.linalg.norm(v2) * np.linalg.norm(v3) ))
22 |
23 | normal_p = np.cross(v1, v2)
24 | normal_p_ = np.cross(v2, v3)
25 | chi = np.arccos( np.dot(normal_p, normal_p_) / \
26 | (np.linalg.norm(normal_p) * np.linalg.norm(normal_p_) ))
27 | # get length:
28 | l = torch.tensor(np.linalg.norm(v3))
29 | theta = torch.tensor(theta)
30 | chi = torch.tensor(chi)
31 | # reconstruct
32 | # doesnt work because the scn angle was not measured correctly
33 | # so the method corrects that incorrection
34 | assert (mp_nerf_torch(a, b, c, l, theta, chi - np.pi) - torch.tensor([1,0,6])).sum().abs() < 0.1
35 | assert get_dihedral(a, b, c, d).item() == chi
36 |
37 |
38 | def test_modify_angles_mask_with_torsions():
39 | # create inputs
40 | seq = "AGHHKLHRTVNMSTIL"
41 | angles_mask = torch.randn(2, 16, 14)
42 | torsions = torch.ones(16, 4)
43 | # ensure shape
44 | assert modify_angles_mask_with_torsions(seq, angles_mask, torsions).shape == angles_mask.shape, \
45 | "Shapes don't match"
--------------------------------------------------------------------------------
/tests/test_ml_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | from mp_nerf import *
5 | from mp_nerf.utils import *
6 | from mp_nerf.ml_utils import *
7 | from mp_nerf.kb_proteins import *
8 | from mp_nerf.proteins import *
9 |
10 |
11 | # test ML utils
12 | def test_scn_atom_embedd():
13 | seq_list = ["AGHHKLHRTVNMSTIL",
14 | "WERTQLITANMWTCSD"]
15 | embedds = scn_atom_embedd(seq_list)
16 | assert embedds.shape == torch.Size([2, 16, 14]), "Shapes don't match"
17 |
18 |
19 | def test_chain_to_atoms():
20 | chain = torch.randn(100, 3)
21 | atoms = chain2atoms(chain, c=14)
22 | assert atoms.shape == torch.Size([100, 14, 3]), "Shapes don't match"
23 |
24 |
25 | def test_rename_symmetric_atoms():
26 | seq_list = ["AGHHKLHRTVNMSTIL"]
27 | pred_coors = torch.randn(1, 16, 14, 3)
28 | true_coors = torch.randn(1, 16, 14, 3)
29 | cloud_mask = scn_cloud_mask(seq_list[0]).unsqueeze(0)
30 | pred_feats = torch.randn(1, 16, 14, 16)
31 |
32 | renamed = rename_symmetric_atoms(pred_coors, true_coors, seq_list, cloud_mask, pred_feats=pred_feats)
33 | assert renamed[0].shape == pred_coors.shape and renamed[1].shape == pred_feats.shape, "Shapes don't match"
34 |
35 |
36 | def test_torsion_angle_loss():
37 | pred_torsions = torch.randn(1, 100, 7)
38 | true_torsions = torch.randn(1, 100, 7)
39 | angle_mask = pred_torsions <= 2.
40 |
41 | loss = torsion_angle_loss(pred_torsions, true_torsions,
42 | coeff=2., angle_mask=None)
43 | assert loss.shape == pred_torsions.shape, "Shapes don't match"
44 |
45 |
46 | def test_fape_loss_torch():
47 | seq_list = ["AGHHKLHRTVNMSTIL"]
48 | pred_coords = torch.randn(1, 16, 14, 3)
49 | true_coords = torch.randn(1, 16, 14, 3)
50 |
51 | loss_c_alpha = fape_torch(pred_coords, true_coords, c_alpha=True, seq_list=seq_list)
52 | loss_full = fape_torch(pred_coords, true_coords, c_alpha=False, seq_list=seq_list)
53 |
54 | assert True
55 |
56 |
57 |
58 |
59 |
--------------------------------------------------------------------------------