├── .github
    └── workflows
    │   ├── compile_huggingface.yml
    │   └── test.yml
├── .gitignore
├── LICENSE
├── README.md
├── huggingface
    ├── README.md
    ├── combine_files.py
    ├── huggingface_code.py
    ├── huggingface_wrapper.py
    ├── print_colab_dropdown.py
    ├── push_to_hub.py
    └── requirements.txt
├── metl
    ├── __init__.py
    ├── encode.py
    ├── main.py
    ├── models.py
    ├── relative_attention.py
    ├── structure.py
    ├── test.py
    ├── test2.py
    ├── test3.py
    └── test4.py
├── notebooks
    └── inference.ipynb
├── pdbs
    ├── 1gfl_cm.pdb
    ├── 2qmt_p.pdb
    ├── 6qji_p_trunc_2022.pdb
    ├── AF-P60484-F1-model_v4_p.pdb
    ├── AF-P62993-F1-model_v4_trunc_p.pdb
    ├── AF-Q6SJ61-F1-model_v4_p.pdb
    ├── pab1_cm.pdb
    └── ube4b_cm.pdb
├── requirements.txt
├── setup.cfg
└── setup.py


/.github/workflows/compile_huggingface.yml:
--------------------------------------------------------------------------------
 1 | name: Compiling Huggingface Wrapper
 2 | on: [push, workflow_dispatch]
 3 | jobs:
 4 |   Combine-File:
 5 |     runs-on: ubuntu-latest
 6 |     env: 
 7 |       HF_TOKEN: ${{ secrets.HF_TOKEN }}
 8 |     steps:
 9 |       - uses: actions/checkout@v4
10 |         with:
11 |           ref: 'main'
12 |       - name: Set up Python
13 |         uses: actions/setup-python@v5
14 |         with:
15 |           python-version: '3.9'
16 |       - name: Upgrade pip
17 |         run: pip install --upgrade pip
18 |       - name: Install dependencies
19 |         run: pip install -r huggingface/requirements.txt
20 |       - name: Install torch cpu only
21 |         run: pip install torch==2.1.0+cpu --index-url https://download.pytorch.org/whl/cpu
22 |       - name: Combining Files
23 |         run: python huggingface/combine_files.py -o huggingface/huggingface_wrapper.py
24 |       - name: Formatting generated code
25 |         run: | 
26 |           python -m black huggingface/huggingface_wrapper.py
27 |           python -m isort huggingface/huggingface_wrapper.py 
28 |       - name: Push to hub
29 |         run: python huggingface/push_to_hub.py
30 |         
31 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | on:
 3 | - push
 4 | - pull_request
 5 | jobs:
 6 | 
 7 |   test:
 8 |     name: Test pre-trained models
 9 |     runs-on: ${{ matrix.os }}
10 |     strategy:
11 |       max-parallel: 4
12 |       fail-fast: false
13 |       matrix:
14 |         os:
15 |         - ubuntu-latest
16 |         - windows-latest
17 |         - macos-latest
18 |         python-version:
19 |         - '3.9'
20 |         - '3.12'
21 | 
22 |     steps:
23 |     - name: Checkout repository
24 |       uses: actions/checkout@v4
25 |     - name: Install Python
26 |       uses: actions/setup-python@v5
27 |       with:
28 |         python-version: ${{ matrix.python-version }}
29 |         cache: 'pip' # caching pip dependencies
30 |     - name: Install dependencies
31 |       run: |
32 |         python -m pip install --upgrade pip
33 |         pip install -r requirements.txt
34 |         pip install .
35 |         pip list
36 |     - name: Test METL-G
37 |       run: python metl/test.py
38 |     - name: Test 1D low-N METL-L avGFP
39 |       run: python metl/test2.py
40 |     - name: Test 3D low-N METL-L avGFP
41 |       run: python metl/test3.py
42 |     - name: Test METL-L GB1
43 |       run: python metl/test4.py
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # PyCharm project settings
132 | .idea
133 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Sam Gelman
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Pretrained METL models
  2 | [![GitHub Actions](https://github.com/gitter-lab/metl-pretrained/actions/workflows/test.yml/badge.svg)](https://github.com/gitter-lab/metl-pretrained/actions/workflows/test.yml)
  3 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.10819499.svg)](https://zenodo.org/doi/10.5281/zenodo.10819499)
  4 | 
  5 | This repository contains pretrained METL [models](https://zenodo.org/doi/10.5281/zenodo.11051644) with minimal dependencies.
  6 | For more information, please see the [metl](https://github.com/gitter-lab/metl) repository and our manuscript:
  7 | 
  8 | [Biophysics-based protein language models for protein engineering](https://doi.org/10.1101/2024.03.15.585128).  
  9 | Sam Gelman, Bryce Johnson, Chase Freschlin, Arnav Sharma, Sameer D'Costa, John Peters, Anthony Gitter<sup>+</sup>, Philip A Romero<sup>+</sup>.  
 10 | *bioRxiv*, 2024. doi:10.1101/2024.03.15.585128  
 11 | <sup>+</sup> denotes equal contribution.
 12 | 
 13 | # Getting started
 14 | 1. Create a conda environment (or use existing one): `conda create --name myenv python=3.9`
 15 | 2. Activate conda environment `conda activate myenv`
 16 | 3. Clone this repository
 17 | 4. Navigate to the cloned repository `cd metl-pretrained`
 18 | 5. Install the package with `pip install .`
 19 | 6. Import the package in your script with `import metl`
 20 | 7. Load a pretrained model using `model, data_encoder = metl.get_from_uuid(uuid)` or one of the other loading functions (see examples below)
 21 |     - `model` is a PyTorch model loaded with the pre-trained weights
 22 |     - `data_encoder` is a helper object that can be used to encode sequences and variants to be fed into the model
 23 | 
 24 | # Available models
 25 | Model checkpoints are available to download from [Zenodo](https://zenodo.org/doi/10.5281/zenodo.11051644).
 26 | Once you have a checkpoint downloaded, you can load it into a PyTorch model using `metl.get_from_checkpoint()`.
 27 | Alternatively, you can use `metl.get_from_uuid()` or `metl.get_from_ident()` to automatically download, cache, and load the model based on the model identifier or UUID.
 28 | See the examples below.
 29 | 
 30 | ## Source models
 31 | Source models predict Rosetta energy terms.
 32 | 
 33 | ### Global source models
 34 | 
 35 | | Identifier      | UUID       | Params | RPE | Output           | Description | Download                                                                                   |
 36 | |-----------------|------------|--------|-----|------------------|-------------|--------------------------------------------------------------------------------------------|
 37 | | `METL-G-20M-1D` | `D72M9aEp` | 20M    | 1D  | Rosetta energies | METL-G      | [Download](https://zenodo.org/records/14908509/files/METL-G-20M-1D-D72M9aEp.pt?download=1) |
 38 | | `METL-G-20M-3D` | `Nr9zCKpR` | 20M    | 3D  | Rosetta energies | METL-G      | [Download](https://zenodo.org/records/14908509/files/METL-G-20M-3D-Nr9zCKpR.pt?download=1) |
 39 | | `METL-G-50M-1D` | `auKdzzwX` | 50M    | 1D  | Rosetta energies | METL-G      | [Download](https://zenodo.org/records/14908509/files/METL-G-50M-1D-auKdzzwX.pt?download=1) |
 40 | | `METL-G-50M-3D` | `6PSAzdfv` | 50M    | 3D  | Rosetta energies | METL-G      | [Download](https://zenodo.org/records/14908509/files/METL-G-50M-3D-6PSAzdfv.pt?download=1) |
 41 | 
 42 | ### Local source models
 43 | 
 44 | | Identifier               | UUID       | Protein | Params | RPE | Output           | Description | Download                                                                                            |
 45 | |--------------------------|------------|-----|--------|-----|------------------|-------------|-----------------------------------------------------------------------------------------------------|
 46 | | `METL-L-2M-1D-GFP`       | `8gMPQJy4` | GFP | 2M     | 1D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-1D-GFP-8gMPQJy4.pt?download=1)       |
 47 | | `METL-L-2M-3D-GFP`       | `Hr4GNHws` | GFP | 2M     | 3D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-3D-GFP-Hr4GNHws.pt?download=1)       |
 48 | | `METL-L-2M-1D-DLG4_2022` | `8iFoiYw2` | DLG4 | 2M     | 1D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-1D-DLG4_2022-8iFoiYw2.pt?download=1) |
 49 | | `METL-L-2M-3D-DLG4_2022` | `kt5DdWTa` | DLG4 | 2M     | 1D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-3D-DLG4_2022-kt5DdWTa.pt?download=1) |
 50 | | `METL-L-2M-1D-GB1`       | `DMfkjVzT` | GB1 | 2M     | 1D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-1D-GB1-DMfkjVzT.pt?download=1)       |
 51 | | `METL-L-2M-3D-GB1`       | `epegcFiH` | GB1 | 2M     | 3D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-3D-GB1-epegcFiH.pt?download=1)       |
 52 | | `METL-L-2M-1D-GRB2`      | `kS3rUS7h` | GRB2 | 2M     | 1D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-1D-GRB2-kS3rUS7h.pt?download=1)      |
 53 | | `METL-L-2M-3D-GRB2`      | `X7w83g6S` | GRB2 | 2M     | 3D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-3D-GRB2-X7w83g6S.pt?download=1)      |
 54 | | `METL-L-2M-1D-Pab1`      | `UKebCQGz` | Pab1 | 2M     | 1D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-1D-Pab1-UKebCQGz.pt?download=1)      |
 55 | | `METL-L-2M-3D-Pab1`      | `2rr8V4th` | Pab1 | 2M     | 3D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-3D-Pab1-2rr8V4th.pt?download=1)      |
 56 | | `METL-L-2M-1D-PTEN`      |  `CEMSx7ZC`   | PTEN | 2M     | 1D  | Rosetta energies | METL-L           | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-1D-PTEN-CEMSx7ZC.pt?download=1)      | 
 57 | | `METL-L-2M-3D-PTEN`      |  `PjxR5LW7`    | PTEN | 2M     | 3D  | Rosetta energies | METL-L           | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-3D-PTEN-PjxR5LW7.pt?download=1)      | 
 58 | | `METL-L-2M-1D-TEM-1`     | `PREhfC22` | TEM-1 | 2M     | 1D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-1D-TEM-1-PREhfC22.pt?download=1)     |
 59 | | `METL-L-2M-3D-TEM-1`     | `9ASvszux` | TEM-1 | 2M     | 3D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-3D-TEM-1-9ASvszux.pt?download=1)     |
 60 | | `METL-L-2M-1D-Ube4b`     | `HscFFkAb` | Ube4b | 2M     | 1D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-1D-Ube4b-HscFFkAb.pt?download=1)     |
 61 | | `METL-L-2M-3D-Ube4b`     | `H48oiNZN` | Ube4b | 2M     | 3D  | Rosetta energies | METL-L      | [Download](https://zenodo.org/records/14908509/files/METL-L-2M-3D-Ube4b-H48oiNZN.pt?download=1)     |
 62 | 
 63 | 
 64 | 
 65 | These models will output a length 55 vector corresponding to the following energy terms (in order):
 66 | <details>
 67 |   <summary>
 68 |     Expand to see energy terms
 69 |   </summary>
 70 | 
 71 | ```
 72 | total_score
 73 | fa_atr
 74 | fa_dun
 75 | fa_elec
 76 | fa_intra_rep
 77 | fa_intra_sol_xover4
 78 | fa_rep
 79 | fa_sol
 80 | hbond_bb_sc
 81 | hbond_lr_bb
 82 | hbond_sc
 83 | hbond_sr_bb
 84 | lk_ball_wtd
 85 | omega
 86 | p_aa_pp
 87 | pro_close
 88 | rama_prepro
 89 | ref
 90 | yhh_planarity
 91 | buried_all
 92 | buried_np
 93 | contact_all
 94 | contact_buried_core
 95 | contact_buried_core_boundary
 96 | degree
 97 | degree_core
 98 | degree_core_boundary
 99 | exposed_hydrophobics
100 | exposed_np_AFIMLWVY
101 | exposed_polars
102 | exposed_total
103 | one_core_each
104 | pack
105 | res_count_buried_core
106 | res_count_buried_core_boundary
107 | res_count_buried_np_core
108 | res_count_buried_np_core_boundary
109 | ss_contributes_core
110 | ss_mis
111 | total_hydrophobic
112 | total_hydrophobic_AFILMVWY
113 | total_sasa
114 | two_core_each
115 | unsat_hbond
116 | centroid_total_score
117 | cbeta
118 | cenpack
119 | env
120 | hs_pair
121 | pair
122 | rg
123 | rsigma
124 | sheet
125 | ss_pair
126 | vdw
127 | ```
128 | </details>
129 | 
130 | 
131 | ### Function-specific source models for GB1
132 | 
133 | The GB1 experimental data measured the binding interaction between GB1 variants and Immunoglobulin G (IgG). 
134 | To match this experimentally characterized function, we implemented a Rosetta pipeline to model the GB1-IgG complex and compute 17 attributes related to energy changes upon binding.
135 | We pretrained a standard METL-Local model and a modified METL-Bind model, which additionally incorporates the IgG binding attributes into its pretraining tasks.
136 | 
137 | | Identifier                     | UUID       | Protein | Params | RPE | Output                              | Description                                                                                                                                                                       | Download     |
138 | |--------------------------------|------------|---------|--------|-----|-------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
139 | | `METL-BIND-2M-3D-GB1-STANDARD` | `K6mw24Rg` | GB1     | 2M     | 3D  | Standard Rosetta energies           | Trained for the function-specific synthetic data experiment, but only trained on the standard energy terms, to use as a baseline. Should perform similarly to `METL-L-2M-3D-GB1`. | [Download](https://zenodo.org/records/14908509/files/METL-BIND-2M-3D-GB1-STANDARD-K6mw24Rg.pt?download=1) |
140 | | `METL-BIND-2M-3D-GB1-BINDING`  | `Bo5wn2SG` | GB1     | 2M     | 3D  | Standard + binding Rosetta energies | Trained on both the standard energy terms and the binding-specific energy terms.                                                                                                  | [Download](https://zenodo.org/records/14908509/files/METL-BIND-2M-3D-GB1-BINDING-Bo5wn2SG.pt?download=1) |
141 | 
142 | 
143 | `METL-BIND-2M-3D-GB1-BINDING` predicts the standard energy terms listed above as well as the following binding energy terms (in order):
144 | 
145 | <details>
146 |   <summary>
147 |     Expand to see binding energy terms
148 |   </summary>
149 | 
150 | ```
151 | complex_normalized
152 | dG_cross
153 | dG_cross/dSASAx100
154 | dG_separated
155 | dG_separated/dSASAx100
156 | dSASA_hphobic
157 | dSASA_int
158 | dSASA_polar
159 | delta_unsatHbonds
160 | hbond_E_fraction
161 | hbonds_int
162 | nres_int
163 | per_residue_energy_int
164 | side1_normalized
165 | side1_score
166 | side2_normalized
167 | side2_score
168 | ```
169 | </details>
170 | 
171 | ## Target models
172 | Target models are fine-tuned source models that predict functional scores from experimental sequence-function data.
173 | 
174 | ### Global target models
175 | 
176 | These models were trained using 80% of the experimental sequence-function data as training data. 
177 | 
178 | | DMS Dataset    | Identifier | UUID        | RPE | Output           | Description                                         | Download                                                                                                          |
179 | |----------------|------------|-------------|-----|------------------|-----------------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
180 | | GFP            | `None`     | `PeT2D92j`  | 1D  | Functional score | METL-Global finetuned on the GFP dataset            | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-GFP-PeT2D92j.pt?download=1)                 |
181 | | GFP            | `None`     | `6JBzHpkQ`  | 3D  | Functional score | METL-Global finetuned on the GFP dataset            | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-GFP-6JBzHpkQ.pt?download=1)                 |
182 | | DLG4-Abundance | `None`     | `4Rh3WCbG`  | 1D  | Functional score | METL-Global finetuned on the DLG4-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-DLG4_2022-ABUNDANCE-4Rh3WCbG.pt?download=1) |
183 | | DLG4-Abundance | `None`     | `RBtqxzvu`  | 3D  | Functional score | METL-Global finetuned on the DLG4-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-DLG4_2022-ABUNDANCE-RBtqxzvu.pt?download=1) |
184 | | DLG4-Binding   | `None`     | `4xbuC5y7`  | 1D  | Functional score | METL-Global finetuned on the DLG4-Binding dataset   | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-DLG4_2022-BINDING-4xbuC5y7.pt?download=1)   |
185 | | DLG4-Binding   | `None`     | `BuvxgE2x`  | 3D  | Functional score | METL-Global finetuned on the DLG4-Binding dataset   | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-DLG4_2022-BINDING-BuvxgE2x.pt?download=1)   |
186 | | GB1            | `None`     | `dAndZfJ4`  | 1D  | Functional score | METL-Global finetuned on the GB1 dataset            | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-GB1-dAndZfJ4.pt?download=1)                 |
187 | | GB1            | `None`     | `9vSB3DRM`  | 3D  | Functional score | METL-Global finetuned on the GB1 dataset            | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-GB1-9vSB3DRM.pt?download=1)                 |
188 | | GRB2-Abundance | `None`     | `HenDpDWe`  | 1D  | Functional score | METL-Global finetuned on the GRB2-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-GRB2-ABUNDANCE-HenDpDWe.pt?download=1)      |
189 | | GRB2-Abundance | `None`     | `dDoCCvfr`  | 3D  | Functional score | METL-Global finetuned on the GRB2-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-GRB2-ABUNDANCE-dDoCCvfr.pt?download=1)      |
190 | | GRB2-Binding   | `None`     | `cvnycE5Q`  | 1D  | Functional score | METL-Global finetuned on the GRB2-Binding dataset   | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-GRB2-BINDING-cvnycE5Q.pt?download=1)        |
191 | | GRB2-Binding   | `None`     | `jYesS9Ki`  | 3D  | Functional score | METL-Global finetuned on the GRB2-Binding dataset   | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-GRB2-BINDING-jYesS9Ki.pt?download=1)        |
192 | | Pab1           | `None`     | `ho54gxzv` | 1D  | Functional score | METL-Global finetuned on the Pab1 dataset           | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-Pab1-ho54gxzv.pt?download=1)                |
193 | | Pab1           | `None`     | `jhbL2FeB`  | 3D  | Functional score | METL-Global finetuned on the Pab1 dataset           | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-Pab1-jhbL2FeB.pt?download=1)                |
194 | | PTEN-Abundance | `None`     | `UEuMtmfx`  | 1D  | Functional score | METL-Global finetuned on the PTEN-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-PTEN-ABUNDANCE-UEuMtmfx.pt?download=1)      |
195 | | PTEN-Abundance | `None`     | `eJPPQYEW`  | 3D  | Functional score | METL-Global finetuned on the PTEN-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-PTEN-ABUNDANCE-eJPPQYEW.pt?download=1)      |
196 | | PTEN-Activity  | `None`     | `U3X8mSeT`  | 1D  | Functional score | METL-Global finetuned on the PTEN-Activity dataset  | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-PTEN-ACTIVITY-U3X8mSeT.pt?download=1)       |
197 | | PTEN-Activity  | `None`     | `4gqYnW6V`  | 3D  | Functional score | METL-Global finetuned on the PTEN-Activity dataset  | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-PTEN-ACTIVITY-4gqYnW6V.pt?download=1)       |
198 | | TEM-1          | `None`     | `ELL4GGQq`  | 1D  | Functional score | METL-Global finetuned on the TEM-1 dataset          | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-TEM-1-ELL4GGQq.pt?download=1)               |
199 | | TEM-1          | `None`     | `K6BjsWXm`  | 3D  | Functional score | METL-Global finetuned on the TEM-1 dataset          | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-TEM-1-K6BjsWXm.pt?download=1)               |
200 | | Ube4b          | `None`     | `BAWw23vW`  | 1D  | Functional score | METL-Global finetuned on the Ube4b dataset          | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-Ube4b-BAWw23vW.pt?download=1)               |
201 | | Ube4b          | `None`     | `G9piq6WH`  | 3D  | Functional score | METL-Global finetuned on the Ube4b dataset          | [Download](https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-Ube4b-G9piq6WH.pt?download=1)               |
202 | 
203 | ### Local target models
204 | 
205 | These models were trained using 80% of the experimental sequence-function data as training data. 
206 | 
207 | | DMS Dataset    | Identifier | UUID     | RPE | Output           | Description                                        | Download                                                                                                      |
208 | |----------------|------------|----------|-----|------------------|----------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
209 | | GFP            | `None`     | `HaUuRwfE` | 1D  | Functional score | METL-Local finetuned on the GFP dataset            | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-GFP-HaUuRwfE.pt?download=1)                 |
210 | | GFP            | `None`     | `LWEY95Yb` | 3D  | Functional score | METL-Local finetuned on the GFP dataset            | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-GFP-LWEY95Yb.pt?download=1)                 |
211 | | DLG4-Abundance | `None`     | `RMFA6dnX` | 1D  | Functional score | METL-Local finetuned on the DLG4-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-DLG4_2022-ABUNDANCE-RMFA6dnX.pt?download=1) |
212 | | DLG4-Abundance | `None`     | `V3uTtXVe` | 3D  | Functional score | METL-Local finetuned on the DLG4-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-DLG4_2022-ABUNDANCE-V3uTtXVe.pt?download=1) |
213 | | DLG4-Binding   | `None`     | `YdzBYWHs` | 1D  | Functional score | METL-Local finetuned on the DLG4-Binding dataset   | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-DLG4_2022-BINDING-YdzBYWHs.pt?download=1)   |
214 | | DLG4-Binding   | `None`     | `iu6ZahPw` | 3D  | Functional score | METL-Local finetuned on the DLG4-Binding dataset   | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-DLG4_2022-BINDING-iu6ZahPw.pt?download=1)   |
215 | | GB1            | `None`     | `Pgcseywk` | 1D  | Functional score | METL-Local finetuned on the GB1 dataset            | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-GB1-Pgcseywk.pt?download=1)                 |
216 | | GB1            | `None`     | `UvMMdsq4` | 3D  | Functional score | METL-Local finetuned on the GB1 dataset            | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-GB1-UvMMdsq4.pt?download=1)                 |
217 | | GRB2-Abundance | `None`     | `VNpi9Zjt` | 1D  | Functional score | METL-Local finetuned on the GRB2-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-GRB2-ABUNDANCE-VNpi9Zjt.pt?download=1)      |
218 | | GRB2-Abundance | `None`     | `PqBMjXkA` | 3D  | Functional score | METL-Local finetuned on the GRB2-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-GRB2-ABUNDANCE-PqBMjXkA.pt?download=1)      |
219 | | GRB2-Binding   | `None`     | `Z59BhUaE` | 1D  | Functional score | METL-Local finetuned on the GRB2-Binding dataset   | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-GRB2-BINDING-Z59BhUaE.pt?download=1)        |
220 | | GRB2-Binding   | `None`     | `VwcRN6UB` | 3D  | Functional score | METL-Local finetuned on the GRB2-Binding dataset   | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-GRB2-BINDING-VwcRN6UB.pt?download=1)        |
221 | | Pab1           | `None`     | `TdjCzoQQ` | 1D  | Functional score | METL-Local finetuned on the Pab1 dataset           | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-Pab1-TdjCzoQQ.pt?download=1)                |
222 | | Pab1           | `None`     | `5SjoLx3y` | 3D  | Functional score | METL-Local finetuned on the Pab1 dataset           | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-Pab1-5SjoLx3y.pt?download=1)                |
223 | | PTEN-Abundance | `None`     | `oUScGeHo` | 1D  | Functional score | METL-Local finetuned on the PTEN-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-2M-1D-PTEN-ABUNDANCE-oUScGeHo.pt?download=1)   |
224 | | PTEN-Abundance | `None`     | `DhuasDEr` | 3D  | Functional score | METL-Local finetuned on the PTEN-Abundance dataset | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-2M-3D-PTEN-ABUNDANCE-DhuasDEr.pt?download=1)   |
225 | | PTEN-Activity  | `None`     | `m9UsG7dq` | 1D  | Functional score | METL-Local finetuned on the PTEN-Activity dataset  | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-2M-1D-PTEN-ACTIVITY-m9UsG7dq.pt?download=1)    |
226 | | PTEN-Activity  | `None`     | `8Vi7ENcC` | 3D  | Functional score | METL-Local finetuned on the PTEN-Activity dataset  | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-2M-3D-PTEN-ACTIVITY-8Vi7ENcC.pt?download=1)    |
227 | | TEM-1          | `None`     | `64ncFxBR` | 1D  | Functional score | METL-Local finetuned on the TEM-1 dataset          | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-TEM-1-64ncFxBR.pt?download=1)               |
228 | | TEM-1          | `None`     | `PncvgiJU` | 3D  | Functional score | METL-Local finetuned on the TEM-1 dataset          | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-TEM-1-PncvgiJU.pt?download=1)               |
229 | | Ube4b          | `None`     | `e9uhhnAv` | 1D  | Functional score | METL-Local finetuned on the Ube4b dataset          | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-1D-Ube4b-e9uhhnAv.pt?download=1)               |
230 | | Ube4b          | `None`     | `NfbZL7jK` | 3D  | Functional score | METL-Local finetuned on the Ube4b dataset          | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-3D-Ube4b-NfbZL7jK.pt?download=1)               |
231 | 
232 | 
233 | ### GFP design experiment target models
234 | 
235 | | DMS Dataset | Identifier | UUID       | RPE | Output           | Description                                                                                                                                                      | Download                                                                            |
236 | |:------------|------------|------------|-----|------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------|
237 | | GFP         | `None`     | `YoQkzoLD` | 1D  | Functional score | The `METL-L-2M-1D-GFP` model, fine-tuned on 64 examples from the GFP DMS dataset. This model was used for the GFP design experiment described in the manuscript. | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-2M-1D-GFP-YoQkzoLD.pt?download=1) |
238 | | GFP         | `None`     | `PEkeRuxb` | 3D  | Functional score | The `METL-L-2M-3D-GFP` model, fine-tuned on 64 examples from the GFP DMS dataset. This model was used for the GFP design experiment described in the manuscript. | [Download](https://zenodo.org/records/14908509/files/FT-METL-L-2M-3D-GFP-PEkeRuxb.pt?download=1) |
239 | 
240 | 
241 | # 3D Relative Position Embeddings
242 | 
243 | METL uses relative position embeddings (RPEs) based on 3D protein structure. 
244 | The implementation of relative position embeddings is similar to the original paper by [Shaw et al](https://aclanthology.org/N18-2074/).
245 | However, instead of using the default 1D sequence-based distances, we calculate relative distances based on a graph of the 3D protein structure.
246 | These 3D RPEs enable the transformer to use 3D distances between amino acid residues as the positional signal when calculating attention.
247 | When using 3D RPEs, the model requires a protein structure in the form of a PDB file, corresponding to the wild-type protein or base protein of the input variant sequence.
248 | 
249 | Our testing showed that 3D RPEs improve performance for METL-Global models but do not make a difference for METL-Local models.
250 | We provide both 1D and 3D models in this repository. The 1D models do not require the PDB structure as an additional input.
251 | 
252 | The [pdbs](pdbs) directory contains PDB files corresponding to the experimental datasets we evaluated. These can be used with the 3D RPE models listed above.
253 | 
254 | | DMS Dataset    | PDB File                                                                    |
255 | |----------------|-----------------------------------------------------------------------------|
256 | | GFP            | [`1gfl_cm.pdb`](pdbs/1gfl_cm.pdb)                                             |
257 | | DLG4-Abundance | [`6qji_p_trunc_2022.pdb`](pdbs/6qji_p_trunc_2022.pdb)                         |
258 | | DLG4-Binding   | [`6qji_p_trunc_2022.pdb`](pdbs/6qji_p_trunc_2022.pdb)                         |
259 | | GB1            | [`2qmt_p.pdb`](pdbs/2qmt_p.pdb)                                               |
260 | | GRB2-Abundance | [`AF-P62993-F1-model_v4_trunc_p.pdb`](pdbs/AF-P62993-F1-model_v4_trunc_p.pdb) |
261 | | GRB2-Binding   | [`AF-P62993-F1-model_v4_trunc_p.pdb`](pdbs/AF-P62993-F1-model_v4_trunc_p.pdb) |
262 | | Pab1           | [`pab1_cm.pdb`](pdbs/pab1_cm.pdb)                                             |
263 | | PTEN-Abundance | [`AF-P60484-F1-model_v4_p.pdb`](pdbs/AF-P60484-F1-model_v4_p.pdb)             |
264 | | PTEN-Activity  | [`AF-P60484-F1-model_v4_p.pdb`](pdbs/AF-P60484-F1-model_v4_p.pdb)             |
265 | | TEM-1          | [`AF-Q6SJ61-F1-model_v4_p.pdb`](pdbs/AF-Q6SJ61-F1-model_v4_p.pdb)             |
266 | | Ube4b          | [`ube4b_cm.pdb`](pdbs/ube4b_cm.pdb)                                           |
267 | 
268 | # Examples
269 | 
270 | ## METL source model
271 | 
272 | METL source models are assigned identifiers that can be used to load the model with `metl.get_from_ident()`. 
273 | 
274 | This example:
275 | - Automatically downloads and caches `METL-G-20M-1D` using `metl.get_from_ident("metl-g-20m-1d")`.
276 | - Encodes a pair of dummy amino acid sequences using `data_encoder.encode_sequences()`.
277 | - Runs the sequences through the model and prints the predicted Rosetta energies.
278 | 
279 | _Todo: show how to extract the METL representation at different layers of the network_ 
280 | 
281 | ```python
282 | import metl
283 | import torch
284 | 
285 | model, data_encoder = metl.get_from_ident("metl-g-20m-1d")
286 | 
287 | # these are amino acid sequences
288 | # make sure all the sequences are the same length
289 | dummy_sequences = ["SMART", "MAGIC"]
290 | encoded_seqs = data_encoder.encode_sequences(dummy_sequences)
291 | 
292 | # set model to eval mode
293 | model.eval()
294 | # no need to compute gradients for inference
295 | with torch.no_grad():
296 |     predictions = model(torch.tensor(encoded_seqs))
297 |     
298 | print(predictions)
299 | ```
300 | 
301 | If you are using a model with 3D relative position embeddings, you will need to provide the PDB structure of the wild-type or base protein.
302 | 
303 | ```
304 | predictions = model(torch.tensor(encoded_seqs), pdb_fn="../path/to/file.pdb")
305 | ```
306 | 
307 | 
308 | # METL target model
309 | 
310 | METL target models can be loaded using the model's UUID and `metl.get_from_uuid()`.
311 | 
312 | This example:
313 | - Automatically downloads and caches `YoQkzoLD` using `metl.get_from_uuid(uuid="YoQkzoLD")`.
314 | - Encodes several variants specified in variant notation. A wild-type sequence is needed to encode variants.
315 | - Runs the sequences through the model and prints the predicted DMS scores.
316 | 
317 | ```python
318 | import metl
319 | import torch
320 | 
321 | model, data_encoder = metl.get_from_uuid(uuid="YoQkzoLD")
322 | 
323 | # the GFP wild-type sequence
324 | wt = "SKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLSYGVQCFSRYPDHMKQ" \
325 |      "HDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKN" \
326 |      "GIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK"
327 | 
328 | # some example GFP variants to compute the scores for
329 | variants = ["E3K,G102S",
330 |             "T36P,S203T,K207R",
331 |             "V10A,D19G,F25S,E113V"]
332 | 
333 | encoded_variants = data_encoder.encode_variants(wt, variants)
334 | 
335 | # set model to eval mode
336 | model.eval()
337 | # no need to compute gradients for inference
338 | with torch.no_grad():
339 |     predictions = model(torch.tensor(encoded_variants))
340 | 
341 | print(predictions)
342 | 
343 | ```
344 | 


--------------------------------------------------------------------------------
/huggingface/README.md:
--------------------------------------------------------------------------------
1 | This directory is to maintain the 🤗 support of METL.
2 | 
3 | Herein are a few files to facilitate uploading the wrapper to 🤗. First, combine_files.py takes all of the files in the METL directory, barring files that have test or _.py (think, init.py here) and combines them into a single file. combine_files.py also appends the huggingface wrapper code itself (stored in huggingface_code.py) onto the bottom of the script.
4 | 
5 | This script then gets auto-updated to 🤗 after formatting it by running the push_to_hub.py script via GitHub Actions. Some additional small comments are included in the top of each file repeating these responsibilities.


--------------------------------------------------------------------------------
/huggingface/combine_files.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script combines all of the files in the metl directory into one file so that it can be uploaded automatically to huggingface.
 3 | 
 4 | Files ending with _.py and that contain test in the filename will not be included. This script automatically generates the required imports from the files as well.
 5 | 
 6 | Regardless of changes to metl, as long as necessary files that may be added don't contain test or _.py, this should work as intended.
 7 | """
 8 | 
 9 | import argparse
10 | import os
11 | 
12 | def main(output_path: str):
13 |     imports = set()
14 |     code = []
15 |     metl_imports = set()
16 |     for file in os.listdir('./metl'):
17 |         if '.py' in file and '_.py' not in file and 'test' not in file:
18 |             with open(f'./metl/{file}', 'r') as f:
19 |                 file_text = f.readlines()
20 |                 for line in file_text:
21 |                     line_for_compare = line.strip()
22 |                     if 'import ' in line_for_compare and 'metl.' not in line_for_compare:
23 |                         imports.add(line_for_compare)
24 |                     elif 'import ' in line_for_compare and 'metl.' in line_for_compare:
25 |                         if 'as' in line_for_compare:
26 |                             metl_imports.add(line_for_compare)
27 |                     else:
28 |                         code.append(line[:-1])
29 | 
30 |     code = '\n'.join(code)
31 |     imports = '\n'.join(imports)
32 | 
33 |     for line in metl_imports:
34 |         import_name = line.split('as')[-1].strip()
35 |         code = code.replace(f'{import_name}.', '')
36 | 
37 |     huggingface_import = 'from transformers import PretrainedConfig, PreTrainedModel'
38 |     delimiter = '$>'
39 | 
40 |     with open('./huggingface/huggingface_code.py', 'r') as f:
41 |         contents = f.read()
42 |         delim_location = contents.find(delimiter)
43 |         cut_contents = contents[delim_location+len(delimiter):]
44 | 
45 |     with open(output_path, 'w') as f:
46 |         f.write(f'{huggingface_import}\n{imports}\n{code}\n{cut_contents}')
47 | 
48 | def parse_args():
49 |     parser = argparse.ArgumentParser(description="Compile huggingface wrapper")
50 |     parser.add_argument("-o", type=str, help="Output filepath", default='./huggingface_wrapper.py')
51 | 
52 |     args = parser.parse_args()
53 | 
54 |     args.o = os.path.abspath(args.o)
55 |     return args
56 | 
57 | if __name__ == "__main__":
58 |     args = parse_args()
59 |     main(args.o)
60 | 


--------------------------------------------------------------------------------
/huggingface/huggingface_code.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This file contains the actual wrapper for METL. 
 3 | Above the delimiter for this file: #\$\> we have included imports and shell functions 
 4 | which prevent python (and other linters) from complaining this file has errors. 
 5 | """
 6 | 
 7 | 
 8 | from transformers import PretrainedConfig, PreTrainedModel
 9 | 
10 | def get_from_uuid():
11 |     pass
12 | 
13 | def get_from_ident():
14 |     pass
15 | 
16 | def get_from_checkpoint():
17 |     pass
18 | 
19 | IDENT_UUID_MAP = ""
20 | UUID_URL_MAP = ""
21 | 
22 | # Chop The above off. 
23 | 
24 | #$>
25 | # Huggingface code
26 | 
27 | class METLConfig(PretrainedConfig):
28 |     IDENT_UUID_MAP = IDENT_UUID_MAP
29 |     UUID_URL_MAP = UUID_URL_MAP
30 |     model_type = "METL"
31 | 
32 |     def __init__(
33 |             self,
34 |             id:str = None,
35 |             **kwargs,
36 |     ):
37 |         self.id = id
38 |         super().__init__(**kwargs)
39 | 
40 | class METLModel(PreTrainedModel):
41 |     config_class = METLConfig
42 |     def __init__(self, config:METLConfig):
43 |         super().__init__(config)
44 |         self.model = None
45 |         self.encoder = None
46 |         self.config = config
47 |         
48 |     def forward(self, X, pdb_fn=None):
49 |         if pdb_fn:
50 |             return self.model(X, pdb_fn=pdb_fn)
51 |         return self.model(X)
52 |     
53 |     def load_from_uuid(self, id):
54 |         if id:
55 |             assert id in self.config.UUID_URL_MAP, "ID given does not reference a valid METL model in the IDENT_UUID_MAP"
56 |             self.config.id = id
57 | 
58 |         self.model, self.encoder = get_from_uuid(self.config.id)
59 | 
60 |     def load_from_ident(self, id):
61 |         if id:
62 |             id = id.lower()
63 |             assert id in self.config.IDENT_UUID_MAP, "ID given does not reference a valid METL model in the IDENT_UUID_MAP"
64 |             self.config.id = id
65 | 
66 |         self.model, self.encoder = get_from_ident(self.config.id)
67 | 
68 |     def get_from_checkpoint(self, checkpoint_path):
69 |         self.model, self.encoder = get_from_checkpoint(checkpoint_path)
70 | 


--------------------------------------------------------------------------------
/huggingface/print_colab_dropdown.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility script for generating a list that can be pasted into the google colab when more models are uploaded to zenodo and added to the METL IDENT_UUID_MAP.
 3 | 
 4 | This pulls from huggingface, so wait for that action to finish first before running this script and uploading the colab notebook.
 5 | """
 6 | 
 7 | from transformers import AutoModel
 8 | 
 9 | def main():
10 |     metl = AutoModel.from_pretrained('gitter-lab/METL', trust_remote_code=True)
11 |     start = "# @param ["
12 |     metl_keys = [f'"{key}"' for key in metl.config.IDENT_UUID_MAP.keys()]
13 |     keys = ','.join(metl_keys)
14 |     end = f'{keys}]'
15 |     print(start + end)
16 | 
17 | if __name__ == "__main__":
18 |     main()


--------------------------------------------------------------------------------
/huggingface/push_to_hub.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Basic minimal script for uploading the generated file from combine_files.py onto huggingface. 
 3 | Requires the action to have access to the HF_TOKEN secret in the repository. 
 4 | """
 5 | 
 6 | from huggingface_wrapper import METLConfig, METLModel
 7 | from huggingface_hub import login
 8 | import os
 9 | from transformers import AutoModel, AutoConfig
10 | import torch
11 | 
12 | def main():
13 |     API_KEY = os.getenv('HF_TOKEN')
14 |     login(API_KEY)
15 | 
16 |     config = METLConfig()
17 |     model = METLModel(config)
18 |     model.model = torch.nn.Linear(1, 1)
19 | 
20 |     AutoConfig.register("METL", METLConfig)
21 |     AutoModel.register(METLConfig, METLModel)
22 | 
23 |     model.register_for_auto_class()
24 |     config.register_for_auto_class()
25 | 
26 |     model.push_to_hub('gitter-lab/METL')
27 |     config.push_to_hub('gitter-lab/METL')
28 | 
29 | if __name__ == "__main__":
30 |     main()


--------------------------------------------------------------------------------
/huggingface/requirements.txt:
--------------------------------------------------------------------------------
1 | huggingface-hub==0.30.2
2 | transformers==4.51.3
3 | numpy>=1.23.2
4 | networkx>=2.6.3
5 | scipy>=1.9.1
6 | biopandas>=0.2.7
7 | isort
8 | black
9 | 


--------------------------------------------------------------------------------
/metl/__init__.py:
--------------------------------------------------------------------------------
1 | from .main import *
2 | __version__ = "0.1"
3 | 


--------------------------------------------------------------------------------
/metl/encode.py:
--------------------------------------------------------------------------------
 1 | """ Encodes data in different formats """
 2 | from enum import Enum, auto
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | class Encoding(Enum):
 8 |     INT_SEQS = auto()
 9 |     ONE_HOT = auto()
10 | 
11 | 
12 | class DataEncoder:
13 |     chars = ["*", "A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
14 |     num_chars = len(chars)
15 |     mapping = {c: i for i, c in enumerate(chars)}
16 | 
17 |     def __init__(self, encoding: Encoding = Encoding.INT_SEQS):
18 |         self.encoding = encoding
19 | 
20 |     def _encode_from_int_seqs(self, seq_ints):
21 |         if self.encoding == Encoding.INT_SEQS:
22 |             return seq_ints
23 |         elif self.encoding == Encoding.ONE_HOT:
24 |             one_hot = np.eye(self.num_chars)[seq_ints]
25 |             return one_hot.astype(np.float32)
26 | 
27 |     def encode_sequences(self, char_seqs):
28 |         seq_ints = []
29 |         for char_seq in char_seqs:
30 |             int_seq = [self.mapping[c] for c in char_seq]
31 |             seq_ints.append(int_seq)
32 |         seq_ints = np.array(seq_ints).astype(int)
33 |         return self._encode_from_int_seqs(seq_ints)
34 | 
35 |     def encode_variants(self, wt, variants):
36 |         # convert wild type seq to integer encoding
37 |         wt_int = np.zeros(len(wt), dtype=np.uint8)
38 |         for i, c in enumerate(wt):
39 |             wt_int[i] = self.mapping[c]
40 | 
41 |         # tile the wild-type seq
42 |         seq_ints = np.tile(wt_int, (len(variants), 1))
43 | 
44 |         for i, variant in enumerate(variants):
45 |             # special handling if we want to encode the wild-type seq (it's already correct!)
46 |             if variant == "_wt":
47 |                 continue
48 | 
49 |             # variants are a list of mutations [mutation1, mutation2, ....]
50 |             variant = variant.split(",")
51 |             for mutation in variant:
52 |                 # mutations are in the form <original char><position><replacement char>
53 |                 position = int(mutation[1:-1])
54 |                 replacement = self.mapping[mutation[-1]]
55 |                 seq_ints[i, position] = replacement
56 | 
57 |         seq_ints = seq_ints.astype(int)
58 |         return self._encode_from_int_seqs(seq_ints)
59 | 


--------------------------------------------------------------------------------
/metl/main.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.hub
  3 | 
  4 | import metl.models as models
  5 | from metl.encode import DataEncoder, Encoding
  6 | 
  7 | UUID_URL_MAP = {
  8 |     # global source models
  9 |     "D72M9aEp": "https://zenodo.org/records/14908509/files/METL-G-20M-1D-D72M9aEp.pt?download=1",
 10 |     "Nr9zCKpR": "https://zenodo.org/records/14908509/files/METL-G-20M-3D-Nr9zCKpR.pt?download=1",
 11 |     "auKdzzwX": "https://zenodo.org/records/14908509/files/METL-G-50M-1D-auKdzzwX.pt?download=1",
 12 |     "6PSAzdfv": "https://zenodo.org/records/14908509/files/METL-G-50M-3D-6PSAzdfv.pt?download=1",
 13 | 
 14 |     # local source models
 15 |     "8gMPQJy4": "https://zenodo.org/records/14908509/files/METL-L-2M-1D-GFP-8gMPQJy4.pt?download=1",
 16 |     "Hr4GNHws": "https://zenodo.org/records/14908509/files/METL-L-2M-3D-GFP-Hr4GNHws.pt?download=1",
 17 |     "8iFoiYw2": "https://zenodo.org/records/14908509/files/METL-L-2M-1D-DLG4_2022-8iFoiYw2.pt?download=1",
 18 |     "kt5DdWTa": "https://zenodo.org/records/14908509/files/METL-L-2M-3D-DLG4_2022-kt5DdWTa.pt?download=1",
 19 |     "DMfkjVzT": "https://zenodo.org/records/14908509/files/METL-L-2M-1D-GB1-DMfkjVzT.pt?download=1",
 20 |     "epegcFiH": "https://zenodo.org/records/14908509/files/METL-L-2M-3D-GB1-epegcFiH.pt?download=1",
 21 |     "kS3rUS7h": "https://zenodo.org/records/14908509/files/METL-L-2M-1D-GRB2-kS3rUS7h.pt?download=1",
 22 |     "X7w83g6S": "https://zenodo.org/records/14908509/files/METL-L-2M-3D-GRB2-X7w83g6S.pt?download=1",
 23 |     "UKebCQGz": "https://zenodo.org/records/14908509/files/METL-L-2M-1D-Pab1-UKebCQGz.pt?download=1",
 24 |     "2rr8V4th": "https://zenodo.org/records/14908509/files/METL-L-2M-3D-Pab1-2rr8V4th.pt?download=1",
 25 |     "PREhfC22": "https://zenodo.org/records/14908509/files/METL-L-2M-1D-TEM-1-PREhfC22.pt?download=1",
 26 |     "9ASvszux": "https://zenodo.org/records/14908509/files/METL-L-2M-3D-TEM-1-9ASvszux.pt?download=1",
 27 |     "HscFFkAb": "https://zenodo.org/records/14908509/files/METL-L-2M-1D-Ube4b-HscFFkAb.pt?download=1",
 28 |     "H48oiNZN": "https://zenodo.org/records/14908509/files/METL-L-2M-3D-Ube4b-H48oiNZN.pt?download=1",
 29 |     "CEMSx7ZC": "https://zenodo.org/records/14908509/files/METL-L-2M-1D-PTEN-CEMSx7ZC.pt?download=1",
 30 |     "PjxR5LW7": "https://zenodo.org/records/14908509/files/METL-L-2M-3D-PTEN-PjxR5LW7.pt?download=1",
 31 | 
 32 |     # metl bind source models
 33 |     "K6mw24Rg": "https://zenodo.org/records/14908509/files/METL-BIND-2M-3D-GB1-STANDARD-K6mw24Rg.pt?download=1",
 34 |     "Bo5wn2SG": "https://zenodo.org/records/14908509/files/METL-BIND-2M-3D-GB1-BINDING-Bo5wn2SG.pt?download=1",
 35 | 
 36 |     # finetuned models from GFP design experiment
 37 |     "YoQkzoLD": "https://zenodo.org/records/14908509/files/FT-METL-L-2M-1D-GFP-YoQkzoLD.pt?download=1",
 38 |     "PEkeRuxb": "https://zenodo.org/records/14908509/files/FT-METL-L-2M-3D-GFP-PEkeRuxb.pt?download=1",
 39 | 
 40 |     #  new finetuned GLOBAL models
 41 |     "4Rh3WCbG": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-DLG4_2022-ABUNDANCE-4Rh3WCbG.pt?download=1",
 42 |     "4xbuC5y7": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-DLG4_2022-BINDING-4xbuC5y7.pt?download=1",
 43 |     "dAndZfJ4": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-GB1-dAndZfJ4.pt?download=1",
 44 |     "PeT2D92j": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-GFP-PeT2D92j.pt?download=1",
 45 |     "HenDpDWe": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-GRB2-ABUNDANCE-HenDpDWe.pt?download=1",
 46 |     "cvnycE5Q": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-GRB2-BINDING-cvnycE5Q.pt?download=1",
 47 |     "ho54gxzv": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-Pab1-ho54gxzv.pt?download=1",
 48 |     "UEuMtmfx": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-PTEN-ABUNDANCE-UEuMtmfx.pt?download=1",
 49 |     "U3X8mSeT": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-PTEN-ACTIVITY-U3X8mSeT.pt?download=1",
 50 |     "ELL4GGQq": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-TEM-1-ELL4GGQq.pt?download=1",
 51 |     "BAWw23vW": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-1D-Ube4b-BAWw23vW.pt?download=1",
 52 |     "RBtqxzvu": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-DLG4_2022-ABUNDANCE-RBtqxzvu.pt?download=1",
 53 |     "BuvxgE2x": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-DLG4_2022-BINDING-BuvxgE2x.pt?download=1",
 54 |     "9vSB3DRM": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-GB1-9vSB3DRM.pt?download=1",
 55 |     "6JBzHpkQ": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-GFP-6JBzHpkQ.pt?download=1",
 56 |     "dDoCCvfr": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-GRB2-ABUNDANCE-dDoCCvfr.pt?download=1",
 57 |     "jYesS9Ki": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-GRB2-BINDING-jYesS9Ki.pt?download=1",
 58 |     "jhbL2FeB": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-Pab1-jhbL2FeB.pt?download=1",
 59 |     "eJPPQYEW": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-PTEN-ABUNDANCE-eJPPQYEW.pt?download=1",
 60 |     "4gqYnW6V": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-PTEN-ACTIVITY-4gqYnW6V.pt?download=1",
 61 |     "K6BjsWXm": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-TEM-1-K6BjsWXm.pt?download=1",
 62 |     "G9piq6WH": "https://zenodo.org/records/14908509/files/FT-METL-G-20M-3D-Ube4b-G9piq6WH.pt?download=1",
 63 | 
 64 |     # finetuned LOCAL models
 65 |     "RMFA6dnX": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-DLG4_2022-ABUNDANCE-RMFA6dnX.pt?download=1",
 66 |     "YdzBYWHs": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-DLG4_2022-BINDING-YdzBYWHs.pt?download=1",
 67 |     "Pgcseywk": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-GB1-Pgcseywk.pt?download=1",
 68 |     "HaUuRwfE": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-GFP-HaUuRwfE.pt?download=1",
 69 |     "VNpi9Zjt": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-GRB2-ABUNDANCE-VNpi9Zjt.pt?download=1",
 70 |     "Z59BhUaE": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-GRB2-BINDING-Z59BhUaE.pt?download=1",
 71 |     "TdjCzoQQ": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-Pab1-TdjCzoQQ.pt?download=1",
 72 |     "64ncFxBR": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-TEM-1-64ncFxBR.pt?download=1",
 73 |     "e9uhhnAv": "https://zenodo.org/records/14908509/files/FT-METL-L-1D-Ube4b-e9uhhnAv.pt?download=1",
 74 |     "oUScGeHo": "https://zenodo.org/records/14908509/files/FT-METL-L-2M-1D-PTEN-ABUNDANCE-oUScGeHo.pt?download=1",
 75 |     "m9UsG7dq": "https://zenodo.org/records/14908509/files/FT-METL-L-2M-1D-PTEN-ACTIVITY-m9UsG7dq.pt?download=1",
 76 |     "DhuasDEr": "https://zenodo.org/records/14908509/files/FT-METL-L-2M-3D-PTEN-ABUNDANCE-DhuasDEr.pt?download=1",
 77 |     "8Vi7ENcC": "https://zenodo.org/records/14908509/files/FT-METL-L-2M-3D-PTEN-ACTIVITY-8Vi7ENcC.pt?download=1",
 78 |     "V3uTtXVe": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-DLG4_2022-ABUNDANCE-V3uTtXVe.pt?download=1",
 79 |     "iu6ZahPw": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-DLG4_2022-BINDING-iu6ZahPw.pt?download=1",
 80 |     "UvMMdsq4": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-GB1-UvMMdsq4.pt?download=1",
 81 |     "LWEY95Yb": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-GFP-LWEY95Yb.pt?download=1",
 82 |     "PqBMjXkA": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-GRB2-ABUNDANCE-PqBMjXkA.pt?download=1",
 83 |     "VwcRN6UB": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-GRB2-BINDING-VwcRN6UB.pt?download=1",
 84 |     "5SjoLx3y": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-Pab1-5SjoLx3y.pt?download=1",
 85 |     "PncvgiJU": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-TEM-1-PncvgiJU.pt?download=1",
 86 |     "NfbZL7jK": "https://zenodo.org/records/14908509/files/FT-METL-L-3D-Ube4b-NfbZL7jK.pt?download=1"
 87 | 
 88 | }
 89 | 
 90 | IDENT_UUID_MAP = {
 91 |     # the keys should be all lowercase
 92 |     "metl-g-20m-1d": "D72M9aEp",
 93 |     "metl-g-20m-3d": "Nr9zCKpR",
 94 |     "metl-g-50m-1d": "auKdzzwX",
 95 |     "metl-g-50m-3d": "6PSAzdfv",
 96 | 
 97 |     # GFP local source models
 98 |     "metl-l-2m-1d-gfp": "8gMPQJy4",
 99 |     "metl-l-2m-3d-gfp": "Hr4GNHws",
100 | 
101 |     # DLG4 local source models
102 |     "metl-l-2m-1d-dlg4_2022": "8iFoiYw2",
103 |     "metl-l-2m-3d-dlg4_2022": "kt5DdWTa",
104 | 
105 |     # GB1 local source models
106 |     "metl-l-2m-1d-gb1": "DMfkjVzT",
107 |     "metl-l-2m-3d-gb1": "epegcFiH",
108 | 
109 |     # GRB2 local source models
110 |     "metl-l-2m-1d-grb2": "kS3rUS7h",
111 |     "metl-l-2m-3d-grb2": "X7w83g6S",
112 | 
113 |     # Pab1 local source models
114 |     "metl-l-2m-1d-pab1": "UKebCQGz",
115 |     "metl-l-2m-3d-pab1": "2rr8V4th",
116 | 
117 |     # PTEN local source models
118 |     "metl-l-2m-1d-pten": "CEMSx7ZC",
119 |     "metl-l-2m-3d-pten": "PjxR5LW7",
120 | 
121 |     # TEM-1 local source models
122 |     "metl-l-2m-1d-tem-1": "PREhfC22",
123 |     "metl-l-2m-3d-tem-1": "9ASvszux",
124 | 
125 |     # Ube4b local source models
126 |     "metl-l-2m-1d-ube4b": "HscFFkAb",
127 |     "metl-l-2m-3d-ube4b": "H48oiNZN",
128 | 
129 |     # METL-Bind for GB1
130 |     "metl-bind-2m-3d-gb1-standard": "K6mw24Rg",
131 |     "metl-bind-2m-3d-gb1-binding": "Bo5wn2SG",
132 | 
133 |     # GFP design models, giving them an ident
134 |     "metl-l-2m-1d-gfp-ft-design": "YoQkzoLD",
135 |     "metl-l-2m-3d-gfp-ft-design": "PEkeRuxb",
136 | 
137 | }
138 | 
139 | 
140 | def download_checkpoint(uuid):
141 |     ckpt = torch.hub.load_state_dict_from_url(UUID_URL_MAP[uuid],
142 |                                               map_location="cpu", file_name=f"{uuid}.pt")
143 |     state_dict = ckpt["state_dict"]
144 |     hyper_parameters = ckpt["hyper_parameters"]
145 | 
146 |     return state_dict, hyper_parameters
147 | 
148 | 
149 | def _get_data_encoding(hparams):
150 |     if "encoding" in hparams and hparams["encoding"] == "int_seqs":
151 |         encoding = Encoding.INT_SEQS
152 |     elif "encoding" in hparams and hparams["encoding"] == "one_hot":
153 |         encoding = Encoding.ONE_HOT
154 |     elif (("encoding" in hparams and hparams["encoding"] == "auto") or "encoding" not in hparams) and \
155 |             hparams["model_name"] in ["transformer_encoder"]:
156 |         encoding = Encoding.INT_SEQS
157 |     else:
158 |         raise ValueError("Detected unsupported encoding in hyperparameters")
159 | 
160 |     return encoding
161 | 
162 | 
163 | def load_model_and_data_encoder(state_dict, hparams):
164 |     model = models.Model[hparams["model_name"]].cls(**hparams)
165 |     model.load_state_dict(state_dict)
166 | 
167 |     data_encoder = DataEncoder(_get_data_encoding(hparams))
168 | 
169 |     return model, data_encoder
170 | 
171 | 
172 | def get_from_uuid(uuid):
173 |     if uuid in UUID_URL_MAP:
174 |         state_dict, hparams = download_checkpoint(uuid)
175 |         return load_model_and_data_encoder(state_dict, hparams)
176 |     else:
177 |         raise ValueError(f"UUID {uuid} not found in UUID_URL_MAP")
178 | 
179 | 
180 | def get_from_ident(ident):
181 |     ident = ident.lower()
182 |     if ident in IDENT_UUID_MAP:
183 |         state_dict, hparams = download_checkpoint(IDENT_UUID_MAP[ident])
184 |         return load_model_and_data_encoder(state_dict, hparams)
185 |     else:
186 |         raise ValueError(f"Identifier {ident} not found in IDENT_UUID_MAP")
187 | 
188 | 
189 | def get_from_checkpoint(ckpt_fn):
190 |     ckpt = torch.load(ckpt_fn, map_location="cpu")
191 |     state_dict = ckpt["state_dict"]
192 |     hyper_parameters = ckpt["hyper_parameters"]
193 |     return load_model_and_data_encoder(state_dict, hyper_parameters)
194 | 


--------------------------------------------------------------------------------
/metl/models.py:
--------------------------------------------------------------------------------
   1 | import collections
   2 | import math
   3 | from argparse import ArgumentParser
   4 | import enum
   5 | from os.path import isfile
   6 | from typing import List, Tuple, Optional
   7 | 
   8 | import torch
   9 | import torch.nn as nn
  10 | import torch.nn.functional as F
  11 | from torch import Tensor
  12 | 
  13 | import metl.relative_attention as ra
  14 | 
  15 | 
  16 | def reset_parameters_helper(m: nn.Module):
  17 |     """ helper function for resetting model parameters, meant to be used with model.apply() """
  18 | 
  19 |     # the PyTorch MultiHeadAttention has a private function _reset_parameters()
  20 |     # other layers have a public reset_parameters()... go figure
  21 |     reset_parameters = getattr(m, "reset_parameters", None)
  22 |     reset_parameters_private = getattr(m, "_reset_parameters", None)
  23 | 
  24 |     if callable(reset_parameters) and callable(reset_parameters_private):
  25 |         raise RuntimeError("Module has both public and private methods for resetting parameters. "
  26 |                            "This is unexpected... probably should just call the public one.")
  27 | 
  28 |     if callable(reset_parameters):
  29 |         m.reset_parameters()
  30 | 
  31 |     if callable(reset_parameters_private):
  32 |         m._reset_parameters()
  33 | 
  34 | 
  35 | class SequentialWithArgs(nn.Sequential):
  36 |     def forward(self, x, **kwargs):
  37 |         for module in self:
  38 |             if isinstance(module, ra.RelativeTransformerEncoder) or isinstance(module, SequentialWithArgs):
  39 |                 # for relative transformer encoders, pass in kwargs (pdb_fn)
  40 |                 x = module(x, **kwargs)
  41 |             else:
  42 |                 # for all modules, don't pass in kwargs
  43 |                 x = module(x)
  44 |         return x
  45 | 
  46 | 
  47 | class PositionalEncoding(nn.Module):
  48 |     # originally from https://pytorch.org/tutorials/beginner/transformer_tutorial.html
  49 |     # they have since updated their implementation, but it is functionally equivalent
  50 |     def __init__(self, d_model, dropout=0.1, max_len=5000):
  51 |         super(PositionalEncoding, self).__init__()
  52 |         self.dropout = nn.Dropout(p=dropout)
  53 | 
  54 |         pe = torch.zeros(max_len, d_model)
  55 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
  56 |         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
  57 |         pe[:, 0::2] = torch.sin(position * div_term)
  58 |         pe[:, 1::2] = torch.cos(position * div_term)
  59 |         # note the implementation on Pytorch's website expects [seq_len, batch_size, embedding_dim]
  60 |         # however our data is in [batch_size, seq_len, embedding_dim] (i.e. batch_first)
  61 |         # fixed by changing pe = pe.unsqueeze(0).transpose(0, 1) to pe = pe.unsqueeze(0)
  62 |         # also down below, changing our indexing into the position encoding to reflect new dimensions
  63 |         # pe = pe.unsqueeze(0).transpose(0, 1)
  64 |         pe = pe.unsqueeze(0)
  65 |         self.register_buffer('pe', pe)
  66 | 
  67 |     def forward(self, x, **kwargs):
  68 |         # note the implementation on Pytorch's website expects [seq_len, batch_size, embedding_dim]
  69 |         # however our data is in [batch_size, seq_len, embedding_dim] (i.e. batch_first)
  70 |         # fixed by changing x = x + self.pe[:x.size(0)] to x = x + self.pe[:, :x.size(1), :]
  71 |         # x = x + self.pe[:x.size(0), :]
  72 |         x = x + self.pe[:, :x.size(1), :]
  73 |         return self.dropout(x)
  74 | 
  75 | 
  76 | class ScaledEmbedding(nn.Module):
  77 |     # https://pytorch.org/tutorials/beginner/translation_transformer.html
  78 |     # a helper function for embedding that scales by sqrt(d_model) in the forward()
  79 |     # makes it, so we don't have to do the scaling in the main AttnModel forward()
  80 | 
  81 |     # todo: be aware of embedding scaling factor
  82 |     # regarding the scaling factor, it's unclear exactly what the purpose is and whether it is needed
  83 |     # there are several theories on why it is used, and it shows up in all the transformer reference implementations
  84 |     # https://datascience.stackexchange.com/questions/87906/transformer-model-why-are-word-embeddings-scaled-before-adding-positional-encod
  85 |     #   1. Has something to do with weight sharing between the embedding and the decoder output
  86 |     #   2. Scales up the embeddings so the signal doesn't get overwhelmed when adding the absolute positional encoding
  87 |     #   3. It cancels out with the scaling factor in scaled dot product attention, and helps make the model robust
  88 |     #      to the choice of embedding_len
  89 |     #   4. It's not actually needed
  90 | 
  91 |     # Regarding #1, not really sure about this. In section 3.4 of attention is all you need,
  92 |     # that's where they state they multiply the embedding weights by sqrt(d_model), and the context is that they
  93 |     # are sharing the same weight matrix between the two embedding layers and the pre-softmax linear transformation.
  94 |     # there may be a reason that we want those weights scaled differently for the embedding layers vs. the linear
  95 |     # transformation. It might have something to do with the scale at which embedding weights are initialized
  96 |     # is more appropriate for the decoder linear transform vs how they are used in the attention function. Might have
  97 |     # something to do with computing the correct next-token probabilities. Overall, I'm really not sure about this,
  98 |     # but we aren't using a decoder anyway. So if this is the reason, then we don't need to perform the multiply.
  99 | 
 100 |     # Regarding #2, it seems like in one implementation of transformers (fairseq), the sinusoidal positional encoding
 101 |     # has a range of (-1.0, 1.0), but the word embedding are initialized with mean 0 and s.d embedding_dim ** -0.5,
 102 |     # which for embedding_dim=512, is a range closer to (-0.10, 0.10). Thus, the positional embedding would overwhelm
 103 |     # the word embeddings when they are added together. The scaling factor increases the signal of the word embeddings.
 104 |     # for embedding_dim=512, it scales word embeddings by 22, increasing range of the word embeddings to (-2.2, 2.2).
 105 |     # link to fairseq implementation, search for nn.init to see them do the initialization
 106 |     # https://fairseq.readthedocs.io/en/v0.7.1/_modules/fairseq/models/transformer.html
 107 |     #
 108 |     # For PyTorch, PyTorch initializes nn.Embedding with a standard normal distribution mean 0, variance 1: N(0,1).
 109 |     # this puts the range for the word embeddings around (-3, 3). the pytorch implementation for positional encoding
 110 |     # also has a range of (-1.0, 1.0). So already, these are much closer in scale, and it doesn't seem like we need
 111 |     # to increase the scale of the word embeddings. However, PyTorch example still multiply by the scaling factor
 112 |     # unclear whether this is just a carryover that is not actually needed, or if there is a different reason
 113 |     #
 114 |     # EDIT! I just realized that even though nn.Embedding defaults to a range of around (-3, 3), the PyTorch
 115 |     # transformer example actually re-initializes them using a uniform distribution in the range of (-0.1, 0.1)
 116 |     # that makes it very similar to the fairseq implementation, so the scaling factor that PyTorch uses actually would
 117 |     # bring the word embedding and positional encodings much closer in scale. So this could be the reason why pytorch
 118 |     # does it
 119 | 
 120 |     # Regarding #3, I don't think so. Firstly, does it actually cancel there? Secondly, the purpose of the scaling
 121 |     # factor in scaled dot product attention, according to attention is all you need, is to counteract dot products
 122 |     # that are very high in magnitude due to choice of large mbedding length (aka d_k). The problem with high magnitude
 123 |     # dot products is that potentially, the softmax is pushed into regions where it has extremely small gradients,
 124 |     # making learning difficult. If the scaling factor in the embedding was meant to counteract the scaling factor in
 125 |     # scaled dot product attention, then what would be the point of doing all that?
 126 | 
 127 |     # Regarding #4, I don't think the scaling will have any effects in practice, it's probably not needed
 128 | 
 129 |     # Overall, I think #2 is the most likely reason why this scaling is performed. In theory, I think
 130 |     # even if the scaling wasn't performed, the network might learn to up-scale the word embedding weights to increase
 131 |     # word embedding signal vs. the position signal on its own. Another question I have is why not just initialize
 132 |     # the embedding weights to have higher initial values? Why put it in the range (-0.1, 0.1)?
 133 |     #
 134 |     # The fact that most implementations have this scaling concerns me, makes me think I might be missing something.
 135 |     # For our purposes, we can train a couple models to see if scaling has any positive or negative effect.
 136 |     # Still need to think about potential effects of this scaling on relative position embeddings.
 137 | 
 138 |     def __init__(self, num_embeddings: int, embedding_dim: int, scale: bool):
 139 |         super(ScaledEmbedding, self).__init__()
 140 |         self.embedding = nn.Embedding(num_embeddings, embedding_dim)
 141 |         self.emb_size = embedding_dim
 142 |         self.embed_scale = math.sqrt(self.emb_size)
 143 | 
 144 |         self.scale = scale
 145 | 
 146 |         self.init_weights()
 147 | 
 148 |     def init_weights(self):
 149 |         # todo: not sure why PyTorch example initializes weights like this
 150 |         #   might have something to do with word embedding scaling factor (see above)
 151 |         #   could also just try the default weight initialization for nn.Embedding()
 152 |         init_range = 0.1
 153 |         self.embedding.weight.data.uniform_(-init_range, init_range)
 154 | 
 155 |     def forward(self, tokens: Tensor, **kwargs):
 156 |         if self.scale:
 157 |             return self.embedding(tokens.long()) * self.embed_scale
 158 |         else:
 159 |             return self.embedding(tokens.long())
 160 | 
 161 | 
 162 | class FCBlock(nn.Module):
 163 |     """ a fully connected block with options for batchnorm and dropout
 164 |         can extend in the future with option for different activation, etc """
 165 | 
 166 |     def __init__(self,
 167 |                  in_features: int,
 168 |                  num_hidden_nodes: int = 64,
 169 |                  use_batchnorm: bool = False,
 170 |                  use_layernorm: bool = False,
 171 |                  norm_before_activation: bool = False,
 172 |                  use_dropout: bool = False,
 173 |                  dropout_rate: float = 0.2,
 174 |                  activation: str = "relu"):
 175 | 
 176 |         super().__init__()
 177 | 
 178 |         if use_batchnorm and use_layernorm:
 179 |             raise ValueError("Only one of use_batchnorm or use_layernorm can be set to True")
 180 | 
 181 |         self.use_batchnorm = use_batchnorm
 182 |         self.use_dropout = use_dropout
 183 |         self.use_layernorm = use_layernorm
 184 |         self.norm_before_activation = norm_before_activation
 185 | 
 186 |         self.fc = nn.Linear(in_features=in_features, out_features=num_hidden_nodes)
 187 | 
 188 |         self.activation = get_activation_fn(activation, functional=False)
 189 | 
 190 |         if use_batchnorm:
 191 |             self.norm = nn.BatchNorm1d(num_hidden_nodes)
 192 | 
 193 |         if use_layernorm:
 194 |             self.norm = nn.LayerNorm(num_hidden_nodes)
 195 | 
 196 |         if use_dropout:
 197 |             self.dropout = nn.Dropout(p=dropout_rate)
 198 | 
 199 |     def forward(self, x, **kwargs):
 200 |         x = self.fc(x)
 201 | 
 202 |         # norm can be before or after activation, using flag
 203 |         if (self.use_batchnorm or self.use_layernorm) and self.norm_before_activation:
 204 |             x = self.norm(x)
 205 | 
 206 |         x = self.activation(x)
 207 | 
 208 |         # batchnorm being applied after activation, there is some discussion on this online
 209 |         if (self.use_batchnorm or self.use_layernorm) and not self.norm_before_activation:
 210 |             x = self.norm(x)
 211 | 
 212 |         # dropout being applied last
 213 |         if self.use_dropout:
 214 |             x = self.dropout(x)
 215 | 
 216 |         return x
 217 | 
 218 | 
 219 | class TaskSpecificPredictionLayers(nn.Module):
 220 |     """ Constructs num_tasks [dense(num_hidden_nodes)+relu+dense(1)] layers, each independently transforming input
 221 |         into a single output node. All num_tasks outputs are then concatenated into a single tensor. """
 222 | 
 223 |     # todo: the independent layers are run in sequence rather than in parallel, causing a slowdown that
 224 |     #   scales with the number of tasks. might be able to run in parallel by hacking convolution operation
 225 |     #   https://stackoverflow.com/questions/58374980/run-multiple-models-of-an-ensemble-in-parallel-with-pytorch
 226 |     #   https://github.com/pytorch/pytorch/issues/54147
 227 |     #   https://github.com/pytorch/pytorch/issues/36459
 228 | 
 229 |     def __init__(self,
 230 |                  num_tasks: int,
 231 |                  in_features: int,
 232 |                  num_hidden_nodes: int = 64,
 233 |                  use_batchnorm: bool = False,
 234 |                  use_dropout: bool = False,
 235 |                  dropout_rate: float = 0.2,
 236 |                  activation: str = "relu"):
 237 | 
 238 |         super().__init__()
 239 | 
 240 |         # each task-specific layer outputs a single node,
 241 |         # which can be combined with torch.cat into prediction vector
 242 |         self.task_specific_pred_layers = nn.ModuleList()
 243 |         for i in range(num_tasks):
 244 |             layers = [FCBlock(in_features=in_features,
 245 |                               num_hidden_nodes=num_hidden_nodes,
 246 |                               use_batchnorm=use_batchnorm,
 247 |                               use_dropout=use_dropout,
 248 |                               dropout_rate=dropout_rate,
 249 |                               activation=activation),
 250 |                       nn.Linear(in_features=num_hidden_nodes, out_features=1)]
 251 |             self.task_specific_pred_layers.append(nn.Sequential(*layers))
 252 | 
 253 |     def forward(self, x, **kwargs):
 254 |         # run each task-specific layer and concatenate outputs into a single output vector
 255 |         task_specific_outputs = []
 256 |         for layer in self.task_specific_pred_layers:
 257 |             task_specific_outputs.append(layer(x))
 258 | 
 259 |         output = torch.cat(task_specific_outputs, dim=1)
 260 |         return output
 261 | 
 262 | 
 263 | class GlobalAveragePooling(nn.Module):
 264 |     """ helper class for global average pooling """
 265 | 
 266 |     def __init__(self, dim=1):
 267 |         super().__init__()
 268 |         # our data is in [batch_size, sequence_length, embedding_length]
 269 |         # with global pooling, we want to pool over the sequence dimension (dim=1)
 270 |         self.dim = dim
 271 | 
 272 |     def forward(self, x, **kwargs):
 273 |         return torch.mean(x, dim=self.dim)
 274 | 
 275 | 
 276 | class CLSPooling(nn.Module):
 277 |     """ helper class for CLS token extraction """
 278 | 
 279 |     def __init__(self, cls_position=0):
 280 |         super().__init__()
 281 | 
 282 |         # the position of the CLS token in the sequence dimension
 283 |         # currently, the CLS token is in the first position, but may move it to the last position
 284 |         self.cls_position = cls_position
 285 | 
 286 |     def forward(self, x, **kwargs):
 287 |         # assumes input is in [batch_size, sequence_len, embedding_len]
 288 |         # thus sequence dimension is dimension 1
 289 |         return x[:, self.cls_position, :]
 290 | 
 291 | 
 292 | class TransformerEncoderWrapper(nn.TransformerEncoder):
 293 |     """ wrapper around PyTorch's TransformerEncoder that re-initializes layer parameters,
 294 |         so each transformer encoder layer has a different initialization """
 295 | 
 296 |     # todo: PyTorch is changing its transformer API... check up on and see if there is a better way
 297 |     def __init__(self, encoder_layer, num_layers, norm=None, reset_params=True):
 298 |         super().__init__(encoder_layer, num_layers, norm)
 299 |         if reset_params:
 300 |             self.apply(reset_parameters_helper)
 301 | 
 302 | 
 303 | class AttnModel(nn.Module):
 304 |     # https://pytorch.org/tutorials/beginner/transformer_tutorial.html
 305 | 
 306 |     @staticmethod
 307 |     def add_model_specific_args(parent_parser):
 308 |         parser = ArgumentParser(parents=[parent_parser], add_help=False)
 309 | 
 310 |         parser.add_argument('--pos_encoding', type=str, default="absolute",
 311 |                             choices=["none", "absolute", "relative", "relative_3D"],
 312 |                             help="what type of positional encoding to use")
 313 |         parser.add_argument('--pos_encoding_dropout', type=float, default=0.1,
 314 |                             help="out much dropout to use in positional encoding, for pos_encoding==absolute")
 315 |         parser.add_argument('--clipping_threshold', type=int, default=3,
 316 |                             help="clipping threshold for relative position embedding, for relative and relative_3D")
 317 |         parser.add_argument('--contact_threshold', type=int, default=7,
 318 |                             help="threshold, in angstroms, for contact map, for relative_3D")
 319 |         parser.add_argument('--embedding_len', type=int, default=128)
 320 |         parser.add_argument('--num_heads', type=int, default=2)
 321 |         parser.add_argument('--num_hidden', type=int, default=64)
 322 |         parser.add_argument('--num_enc_layers', type=int, default=2)
 323 |         parser.add_argument('--enc_layer_dropout', type=float, default=0.1)
 324 |         parser.add_argument('--use_final_encoder_norm', action="store_true", default=False)
 325 | 
 326 |         parser.add_argument('--global_average_pooling', action="store_true", default=False)
 327 |         parser.add_argument('--cls_pooling', action="store_true", default=False)
 328 | 
 329 |         parser.add_argument('--use_task_specific_layers', action="store_true", default=False,
 330 |                             help="exclusive with use_final_hidden_layer; takes priority over use_final_hidden_layer"
 331 |                                  " if both flags are set")
 332 |         parser.add_argument('--task_specific_hidden_nodes', type=int, default=64)
 333 |         parser.add_argument('--use_final_hidden_layer', action="store_true", default=False)
 334 |         parser.add_argument('--final_hidden_size', type=int, default=64)
 335 |         parser.add_argument('--use_final_hidden_layer_norm', action="store_true", default=False)
 336 |         parser.add_argument('--final_hidden_layer_norm_before_activation', action="store_true", default=False)
 337 |         parser.add_argument('--use_final_hidden_layer_dropout', action="store_true", default=False)
 338 |         parser.add_argument('--final_hidden_layer_dropout_rate', type=float, default=0.2)
 339 | 
 340 |         parser.add_argument('--activation', type=str, default="relu",
 341 |                             help="activation function used for all activations in the network")
 342 |         return parser
 343 | 
 344 |     def __init__(self,
 345 |                  # data args
 346 |                  num_tasks: int,
 347 |                  aa_seq_len: int,
 348 |                  num_tokens: int,
 349 |                  # transformer encoder model args
 350 |                  pos_encoding: str = "absolute",
 351 |                  pos_encoding_dropout: float = 0.1,
 352 |                  clipping_threshold: int = 3,
 353 |                  contact_threshold: int = 7,
 354 |                  pdb_fns: List[str] = None,
 355 |                  embedding_len: int = 64,
 356 |                  num_heads: int = 2,
 357 |                  num_hidden: int = 64,
 358 |                  num_enc_layers: int = 2,
 359 |                  enc_layer_dropout: float = 0.1,
 360 |                  use_final_encoder_norm: bool = False,
 361 |                  # pooling to fixed-length representation
 362 |                  global_average_pooling: bool = True,
 363 |                  cls_pooling: bool = False,
 364 |                  # prediction layers
 365 |                  use_task_specific_layers: bool = False,
 366 |                  task_specific_hidden_nodes: int = 64,
 367 |                  use_final_hidden_layer: bool = False,
 368 |                  final_hidden_size: int = 64,
 369 |                  use_final_hidden_layer_norm: bool = False,
 370 |                  final_hidden_layer_norm_before_activation: bool = False,
 371 |                  use_final_hidden_layer_dropout: bool = False,
 372 |                  final_hidden_layer_dropout_rate: float = 0.2,
 373 |                  # activation function
 374 |                  activation: str = "relu",
 375 |                  *args, **kwargs):
 376 | 
 377 |         super().__init__()
 378 | 
 379 |         # store embedding length for use in the forward function
 380 |         self.embedding_len = embedding_len
 381 |         self.aa_seq_len = aa_seq_len
 382 | 
 383 |         # build up layers
 384 |         layers = collections.OrderedDict()
 385 | 
 386 |         # amino acid embedding
 387 |         layers["embedder"] = ScaledEmbedding(num_embeddings=num_tokens, embedding_dim=embedding_len, scale=True)
 388 | 
 389 |         # absolute positional encoding
 390 |         if pos_encoding == "absolute":
 391 |             layers["pos_encoder"] = PositionalEncoding(embedding_len, dropout=pos_encoding_dropout, max_len=512)
 392 | 
 393 |         # transformer encoder layer for none or absolute positional encoding
 394 |         if pos_encoding in ["none", "absolute"]:
 395 |             encoder_layer = torch.nn.TransformerEncoderLayer(d_model=embedding_len,
 396 |                                                              nhead=num_heads,
 397 |                                                              dim_feedforward=num_hidden,
 398 |                                                              dropout=enc_layer_dropout,
 399 |                                                              activation=get_activation_fn(activation),
 400 |                                                              norm_first=True,
 401 |                                                              batch_first=True)
 402 | 
 403 |             # layer norm that is used after the transformer encoder layers
 404 |             # if the norm_first is False, this is *redundant* and not needed
 405 |             # but if norm_first is True, this can be used to normalize outputs from
 406 |             # the transformer encoder before inputting to the final fully connected layer
 407 |             encoder_norm = None
 408 |             if use_final_encoder_norm:
 409 |                 encoder_norm = nn.LayerNorm(embedding_len)
 410 | 
 411 |             layers["tr_encoder"] = TransformerEncoderWrapper(encoder_layer=encoder_layer,
 412 |                                                              num_layers=num_enc_layers,
 413 |                                                              norm=encoder_norm)
 414 | 
 415 |         # transformer encoder layer for relative position encoding
 416 |         elif pos_encoding in ["relative", "relative_3D"]:
 417 |             relative_encoder_layer = ra.RelativeTransformerEncoderLayer(d_model=embedding_len,
 418 |                                                                         nhead=num_heads,
 419 |                                                                         pos_encoding=pos_encoding,
 420 |                                                                         clipping_threshold=clipping_threshold,
 421 |                                                                         contact_threshold=contact_threshold,
 422 |                                                                         pdb_fns=pdb_fns,
 423 |                                                                         dim_feedforward=num_hidden,
 424 |                                                                         dropout=enc_layer_dropout,
 425 |                                                                         activation=get_activation_fn(activation),
 426 |                                                                         norm_first=True)
 427 | 
 428 |             encoder_norm = None
 429 |             if use_final_encoder_norm:
 430 |                 encoder_norm = nn.LayerNorm(embedding_len)
 431 | 
 432 |             layers["tr_encoder"] = ra.RelativeTransformerEncoder(encoder_layer=relative_encoder_layer,
 433 |                                                                  num_layers=num_enc_layers,
 434 |                                                                  norm=encoder_norm)
 435 | 
 436 |         # GLOBAL AVERAGE POOLING OR CLS TOKEN
 437 |         # set up the layers and output shapes (i.e. input shapes for the pred layer)
 438 |         if global_average_pooling:
 439 |             # pool over the sequence dimension
 440 |             layers["avg_pooling"] = GlobalAveragePooling(dim=1)
 441 |             pred_layer_input_features = embedding_len
 442 |         elif cls_pooling:
 443 |             layers["cls_pooling"] = CLSPooling(cls_position=0)
 444 |             pred_layer_input_features = embedding_len
 445 |         else:
 446 |             # no global average pooling or CLS token
 447 |             # sequence dimension is still there, just flattened
 448 |             layers["flatten"] = nn.Flatten()
 449 |             pred_layer_input_features = embedding_len * aa_seq_len
 450 | 
 451 |         # PREDICTION
 452 |         if use_task_specific_layers:
 453 |             # task specific prediction layers (nonlinear transform for each task)
 454 |             layers["prediction"] = TaskSpecificPredictionLayers(num_tasks=num_tasks,
 455 |                                                                 in_features=pred_layer_input_features,
 456 |                                                                 num_hidden_nodes=task_specific_hidden_nodes,
 457 |                                                                 activation=activation)
 458 |         elif use_final_hidden_layer:
 459 |             # combined prediction linear (linear transform for each task)
 460 |             layers["fc1"] = FCBlock(in_features=pred_layer_input_features,
 461 |                                     num_hidden_nodes=final_hidden_size,
 462 |                                     use_batchnorm=False,
 463 |                                     use_layernorm=use_final_hidden_layer_norm,
 464 |                                     norm_before_activation=final_hidden_layer_norm_before_activation,
 465 |                                     use_dropout=use_final_hidden_layer_dropout,
 466 |                                     dropout_rate=final_hidden_layer_dropout_rate,
 467 |                                     activation=activation)
 468 | 
 469 |             layers["prediction"] = nn.Linear(in_features=final_hidden_size, out_features=num_tasks)
 470 |         else:
 471 |             layers["prediction"] = nn.Linear(in_features=pred_layer_input_features, out_features=num_tasks)
 472 | 
 473 |         # FINAL MODEL
 474 |         self.model = SequentialWithArgs(layers)
 475 | 
 476 |     def forward(self, x, **kwargs):
 477 |         return self.model(x, **kwargs)
 478 | 
 479 | 
 480 | class Transpose(nn.Module):
 481 |     """ helper layer to swap data from (batch, seq, channels) to (batch, channels, seq)
 482 |         used as a helper in the convolutional network which pytorch defaults to channels-first """
 483 | 
 484 |     def __init__(self, dims: Tuple[int, ...] = (1, 2)):
 485 |         super().__init__()
 486 |         self.dims = dims
 487 | 
 488 |     def forward(self, x, **kwargs):
 489 |         x = x.transpose(*self.dims).contiguous()
 490 |         return x
 491 | 
 492 | 
 493 | def conv1d_out_shape(seq_len, kernel_size, stride=1, pad=0, dilation=1):
 494 |     return (seq_len + (2 * pad) - (dilation * (kernel_size - 1)) - 1 // stride) + 1
 495 | 
 496 | 
 497 | class ConvBlock(nn.Module):
 498 |     def __init__(self,
 499 |                  in_channels: int,
 500 |                  out_channels: int,
 501 |                  kernel_size: int,
 502 |                  dilation: int = 1,
 503 |                  padding: str = "same",
 504 |                  use_batchnorm: bool = False,
 505 |                  use_layernorm: bool = False,
 506 |                  norm_before_activation: bool = False,
 507 |                  use_dropout: bool = False,
 508 |                  dropout_rate: float = 0.2,
 509 |                  activation: str = "relu"):
 510 | 
 511 |         super().__init__()
 512 | 
 513 |         if use_batchnorm and use_layernorm:
 514 |             raise ValueError("Only one of use_batchnorm or use_layernorm can be set to True")
 515 | 
 516 |         self.use_batchnorm = use_batchnorm
 517 |         self.use_layernorm = use_layernorm
 518 |         self.norm_before_activation = norm_before_activation
 519 |         self.use_dropout = use_dropout
 520 | 
 521 |         self.conv = nn.Conv1d(in_channels=in_channels,
 522 |                               out_channels=out_channels,
 523 |                               kernel_size=kernel_size,
 524 |                               padding=padding,
 525 |                               dilation=dilation)
 526 | 
 527 |         self.activation = get_activation_fn(activation, functional=False)
 528 | 
 529 |         if use_batchnorm:
 530 |             self.norm = nn.BatchNorm1d(out_channels)
 531 | 
 532 |         if use_layernorm:
 533 |             self.norm = nn.LayerNorm(out_channels)
 534 | 
 535 |         if use_dropout:
 536 |             self.dropout = nn.Dropout(p=dropout_rate)
 537 | 
 538 |     def forward(self, x, **kwargs):
 539 |         x = self.conv(x)
 540 | 
 541 |         # norm can be before or after activation, using flag
 542 |         if self.use_batchnorm and self.norm_before_activation:
 543 |             x = self.norm(x)
 544 |         elif self.use_layernorm and self.norm_before_activation:
 545 |             x = self.norm(x.transpose(1, 2)).transpose(1, 2)
 546 | 
 547 |         x = self.activation(x)
 548 | 
 549 |         # batchnorm being applied after activation, there is some discussion on this online
 550 |         if self.use_batchnorm and not self.norm_before_activation:
 551 |             x = self.norm(x)
 552 |         elif self.use_layernorm and not self.norm_before_activation:
 553 |             x = self.norm(x.transpose(1, 2)).transpose(1, 2)
 554 | 
 555 |         # dropout being applied after batchnorm, there is some discussion on this online
 556 |         if self.use_dropout:
 557 |             x = self.dropout(x)
 558 | 
 559 |         return x
 560 | 
 561 | 
 562 | class ConvModel2(nn.Module):
 563 |     """ convolutional source model that supports padded inputs, pooling, etc """
 564 | 
 565 |     @staticmethod
 566 |     def add_model_specific_args(parent_parser):
 567 |         parser = ArgumentParser(parents=[parent_parser], add_help=False)
 568 |         parser.add_argument('--use_embedding', action="store_true", default=False)
 569 |         parser.add_argument('--embedding_len', type=int, default=128)
 570 | 
 571 |         parser.add_argument('--num_conv_layers', type=int, default=1)
 572 |         parser.add_argument('--kernel_sizes', type=int, nargs="+", default=[7])
 573 |         parser.add_argument('--out_channels', type=int, nargs="+", default=[128])
 574 |         parser.add_argument('--dilations', type=int, nargs="+", default=[1])
 575 |         parser.add_argument('--padding', type=str, default="valid", choices=["valid", "same"])
 576 |         parser.add_argument('--use_conv_layer_norm', action="store_true", default=False)
 577 |         parser.add_argument('--conv_layer_norm_before_activation', action="store_true", default=False)
 578 |         parser.add_argument('--use_conv_layer_dropout', action="store_true", default=False)
 579 |         parser.add_argument('--conv_layer_dropout_rate', type=float, default=0.2)
 580 | 
 581 |         parser.add_argument('--global_average_pooling', action="store_true", default=False)
 582 | 
 583 |         parser.add_argument('--use_task_specific_layers', action="store_true", default=False)
 584 |         parser.add_argument('--task_specific_hidden_nodes', type=int, default=64)
 585 |         parser.add_argument('--use_final_hidden_layer', action="store_true", default=False)
 586 |         parser.add_argument('--final_hidden_size', type=int, default=64)
 587 |         parser.add_argument('--use_final_hidden_layer_norm', action="store_true", default=False)
 588 |         parser.add_argument('--final_hidden_layer_norm_before_activation', action="store_true", default=False)
 589 |         parser.add_argument('--use_final_hidden_layer_dropout', action="store_true", default=False)
 590 |         parser.add_argument('--final_hidden_layer_dropout_rate', type=float, default=0.2)
 591 | 
 592 |         parser.add_argument('--activation', type=str, default="relu",
 593 |                             help="activation function used for all activations in the network")
 594 | 
 595 |         return parser
 596 | 
 597 |     def __init__(self,
 598 |                  # data
 599 |                  num_tasks: int,
 600 |                  aa_seq_len: int,
 601 |                  aa_encoding_len: int,
 602 |                  num_tokens: int,
 603 |                  # convolutional model args
 604 |                  use_embedding: bool = False,
 605 |                  embedding_len: int = 64,
 606 |                  num_conv_layers: int = 1,
 607 |                  kernel_sizes: List[int] = (7,),
 608 |                  out_channels: List[int] = (128,),
 609 |                  dilations: List[int] = (1,),
 610 |                  padding: str = "valid",
 611 |                  use_conv_layer_norm: bool = False,
 612 |                  conv_layer_norm_before_activation: bool = False,
 613 |                  use_conv_layer_dropout: bool = False,
 614 |                  conv_layer_dropout_rate: float = 0.2,
 615 |                  # pooling
 616 |                  global_average_pooling: bool = True,
 617 |                  # prediction layers
 618 |                  use_task_specific_layers: bool = False,
 619 |                  task_specific_hidden_nodes: int = 64,
 620 |                  use_final_hidden_layer: bool = False,
 621 |                  final_hidden_size: int = 64,
 622 |                  use_final_hidden_layer_norm: bool = False,
 623 |                  final_hidden_layer_norm_before_activation: bool = False,
 624 |                  use_final_hidden_layer_dropout: bool = False,
 625 |                  final_hidden_layer_dropout_rate: float = 0.2,
 626 |                  # activation function
 627 |                  activation: str = "relu",
 628 |                  *args, **kwargs):
 629 | 
 630 |         super(ConvModel2, self).__init__()
 631 | 
 632 |         # build up the layers
 633 |         layers = collections.OrderedDict()
 634 | 
 635 |         # amino acid embedding
 636 |         if use_embedding:
 637 |             layers["embedder"] = ScaledEmbedding(num_embeddings=num_tokens, embedding_dim=embedding_len, scale=False)
 638 | 
 639 |         # transpose the input to match PyTorch's expected format
 640 |         layers["transpose"] = Transpose(dims=(1, 2))
 641 | 
 642 |         # build up the convolutional layers
 643 |         for layer_num in range(num_conv_layers):
 644 |             # determine the number of input channels for the first convolutional layer
 645 |             if layer_num == 0 and use_embedding:
 646 |                 # for the first convolutional layer, the in_channels is the embedding_len
 647 |                 in_channels = embedding_len
 648 |             elif layer_num == 0 and not use_embedding:
 649 |                 # for the first convolutional layer, the in_channels is the aa_encoding_len
 650 |                 in_channels = aa_encoding_len
 651 |             else:
 652 |                 in_channels = out_channels[layer_num - 1]
 653 | 
 654 |             layers[f"conv{layer_num}"] = ConvBlock(in_channels=in_channels,
 655 |                                                    out_channels=out_channels[layer_num],
 656 |                                                    kernel_size=kernel_sizes[layer_num],
 657 |                                                    dilation=dilations[layer_num],
 658 |                                                    padding=padding,
 659 |                                                    use_batchnorm=False,
 660 |                                                    use_layernorm=use_conv_layer_norm,
 661 |                                                    norm_before_activation=conv_layer_norm_before_activation,
 662 |                                                    use_dropout=use_conv_layer_dropout,
 663 |                                                    dropout_rate=conv_layer_dropout_rate,
 664 |                                                    activation=activation)
 665 | 
 666 |         # handle transition from convolutional layers to fully connected layer
 667 |         # either use global average pooling or flatten
 668 |         # take into consideration whether we are using valid or same padding
 669 |         if global_average_pooling:
 670 |             # global average pooling (mean across the seq len dimension)
 671 |             # the seq len dimensions is the last dimension (batch_size, num_filters, seq_len)
 672 |             layers["avg_pooling"] = GlobalAveragePooling(dim=-1)
 673 |             # the prediction layers will take num_filters input features
 674 |             pred_layer_input_features = out_channels[-1]
 675 | 
 676 |         else:
 677 |             # no global average pooling. flatten instead.
 678 |             layers["flatten"] = nn.Flatten()
 679 |             # calculate the final output len of the convolutional layers
 680 |             # and the number of input features for the prediction layers
 681 |             if padding == "valid":
 682 |                 # valid padding (aka no padding) results in shrinking length in progressive layers
 683 |                 conv_out_len = conv1d_out_shape(aa_seq_len, kernel_size=kernel_sizes[0], dilation=dilations[0])
 684 |                 for layer_num in range(1, num_conv_layers):
 685 |                     conv_out_len = conv1d_out_shape(conv_out_len,
 686 |                                                     kernel_size=kernel_sizes[layer_num],
 687 |                                                     dilation=dilations[layer_num])
 688 |                 pred_layer_input_features = conv_out_len * out_channels[-1]
 689 |             else:
 690 |                 # padding == "same"
 691 |                 pred_layer_input_features = aa_seq_len * out_channels[-1]
 692 | 
 693 |         # prediction layer
 694 |         if use_task_specific_layers:
 695 |             layers["prediction"] = TaskSpecificPredictionLayers(num_tasks=num_tasks,
 696 |                                                                 in_features=pred_layer_input_features,
 697 |                                                                 num_hidden_nodes=task_specific_hidden_nodes,
 698 |                                                                 activation=activation)
 699 | 
 700 |         # final hidden layer (with potential additional dropout)
 701 |         elif use_final_hidden_layer:
 702 |             layers["fc1"] = FCBlock(in_features=pred_layer_input_features,
 703 |                                     num_hidden_nodes=final_hidden_size,
 704 |                                     use_batchnorm=False,
 705 |                                     use_layernorm=use_final_hidden_layer_norm,
 706 |                                     norm_before_activation=final_hidden_layer_norm_before_activation,
 707 |                                     use_dropout=use_final_hidden_layer_dropout,
 708 |                                     dropout_rate=final_hidden_layer_dropout_rate,
 709 |                                     activation=activation)
 710 |             layers["prediction"] = nn.Linear(in_features=final_hidden_size, out_features=num_tasks)
 711 | 
 712 |         else:
 713 |             layers["prediction"] = nn.Linear(in_features=pred_layer_input_features, out_features=num_tasks)
 714 | 
 715 |         self.model = nn.Sequential(layers)
 716 | 
 717 |     def forward(self, x, **kwargs):
 718 |         output = self.model(x)
 719 |         return output
 720 | 
 721 | 
 722 | class ConvModel(nn.Module):
 723 |     """ a convolutional network with convolutional layers followed by a fully connected layer """
 724 | 
 725 |     @staticmethod
 726 |     def add_model_specific_args(parent_parser):
 727 |         parser = ArgumentParser(parents=[parent_parser], add_help=False)
 728 |         parser.add_argument('--num_conv_layers', type=int, default=1)
 729 |         parser.add_argument('--kernel_sizes', type=int, nargs="+", default=[7])
 730 |         parser.add_argument('--out_channels', type=int, nargs="+", default=[128])
 731 |         parser.add_argument('--padding', type=str, default="valid", choices=["valid", "same"])
 732 |         parser.add_argument('--use_final_hidden_layer', action="store_true",
 733 |                             help="whether to use a final hidden layer")
 734 |         parser.add_argument('--final_hidden_size', type=int, default=128,
 735 |                             help="number of nodes in the final hidden layer")
 736 |         parser.add_argument('--use_dropout', action="store_true",
 737 |                             help="whether to use dropout in the final hidden layer")
 738 |         parser.add_argument('--dropout_rate', type=float, default=0.2,
 739 |                             help="dropout rate in the final hidden layer")
 740 |         parser.add_argument('--use_task_specific_layers', action="store_true", default=False)
 741 |         parser.add_argument('--task_specific_hidden_nodes', type=int, default=64)
 742 |         return parser
 743 | 
 744 |     def __init__(self,
 745 |                  num_tasks: int,
 746 |                  aa_seq_len: int,
 747 |                  aa_encoding_len: int,
 748 |                  num_conv_layers: int = 1,
 749 |                  kernel_sizes: List[int] = (7,),
 750 |                  out_channels: List[int] = (128,),
 751 |                  padding: str = "valid",
 752 |                  use_final_hidden_layer: bool = True,
 753 |                  final_hidden_size: int = 128,
 754 |                  use_dropout: bool = False,
 755 |                  dropout_rate: float = 0.2,
 756 |                  use_task_specific_layers: bool = False,
 757 |                  task_specific_hidden_nodes: int = 64,
 758 |                  *args, **kwargs):
 759 | 
 760 |         super(ConvModel, self).__init__()
 761 | 
 762 |         # set up the model as a Sequential block (less to do in forward())
 763 |         layers = collections.OrderedDict()
 764 | 
 765 |         layers["transpose"] = Transpose(dims=(1, 2))
 766 | 
 767 |         for layer_num in range(num_conv_layers):
 768 |             # for the first convolutional layer, the in_channels is the feature_len
 769 |             in_channels = aa_encoding_len if layer_num == 0 else out_channels[layer_num - 1]
 770 | 
 771 |             layers["conv{}".format(layer_num)] = nn.Sequential(
 772 |                 nn.Conv1d(in_channels=in_channels,
 773 |                           out_channels=out_channels[layer_num],
 774 |                           kernel_size=kernel_sizes[layer_num],
 775 |                           padding=padding),
 776 |                 nn.ReLU()
 777 |             )
 778 | 
 779 |         layers["flatten"] = nn.Flatten()
 780 | 
 781 |         # calculate the final output len of the convolutional layers
 782 |         # and the number of input features for the prediction layers
 783 |         if padding == "valid":
 784 |             # valid padding (aka no padding) results in shrinking length in progressive layers
 785 |             conv_out_len = conv1d_out_shape(aa_seq_len, kernel_size=kernel_sizes[0])
 786 |             for layer_num in range(1, num_conv_layers):
 787 |                 conv_out_len = conv1d_out_shape(conv_out_len, kernel_size=kernel_sizes[layer_num])
 788 |             next_dim = conv_out_len * out_channels[-1]
 789 |         elif padding == "same":
 790 |             next_dim = aa_seq_len * out_channels[-1]
 791 |         else:
 792 |             raise ValueError("unexpected value for padding: {}".format(padding))
 793 | 
 794 |         # final hidden layer (with potential additional dropout)
 795 |         if use_final_hidden_layer:
 796 |             layers["fc1"] = FCBlock(in_features=next_dim,
 797 |                                     num_hidden_nodes=final_hidden_size,
 798 |                                     use_batchnorm=False,
 799 |                                     use_dropout=use_dropout,
 800 |                                     dropout_rate=dropout_rate)
 801 |             next_dim = final_hidden_size
 802 | 
 803 |         # final prediction layer
 804 |         # either task specific nonlinear layers or a single linear layer
 805 |         if use_task_specific_layers:
 806 |             layers["prediction"] = TaskSpecificPredictionLayers(num_tasks=num_tasks,
 807 |                                                                 in_features=next_dim,
 808 |                                                                 num_hidden_nodes=task_specific_hidden_nodes)
 809 |         else:
 810 |             layers["prediction"] = nn.Linear(in_features=next_dim, out_features=num_tasks)
 811 | 
 812 |         self.model = nn.Sequential(layers)
 813 | 
 814 |     def forward(self, x, **kwargs):
 815 |         output = self.model(x)
 816 |         return output
 817 | 
 818 | 
 819 | class FCModel(nn.Module):
 820 | 
 821 |     @staticmethod
 822 |     def add_model_specific_args(parent_parser):
 823 |         parser = ArgumentParser(parents=[parent_parser], add_help=False)
 824 |         parser.add_argument('--num_layers', type=int, default=1)
 825 |         parser.add_argument('--num_hidden', nargs="+", type=int, default=[128])
 826 |         parser.add_argument('--use_batchnorm', action="store_true", default=False)
 827 |         parser.add_argument('--use_layernorm', action="store_true", default=False)
 828 |         parser.add_argument('--norm_before_activation', action="store_true", default=False)
 829 |         parser.add_argument('--use_dropout', action="store_true", default=False)
 830 |         parser.add_argument('--dropout_rate', type=float, default=0.2)
 831 |         return parser
 832 | 
 833 |     def __init__(self,
 834 |                  num_tasks: int,
 835 |                  seq_encoding_len: int,
 836 |                  num_layers: int = 1,
 837 |                  num_hidden: List[int] = (128,),
 838 |                  use_batchnorm: bool = False,
 839 |                  use_layernorm: bool = False,
 840 |                  norm_before_activation: bool = False,
 841 |                  use_dropout: bool = False,
 842 |                  dropout_rate: float = 0.2,
 843 |                  activation: str = "relu",
 844 |                  *args, **kwargs):
 845 |         super().__init__()
 846 | 
 847 |         # set up the model as a Sequential block (less to do in forward())
 848 |         layers = collections.OrderedDict()
 849 | 
 850 |         # flatten inputs as this is all fully connected
 851 |         layers["flatten"] = nn.Flatten()
 852 | 
 853 |         # build up the variable number of hidden layers (fully connected + ReLU + dropout (if set))
 854 |         for layer_num in range(num_layers):
 855 |             # for the first layer (layer_num == 0), in_features is determined by given input
 856 |             # for subsequent layers, the in_features is the previous layer's num_hidden
 857 |             in_features = seq_encoding_len if layer_num == 0 else num_hidden[layer_num - 1]
 858 | 
 859 |             layers["fc{}".format(layer_num)] = FCBlock(in_features=in_features,
 860 |                                                        num_hidden_nodes=num_hidden[layer_num],
 861 |                                                        use_batchnorm=use_batchnorm,
 862 |                                                        use_layernorm=use_layernorm,
 863 |                                                        norm_before_activation=norm_before_activation,
 864 |                                                        use_dropout=use_dropout,
 865 |                                                        dropout_rate=dropout_rate,
 866 |                                                        activation=activation)
 867 | 
 868 |         # finally, the linear output layer
 869 |         in_features = num_hidden[-1] if num_layers > 0 else seq_encoding_len
 870 |         layers["output"] = nn.Linear(in_features=in_features, out_features=num_tasks)
 871 | 
 872 |         self.model = nn.Sequential(layers)
 873 | 
 874 |     def forward(self, x, **kwargs):
 875 |         output = self.model(x)
 876 |         return output
 877 | 
 878 | 
 879 | class LRModel(nn.Module):
 880 |     """ a simple linear model """
 881 | 
 882 |     def __init__(self, num_tasks, seq_encoding_len, *args, **kwargs):
 883 |         super().__init__()
 884 | 
 885 |         self.model = nn.Sequential(
 886 |             nn.Flatten(),
 887 |             nn.Linear(seq_encoding_len, out_features=num_tasks))
 888 | 
 889 |     def forward(self, x, **kwargs):
 890 |         output = self.model(x)
 891 |         return output
 892 | 
 893 | 
 894 | class TransferModel(nn.Module):
 895 |     """ transfer learning model """
 896 | 
 897 |     @staticmethod
 898 |     def add_model_specific_args(parent_parser):
 899 | 
 900 |         def none_or_int(value: str):
 901 |             return None if value.lower() == "none" else int(value)
 902 | 
 903 |         p = ArgumentParser(parents=[parent_parser], add_help=False)
 904 | 
 905 |         # for model set up
 906 |         p.add_argument('--pretrained_ckpt_path', type=str, default=None)
 907 | 
 908 |         # where to cut off the backbone
 909 |         p.add_argument("--backbone_cutoff", type=none_or_int, default=-1,
 910 |                        help="where to cut off the backbone. can be a negative int, indexing back from "
 911 |                             "pretrained_model.model.model. a value of -1 would chop off the backbone prediction head. "
 912 |                             "a value of -2 chops the prediction head and FC layer. a value of -3 chops"
 913 |                             "the above, as well as the global average pooling layer. all depends on architecture.")
 914 | 
 915 |         p.add_argument("--pred_layer_input_features", type=int, default=None,
 916 |                        help="if None, number of features will be determined based on backbone_cutoff and standard "
 917 |                             "architecture. otherwise, specify the number of input features for the prediction layer")
 918 | 
 919 |         # top net args
 920 |         p.add_argument("--top_net_type", type=str, default="linear", choices=["linear", "nonlinear", "sklearn"])
 921 |         p.add_argument("--top_net_hidden_nodes", type=int, default=256)
 922 |         p.add_argument("--top_net_use_batchnorm", action="store_true")
 923 |         p.add_argument("--top_net_use_dropout", action="store_true")
 924 |         p.add_argument("--top_net_dropout_rate", type=float, default=0.1)
 925 | 
 926 |         return p
 927 | 
 928 |     def __init__(self,
 929 |                  # pretrained model
 930 |                  pretrained_ckpt_path: Optional[str] = None,
 931 |                  pretrained_hparams: Optional[dict] = None,
 932 |                  backbone_cutoff: Optional[int] = -1,
 933 |                  # top net
 934 |                  pred_layer_input_features: Optional[int] = None,
 935 |                  top_net_type: str = "linear",
 936 |                  top_net_hidden_nodes: int = 256,
 937 |                  top_net_use_batchnorm: bool = False,
 938 |                  top_net_use_dropout: bool = False,
 939 |                  top_net_dropout_rate: float = 0.1,
 940 |                  *args, **kwargs):
 941 | 
 942 |         super().__init__()
 943 | 
 944 |         # error checking: if pretrained_ckpt_path is None, then pretrained_hparams must be specified
 945 |         if pretrained_ckpt_path is None and pretrained_hparams is None:
 946 |             raise ValueError("Either pretrained_ckpt_path or pretrained_hparams must be specified")
 947 | 
 948 |         # note: pdb_fns is loaded from transfer model arguments rather than original source model hparams
 949 |         # if pdb_fns is specified as a kwarg, pass it on for structure-based RPE
 950 |         # otherwise, can just set pdb_fns to None, and structure-based RPE will handle new PDBs on the fly
 951 |         pdb_fns = kwargs["pdb_fns"] if "pdb_fns" in kwargs else None
 952 | 
 953 |         # generate a fresh backbone using pretrained_hparams if specified
 954 |         # otherwise load the backbone from the pretrained checkpoint
 955 |         # we prioritize pretrained_hparams over pretrained_ckpt_path because
 956 |         # pretrained_hparams will only really be specified if we are loading from a DMSTask checkpoint
 957 |         # meaning the TransferModel has already been fine-tuned on DMS data, and we are likely loading
 958 |         # weights from that finetuning (including weights for the backbone)
 959 |         # whereas if pretrained_hparams is not specified but pretrained_ckpt_path is, then we are
 960 |         # likely finetuning the TransferModel for the first time, and we need the pretrained weights for the
 961 |         # backbone from the RosettaTask checkpoint
 962 |         if pretrained_hparams is not None:
 963 |             # pretrained_hparams will only be specified if we are loading from a DMSTask checkpoint
 964 |             pretrained_hparams["pdb_fns"] = pdb_fns
 965 |             pretrained_model = Model[pretrained_hparams["model_name"]].cls(**pretrained_hparams)
 966 |             self.pretrained_hparams = pretrained_hparams
 967 |         else:
 968 |             # not supported in metl-pretrained
 969 |             raise NotImplementedError("Loading pretrained weights from RosettaTask checkpoint not supported")
 970 | 
 971 |         layers = collections.OrderedDict()
 972 | 
 973 |         # set the backbone to all layers except the last layer (the pre-trained prediction layer)
 974 |         if backbone_cutoff is None:
 975 |             layers["backbone"] = SequentialWithArgs(*list(pretrained_model.model.children()))
 976 |         else:
 977 |             layers["backbone"] = SequentialWithArgs(*list(pretrained_model.model.children())[0:backbone_cutoff])
 978 | 
 979 |         if top_net_type == "sklearn":
 980 |             # sklearn top not doesn't require any more layers, just return model for the repr layer
 981 |             self.model = SequentialWithArgs(layers)
 982 |             return
 983 | 
 984 |         # figure out dimensions of input into the prediction layer
 985 |         if pred_layer_input_features is None:
 986 |             # todo: can make this more robust by checking if the pretrained_mode.hparams for use_final_hidden_layer,
 987 |             #   global_average_pooling, etc. then can determine what the layer will be based on backbone_cutoff.
 988 |             # currently, assumes that pretrained_model uses global average pooling and a final_hidden_layer
 989 |             if backbone_cutoff is None:
 990 |                 # no backbone cutoff... use the full network (including tasks) as the backbone
 991 |                 pred_layer_input_features = self.pretrained_hparams["num_tasks"]
 992 |             elif backbone_cutoff == -1:
 993 |                 pred_layer_input_features = self.pretrained_hparams["final_hidden_size"]
 994 |             elif backbone_cutoff == -2:
 995 |                 pred_layer_input_features = self.pretrained_hparams["embedding_len"]
 996 |             elif backbone_cutoff == -3:
 997 |                 pred_layer_input_features = self.pretrained_hparams["embedding_len"] * kwargs["aa_seq_len"]
 998 |             else:
 999 |                 raise ValueError("can't automatically determine pred_layer_input_features for given backbone_cutoff")
1000 | 
1001 |         layers["flatten"] = nn.Flatten(start_dim=1)
1002 | 
1003 |         # create a new prediction layer on top of the backbone
1004 |         if top_net_type == "linear":
1005 |             # linear layer for prediction
1006 |             layers["prediction"] = nn.Linear(in_features=pred_layer_input_features, out_features=1)
1007 |         elif top_net_type == "nonlinear":
1008 |             # fully connected with hidden layer
1009 |             fc_block = FCBlock(in_features=pred_layer_input_features,
1010 |                                num_hidden_nodes=top_net_hidden_nodes,
1011 |                                use_batchnorm=top_net_use_batchnorm,
1012 |                                use_dropout=top_net_use_dropout,
1013 |                                dropout_rate=top_net_dropout_rate)
1014 | 
1015 |             pred_layer = nn.Linear(in_features=top_net_hidden_nodes, out_features=1)
1016 | 
1017 |             layers["prediction"] = SequentialWithArgs(fc_block, pred_layer)
1018 |         else:
1019 |             raise ValueError("Unexpected type of top net layer: {}".format(top_net_type))
1020 | 
1021 |         self.model = SequentialWithArgs(layers)
1022 | 
1023 |     def forward(self, x, **kwargs):
1024 |         return self.model(x, **kwargs)
1025 | 
1026 | 
1027 | def get_activation_fn(activation, functional=True):
1028 |     if activation == "relu":
1029 |         return F.relu if functional else nn.ReLU()
1030 |     elif activation == "gelu":
1031 |         return F.gelu if functional else nn.GELU()
1032 |     elif activation == "silo" or activation == "swish":
1033 |         return F.silu if functional else nn.SiLU()
1034 |     elif activation == "leaky_relu" or activation == "lrelu":
1035 |         return F.leaky_relu if functional else nn.LeakyReLU()
1036 |     else:
1037 |         raise RuntimeError("unknown activation: {}".format(activation))
1038 | 
1039 | 
1040 | class Model(enum.Enum):
1041 |     def __new__(cls, *args, **kwds):
1042 |         value = len(cls.__members__) + 1
1043 |         obj = object.__new__(cls)
1044 |         obj._value_ = value
1045 |         return obj
1046 | 
1047 |     def __init__(self, cls, transfer_model):
1048 |         self.cls = cls
1049 |         self.transfer_model = transfer_model
1050 | 
1051 |     linear = LRModel, False
1052 |     fully_connected = FCModel, False
1053 |     cnn = ConvModel, False
1054 |     cnn2 = ConvModel2, False
1055 |     transformer_encoder = AttnModel, False
1056 |     transfer_model = TransferModel, True
1057 | 
1058 | 
1059 | def main():
1060 |     pass
1061 | 
1062 | 
1063 | if __name__ == "__main__":
1064 |     main()
1065 | 


--------------------------------------------------------------------------------
/metl/relative_attention.py:
--------------------------------------------------------------------------------
  1 | """ implementation of transformer encoder with relative attention
  2 |     references:
  3 |         - https://medium.com/@_init_/how-self-attention-with-relative-position-representations-works-28173b8c245a
  4 |         - https://pytorch.org/docs/stable/_modules/torch/nn/modules/transformer.html#TransformerEncoderLayer
  5 |         - https://github.com/evelinehong/Transformer_Relative_Position_PyTorch/blob/master/relative_position.py
  6 |         - https://github.com/jiezouguihuafu/ClassicalModelreproduced/blob/main/Transformer/transfor_rpe.py
  7 | """
  8 | 
  9 | import copy
 10 | from os.path import basename, dirname, join, isfile
 11 | from typing import Optional, Union
 12 | 
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | from torch import Tensor
 17 | from torch.nn import Linear, Dropout, LayerNorm
 18 | import time
 19 | import networkx as nx
 20 | 
 21 | import metl.structure as structure
 22 | import metl.models as models
 23 | 
 24 | 
 25 | class RelativePosition3D(nn.Module):
 26 |     """ Contact map-based relative position embeddings """
 27 | 
 28 |     #  need to compute a bucket_mtx for each structure
 29 |     #  need to know which bucket_mtx to use when grabbing the embeddings in forward()
 30 |     #   - on init, get a list of all PDB files we will be using
 31 |     #   - use a dictionary to store PDB files --> bucket_mtxs
 32 |     #   - forward() gets a new arg: the pdb file, which indexes into the dictionary to grab the right bucket_mtx
 33 |     def __init__(self,
 34 |                  embedding_len: int,
 35 |                  contact_threshold: int,
 36 |                  clipping_threshold: int,
 37 |                  pdb_fns: Optional[Union[str, list, tuple]] = None,
 38 |                  default_pdb_dir: str = "data/pdb_files"):
 39 | 
 40 |         # preferably, pdb_fns contains full paths to the PDBs, but if just the PDB filename is given
 41 |         # then it defaults to the path data/pdb_files/<pdb_fn>
 42 |         super().__init__()
 43 |         self.embedding_len = embedding_len
 44 |         self.clipping_threshold = clipping_threshold
 45 |         self.contact_threshold = contact_threshold
 46 |         self.default_pdb_dir = default_pdb_dir
 47 | 
 48 |         # dummy buffer for getting correct device for on-the-fly bucket matrix generation
 49 |         self.register_buffer("dummy_buffer", torch.empty(0), persistent=False)
 50 | 
 51 |         # for 3D-based positions, the number of embeddings is generally the number of buckets
 52 |         # for contact map-based distances, that is clipping_threshold + 1
 53 |         num_embeddings = clipping_threshold + 1
 54 | 
 55 |         # this is the embedding lookup table E_r
 56 |         self.embeddings_table = nn.Embedding(num_embeddings, embedding_len)
 57 | 
 58 |         # set up pdb_fns that were passed in on init (can also be set up during runtime in forward())
 59 |         # todo: i'm using a hacky workaround to move the bucket_mtxs to the correct device
 60 |         #   i tried to make it more efficient by registering bucket matrices as buffers, but i was
 61 |         #   having problems with DDP syncing the buffers across processes
 62 |         self.bucket_mtxs = {}
 63 |         self.bucket_mtxs_device = self.dummy_buffer.device
 64 |         self._init_pdbs(pdb_fns)
 65 | 
 66 |     def forward(self, pdb_fn):
 67 |         # compute matrix R by grabbing the embeddings from the embeddings lookup table
 68 |         embeddings = self.embeddings_table(self._get_bucket_mtx(pdb_fn))
 69 |         return embeddings
 70 | 
 71 |     # def _get_bucket_mtx(self, pdb_fn):
 72 |     #     """ retrieve a bucket matrix given the pdb_fn.
 73 |     #         if the pdb_fn was provided at init or has already been computed, then the bucket matrix will be
 74 |     #         retrieved from the object buffer. if the bucket matrix has not been computed yet, it will be here """
 75 |     #     pdb_attr = self._pdb_key(pdb_fn)
 76 |     #     if hasattr(self, pdb_attr):
 77 |     #         return getattr(self, pdb_attr)
 78 |     #     else:
 79 |     #         # encountering a new PDB at runtime... process it
 80 |     #         # todo: if there's a new PDB at runtime, it will be initialized separately in each instance
 81 |     #         #   of RelativePosition3D, for each layer. It would be more efficient to have a global
 82 |     #         #   bucket_mtx registry... perhaps in the RelativeTransformerEncoder class, that can be passed through
 83 |     #         self._init_pdb(pdb_fn)
 84 |     #         return getattr(self, pdb_attr)
 85 | 
 86 |     def _move_bucket_mtxs(self, device):
 87 |         for k, v in self.bucket_mtxs.items():
 88 |             self.bucket_mtxs[k] = v.to(device)
 89 |         self.bucket_mtxs_device = device
 90 | 
 91 |     def _get_bucket_mtx(self, pdb_fn):
 92 |         """ retrieve a bucket matrix given the pdb_fn.
 93 |             if the pdb_fn was provided at init or has already been computed, then the bucket matrix will be
 94 |             retrieved from the bucket_mtxs dictionary. else, it will be computed now on-the-fly """
 95 | 
 96 |         # ensure that all the bucket matrices are on the same device as the nn.Embedding
 97 |         if self.bucket_mtxs_device != self.dummy_buffer.device:
 98 |             self._move_bucket_mtxs(self.dummy_buffer.device)
 99 | 
100 |         pdb_attr = self._pdb_key(pdb_fn)
101 |         if pdb_attr in self.bucket_mtxs:
102 |             return self.bucket_mtxs[pdb_attr]
103 |         else:
104 |             # encountering a new PDB at runtime... process it
105 |             # todo: if there's a new PDB at runtime, it will be initialized separately in each instance
106 |             #   of RelativePosition3D, for each layer. It would be more efficient to have a global
107 |             #   bucket_mtx registry... perhaps in the RelativeTransformerEncoder class, that can be passed through
108 |             self._init_pdb(pdb_fn)
109 |             return self.bucket_mtxs[pdb_attr]
110 | 
111 |     # def _set_bucket_mtx(self, pdb_fn, bucket_mtx):
112 |     #     """ store a bucket matrix as a buffer """
113 |     #     # if PyTorch ever implements a BufferDict, we could use it here efficiently
114 |     #     # there is also BufferDict from https://botorch.org/api/_modules/botorch/utils/torch.html
115 |     #     # would just need to modify it to have an option for persistent=False
116 |     #     bucket_mtx = bucket_mtx.to(self.dummy_buffer.device)
117 |     #
118 |     #     self.register_buffer(self._pdb_key(pdb_fn), bucket_mtx, persistent=False)
119 | 
120 |     def _set_bucket_mtx(self, pdb_fn, bucket_mtx):
121 |         """ store a bucket matrix in the bucket dict """
122 | 
123 |         # move the bucket_mtx to the same device that the other bucket matrices are on
124 |         bucket_mtx = bucket_mtx.to(self.bucket_mtxs_device)
125 | 
126 |         self.bucket_mtxs[self._pdb_key(pdb_fn)] = bucket_mtx
127 | 
128 |     @staticmethod
129 |     def _pdb_key(pdb_fn):
130 |         """ return a unique key for the given pdb_fn, used to map unique PDBs """
131 |         # note this key does NOT currently support PDBs with the same basename but different paths
132 |         # assumes every PDB is in the format <pdb_name>.pdb
133 |         # should be a compatible with being a class attribute, as it is used as a pytorch buffer name
134 |         return f"pdb_{basename(pdb_fn).split('.')[0]}"
135 | 
136 |     def _init_pdbs(self, pdb_fns):
137 |         start = time.time()
138 | 
139 |         if pdb_fns is None:
140 |             # nothing to initialize if pdb_fns is None
141 |             return
142 | 
143 |         # make sure pdb_fns is a list
144 |         if not isinstance(pdb_fns, list) and not isinstance(pdb_fns, tuple):
145 |             pdb_fns = [pdb_fns]
146 | 
147 |         # init each pdb fn in the list
148 |         for pdb_fn in pdb_fns:
149 |             self._init_pdb(pdb_fn)
150 | 
151 |         print("Initialized PDB bucket matrices in: {:.3f}".format(time.time() - start))
152 | 
153 |     def _init_pdb(self, pdb_fn):
154 |         """ process a pdb file for use with structure-based relative attention """
155 |         # if pdb_fn is not a full path, default to the path data/pdb_files/<pdb_fn>
156 |         if dirname(pdb_fn) == "":
157 |             # handle the case where the pdb file is in the current working directory
158 |             # if there is a PDB file in the cwd.... then just use it as is. otherwise, append the default.
159 |             if not isfile(pdb_fn):
160 |                 pdb_fn = join(self.default_pdb_dir, pdb_fn)
161 | 
162 |         # create a structure graph from the pdb_fn and contact threshold
163 |         cbeta_mtx = structure.cbeta_distance_matrix(pdb_fn)
164 |         structure_graph = structure.dist_thresh_graph(cbeta_mtx, self.contact_threshold)
165 | 
166 |         # bucket_mtx indexes into the embedding lookup table to create the final distance matrix
167 |         bucket_mtx = self._compute_bucket_mtx(structure_graph)
168 | 
169 |         self._set_bucket_mtx(pdb_fn, bucket_mtx)
170 | 
171 |     def _compute_bucketed_neighbors(self, structure_graph, source_node):
172 |         """ gets the bucketed neighbors from the given source node and structure graph"""
173 |         if self.clipping_threshold < 0:
174 |             raise ValueError("Clipping threshold must be >= 0")
175 | 
176 |         sspl = _inv_dict(nx.single_source_shortest_path_length(structure_graph, source_node))
177 | 
178 |         if self.clipping_threshold is not None:
179 |             num_buckets = 1 + self.clipping_threshold
180 |             sspl = _combine_d(sspl, self.clipping_threshold, num_buckets - 1)
181 | 
182 |         return sspl
183 | 
184 |     def _compute_bucket_mtx(self, structure_graph):
185 |         """ get the bucket_mtx for the given structure_graph
186 |             calls _get_bucketed_neighbors for every node in the structure_graph """
187 |         num_residues = len(list(structure_graph))
188 | 
189 |         # index into the embedding lookup table to create the final distance matrix
190 |         bucket_mtx = torch.zeros(num_residues, num_residues, dtype=torch.long)
191 | 
192 |         for node_num in sorted(list(structure_graph)):
193 |             bucketed_neighbors = self._compute_bucketed_neighbors(structure_graph, node_num)
194 | 
195 |             for bucket_num, neighbors in bucketed_neighbors.items():
196 |                 bucket_mtx[node_num, neighbors] = bucket_num
197 | 
198 |         return bucket_mtx
199 | 
200 | 
201 | class RelativePosition(nn.Module):
202 |     """ creates the embedding lookup table E_r and computes R
203 |         note this inherits from pl.LightningModule instead of nn.Module
204 |         makes it easier to access the device with `self.device`
205 |         might be able to keep it as an nn.Module using the hacky dummy_param or commented out .device property """
206 | 
207 |     def __init__(self, embedding_len: int, clipping_threshold: int):
208 |         """
209 |         embedding_len: the length of the embedding, may be d_model, or d_model // num_heads for multihead
210 |         clipping_threshold: the maximum relative position, referred to as k by Shaw et al.
211 |         """
212 |         super().__init__()
213 |         self.embedding_len = embedding_len
214 |         self.clipping_threshold = clipping_threshold
215 |         # for sequence-based distances, the number of embeddings is 2*k+1, where k is the clipping threshold
216 |         num_embeddings = 2 * clipping_threshold + 1
217 | 
218 |         # this is the embedding lookup table E_r
219 |         self.embeddings_table = nn.Embedding(num_embeddings, embedding_len)
220 | 
221 |         # for getting the correct device for range vectors in forward
222 |         self.register_buffer("dummy_buffer", torch.empty(0), persistent=False)
223 | 
224 |     def forward(self, length_q, length_k):
225 |         # supports different length sequences, but in self-attention length_q and length_k are the same
226 |         range_vec_q = torch.arange(length_q, device=self.dummy_buffer.device)
227 |         range_vec_k = torch.arange(length_k, device=self.dummy_buffer.device)
228 | 
229 |         # this sets up the standard sequence-based distance matrix for relative positions
230 |         # the current position is 0, positions to the right are +1, +2, etc, and to the left -1, -2, etc
231 |         distance_mat = range_vec_k[None, :] - range_vec_q[:, None]
232 |         distance_mat_clipped = torch.clamp(distance_mat, -self.clipping_threshold, self.clipping_threshold)
233 | 
234 |         # convert to indices, indexing into the embedding table
235 |         final_mat = (distance_mat_clipped + self.clipping_threshold).long()
236 | 
237 |         # compute matrix R by grabbing the embeddings from the embedding lookup table
238 |         embeddings = self.embeddings_table(final_mat)
239 | 
240 |         return embeddings
241 | 
242 | 
243 | class RelativeMultiHeadAttention(nn.Module):
244 |     def __init__(self, embed_dim, num_heads, dropout, pos_encoding, clipping_threshold, contact_threshold, pdb_fns):
245 |         """
246 |         Multi-head attention with relative position embeddings.  Input data should be in batch_first format.
247 |         :param embed_dim: aka d_model, aka hid_dim
248 |         :param num_heads: number of heads
249 |         :param dropout: how much dropout for scaled dot product attention
250 | 
251 |         :param pos_encoding: what type of positional encoding to use, relative or relative3D
252 |         :param clipping_threshold: clipping threshold for relative position embedding
253 |         :param contact_threshold: for relative_3D, the threshold in angstroms for the contact map
254 |         :param pdb_fns: pdb file(s) to set up the relative position object
255 | 
256 |         """
257 |         super().__init__()
258 | 
259 |         assert embed_dim % num_heads == 0, "embed_dim must be divisible by num_heads"
260 | 
261 |         # model dimensions
262 |         self.embed_dim = embed_dim
263 |         self.num_heads = num_heads
264 |         self.head_dim = embed_dim // num_heads
265 | 
266 |         # pos encoding stuff
267 |         self.pos_encoding = pos_encoding
268 |         self.clipping_threshold = clipping_threshold
269 |         self.contact_threshold = contact_threshold
270 |         if pdb_fns is not None and not isinstance(pdb_fns, list):
271 |             pdb_fns = [pdb_fns]
272 |         self.pdb_fns = pdb_fns
273 | 
274 |         # relative position embeddings for use with keys and values
275 |         # Shaw et al. uses relative position information for both keys and values
276 |         # Huang et al. only uses it for the keys, which is probably enough
277 |         if pos_encoding == "relative":
278 |             self.relative_position_k = RelativePosition(self.head_dim, self.clipping_threshold)
279 |             self.relative_position_v = RelativePosition(self.head_dim, self.clipping_threshold)
280 |         elif pos_encoding == "relative_3D":
281 |             self.relative_position_k = RelativePosition3D(self.head_dim, self.contact_threshold,
282 |                                                           self.clipping_threshold, self.pdb_fns)
283 |             self.relative_position_v = RelativePosition3D(self.head_dim, self.contact_threshold,
284 |                                                           self.clipping_threshold, self.pdb_fns)
285 |         else:
286 |             raise ValueError("unrecognized pos_encoding: {}".format(pos_encoding))
287 | 
288 |         # WQ, WK, and WV from attention is all you need
289 |         # note these default to bias=True, same as PyTorch implementation
290 |         self.q_proj = nn.Linear(embed_dim, embed_dim)
291 |         self.k_proj = nn.Linear(embed_dim, embed_dim)
292 |         self.v_proj = nn.Linear(embed_dim, embed_dim)
293 | 
294 |         # WO from attention is all you need
295 |         # used for the final projection when computing multi-head attention
296 |         # PyTorch uses NonDynamicallyQuantizableLinear instead of Linear to avoid triggering an obscure
297 |         # error quantizing the model https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/linear.py#L122
298 |         # todo: if quantizing the model, explore if the above is a concern for us
299 |         self.out_proj = nn.Linear(embed_dim, embed_dim)
300 | 
301 |         # dropout for scaled dot product attention
302 |         self.dropout = nn.Dropout(dropout)
303 | 
304 |         # scaling factor for scaled dot product attention
305 |         scale = torch.sqrt(torch.FloatTensor([self.head_dim]))
306 |         # persistent=False if you don't want to save it inside state_dict
307 |         self.register_buffer('scale', scale)
308 | 
309 |         # toggles meant to be set directly by user
310 |         self.need_weights = False
311 |         self.average_attn_weights = True
312 | 
313 |     def _compute_attn_weights(self, query, key, len_q, len_k, batch_size, mask, pdb_fn):
314 |         """ computes the attention weights (a "compatability function" of queries with corresponding keys) """
315 | 
316 |         # calculate the first term in the numerator attn1, which is Q*K
317 |         # todo: pytorch reshapes q,k and v to 3 dimensions (similar to how r_q2 is below)
318 |         #   is that functionally equivalent to what we're doing? is their way faster?
319 |         # r_q1 = [batch_size, num_heads, len_q, head_dim]
320 |         r_q1 = query.view(batch_size, len_q, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
321 |         # todo: we could directly permute r_k1 to [batch_size, num_heads, head_dim, len_k]
322 |         #   to make it compatible for matrix multiplication with r_q1, instead of 2-step approach
323 |         # r_k1 = [batch_size, num_heads, len_k, head_dim]
324 |         r_k1 = key.view(batch_size, len_k, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
325 |         # attn1 = [batch_size, num_heads, len_q, len_k]
326 |         attn1 = torch.matmul(r_q1, r_k1.permute(0, 1, 3, 2))
327 | 
328 |         # calculate the second term in the numerator attn2, which is Q*R
329 |         # r_q2 = [query_len, batch_size * num_heads, head_dim]
330 |         r_q2 = query.permute(1, 0, 2).contiguous().view(len_q, batch_size * self.num_heads, self.head_dim)
331 | 
332 |         # todo: support multiple different PDB base structures per batch
333 |         #   one option:
334 |         #       - require batches to be all the same protein
335 |         #       - add argument to forward() to accept the PDB file for the protein in the batch
336 |         #       - then we just pass in the PDB file to relative position's forward()
337 |         #   to support multiple different structures per batch:
338 |         #       - add argument to forward() to accept PDB files, one for each item in batch
339 |         #       - make corresponding changing in relative_position object to return R for each structure
340 |         #       - note: if there are a lot of of different structures, and the sequence lengths are long,
341 |         #               this could be memory prohibitive because R (rel_pos_k) can take up a lot of mem for long seqs
342 |         #       - adjust the attn2 calculation to factor in the multiple different R matrices.
343 |         #               the way to do this might have to be to do multiple matmuls, one for each each structure.
344 |         #               basically, would split up r_q2 into several matrices grouped by structure, and then
345 |         #               multiply with corresponding R, then combine back into the exact same order of the original r_q2
346 |         #               note: this may be computationally intensive (splitting, more matrix muliplies, joining)
347 |         #               another option would be to create views(?), repeating the different Rs so we can do a
348 |         #               a matris multiply directly with r_q2
349 |         #       - would shapes be affected if there was padding in the queries, keys, values?
350 | 
351 |         if self.pos_encoding == "relative":
352 |             # rel_pos_k = [len_q, len_k, head_dim]
353 |             rel_pos_k = self.relative_position_k(len_q, len_k)
354 |         elif self.pos_encoding == "relative_3D":
355 |             # rel_pos_k = [sequence length (from PDB structure), head_dim]
356 |             rel_pos_k = self.relative_position_k(pdb_fn)
357 |         else:
358 |             raise ValueError("unrecognized pos_encoding: {}".format(self.pos_encoding))
359 | 
360 |         # the matmul basically computes the dot product between each input position’s query vector and
361 |         # its corresponding relative position embeddings across all input sequences in the heads and batch
362 |         # attn2 = [batch_size * num_heads, len_q, len_k]
363 |         attn2 = torch.matmul(r_q2, rel_pos_k.transpose(1, 2)).transpose(0, 1)
364 |         # attn2 = [batch_size, num_heads, len_q, len_k]
365 |         attn2 = attn2.contiguous().view(batch_size, self.num_heads, len_q, len_k)
366 | 
367 |         # calculate attention weights
368 |         attn_weights = (attn1 + attn2) / self.scale
369 | 
370 |         # apply mask if given
371 |         if mask is not None:
372 |             # todo: pytorch uses float("-inf") instead of -1e10
373 |             attn_weights = attn_weights.masked_fill(mask == 0, -1e10)
374 | 
375 |         # softmax gives us attn_weights weights
376 |         attn_weights = torch.softmax(attn_weights, dim=-1)
377 |         # attn_weights = [batch_size, num_heads, len_q, len_k]
378 |         attn_weights = self.dropout(attn_weights)
379 | 
380 |         return attn_weights
381 | 
382 |     def _compute_avg_val(self, value, len_q, len_k, len_v, attn_weights, batch_size, pdb_fn):
383 |         # todo: add option to not factor in relative position embeddings in value calculation
384 |         # calculate the first term, the attn*values
385 |         # r_v1 = [batch_size, num_heads, len_v, head_dim]
386 |         r_v1 = value.view(batch_size, len_v, self.num_heads, self.head_dim).permute(0, 2, 1, 3)
387 |         # avg1 = [batch_size, num_heads, len_q, head_dim]
388 |         avg1 = torch.matmul(attn_weights, r_v1)
389 | 
390 |         # calculate the second term, the attn*R
391 |         # similar to how relative embeddings are factored in the attention weights calculation
392 |         if self.pos_encoding == "relative":
393 |             # rel_pos_v = [query_len, value_len, head_dim]
394 |             rel_pos_v = self.relative_position_v(len_q, len_v)
395 |         elif self.pos_encoding == "relative_3D":
396 |             # rel_pos_v = [sequence length (from PDB structure), head_dim]
397 |             rel_pos_v = self.relative_position_v(pdb_fn)
398 |         else:
399 |             raise ValueError("unrecognized pos_encoding: {}".format(self.pos_encoding))
400 | 
401 |         # r_attn_weights = [len_q, batch_size * num_heads, len_v]
402 |         r_attn_weights = attn_weights.permute(2, 0, 1, 3).contiguous().view(len_q, batch_size * self.num_heads, len_k)
403 |         avg2 = torch.matmul(r_attn_weights, rel_pos_v)
404 |         # avg2 = [batch_size, num_heads, len_q, head_dim]
405 |         avg2 = avg2.transpose(0, 1).contiguous().view(batch_size, self.num_heads, len_q, self.head_dim)
406 | 
407 |         # calculate avg value
408 |         x = avg1 + avg2  # [batch_size, num_heads, len_q, head_dim]
409 |         x = x.permute(0, 2, 1, 3).contiguous()  # [batch_size, len_q, num_heads, head_dim]
410 |         # x = [batch_size, len_q, embed_dim]
411 |         x = x.view(batch_size, len_q, self.embed_dim)
412 | 
413 |         return x
414 | 
415 |     def forward(self, query, key, value, pdb_fn=None, mask=None):
416 |         # query = [batch_size, q_len, embed_dim]
417 |         # key = [batch_size, k_len, embed_dim]
418 |         # value = [batch_size, v_en, embed_dim]
419 |         batch_size = query.shape[0]
420 |         len_k, len_q, len_v = (key.shape[1], query.shape[1], value.shape[1])
421 | 
422 |         # in projection (multiply inputs by WQ, WK, WV)
423 |         query = self.q_proj(query)
424 |         key = self.k_proj(key)
425 |         value = self.v_proj(value)
426 | 
427 |         # first compute the attention weights, then multiply with values
428 |         # attn = [batch size, num_heads, len_q, len_k]
429 |         attn_weights = self._compute_attn_weights(query, key, len_q, len_k, batch_size, mask, pdb_fn)
430 | 
431 |         # take weighted average of values (weighted by attention weights)
432 |         attn_output = self._compute_avg_val(value, len_q, len_k, len_v, attn_weights, batch_size, pdb_fn)
433 | 
434 |         # output projection
435 |         # attn_output = [batch_size, len_q, embed_dim]
436 |         attn_output = self.out_proj(attn_output)
437 | 
438 |         if self.need_weights:
439 |             # return attention weights in addition to attention
440 |             # average the weights over the heads (to get overall attention)
441 |             # attn_weights = [batch_size, len_q, len_k]
442 |             if self.average_attn_weights:
443 |                 attn_weights = attn_weights.sum(dim=1) / self.num_heads
444 |             return {"attn_output": attn_output, "attn_weights": attn_weights}
445 |         else:
446 |             return attn_output
447 | 
448 | 
449 | class RelativeTransformerEncoderLayer(nn.Module):
450 |     """
451 |     d_model: the number of expected features in the input (required).
452 |     nhead: the number of heads in the MultiHeadAttention models (required).
453 |     clipping_threshold: the clipping threshold for relative position embeddings
454 |     dim_feedforward: the dimension of the feedforward network model (default=2048).
455 |     dropout: the dropout value (default=0.1).
456 |     activation: the activation function of the intermediate layer, can be a string
457 |         ("relu" or "gelu") or a unary callable. Default: relu
458 |     layer_norm_eps: the eps value in layer normalization components (default=1e-5).
459 |     norm_first: if ``True``, layer norm is done prior to attention and feedforward
460 |         operations, respectively. Otherwise, it's done after. Default: ``False`` (after).
461 |     """
462 | 
463 |     # this is some kind of torch jit compiling helper... will also ensure these values don't change
464 |     __constants__ = ['batch_first', 'norm_first']
465 | 
466 |     def __init__(self,
467 |                  d_model,
468 |                  nhead,
469 |                  pos_encoding="relative",
470 |                  clipping_threshold=3,
471 |                  contact_threshold=7,
472 |                  pdb_fns=None,
473 |                  dim_feedforward=2048,
474 |                  dropout=0.1,
475 |                  activation=F.relu,
476 |                  layer_norm_eps=1e-5,
477 |                  norm_first=False) -> None:
478 | 
479 |         self.batch_first = True
480 | 
481 |         super(RelativeTransformerEncoderLayer, self).__init__()
482 | 
483 |         self.self_attn = RelativeMultiHeadAttention(d_model, nhead, dropout,
484 |                                                     pos_encoding, clipping_threshold, contact_threshold, pdb_fns)
485 | 
486 |         # feed forward model
487 |         self.linear1 = Linear(d_model, dim_feedforward)
488 |         self.dropout = Dropout(dropout)
489 |         self.linear2 = Linear(dim_feedforward, d_model)
490 | 
491 |         self.norm_first = norm_first
492 |         self.norm1 = LayerNorm(d_model, eps=layer_norm_eps)
493 |         self.norm2 = LayerNorm(d_model, eps=layer_norm_eps)
494 |         self.dropout1 = Dropout(dropout)
495 |         self.dropout2 = Dropout(dropout)
496 | 
497 |         # Legacy string support for activation function.
498 |         if isinstance(activation, str):
499 |             self.activation = models.get_activation_fn(activation)
500 |         else:
501 |             self.activation = activation
502 | 
503 |     def forward(self, src: Tensor, pdb_fn=None) -> Tensor:
504 |         x = src
505 |         if self.norm_first:
506 |             x = x + self._sa_block(self.norm1(x), pdb_fn=pdb_fn)
507 |             x = x + self._ff_block(self.norm2(x))
508 |         else:
509 |             x = self.norm1(x + self._sa_block(x))
510 |             x = self.norm2(x + self._ff_block(x))
511 | 
512 |         return x
513 | 
514 |     # self-attention block
515 |     def _sa_block(self, x: Tensor, pdb_fn=None) -> Tensor:
516 |         x = self.self_attn(x, x, x, pdb_fn=pdb_fn)
517 |         if isinstance(x, dict):
518 |             # handle the case where we are returning attention weights
519 |             x = x["attn_output"]
520 |         return self.dropout1(x)
521 | 
522 |     # feed forward block
523 |     def _ff_block(self, x: Tensor) -> Tensor:
524 |         x = self.linear2(self.dropout(self.activation(self.linear1(x))))
525 |         return self.dropout2(x)
526 | 
527 | 
528 | class RelativeTransformerEncoder(nn.Module):
529 |     def __init__(self, encoder_layer, num_layers, norm=None, reset_params=True):
530 |         super(RelativeTransformerEncoder, self).__init__()
531 |         # using get_clones means all layers have the same initialization
532 |         # this is also a problem in PyTorch's TransformerEncoder implementation, which this is based on
533 |         # todo: PyTorch is changing its transformer API... check up on and see if there is a better way
534 |         self.layers = _get_clones(encoder_layer, num_layers)
535 |         self.num_layers = num_layers
536 |         self.norm = norm
537 | 
538 |         # important because get_clones means all layers have same initialization
539 |         # should recursively reset parameters for all submodules
540 |         if reset_params:
541 |             self.apply(models.reset_parameters_helper)
542 | 
543 |     def forward(self, src: Tensor, pdb_fn=None) -> Tensor:
544 |         output = src
545 | 
546 |         for mod in self.layers:
547 |             output = mod(output, pdb_fn=pdb_fn)
548 | 
549 |         if self.norm is not None:
550 |             output = self.norm(output)
551 | 
552 |         return output
553 | 
554 | 
555 | def _get_clones(module, num_clones):
556 |     return nn.ModuleList([copy.deepcopy(module) for _ in range(num_clones)])
557 | 
558 | 
559 | def _inv_dict(d):
560 |     """ helper function for contact map-based position embeddings """
561 |     inv = dict()
562 |     for k, v in d.items():
563 |         # collect dict keys into lists based on value
564 |         inv.setdefault(v, list()).append(k)
565 |     for k, v in inv.items():
566 |         # put in sorted order
567 |         inv[k] = sorted(v)
568 |     return inv
569 | 
570 | 
571 | def _combine_d(d, threshold, combined_key):
572 |     """ helper function for contact map-based position embeddings
573 |         d is a dictionary with ints as keys and lists as values.
574 |         for all keys >= threshold, this function combines the values of those keys into a single list """
575 |     out_d = {}
576 |     for k, v in d.items():
577 |         if k < threshold:
578 |             out_d[k] = v
579 |         elif k >= threshold:
580 |             if combined_key not in out_d:
581 |                 out_d[combined_key] = v
582 |             else:
583 |                 out_d[combined_key] += v
584 |     if combined_key in out_d:
585 |         out_d[combined_key] = sorted(out_d[combined_key])
586 |     return out_d
587 | 


--------------------------------------------------------------------------------
/metl/structure.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from os.path import isfile
  3 | from enum import Enum, auto
  4 | 
  5 | import numpy as np
  6 | from scipy.spatial.distance import cdist
  7 | import networkx as nx
  8 | from biopandas.pdb import PandasPdb
  9 | 
 10 | 
 11 | class GraphType(Enum):
 12 |     LINEAR = auto()
 13 |     COMPLETE = auto()
 14 |     DISCONNECTED = auto()
 15 |     DIST_THRESH = auto()
 16 |     DIST_THRESH_SHUFFLED = auto()
 17 | 
 18 | 
 19 | def save_graph(g, fn):
 20 |     """ Saves graph to file """
 21 |     nx.write_gexf(g, fn)
 22 | 
 23 | 
 24 | def load_graph(fn):
 25 |     """ Loads graph from file """
 26 |     g = nx.read_gexf(fn, node_type=int)
 27 |     return g
 28 | 
 29 | 
 30 | def shuffle_nodes(g, seed=7):
 31 |     """ Shuffles the nodes of the given graph and returns a copy of the shuffled graph """
 32 |     # get the list of nodes in this graph
 33 |     nodes = g.nodes()
 34 | 
 35 |     # create a permuted list of nodes
 36 |     np.random.seed(seed)
 37 |     nodes_shuffled = np.random.permutation(nodes)
 38 | 
 39 |     # create a dictionary mapping from old node label to new node label
 40 |     mapping = {n: ns for n, ns in zip(nodes, nodes_shuffled)}
 41 | 
 42 |     g_shuffled = nx.relabel_nodes(g, mapping, copy=True)
 43 | 
 44 |     return g_shuffled
 45 | 
 46 | 
 47 | def linear_graph(num_residues):
 48 |     """ Creates a linear graph where each node is connected to its sequence neighbor in order """
 49 |     g = nx.Graph()
 50 |     g.add_nodes_from(np.arange(0, num_residues))
 51 |     for i in range(num_residues-1):
 52 |         g.add_edge(i, i+1)
 53 |     return g
 54 | 
 55 | 
 56 | def complete_graph(num_residues):
 57 |     """ Creates a graph where each node is connected to all other nodes"""
 58 |     g = nx.complete_graph(num_residues)
 59 |     return g
 60 | 
 61 | 
 62 | def disconnected_graph(num_residues):
 63 |     g = nx.Graph()
 64 |     g.add_nodes_from(np.arange(0, num_residues))
 65 |     return g
 66 | 
 67 | 
 68 | def dist_thresh_graph(dist_mtx, threshold):
 69 |     """ Creates undirected graph based on a distance threshold """
 70 |     g = nx.Graph()
 71 |     g.add_nodes_from(np.arange(0, dist_mtx.shape[0]))
 72 | 
 73 |     # loop through each residue
 74 |     for rn1 in range(len(dist_mtx)):
 75 |         # find all residues that are within threshold distance of current
 76 |         rns_within_threshold = np.where(dist_mtx[rn1] < threshold)[0]
 77 | 
 78 |         # add edges from current residue to those that are within threshold
 79 |         for rn2 in rns_within_threshold:
 80 |             # don't add self edges
 81 |             if rn1 != rn2:
 82 |                 g.add_edge(rn1, rn2)
 83 |     return g
 84 | 
 85 | 
 86 | def ordered_adjacency_matrix(g):
 87 |     """ returns the adjacency matrix ordered by node label in increasing order as a numpy array """
 88 |     node_order = sorted(g.nodes())
 89 |     adj_mtx = nx.to_numpy_matrix(g, nodelist=node_order)
 90 |     return np.asarray(adj_mtx).astype(np.float32)
 91 | 
 92 | 
 93 | def cbeta_distance_matrix(pdb_fn, start=0, end=None):
 94 |     # note that start and end are not going by residue number
 95 |     # they are going by whatever the listing in the pdb file is
 96 | 
 97 |     # read the pdb file into a biopandas object
 98 |     ppdb = PandasPdb().read_pdb(pdb_fn)
 99 | 
100 |     # group by residue number
101 |     # important to specify sort=True so that group keys (residue number) are in order
102 |     # the reason is we loop through group keys below, and assume that residues are in order
103 |     # the pandas function has sort=True by default, but we specify it anyway because it is important
104 |     grouped = ppdb.df["ATOM"].groupby("residue_number", sort=True)
105 | 
106 |     # a list of coords for the cbeta or calpha of each residue
107 |     coords = []
108 | 
109 |     # loop through each residue and find the coordinates of cbeta
110 |     for i, (residue_number, values) in enumerate(grouped):
111 | 
112 |         # skip residues not in the range
113 |         end_index = (len(grouped) if end is None else end)
114 |         if i not in range(start, end_index):
115 |             continue
116 | 
117 |         residue_group = grouped.get_group(residue_number)
118 | 
119 |         atom_names = residue_group["atom_name"]
120 |         if "CB" in atom_names.values:
121 |             # print("Using CB...")
122 |             atom_name = "CB"
123 |         elif "CA" in atom_names.values:
124 |             # print("Using CA...")
125 |             atom_name = "CA"
126 |         else:
127 |             raise ValueError("Couldn't find CB or CA for residue {}".format(residue_number))
128 | 
129 |         # get the coordinates of cbeta (or calpha)
130 |         coords.append(
131 |             residue_group[residue_group["atom_name"] == atom_name][["x_coord", "y_coord", "z_coord"]].values[0])
132 | 
133 |     # stack the coords into a numpy array where each row has the x,y,z coords for a different residue
134 |     coords = np.stack(coords)
135 | 
136 |     # compute pairwise euclidean distance between all cbetas
137 |     dist_mtx = cdist(coords, coords, metric="euclidean")
138 | 
139 |     return dist_mtx
140 | 
141 | 
142 | def get_neighbors(g, nodes):
143 |     """ returns a list (set) of neighbors of all given nodes """
144 |     neighbors = set()
145 |     for n in nodes:
146 |         neighbors.update(g.neighbors(n))
147 |     return sorted(list(neighbors))
148 | 
149 | 
150 | def gen_graph(graph_type, res_dist_mtx, dist_thresh=7, shuffle_seed=7, graph_save_dir=None, save=False):
151 |     """ generate the specified structure graph using the specified residue distance matrix """
152 |     if graph_type is GraphType.LINEAR:
153 |         g = linear_graph(len(res_dist_mtx))
154 |         save_fn = None if not save else os.path.join(graph_save_dir, "linear.graph")
155 | 
156 |     elif graph_type is GraphType.COMPLETE:
157 |         g = complete_graph(len(res_dist_mtx))
158 |         save_fn = None if not save else os.path.join(graph_save_dir, "complete.graph")
159 | 
160 |     elif graph_type is GraphType.DISCONNECTED:
161 |         g = disconnected_graph(len(res_dist_mtx))
162 |         save_fn = None if not save else os.path.join(graph_save_dir, "disconnected.graph")
163 | 
164 |     elif graph_type is GraphType.DIST_THRESH:
165 |         g = dist_thresh_graph(res_dist_mtx, dist_thresh)
166 |         save_fn = None if not save else os.path.join(graph_save_dir, "dist_thresh_{}.graph".format(dist_thresh))
167 | 
168 |     elif graph_type is GraphType.DIST_THRESH_SHUFFLED:
169 |         g = dist_thresh_graph(res_dist_mtx, dist_thresh)
170 |         g = shuffle_nodes(g, seed=shuffle_seed)
171 |         save_fn = None if not save else \
172 |             os.path.join(graph_save_dir, "dist_thresh_{}_shuffled_r{}.graph".format(dist_thresh, shuffle_seed))
173 | 
174 |     else:
175 |         raise ValueError("Graph type {} is not implemented".format(graph_type))
176 | 
177 |     if save:
178 |         if isfile(save_fn):
179 |             print("err: graph already exists: {}. to overwrite, delete the existing file first".format(save_fn))
180 |         else:
181 |             os.makedirs(graph_save_dir, exist_ok=True)
182 |             save_graph(g, save_fn)
183 | 
184 |     return g
185 | 


--------------------------------------------------------------------------------
/metl/test.py:
--------------------------------------------------------------------------------
 1 | import metl
 2 | import torch
 3 | 
 4 | 
 5 | def main():
 6 |     model, data_encoder = metl.get_from_ident("metl-g-20m-1d")
 7 | 
 8 |     # make sure all the sequences are the same length
 9 |     amino_acid_sequences = ["SMART", "MAGIC"]
10 |     encoded_seqs = data_encoder.encode_sequences(amino_acid_sequences)
11 | 
12 |     # set model to eval mode
13 |     model.eval()
14 |     # no need to compute gradients for inference
15 |     with torch.no_grad():
16 |         predictions = model(torch.tensor(encoded_seqs))
17 | 
18 |     print(predictions)
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     main()
23 | 


--------------------------------------------------------------------------------
/metl/test2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import metl
 3 | 
 4 | 
 5 | def main():
 6 |     # "YoQkzoLD" is a METL-L (2M, 1D) [GFP] model that was fine-tuned on 64 examples from the avGFP DMS dataset
 7 |     model, data_encoder = metl.get_from_uuid(uuid="YoQkzoLD")
 8 | 
 9 |     # the GFP wild-type sequence
10 |     wt = "SKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLSYGVQCFSRYPDHMKQ" \
11 |          "HDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKN" \
12 |          "GIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK"
13 | 
14 |     # some example GFP variants to compute the scores for
15 |     variants = ["E3K,G102S",
16 |                 "T36P,S203T,K207R",
17 |                 "V10A,D19G,F25S,E113V"]
18 | 
19 |     encoded_variants = data_encoder.encode_variants(wt, variants)
20 | 
21 |     # set model to eval mode
22 |     model.eval()
23 |     # no need to compute gradients for inference
24 |     with torch.no_grad():
25 |         predictions = model(torch.tensor(encoded_variants))
26 | 
27 |     print(predictions)
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/metl/test3.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import metl
 3 | 
 4 | 
 5 | def main():
 6 |     # this is a 3D RPE model, which requires a PDB file matching the WT sequence
 7 |     model, data_encoder = metl.get_from_uuid(uuid="PEkeRuxb")
 8 | 
 9 |     # the GFP wild-type sequence
10 |     wt = "SKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTLSYGVQCFSRYPDHMKQ" \
11 |          "HDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKN" \
12 |          "GIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK"
13 | 
14 |     # some example GFP variants to compute the scores for
15 |     variants = ["E3K,G102S",
16 |                 "T36P,S203T,K207R",
17 |                 "V10A,D19G,F25S,E113V"]
18 | 
19 |     encoded_variants = data_encoder.encode_variants(wt, variants)
20 | 
21 |     # set model to eval mode
22 |     model.eval()
23 |     # no need to compute gradients for inference
24 |     with torch.no_grad():
25 |         predictions = model(torch.tensor(encoded_variants), pdb_fn="pdbs/1gfl_cm.pdb")
26 | 
27 |     print(predictions)
28 | 
29 | 
30 | if __name__ == "__main__":
31 |     main()
32 | 


--------------------------------------------------------------------------------
/metl/test4.py:
--------------------------------------------------------------------------------
 1 | import metl
 2 | import torch
 3 | 
 4 | 
 5 | def main():
 6 |     model, data_encoder = metl.get_from_ident("METL-L-2M-3D-GB1")
 7 | 
 8 |     # the GB1 WT sequence
 9 |     wt = "MQYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE"
10 | 
11 |     # some example GB1 variants for which to compute the METL-Local Rosetta scores estimates
12 |     variants = ["T17P,T54F",
13 |                 "V28L,F51A",
14 |                 "T17P,V28L,F51A,T54F"]
15 | 
16 |     encoded_variants = data_encoder.encode_variants(wt, variants)
17 | 
18 |     # set model to eval mode
19 |     model.eval()
20 |     # no need to compute gradients for inference
21 |     with torch.no_grad():
22 |         predictions = model(torch.tensor(encoded_variants), pdb_fn="pdbs/2qmt_p.pdb")
23 |     print(predictions)
24 | 
25 |     # can also input full sequences
26 |     sequences = ["MPYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE",
27 |                  "MPAKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE",
28 |                  "MGEKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE"]
29 |     encoded_sequences = data_encoder.encode_sequences(sequences)
30 |     model.eval()
31 |     with torch.no_grad():
32 |         predictions = model(torch.tensor(encoded_sequences), pdb_fn="pdbs/2qmt_p.pdb")
33 |     print(predictions)
34 | 
35 |     # can also use the 1D model which doesn't require a PDB file
36 |     model, data_encoder = metl.get_from_ident("METL-L-2M-1D-GB1")
37 |     variants = ["T17P,T54F",
38 |                 "V28L,F51A",
39 |                 "T17P,V28L,F51A,T54F"]
40 |     encoded_variants = data_encoder.encode_variants(wt, variants)
41 |     model.eval()
42 |     with torch.no_grad():
43 |         predictions = model(torch.tensor(encoded_variants))
44 |     print(predictions)
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     main()
49 | 


--------------------------------------------------------------------------------
/notebooks/inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Inference with METL-Global"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 10,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import torch\n",
 17 |     "import torchextractor as tx\n",
 18 |     "import torchinfo\n",
 19 |     "\n",
 20 |     "import metl"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "# Load a METL-G model"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 5,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "model, data_encoder = metl.get_from_ident(\"METL-G-20M-1D\")"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 7,
 42 |    "metadata": {},
 43 |    "outputs": [
 44 |     {
 45 |      "name": "stdout",
 46 |      "output_type": "stream",
 47 |      "text": [
 48 |       "===============================================================================================\n",
 49 |       "Layer (type (var_name))                                                Param #\n",
 50 |       "===============================================================================================\n",
 51 |       "AttnModel (AttnModel)                                                  --\n",
 52 |       "├─SequentialWithArgs (model)                                           --\n",
 53 |       "│    └─ScaledEmbedding (embedder)                                      --\n",
 54 |       "│    │    └─Embedding (embedding)                                      10,752\n",
 55 |       "│    └─RelativeTransformerEncoder (tr_encoder)                         --\n",
 56 |       "│    │    └─ModuleList (layers)                                        --\n",
 57 |       "│    │    │    └─RelativeTransformerEncoderLayer (0)                   3,154,560\n",
 58 |       "│    │    │    └─RelativeTransformerEncoderLayer (1)                   3,154,560\n",
 59 |       "│    │    │    └─RelativeTransformerEncoderLayer (2)                   3,154,560\n",
 60 |       "│    │    │    └─RelativeTransformerEncoderLayer (3)                   3,154,560\n",
 61 |       "│    │    │    └─RelativeTransformerEncoderLayer (4)                   3,154,560\n",
 62 |       "│    │    │    └─RelativeTransformerEncoderLayer (5)                   3,154,560\n",
 63 |       "│    │    └─LayerNorm (norm)                                           1,024\n",
 64 |       "│    └─GlobalAveragePooling (avg_pooling)                              --\n",
 65 |       "│    └─FCBlock (fc1)                                                   --\n",
 66 |       "│    │    └─Linear (fc)                                                262,656\n",
 67 |       "│    │    └─ReLU (activation)                                          --\n",
 68 |       "│    │    └─LayerNorm (norm)                                           1,024\n",
 69 |       "│    │    └─Dropout (dropout)                                          --\n",
 70 |       "│    └─Linear (prediction)                                             28,215\n",
 71 |       "===============================================================================================\n",
 72 |       "Total params: 19,231,031\n",
 73 |       "Trainable params: 19,231,031\n",
 74 |       "Non-trainable params: 0\n",
 75 |       "===============================================================================================\n"
 76 |      ]
 77 |     }
 78 |    ],
 79 |    "source": [
 80 |     "summary = torchinfo.summary(model, depth=4, verbose=1, row_settings=[\"var_names\"])"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "# Set up representation extraction\n",
 88 |     "For METL-Global models, I recommend using the representation immediately after the GlobalAveragePooling (avg_pooling) layer. For METL-Local models, I recommend using the representation immediately after the final fully connected layer (fc1). "
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 12,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "return_layers = [\n",
 98 |     "    \"model.avg_pooling\",\n",
 99 |     "]\n",
100 |     "\n",
101 |     "extractor = tx.Extractor(model.eval(), return_layers)"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "# Test a couple sequences"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 25,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "# note: make sure all the sequences in a batch are the same length\n",
118 |     "amino_acid_sequences = [\"SMART\", \"MAGIC\"]\n",
119 |     "encoded_seqs = data_encoder.encode_sequences(amino_acid_sequences)\n",
120 |     "\n",
121 |     "with torch.no_grad():\n",
122 |     "    model_out, intermediate_out = extractor(torch.tensor(encoded_seqs))"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 29,
128 |    "metadata": {},
129 |    "outputs": [
130 |     {
131 |      "data": {
132 |       "text/plain": [
133 |        "torch.Size([2, 55])"
134 |       ]
135 |      },
136 |      "execution_count": 29,
137 |      "metadata": {},
138 |      "output_type": "execute_result"
139 |     }
140 |    ],
141 |    "source": [
142 |     "# model_out contains the final output of the model (Rosetta energy term predictions)\n",
143 |     "# there are 55 energy terms, the first one is total_score \n",
144 |     "# they are listed in order on the main README\n",
145 |     "model_out.shape"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 32,
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "data": {
155 |       "text/plain": [
156 |        "torch.Size([2, 512])"
157 |       ]
158 |      },
159 |      "execution_count": 32,
160 |      "metadata": {},
161 |      "output_type": "execute_result"
162 |     }
163 |    ],
164 |    "source": [
165 |     "# intermediate_out is a dictionary containing intermediate outputs \n",
166 |     "# for all the return_layers specified above\n",
167 |     "# METL-G has an embedding dimension of 512, thus outputs will be 512\n",
168 |     "intermediate_out[\"model.avg_pooling\"].shape"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "# Additional notes\n",
176 |     "The above will retrieve a length 512 sequence-level representation immediately following the global average pooling layer, which takes the average of residue-level representations. \n",
177 |     "\n",
178 |     "If you want, you can also get the residue-representations. You can also play around with the sequence-level representation from after the FC layer, although I haven't had as much success with this representation for my tasks (too specific to the Rosetta energies?). You may have more luck with it, though. "
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": 34,
184 |    "metadata": {},
185 |    "outputs": [],
186 |    "source": [
187 |     "# the above will retrieve a length 512 sequence-level representation\n",
188 |     "# you can also get a representation for each residue\n",
189 |     "\n",
190 |     "return_layers = [\n",
191 |     "    \"model.tr_encoder\", # residue-level representation\n",
192 |     "    \"model.avg_pooling\", # sequence-level representation following avg pooling\n",
193 |     "    \"model.fc1\", # sequence-level representation following the final fully connected layer\n",
194 |     "]\n",
195 |     "\n",
196 |     "extractor = tx.Extractor(model.eval(), return_layers)\n",
197 |     "\n",
198 |     "amino_acid_sequences = [\"SMART\", \"MAGIC\"]\n",
199 |     "encoded_seqs = data_encoder.encode_sequences(amino_acid_sequences)\n",
200 |     "\n",
201 |     "with torch.no_grad():\n",
202 |     "    model_out, intermediate_out = extractor(torch.tensor(encoded_seqs))"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": 42,
208 |    "metadata": {},
209 |    "outputs": [
210 |     {
211 |      "name": "stdout",
212 |      "output_type": "stream",
213 |      "text": [
214 |       "Layer: model.tr_encoder\n",
215 |       "Output shape: torch.Size([2, 5, 512])\n",
216 |       "\n",
217 |       "Layer: model.avg_pooling\n",
218 |       "Output shape: torch.Size([2, 512])\n",
219 |       "\n",
220 |       "Layer: model.fc1\n",
221 |       "Output shape: torch.Size([2, 512])\n"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "for k, v in intermediate_out.items():\n",
227 |     "    print(\"Layer: {}\\nOutput shape: {}\\n\".format(k, v.shape))"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": []
236 |   }
237 |  ],
238 |  "metadata": {
239 |   "kernelspec": {
240 |    "display_name": "Python 3 (ipykernel)",
241 |    "language": "python",
242 |    "name": "python3"
243 |   },
244 |   "language_info": {
245 |    "codemirror_mode": {
246 |     "name": "ipython",
247 |     "version": 3
248 |    },
249 |    "file_extension": ".py",
250 |    "mimetype": "text/x-python",
251 |    "name": "python",
252 |    "nbconvert_exporter": "python",
253 |    "pygments_lexer": "ipython3",
254 |    "version": "3.9.16"
255 |   }
256 |  },
257 |  "nbformat": 4,
258 |  "nbformat_minor": 4
259 | }
260 | 


--------------------------------------------------------------------------------
/pdbs/pab1_cm.pdb:
--------------------------------------------------------------------------------
  1 | ATOM      1  N   GLY A   1     -14.422  25.734  -5.746  1.00  0.00           N  
  2 | ATOM      2  CA  GLY A   1     -15.203  24.662  -5.115  1.00  0.00           C  
  3 | ATOM      3  C   GLY A   1     -14.487  23.322  -5.215  1.00  0.00           C  
  4 | ATOM      4  O   GLY A   1     -15.061  22.336  -5.691  1.00  0.00           O  
  5 | ATOM     10  N   ASN A   2     -13.214  23.298  -4.818  1.00  0.00           N  
  6 | ATOM     11  CA  ASN A   2     -12.380  22.099  -4.924  1.00  0.00           C  
  7 | ATOM     12  C   ASN A   2     -12.321  21.350  -3.595  1.00  0.00           C  
  8 | ATOM     13  O   ASN A   2     -11.749  21.848  -2.626  1.00  0.00           O  
  9 | ATOM     14  CB  ASN A   2     -11.004  22.475  -5.456  1.00  0.00           C  
 10 | ATOM     15  CG  ASN A   2     -10.105  21.294  -5.801  1.00  0.00           C  
 11 | ATOM     16  OD1 ASN A   2     -10.309  20.145  -5.399  1.00  0.00           O  
 12 | ATOM     17  ND2 ASN A   2      -9.088  21.602  -6.581  1.00  0.00           N  
 13 | ATOM     24  N   ILE A   3     -12.987  20.198  -3.552  1.00  0.00           N  
 14 | ATOM     25  CA  ILE A   3     -13.111  19.388  -2.339  1.00  0.00           C  
 15 | ATOM     26  C   ILE A   3     -11.955  18.383  -2.279  1.00  0.00           C  
 16 | ATOM     27  O   ILE A   3     -11.637  17.704  -3.269  1.00  0.00           O  
 17 | ATOM     28  CB  ILE A   3     -14.514  18.692  -2.294  1.00  0.00           C  
 18 | ATOM     29  CG1 ILE A   3     -14.771  17.939  -0.981  1.00  0.00           C  
 19 | ATOM     30  CG2 ILE A   3     -14.647  17.722  -3.425  1.00  0.00           C  
 20 | ATOM     31  CD1 ILE A   3     -16.274  17.589  -0.797  1.00  0.00           C  
 21 | ATOM     43  N   PHE A   4     -11.283  18.341  -1.122  1.00  0.00           N  
 22 | ATOM     44  CA  PHE A   4     -10.155  17.436  -0.920  1.00  0.00           C  
 23 | ATOM     45  C   PHE A   4     -10.661  16.137  -0.366  1.00  0.00           C  
 24 | ATOM     46  O   PHE A   4     -11.352  16.110   0.657  1.00  0.00           O  
 25 | ATOM     47  CB  PHE A   4      -9.103  18.009   0.017  1.00  0.00           C  
 26 | ATOM     48  CG  PHE A   4      -7.943  17.071   0.172  1.00  0.00           C  
 27 | ATOM     49  CD1 PHE A   4      -7.063  16.930  -0.865  1.00  0.00           C  
 28 | ATOM     50  CD2 PHE A   4      -7.728  16.332   1.329  1.00  0.00           C  
 29 | ATOM     51  CE1 PHE A   4      -5.991  16.087  -0.788  1.00  0.00           C  
 30 | ATOM     52  CE2 PHE A   4      -6.636  15.480   1.408  1.00  0.00           C  
 31 | ATOM     53  CZ  PHE A   4      -5.773  15.360   0.346  1.00  0.00           C  
 32 | ATOM     63  N   ILE A   5     -10.339  15.058  -1.047  1.00  0.00           N  
 33 | ATOM     64  CA  ILE A   5     -10.849  13.748  -0.712  1.00  0.00           C  
 34 | ATOM     65  C   ILE A   5      -9.733  12.867  -0.152  1.00  0.00           C  
 35 | ATOM     66  O   ILE A   5      -8.807  12.510  -0.882  1.00  0.00           O  
 36 | ATOM     67  CB  ILE A   5     -11.419  13.233  -2.043  1.00  0.00           C  
 37 | ATOM     68  CG1 ILE A   5     -12.474  14.279  -2.540  1.00  0.00           C  
 38 | ATOM     69  CG2 ILE A   5     -12.001  11.881  -1.904  1.00  0.00           C  
 39 | ATOM     70  CD1 ILE A   5     -12.858  14.133  -3.925  1.00  0.00           C  
 40 | ATOM     82  N   LYS A   6      -9.826  12.496   1.130  1.00  0.00           N  
 41 | ATOM     83  CA  LYS A   6      -8.783  11.737   1.831  1.00  0.00           C  
 42 | ATOM     84  C   LYS A   6      -9.218  10.329   2.259  1.00  0.00           C  
 43 | ATOM     85  O   LYS A   6     -10.316  10.149   2.795  1.00  0.00           O  
 44 | ATOM     86  CB  LYS A   6      -8.356  12.530   3.067  1.00  0.00           C  
 45 | ATOM     87  CG  LYS A   6      -7.219  11.941   3.897  1.00  0.00           C  
 46 | ATOM     88  CD  LYS A   6      -6.844  12.874   5.046  1.00  0.00           C  
 47 | ATOM     89  CE  LYS A   6      -5.710  12.297   5.888  1.00  0.00           C  
 48 | ATOM     90  NZ  LYS A   6      -5.333  13.202   7.016  1.00  0.00           N  
 49 | ATOM    104  N   ASN A   7      -8.274   9.366   2.191  1.00  0.00           N  
 50 | ATOM    105  CA  ASN A   7      -8.491   7.952   2.533  1.00  0.00           C  
 51 | ATOM    106  C   ASN A   7      -9.432   7.386   1.496  1.00  0.00           C  
 52 | ATOM    107  O   ASN A   7     -10.439   6.750   1.793  1.00  0.00           O  
 53 | ATOM    108  CB  ASN A   7      -9.065   7.762   3.932  1.00  0.00           C  
 54 | ATOM    109  CG  ASN A   7      -8.816   6.355   4.498  1.00  0.00           C  
 55 | ATOM    110  OD1 ASN A   7      -7.731   5.784   4.300  1.00  0.00           O  
 56 | ATOM    111  ND2 ASN A   7      -9.792   5.803   5.180  1.00  0.00           N  
 57 | ATOM    118  N   LEU A   8      -9.085   7.686   0.259  1.00  0.00           N  
 58 | ATOM    119  CA  LEU A   8      -9.833   7.261  -0.890  1.00  0.00           C  
 59 | ATOM    120  C   LEU A   8      -9.372   5.874  -1.287  1.00  0.00           C  
 60 | ATOM    121  O   LEU A   8      -8.217   5.662  -1.658  1.00  0.00           O  
 61 | ATOM    122  CB  LEU A   8      -9.618   8.298  -2.008  1.00  0.00           C  
 62 | ATOM    123  CG  LEU A   8     -10.363   8.119  -3.325  1.00  0.00           C  
 63 | ATOM    124  CD1 LEU A   8     -11.871   8.223  -3.085  1.00  0.00           C  
 64 | ATOM    125  CD2 LEU A   8      -9.900   9.222  -4.304  1.00  0.00           C  
 65 | ATOM    137  N   HIS A   9     -10.299   4.929  -1.229  1.00  0.00           N  
 66 | ATOM    138  CA  HIS A   9     -10.009   3.530  -1.488  1.00  0.00           C  
 67 | ATOM    139  C   HIS A   9      -9.318   3.361  -2.853  1.00  0.00           C  
 68 | ATOM    140  O   HIS A   9      -9.773   3.957  -3.832  1.00  0.00           O  
 69 | ATOM    141  CB  HIS A   9     -11.318   2.733  -1.423  1.00  0.00           C  
 70 | ATOM    142  CG  HIS A   9     -11.162   1.273  -1.530  1.00  0.00           C  
 71 | ATOM    143  ND1 HIS A   9     -10.964   0.628  -2.740  1.00  0.00           N  
 72 | ATOM    144  CD2 HIS A   9     -11.178   0.308  -0.583  1.00  0.00           C  
 73 | ATOM    145  CE1 HIS A   9     -10.849  -0.680  -2.513  1.00  0.00           C  
 74 | ATOM    146  NE2 HIS A   9     -10.982  -0.890  -1.218  1.00  0.00           N  
 75 | ATOM    154  N   PRO A  10      -8.247   2.530  -2.961  1.00  0.00           N  
 76 | ATOM    155  CA  PRO A  10      -7.463   2.245  -4.160  1.00  0.00           C  
 77 | ATOM    156  C   PRO A  10      -8.216   1.897  -5.437  1.00  0.00           C  
 78 | ATOM    157  O   PRO A  10      -7.713   2.181  -6.521  1.00  0.00           O  
 79 | ATOM    158  CB  PRO A  10      -6.614   1.049  -3.718  1.00  0.00           C  
 80 | ATOM    159  CG  PRO A  10      -6.414   1.247  -2.247  1.00  0.00           C  
 81 | ATOM    160  CD  PRO A  10      -7.703   1.838  -1.744  1.00  0.00           C  
 82 | ATOM    168  N   ASP A  11      -9.407   1.294  -5.349  1.00  0.00           N  
 83 | ATOM    169  CA  ASP A  11     -10.095   0.943  -6.588  1.00  0.00           C  
 84 | ATOM    170  C   ASP A  11     -10.914   2.105  -7.152  1.00  0.00           C  
 85 | ATOM    171  O   ASP A  11     -11.527   1.994  -8.226  1.00  0.00           O  
 86 | ATOM    172  CB  ASP A  11     -10.995  -0.273  -6.379  1.00  0.00           C  
 87 | ATOM    173  CG  ASP A  11     -10.216  -1.581  -6.127  1.00  0.00           C  
 88 | ATOM    174  OD1 ASP A  11      -9.095  -1.715  -6.563  1.00  0.00           O  
 89 | ATOM    175  OD2 ASP A  11     -10.768  -2.435  -5.480  1.00  0.00           O  
 90 | ATOM    180  N   ILE A  12     -10.948   3.237  -6.458  1.00  0.00           N  
 91 | ATOM    181  CA  ILE A  12     -11.752   4.320  -6.974  1.00  0.00           C  
 92 | ATOM    182  C   ILE A  12     -10.954   5.125  -7.965  1.00  0.00           C  
 93 | ATOM    183  O   ILE A  12     -10.210   6.034  -7.606  1.00  0.00           O  
 94 | ATOM    184  CB  ILE A  12     -12.266   5.232  -5.836  1.00  0.00           C  
 95 | ATOM    185  CG1 ILE A  12     -13.108   4.367  -4.867  1.00  0.00           C  
 96 | ATOM    186  CG2 ILE A  12     -13.089   6.433  -6.421  1.00  0.00           C  
 97 | ATOM    187  CD1 ILE A  12     -13.488   5.021  -3.574  1.00  0.00           C  
 98 | ATOM    199  N   ASP A  13     -11.156   4.805  -9.232  1.00  0.00           N  
 99 | ATOM    200  CA  ASP A  13     -10.457   5.490 -10.307  1.00  0.00           C  
100 | ATOM    201  C   ASP A  13     -11.192   6.799 -10.549  1.00  0.00           C  
101 | ATOM    202  O   ASP A  13     -12.200   7.063  -9.887  1.00  0.00           O  
102 | ATOM    203  CB  ASP A  13     -10.423   4.601 -11.563  1.00  0.00           C  
103 | ATOM    204  CG  ASP A  13      -9.331   4.969 -12.606  1.00  0.00           C  
104 | ATOM    205  OD1 ASP A  13      -8.920   6.114 -12.647  1.00  0.00           O  
105 | ATOM    206  OD2 ASP A  13      -8.950   4.110 -13.350  1.00  0.00           O  
106 | ATOM    211  N   ASN A  14     -10.728   7.614 -11.486  1.00  0.00           N  
107 | ATOM    212  CA  ASN A  14     -11.371   8.901 -11.714  1.00  0.00           C  
108 | ATOM    213  C   ASN A  14     -12.826   8.782 -12.134  1.00  0.00           C  
109 | ATOM    214  O   ASN A  14     -13.651   9.603 -11.737  1.00  0.00           O  
110 | ATOM    215  CB  ASN A  14     -10.595   9.740 -12.705  1.00  0.00           C  
111 | ATOM    216  CG  ASN A  14     -11.203  11.111 -12.864  1.00  0.00           C  
112 | ATOM    217  OD1 ASN A  14     -11.506  11.787 -11.870  1.00  0.00           O  
113 | ATOM    218  ND2 ASN A  14     -11.375  11.534 -14.091  1.00  0.00           N  
114 | ATOM    225  N   LYS A  15     -13.160   7.765 -12.927  1.00  0.00           N  
115 | ATOM    226  CA  LYS A  15     -14.541   7.618 -13.370  1.00  0.00           C  
116 | ATOM    227  C   LYS A  15     -15.434   7.408 -12.156  1.00  0.00           C  
117 | ATOM    228  O   LYS A  15     -16.485   8.036 -12.018  1.00  0.00           O  
118 | ATOM    229  CB  LYS A  15     -14.683   6.453 -14.354  1.00  0.00           C  
119 | ATOM    230  CG  LYS A  15     -15.999   6.439 -15.154  1.00  0.00           C  
120 | ATOM    231  CD  LYS A  15     -17.105   5.605 -14.486  1.00  0.00           C  
121 | ATOM    232  CE  LYS A  15     -18.334   5.496 -15.397  1.00  0.00           C  
122 | ATOM    233  NZ  LYS A  15     -19.468   4.780 -14.736  1.00  0.00           N  
123 | ATOM    247  N   ALA A  16     -15.019   6.484 -11.298  1.00  0.00           N  
124 | ATOM    248  CA  ALA A  16     -15.738   6.163 -10.083  1.00  0.00           C  
125 | ATOM    249  C   ALA A  16     -15.829   7.367  -9.147  1.00  0.00           C  
126 | ATOM    250  O   ALA A  16     -16.873   7.578  -8.507  1.00  0.00           O  
127 | ATOM    251  CB  ALA A  16     -15.056   5.009  -9.387  1.00  0.00           C  
128 | ATOM    257  N   LEU A  17     -14.757   8.174  -9.078  1.00  0.00           N  
129 | ATOM    258  CA  LEU A  17     -14.784   9.324  -8.193  1.00  0.00           C  
130 | ATOM    259  C   LEU A  17     -15.820  10.310  -8.717  1.00  0.00           C  
131 | ATOM    260  O   LEU A  17     -16.582  10.895  -7.940  1.00  0.00           O  
132 | ATOM    261  CB  LEU A  17     -13.412  10.018  -8.112  1.00  0.00           C  
133 | ATOM    262  CG  LEU A  17     -13.282  11.165  -7.060  1.00  0.00           C  
134 | ATOM    263  CD1 LEU A  17     -13.501  10.569  -5.683  1.00  0.00           C  
135 | ATOM    264  CD2 LEU A  17     -11.899  11.825  -7.130  1.00  0.00           C  
136 | ATOM    276  N   TYR A  18     -15.842  10.498 -10.042  1.00  0.00           N  
137 | ATOM    277  CA  TYR A  18     -16.803  11.367 -10.690  1.00  0.00           C  
138 | ATOM    278  C   TYR A  18     -18.218  10.915 -10.372  1.00  0.00           C  
139 | ATOM    279  O   TYR A  18     -19.041  11.723  -9.953  1.00  0.00           O  
140 | ATOM    280  CB  TYR A  18     -16.594  11.399 -12.202  1.00  0.00           C  
141 | ATOM    281  CG  TYR A  18     -17.645  12.191 -12.929  1.00  0.00           C  
142 | ATOM    282  CD1 TYR A  18     -17.522  13.559 -13.084  1.00  0.00           C  
143 | ATOM    283  CD2 TYR A  18     -18.763  11.535 -13.429  1.00  0.00           C  
144 | ATOM    284  CE1 TYR A  18     -18.511  14.255 -13.747  1.00  0.00           C  
145 | ATOM    285  CE2 TYR A  18     -19.742  12.237 -14.077  1.00  0.00           C  
146 | ATOM    286  CZ  TYR A  18     -19.619  13.592 -14.237  1.00  0.00           C  
147 | ATOM    287  OH  TYR A  18     -20.597  14.303 -14.892  1.00  0.00           O  
148 | ATOM    297  N   ASP A  19     -18.526   9.619 -10.559  1.00  0.00           N  
149 | ATOM    298  CA  ASP A  19     -19.887   9.156 -10.271  1.00  0.00           C  
150 | ATOM    299  C   ASP A  19     -20.270   9.381  -8.811  1.00  0.00           C  
151 | ATOM    300  O   ASP A  19     -21.415   9.730  -8.503  1.00  0.00           O  
152 | ATOM    301  CB  ASP A  19     -20.059   7.658 -10.564  1.00  0.00           C  
153 | ATOM    302  CG  ASP A  19     -20.136   7.255 -12.050  1.00  0.00           C  
154 | ATOM    303  OD1 ASP A  19     -20.287   8.093 -12.902  1.00  0.00           O  
155 | ATOM    304  OD2 ASP A  19     -20.085   6.061 -12.307  1.00  0.00           O  
156 | ATOM    309  N   THR A  20     -19.306   9.191  -7.909  1.00  0.00           N  
157 | ATOM    310  CA  THR A  20     -19.524   9.361  -6.480  1.00  0.00           C  
158 | ATOM    311  C   THR A  20     -19.883  10.802  -6.126  1.00  0.00           C  
159 | ATOM    312  O   THR A  20     -20.820  11.049  -5.367  1.00  0.00           O  
160 | ATOM    313  CB  THR A  20     -18.253   8.949  -5.689  1.00  0.00           C  
161 | ATOM    314  OG1 THR A  20     -17.973   7.556  -5.924  1.00  0.00           O  
162 | ATOM    315  CG2 THR A  20     -18.432   9.208  -4.168  1.00  0.00           C  
163 | ATOM    323  N   PHE A  21     -19.127  11.754  -6.666  1.00  0.00           N  
164 | ATOM    324  CA  PHE A  21     -19.326  13.162  -6.370  1.00  0.00           C  
165 | ATOM    325  C   PHE A  21     -20.297  13.952  -7.265  1.00  0.00           C  
166 | ATOM    326  O   PHE A  21     -20.857  14.948  -6.805  1.00  0.00           O  
167 | ATOM    327  CB  PHE A  21     -17.958  13.833  -6.328  1.00  0.00           C  
168 | ATOM    328  CG  PHE A  21     -17.184  13.527  -5.060  1.00  0.00           C  
169 | ATOM    329  CD1 PHE A  21     -16.546  12.317  -4.880  1.00  0.00           C  
170 | ATOM    330  CD2 PHE A  21     -17.068  14.475  -4.057  1.00  0.00           C  
171 | ATOM    331  CE1 PHE A  21     -15.861  12.040  -3.728  1.00  0.00           C  
172 | ATOM    332  CE2 PHE A  21     -16.357  14.195  -2.921  1.00  0.00           C  
173 | ATOM    333  CZ  PHE A  21     -15.767  12.971  -2.759  1.00  0.00           C  
174 | ATOM    343  N   SER A  22     -20.563  13.513  -8.507  1.00  0.00           N  
175 | ATOM    344  CA  SER A  22     -21.421  14.276  -9.438  1.00  0.00           C  
176 | ATOM    345  C   SER A  22     -22.840  14.409  -8.914  1.00  0.00           C  
177 | ATOM    346  O   SER A  22     -23.579  15.328  -9.278  1.00  0.00           O  
178 | ATOM    347  CB  SER A  22     -21.476  13.630 -10.809  1.00  0.00           C  
179 | ATOM    348  OG  SER A  22     -22.234  12.456 -10.781  1.00  0.00           O  
180 | ATOM    354  N   VAL A  23     -23.205  13.525  -8.002  1.00  0.00           N  
181 | ATOM    355  CA  VAL A  23     -24.507  13.511  -7.364  1.00  0.00           C  
182 | ATOM    356  C   VAL A  23     -24.765  14.811  -6.588  1.00  0.00           C  
183 | ATOM    357  O   VAL A  23     -25.915  15.119  -6.276  1.00  0.00           O  
184 | ATOM    358  CB  VAL A  23     -24.627  12.276  -6.441  1.00  0.00           C  
185 | ATOM    359  CG1 VAL A  23     -23.788  12.451  -5.222  1.00  0.00           C  
186 | ATOM    360  CG2 VAL A  23     -26.085  12.039  -6.048  1.00  0.00           C  
187 | ATOM    370  N   PHE A  24     -23.703  15.557  -6.245  1.00  0.00           N  
188 | ATOM    371  CA  PHE A  24     -23.847  16.787  -5.486  1.00  0.00           C  
189 | ATOM    372  C   PHE A  24     -23.802  18.026  -6.370  1.00  0.00           C  
190 | ATOM    373  O   PHE A  24     -23.863  19.143  -5.862  1.00  0.00           O  
191 | ATOM    374  CB  PHE A  24     -22.712  16.890  -4.470  1.00  0.00           C  
192 | ATOM    375  CG  PHE A  24     -22.697  15.738  -3.568  1.00  0.00           C  
193 | ATOM    376  CD1 PHE A  24     -21.676  14.819  -3.638  1.00  0.00           C  
194 | ATOM    377  CD2 PHE A  24     -23.717  15.523  -2.700  1.00  0.00           C  
195 | ATOM    378  CE1 PHE A  24     -21.681  13.715  -2.848  1.00  0.00           C  
196 | ATOM    379  CE2 PHE A  24     -23.735  14.430  -1.907  1.00  0.00           C  
197 | ATOM    380  CZ  PHE A  24     -22.710  13.512  -1.975  1.00  0.00           C  
198 | ATOM    390  N   GLY A  25     -23.727  17.846  -7.686  1.00  0.00           N  
199 | ATOM    391  CA  GLY A  25     -23.649  18.967  -8.611  1.00  0.00           C  
200 | ATOM    392  C   GLY A  25     -22.585  18.738  -9.674  1.00  0.00           C  
201 | ATOM    393  O   GLY A  25     -21.672  17.933  -9.504  1.00  0.00           O  
202 | ATOM    397  N   ASP A  26     -22.686  19.484 -10.769  1.00  0.00           N  
203 | ATOM    398  CA  ASP A  26     -21.756  19.335 -11.883  1.00  0.00           C  
204 | ATOM    399  C   ASP A  26     -20.306  19.573 -11.494  1.00  0.00           C  
205 | ATOM    400  O   ASP A  26     -19.990  20.458 -10.690  1.00  0.00           O  
206 | ATOM    401  CB  ASP A  26     -22.149  20.267 -13.029  1.00  0.00           C  
207 | ATOM    402  CG  ASP A  26     -23.427  19.804 -13.758  1.00  0.00           C  
208 | ATOM    403  OD1 ASP A  26     -23.857  18.695 -13.542  1.00  0.00           O  
209 | ATOM    404  OD2 ASP A  26     -23.955  20.561 -14.522  1.00  0.00           O  
210 | ATOM    409  N   ILE A  27     -19.440  18.762 -12.100  1.00  0.00           N  
211 | ATOM    410  CA  ILE A  27     -17.995  18.758 -11.893  1.00  0.00           C  
212 | ATOM    411  C   ILE A  27     -17.248  19.320 -13.092  1.00  0.00           C  
213 | ATOM    412  O   ILE A  27     -17.507  18.940 -14.230  1.00  0.00           O  
214 | ATOM    413  CB  ILE A  27     -17.516  17.325 -11.563  1.00  0.00           C  
215 | ATOM    414  CG1 ILE A  27     -18.169  16.884 -10.236  1.00  0.00           C  
216 | ATOM    415  CG2 ILE A  27     -15.974  17.227 -11.515  1.00  0.00           C  
217 | ATOM    416  CD1 ILE A  27     -18.007  15.418  -9.892  1.00  0.00           C  
218 | ATOM    428  N   LEU A  28     -16.340  20.255 -12.818  1.00  0.00           N  
219 | ATOM    429  CA  LEU A  28     -15.535  20.902 -13.846  1.00  0.00           C  
220 | ATOM    430  C   LEU A  28     -14.346  20.016 -14.171  1.00  0.00           C  
221 | ATOM    431  O   LEU A  28     -13.986  19.816 -15.333  1.00  0.00           O  
222 | ATOM    432  CB  LEU A  28     -14.979  22.225 -13.299  1.00  0.00           C  
223 | ATOM    433  CG  LEU A  28     -15.983  23.294 -12.891  1.00  0.00           C  
224 | ATOM    434  CD1 LEU A  28     -15.221  24.412 -12.185  1.00  0.00           C  
225 | ATOM    435  CD2 LEU A  28     -16.746  23.825 -14.100  1.00  0.00           C  
226 | ATOM    447  N   SER A  29     -13.738  19.485 -13.112  1.00  0.00           N  
227 | ATOM    448  CA  SER A  29     -12.551  18.639 -13.225  1.00  0.00           C  
228 | ATOM    449  C   SER A  29     -12.321  17.756 -12.009  1.00  0.00           C  
229 | ATOM    450  O   SER A  29     -12.553  18.172 -10.874  1.00  0.00           O  
230 | ATOM    451  CB  SER A  29     -11.321  19.482 -13.463  1.00  0.00           C  
231 | ATOM    452  OG  SER A  29     -10.178  18.675 -13.512  1.00  0.00           O  
232 | ATOM    458  N   SER A  30     -11.853  16.538 -12.232  1.00  0.00           N  
233 | ATOM    459  CA  SER A  30     -11.551  15.635 -11.125  1.00  0.00           C  
234 | ATOM    460  C   SER A  30     -10.371  14.735 -11.441  1.00  0.00           C  
235 | ATOM    461  O   SER A  30     -10.023  14.531 -12.611  1.00  0.00           O  
236 | ATOM    462  CB  SER A  30     -12.765  14.793 -10.793  1.00  0.00           C  
237 | ATOM    463  OG  SER A  30     -13.117  13.966 -11.871  1.00  0.00           O  
238 | ATOM    469  N   LYS A  31      -9.748  14.213 -10.382  1.00  0.00           N  
239 | ATOM    470  CA  LYS A  31      -8.615  13.307 -10.539  1.00  0.00           C  
240 | ATOM    471  C   LYS A  31      -8.281  12.483  -9.299  1.00  0.00           C  
241 | ATOM    472  O   LYS A  31      -8.668  12.813  -8.171  1.00  0.00           O  
242 | ATOM    473  CB  LYS A  31      -7.371  14.102 -10.963  1.00  0.00           C  
243 | ATOM    474  CG  LYS A  31      -6.900  15.139  -9.938  1.00  0.00           C  
244 | ATOM    475  CD  LYS A  31      -5.709  15.940 -10.456  1.00  0.00           C  
245 | ATOM    476  CE  LYS A  31      -5.215  16.947  -9.418  1.00  0.00           C  
246 | ATOM    477  NZ  LYS A  31      -4.072  17.761  -9.931  1.00  0.00           N  
247 | ATOM    491  N   ILE A  32      -7.489  11.436  -9.519  1.00  0.00           N  
248 | ATOM    492  CA  ILE A  32      -6.959  10.585  -8.458  1.00  0.00           C  
249 | ATOM    493  C   ILE A  32      -5.485  10.867  -8.352  1.00  0.00           C  
250 | ATOM    494  O   ILE A  32      -4.791  10.889  -9.369  1.00  0.00           O  
251 | ATOM    495  CB  ILE A  32      -7.114   9.087  -8.789  1.00  0.00           C  
252 | ATOM    496  CG1 ILE A  32      -8.554   8.771  -9.152  1.00  0.00           C  
253 | ATOM    497  CG2 ILE A  32      -6.615   8.198  -7.598  1.00  0.00           C  
254 | ATOM    498  CD1 ILE A  32      -9.572   9.045  -8.116  1.00  0.00           C  
255 | ATOM    510  N   ALA A  33      -4.994  11.121  -7.154  1.00  0.00           N  
256 | ATOM    511  CA  ALA A  33      -3.574  11.357  -7.015  1.00  0.00           C  
257 | ATOM    512  C   ALA A  33      -2.874  10.014  -6.992  1.00  0.00           C  
258 | ATOM    513  O   ALA A  33      -3.216   9.168  -6.159  1.00  0.00           O  
259 | ATOM    514  CB  ALA A  33      -3.297  12.132  -5.757  1.00  0.00           C  
260 | ATOM    520  N   THR A  34      -1.910   9.812  -7.889  1.00  0.00           N  
261 | ATOM    521  CA  THR A  34      -1.221   8.532  -7.971  1.00  0.00           C  
262 | ATOM    522  C   THR A  34       0.287   8.676  -7.951  1.00  0.00           C  
263 | ATOM    523  O   THR A  34       0.837   9.773  -8.107  1.00  0.00           O  
264 | ATOM    524  CB  THR A  34      -1.572   7.780  -9.264  1.00  0.00           C  
265 | ATOM    525  OG1 THR A  34      -1.027   8.485 -10.394  1.00  0.00           O  
266 | ATOM    526  CG2 THR A  34      -3.081   7.659  -9.417  1.00  0.00           C  
267 | ATOM    534  N   ASP A  35       0.946   7.541  -7.783  1.00  0.00           N  
268 | ATOM    535  CA  ASP A  35       2.384   7.406  -7.836  1.00  0.00           C  
269 | ATOM    536  C   ASP A  35       2.793   7.280  -9.301  1.00  0.00           C  
270 | ATOM    537  O   ASP A  35       1.939   7.274 -10.196  1.00  0.00           O  
271 | ATOM    538  CB  ASP A  35       2.810   6.178  -7.010  1.00  0.00           C  
272 | ATOM    539  CG  ASP A  35       4.262   6.217  -6.498  1.00  0.00           C  
273 | ATOM    540  OD1 ASP A  35       5.071   6.886  -7.108  1.00  0.00           O  
274 | ATOM    541  OD2 ASP A  35       4.541   5.575  -5.522  1.00  0.00           O  
275 | ATOM    546  N   GLU A  36       4.086   7.145  -9.552  1.00  0.00           N  
276 | ATOM    547  CA  GLU A  36       4.606   7.073 -10.916  1.00  0.00           C  
277 | ATOM    548  C   GLU A  36       4.040   5.897 -11.703  1.00  0.00           C  
278 | ATOM    549  O   GLU A  36       3.826   5.995 -12.910  1.00  0.00           O  
279 | ATOM    550  CB  GLU A  36       6.132   6.973 -10.903  1.00  0.00           C  
280 | ATOM    551  CG  GLU A  36       6.845   8.240 -10.448  1.00  0.00           C  
281 | ATOM    552  CD  GLU A  36       8.344   8.095 -10.431  1.00  0.00           C  
282 | ATOM    553  OE1 GLU A  36       8.820   7.012 -10.687  1.00  0.00           O  
283 | ATOM    554  OE2 GLU A  36       9.013   9.065 -10.166  1.00  0.00           O  
284 | ATOM    561  N   ASN A  37       3.780   4.784 -11.026  1.00  0.00           N  
285 | ATOM    562  CA  ASN A  37       3.262   3.591 -11.678  1.00  0.00           C  
286 | ATOM    563  C   ASN A  37       1.732   3.509 -11.725  1.00  0.00           C  
287 | ATOM    564  O   ASN A  37       1.181   2.464 -12.075  1.00  0.00           O  
288 | ATOM    565  CB  ASN A  37       3.848   2.367 -11.011  1.00  0.00           C  
289 | ATOM    566  CG  ASN A  37       3.428   2.234  -9.575  1.00  0.00           C  
290 | ATOM    567  OD1 ASN A  37       2.503   2.909  -9.082  1.00  0.00           O  
291 | ATOM    568  ND2 ASN A  37       4.112   1.366  -8.869  1.00  0.00           N  
292 | ATOM    575  N   GLY A  38       1.036   4.588 -11.355  1.00  0.00           N  
293 | ATOM    576  CA  GLY A  38      -0.422   4.612 -11.383  1.00  0.00           C  
294 | ATOM    577  C   GLY A  38      -1.093   4.204 -10.069  1.00  0.00           C  
295 | ATOM    578  O   GLY A  38      -2.308   4.355  -9.920  1.00  0.00           O  
296 | ATOM    582  N   LYS A  39      -0.328   3.678  -9.116  1.00  0.00           N  
297 | ATOM    583  CA  LYS A  39      -0.898   3.279  -7.834  1.00  0.00           C  
298 | ATOM    584  C   LYS A  39      -1.445   4.483  -7.070  1.00  0.00           C  
299 | ATOM    585  O   LYS A  39      -0.750   5.486  -6.904  1.00  0.00           O  
300 | ATOM    586  CB  LYS A  39       0.151   2.550  -6.989  1.00  0.00           C  
301 | ATOM    587  CG  LYS A  39      -0.353   2.010  -5.653  1.00  0.00           C  
302 | ATOM    588  CD  LYS A  39       0.739   1.224  -4.926  1.00  0.00           C  
303 | ATOM    589  CE  LYS A  39       0.241   0.694  -3.586  1.00  0.00           C  
304 | ATOM    590  NZ  LYS A  39       1.293  -0.082  -2.865  1.00  0.00           N  
305 | ATOM    604  N   SER A  40      -2.677   4.388  -6.576  1.00  0.00           N  
306 | ATOM    605  CA  SER A  40      -3.268   5.490  -5.814  1.00  0.00           C  
307 | ATOM    606  C   SER A  40      -2.516   5.832  -4.540  1.00  0.00           C  
308 | ATOM    607  O   SER A  40      -2.092   4.945  -3.795  1.00  0.00           O  
309 | ATOM    608  CB  SER A  40      -4.705   5.197  -5.464  1.00  0.00           C  
310 | ATOM    609  OG  SER A  40      -5.210   6.183  -4.590  1.00  0.00           O  
311 | ATOM    615  N   LYS A  41      -2.420   7.134  -4.258  1.00  0.00           N  
312 | ATOM    616  CA  LYS A  41      -1.789   7.649  -3.042  1.00  0.00           C  
313 | ATOM    617  C   LYS A  41      -2.785   7.821  -1.893  1.00  0.00           C  
314 | ATOM    618  O   LYS A  41      -2.442   8.363  -0.842  1.00  0.00           O  
315 | ATOM    619  CB  LYS A  41      -1.045   8.957  -3.310  1.00  0.00           C  
316 | ATOM    620  CG  LYS A  41       0.129   8.766  -4.242  1.00  0.00           C  
317 | ATOM    621  CD  LYS A  41       1.075   9.969  -4.320  1.00  0.00           C  
318 | ATOM    622  CE  LYS A  41       0.466  11.157  -5.033  1.00  0.00           C  
319 | ATOM    623  NZ  LYS A  41       1.508  12.156  -5.422  1.00  0.00           N  
320 | ATOM    637  N   GLY A  42      -4.027   7.380  -2.104  1.00  0.00           N  
321 | ATOM    638  CA  GLY A  42      -5.062   7.464  -1.079  1.00  0.00           C  
322 | ATOM    639  C   GLY A  42      -5.860   8.764  -1.046  1.00  0.00           C  
323 | ATOM    640  O   GLY A  42      -6.501   9.069  -0.026  1.00  0.00           O  
324 | ATOM    644  N   PHE A  43      -5.795   9.567  -2.103  1.00  0.00           N  
325 | ATOM    645  CA  PHE A  43      -6.545  10.807  -2.121  1.00  0.00           C  
326 | ATOM    646  C   PHE A  43      -6.832  11.269  -3.536  1.00  0.00           C  
327 | ATOM    647  O   PHE A  43      -6.225  10.800  -4.506  1.00  0.00           O  
328 | ATOM    648  CB  PHE A  43      -5.825  11.898  -1.317  1.00  0.00           C  
329 | ATOM    649  CG  PHE A  43      -4.514  12.358  -1.809  1.00  0.00           C  
330 | ATOM    650  CD1 PHE A  43      -4.430  13.484  -2.610  1.00  0.00           C  
331 | ATOM    651  CD2 PHE A  43      -3.354  11.699  -1.457  1.00  0.00           C  
332 | ATOM    652  CE1 PHE A  43      -3.210  13.949  -3.029  1.00  0.00           C  
333 | ATOM    653  CE2 PHE A  43      -2.139  12.155  -1.883  1.00  0.00           C  
334 | ATOM    654  CZ  PHE A  43      -2.064  13.287  -2.667  1.00  0.00           C  
335 | ATOM    664  N   GLY A  44      -7.742  12.219  -3.642  1.00  0.00           N  
336 | ATOM    665  CA  GLY A  44      -8.098  12.781  -4.933  1.00  0.00           C  
337 | ATOM    666  C   GLY A  44      -8.795  14.115  -4.796  1.00  0.00           C  
338 | ATOM    667  O   GLY A  44      -8.947  14.654  -3.693  1.00  0.00           O  
339 | ATOM    671  N   PHE A  45      -9.214  14.649  -5.932  1.00  0.00           N  
340 | ATOM    672  CA  PHE A  45      -9.828  15.966  -5.944  1.00  0.00           C  
341 | ATOM    673  C   PHE A  45     -11.043  16.056  -6.845  1.00  0.00           C  
342 | ATOM    674  O   PHE A  45     -11.049  15.492  -7.949  1.00  0.00           O  
343 | ATOM    675  CB  PHE A  45      -8.805  16.968  -6.480  1.00  0.00           C  
344 | ATOM    676  CG  PHE A  45      -7.515  17.035  -5.711  1.00  0.00           C  
345 | ATOM    677  CD1 PHE A  45      -6.482  16.140  -6.009  1.00  0.00           C  
346 | ATOM    678  CD2 PHE A  45      -7.306  17.981  -4.733  1.00  0.00           C  
347 | ATOM    679  CE1 PHE A  45      -5.291  16.184  -5.343  1.00  0.00           C  
348 | ATOM    680  CE2 PHE A  45      -6.096  18.033  -4.069  1.00  0.00           C  
349 | ATOM    681  CZ  PHE A  45      -5.092  17.132  -4.372  1.00  0.00           C  
350 | ATOM    691  N   VAL A  46     -12.038  16.824  -6.403  1.00  0.00           N  
351 | ATOM    692  CA  VAL A  46     -13.174  17.164  -7.258  1.00  0.00           C  
352 | ATOM    693  C   VAL A  46     -13.446  18.664  -7.270  1.00  0.00           C  
353 | ATOM    694  O   VAL A  46     -13.779  19.262  -6.250  1.00  0.00           O  
354 | ATOM    695  CB  VAL A  46     -14.454  16.385  -6.847  1.00  0.00           C  
355 | ATOM    696  CG1 VAL A  46     -15.619  16.830  -7.674  1.00  0.00           C  
356 | ATOM    697  CG2 VAL A  46     -14.253  14.891  -7.087  1.00  0.00           C  
357 | ATOM    707  N   HIS A  47     -13.370  19.279  -8.441  1.00  0.00           N  
358 | ATOM    708  CA  HIS A  47     -13.638  20.697  -8.503  1.00  0.00           C  
359 | ATOM    709  C   HIS A  47     -15.002  20.873  -9.131  1.00  0.00           C  
360 | ATOM    710  O   HIS A  47     -15.209  20.557 -10.313  1.00  0.00           O  
361 | ATOM    711  CB  HIS A  47     -12.535  21.463  -9.250  1.00  0.00           C  
362 | ATOM    712  CG  HIS A  47     -12.660  22.967  -9.106  1.00  0.00           C  
363 | ATOM    713  ND1 HIS A  47     -11.736  23.855  -9.621  1.00  0.00           N  
364 | ATOM    714  CD2 HIS A  47     -13.586  23.717  -8.468  1.00  0.00           C  
365 | ATOM    715  CE1 HIS A  47     -12.091  25.087  -9.282  1.00  0.00           C  
366 | ATOM    716  NE2 HIS A  47     -13.211  25.017  -8.553  1.00  0.00           N  
367 | ATOM    724  N   PHE A  48     -15.935  21.330  -8.300  1.00  0.00           N  
368 | ATOM    725  CA  PHE A  48     -17.319  21.527  -8.658  1.00  0.00           C  
369 | ATOM    726  C   PHE A  48     -17.548  22.887  -9.278  1.00  0.00           C  
370 | ATOM    727  O   PHE A  48     -16.890  23.862  -8.924  1.00  0.00           O  
371 | ATOM    728  CB  PHE A  48     -18.214  21.414  -7.448  1.00  0.00           C  
372 | ATOM    729  CG  PHE A  48     -18.419  20.032  -6.899  1.00  0.00           C  
373 | ATOM    730  CD1 PHE A  48     -17.718  19.577  -5.798  1.00  0.00           C  
374 | ATOM    731  CD2 PHE A  48     -19.356  19.197  -7.470  1.00  0.00           C  
375 | ATOM    732  CE1 PHE A  48     -17.977  18.329  -5.276  1.00  0.00           C  
376 | ATOM    733  CE2 PHE A  48     -19.605  17.966  -6.957  1.00  0.00           C  
377 | ATOM    734  CZ  PHE A  48     -18.925  17.533  -5.855  1.00  0.00           C  
378 | ATOM    744  N   GLU A  49     -18.524  22.953 -10.176  1.00  0.00           N  
379 | ATOM    745  CA  GLU A  49     -18.910  24.221 -10.795  1.00  0.00           C  
380 | ATOM    746  C   GLU A  49     -19.373  25.262  -9.780  1.00  0.00           C  
381 | ATOM    747  O   GLU A  49     -19.100  26.453  -9.934  1.00  0.00           O  
382 | ATOM    748  CB  GLU A  49     -19.978  24.001 -11.860  1.00  0.00           C  
383 | ATOM    749  CG  GLU A  49     -20.380  25.282 -12.594  1.00  0.00           C  
384 | ATOM    750  CD  GLU A  49     -21.290  25.037 -13.742  1.00  0.00           C  
385 | ATOM    751  OE1 GLU A  49     -21.524  23.902 -14.047  1.00  0.00           O  
386 | ATOM    752  OE2 GLU A  49     -21.752  25.986 -14.324  1.00  0.00           O  
387 | ATOM    759  N   GLU A  50     -20.097  24.819  -8.752  1.00  0.00           N  
388 | ATOM    760  CA  GLU A  50     -20.618  25.717  -7.731  1.00  0.00           C  
389 | ATOM    761  C   GLU A  50     -20.099  25.346  -6.348  1.00  0.00           C  
390 | ATOM    762  O   GLU A  50     -19.999  24.162  -6.000  1.00  0.00           O  
391 | ATOM    763  CB  GLU A  50     -22.150  25.694  -7.721  1.00  0.00           C  
392 | ATOM    764  CG  GLU A  50     -22.806  26.198  -9.009  1.00  0.00           C  
393 | ATOM    765  CD  GLU A  50     -24.322  26.205  -8.935  1.00  0.00           C  
394 | ATOM    766  OE1 GLU A  50     -24.845  25.747  -7.947  1.00  0.00           O  
395 | ATOM    767  OE2 GLU A  50     -24.948  26.671  -9.861  1.00  0.00           O  
396 | ATOM    774  N   GLU A  51     -19.900  26.371  -5.513  1.00  0.00           N  
397 | ATOM    775  CA  GLU A  51     -19.410  26.173  -4.151  1.00  0.00           C  
398 | ATOM    776  C   GLU A  51     -20.411  25.364  -3.351  1.00  0.00           C  
399 | ATOM    777  O   GLU A  51     -20.023  24.564  -2.495  1.00  0.00           O  
400 | ATOM    778  CB  GLU A  51     -19.190  27.522  -3.464  1.00  0.00           C  
401 | ATOM    779  CG  GLU A  51     -18.103  28.374  -4.101  1.00  0.00           C  
402 | ATOM    780  CD  GLU A  51     -16.781  27.703  -4.163  1.00  0.00           C  
403 | ATOM    781  OE1 GLU A  51     -16.315  27.196  -3.170  1.00  0.00           O  
404 | ATOM    782  OE2 GLU A  51     -16.238  27.662  -5.247  1.00  0.00           O  
405 | ATOM    789  N   GLY A  52     -21.697  25.569  -3.643  1.00  0.00           N  
406 | ATOM    790  CA  GLY A  52     -22.781  24.866  -2.988  1.00  0.00           C  
407 | ATOM    791  C   GLY A  52     -22.695  23.356  -3.191  1.00  0.00           C  
408 | ATOM    792  O   GLY A  52     -23.171  22.596  -2.342  1.00  0.00           O  
409 | ATOM    796  N   ALA A  53     -22.167  22.908  -4.344  1.00  0.00           N  
410 | ATOM    797  CA  ALA A  53     -22.053  21.486  -4.609  1.00  0.00           C  
411 | ATOM    798  C   ALA A  53     -20.965  20.904  -3.742  1.00  0.00           C  
412 | ATOM    799  O   ALA A  53     -21.128  19.831  -3.151  1.00  0.00           O  
413 | ATOM    800  CB  ALA A  53     -21.743  21.260  -6.062  1.00  0.00           C  
414 | ATOM    806  N   ALA A  54     -19.846  21.628  -3.651  1.00  0.00           N  
415 | ATOM    807  CA  ALA A  54     -18.755  21.163  -2.813  1.00  0.00           C  
416 | ATOM    808  C   ALA A  54     -19.228  21.099  -1.366  1.00  0.00           C  
417 | ATOM    809  O   ALA A  54     -18.911  20.156  -0.638  1.00  0.00           O  
418 | ATOM    810  CB  ALA A  54     -17.543  22.064  -2.962  1.00  0.00           C  
419 | ATOM    816  N   LYS A  55     -20.026  22.091  -0.961  1.00  0.00           N  
420 | ATOM    817  CA  LYS A  55     -20.544  22.138   0.391  1.00  0.00           C  
421 | ATOM    818  C   LYS A  55     -21.406  20.914   0.670  1.00  0.00           C  
422 | ATOM    819  O   LYS A  55     -21.192  20.235   1.681  1.00  0.00           O  
423 | ATOM    820  CB  LYS A  55     -21.325  23.432   0.630  1.00  0.00           C  
424 | ATOM    821  CG  LYS A  55     -21.573  23.788   2.117  1.00  0.00           C  
425 | ATOM    822  CD  LYS A  55     -22.912  23.260   2.651  1.00  0.00           C  
426 | ATOM    823  CE  LYS A  55     -23.184  23.781   4.071  1.00  0.00           C  
427 | ATOM    824  NZ  LYS A  55     -24.448  23.229   4.647  1.00  0.00           N  
428 | ATOM    838  N   GLU A  56     -22.385  20.621  -0.204  1.00  0.00           N  
429 | ATOM    839  CA  GLU A  56     -23.239  19.464   0.042  1.00  0.00           C  
430 | ATOM    840  C   GLU A  56     -22.412  18.194   0.115  1.00  0.00           C  
431 | ATOM    841  O   GLU A  56     -22.657  17.344   0.977  1.00  0.00           O  
432 | ATOM    842  CB  GLU A  56     -24.325  19.286  -1.010  1.00  0.00           C  
433 | ATOM    843  CG  GLU A  56     -25.311  18.146  -0.650  1.00  0.00           C  
434 | ATOM    844  CD  GLU A  56     -26.417  17.965  -1.634  1.00  0.00           C  
435 | ATOM    845  OE1 GLU A  56     -26.442  18.667  -2.606  1.00  0.00           O  
436 | ATOM    846  OE2 GLU A  56     -27.234  17.105  -1.419  1.00  0.00           O  
437 | ATOM    853  N   ALA A  57     -21.407  18.063  -0.757  1.00  0.00           N  
438 | ATOM    854  CA  ALA A  57     -20.570  16.879  -0.746  1.00  0.00           C  
439 | ATOM    855  C   ALA A  57     -19.909  16.716   0.625  1.00  0.00           C  
440 | ATOM    856  O   ALA A  57     -19.759  15.591   1.101  1.00  0.00           O  
441 | ATOM    857  CB  ALA A  57     -19.538  16.952  -1.854  1.00  0.00           C  
442 | ATOM    863  N   ILE A  58     -19.530  17.818   1.296  1.00  0.00           N  
443 | ATOM    864  CA  ILE A  58     -18.942  17.688   2.629  1.00  0.00           C  
444 | ATOM    865  C   ILE A  58     -19.992  17.163   3.608  1.00  0.00           C  
445 | ATOM    866  O   ILE A  58     -19.756  16.195   4.342  1.00  0.00           O  
446 | ATOM    867  CB  ILE A  58     -18.424  19.033   3.227  1.00  0.00           C  
447 | ATOM    868  CG1 ILE A  58     -17.283  19.674   2.397  1.00  0.00           C  
448 | ATOM    869  CG2 ILE A  58     -17.911  18.770   4.690  1.00  0.00           C  
449 | ATOM    870  CD1 ILE A  58     -15.990  18.948   2.374  1.00  0.00           C  
450 | ATOM    882  N   ASP A  59     -21.179  17.790   3.605  1.00  0.00           N  
451 | ATOM    883  CA  ASP A  59     -22.237  17.394   4.538  1.00  0.00           C  
452 | ATOM    884  C   ASP A  59     -22.648  15.937   4.375  1.00  0.00           C  
453 | ATOM    885  O   ASP A  59     -22.963  15.258   5.354  1.00  0.00           O  
454 | ATOM    886  CB  ASP A  59     -23.511  18.239   4.364  1.00  0.00           C  
455 | ATOM    887  CG  ASP A  59     -23.469  19.694   4.904  1.00  0.00           C  
456 | ATOM    888  OD1 ASP A  59     -22.587  20.051   5.643  1.00  0.00           O  
457 | ATOM    889  OD2 ASP A  59     -24.384  20.433   4.575  1.00  0.00           O  
458 | ATOM    894  N   ALA A  60     -22.659  15.468   3.132  1.00  0.00           N  
459 | ATOM    895  CA  ALA A  60     -23.054  14.107   2.826  1.00  0.00           C  
460 | ATOM    896  C   ALA A  60     -21.951  13.069   2.981  1.00  0.00           C  
461 | ATOM    897  O   ALA A  60     -22.196  11.994   3.527  1.00  0.00           O  
462 | ATOM    898  CB  ALA A  60     -23.531  14.054   1.400  1.00  0.00           C  
463 | ATOM    904  N   LEU A  61     -20.744  13.353   2.492  1.00  0.00           N  
464 | ATOM    905  CA  LEU A  61     -19.708  12.338   2.487  1.00  0.00           C  
465 | ATOM    906  C   LEU A  61     -18.746  12.330   3.651  1.00  0.00           C  
466 | ATOM    907  O   LEU A  61     -17.992  11.370   3.785  1.00  0.00           O  
467 | ATOM    908  CB  LEU A  61     -18.864  12.454   1.225  1.00  0.00           C  
468 | ATOM    909  CG  LEU A  61     -19.561  12.290  -0.119  1.00  0.00           C  
469 | ATOM    910  CD1 LEU A  61     -18.555  12.528  -1.138  1.00  0.00           C  
470 | ATOM    911  CD2 LEU A  61     -20.166  10.921  -0.270  1.00  0.00           C  
471 | ATOM    923  N   ASN A  62     -18.732  13.342   4.508  1.00  0.00           N  
472 | ATOM    924  CA  ASN A  62     -17.757  13.312   5.584  1.00  0.00           C  
473 | ATOM    925  C   ASN A  62     -18.153  12.246   6.601  1.00  0.00           C  
474 | ATOM    926  O   ASN A  62     -19.125  12.414   7.340  1.00  0.00           O  
475 | ATOM    927  CB  ASN A  62     -17.652  14.688   6.242  1.00  0.00           C  
476 | ATOM    928  CG  ASN A  62     -16.593  14.765   7.308  1.00  0.00           C  
477 | ATOM    929  OD1 ASN A  62     -15.882  13.781   7.528  1.00  0.00           O  
478 | ATOM    930  ND2 ASN A  62     -16.487  15.897   7.976  1.00  0.00           N  
479 | ATOM    937  N   GLY A  63     -17.421  11.125   6.602  1.00  0.00           N  
480 | ATOM    938  CA  GLY A  63     -17.717   9.981   7.458  1.00  0.00           C  
481 | ATOM    939  C   GLY A  63     -18.501   8.862   6.755  1.00  0.00           C  
482 | ATOM    940  O   GLY A  63     -18.898   7.879   7.386  1.00  0.00           O  
483 | ATOM    944  N   MET A  64     -18.732   9.006   5.455  1.00  0.00           N  
484 | ATOM    945  CA  MET A  64     -19.421   7.978   4.679  1.00  0.00           C  
485 | ATOM    946  C   MET A  64     -18.374   6.966   4.262  1.00  0.00           C  
486 | ATOM    947  O   MET A  64     -17.204   7.316   4.157  1.00  0.00           O  
487 | ATOM    948  CB  MET A  64     -20.124   8.562   3.443  1.00  0.00           C  
488 | ATOM    949  CG  MET A  64     -20.946   7.519   2.605  1.00  0.00           C  
489 | ATOM    950  SD  MET A  64     -21.829   8.206   1.188  1.00  0.00           S  
490 | ATOM    951  CE  MET A  64     -23.197   9.105   1.914  1.00  0.00           C  
491 | ATOM    961  N   LEU A  65     -18.745   5.710   4.060  1.00  0.00           N  
492 | ATOM    962  CA  LEU A  65     -17.758   4.761   3.559  1.00  0.00           C  
493 | ATOM    963  C   LEU A  65     -17.901   4.546   2.070  1.00  0.00           C  
494 | ATOM    964  O   LEU A  65     -19.010   4.372   1.552  1.00  0.00           O  
495 | ATOM    965  CB  LEU A  65     -17.863   3.403   4.268  1.00  0.00           C  
496 | ATOM    966  CG  LEU A  65     -17.088   3.231   5.607  1.00  0.00           C  
497 | ATOM    967  CD1 LEU A  65     -17.606   4.186   6.691  1.00  0.00           C  
498 | ATOM    968  CD2 LEU A  65     -17.196   1.790   6.043  1.00  0.00           C  
499 | ATOM    980  N   LEU A  66     -16.768   4.558   1.383  1.00  0.00           N  
500 | ATOM    981  CA  LEU A  66     -16.711   4.289  -0.042  1.00  0.00           C  
501 | ATOM    982  C   LEU A  66     -16.004   2.952  -0.152  1.00  0.00           C  
502 | ATOM    983  O   LEU A  66     -14.933   2.765   0.438  1.00  0.00           O  
503 | ATOM    984  CB  LEU A  66     -15.969   5.396  -0.810  1.00  0.00           C  
504 | ATOM    985  CG  LEU A  66     -16.501   6.869  -0.634  1.00  0.00           C  
505 | ATOM    986  CD1 LEU A  66     -15.607   7.819  -1.460  1.00  0.00           C  
506 | ATOM    987  CD2 LEU A  66     -17.964   6.988  -1.059  1.00  0.00           C  
507 | ATOM    999  N   ASN A  67     -16.580   2.010  -0.878  1.00  0.00           N  
508 | ATOM   1000  CA  ASN A  67     -16.026   0.664  -0.915  1.00  0.00           C  
509 | ATOM   1001  C   ASN A  67     -15.858   0.188   0.540  1.00  0.00           C  
510 | ATOM   1002  O   ASN A  67     -16.852   0.054   1.255  1.00  0.00           O  
511 | ATOM   1003  CB  ASN A  67     -14.723   0.580  -1.710  1.00  0.00           C  
512 | ATOM   1004  CG  ASN A  67     -14.894   0.927  -3.161  1.00  0.00           C  
513 | ATOM   1005  OD1 ASN A  67     -16.025   0.971  -3.665  1.00  0.00           O  
514 | ATOM   1006  ND2 ASN A  67     -13.802   1.120  -3.869  1.00  0.00           N  
515 | ATOM   1013  N   GLY A  68     -14.629  -0.082   0.977  1.00  0.00           N  
516 | ATOM   1014  CA  GLY A  68     -14.380  -0.569   2.332  1.00  0.00           C  
517 | ATOM   1015  C   GLY A  68     -13.747   0.448   3.294  1.00  0.00           C  
518 | ATOM   1016  O   GLY A  68     -13.262   0.056   4.359  1.00  0.00           O  
519 | ATOM   1020  N   GLN A  69     -13.685   1.734   2.925  1.00  0.00           N  
520 | ATOM   1021  CA  GLN A  69     -13.010   2.710   3.792  1.00  0.00           C  
521 | ATOM   1022  C   GLN A  69     -13.760   4.024   4.022  1.00  0.00           C  
522 | ATOM   1023  O   GLN A  69     -14.364   4.601   3.112  1.00  0.00           O  
523 | ATOM   1024  CB  GLN A  69     -11.631   3.030   3.199  1.00  0.00           C  
524 | ATOM   1025  CG  GLN A  69     -10.650   1.856   3.182  1.00  0.00           C  
525 | ATOM   1026  CD  GLN A  69      -9.327   2.211   2.535  1.00  0.00           C  
526 | ATOM   1027  OE1 GLN A  69      -9.219   3.233   1.858  1.00  0.00           O  
527 | ATOM   1028  NE2 GLN A  69      -8.321   1.369   2.735  1.00  0.00           N  
528 | ATOM   1037  N   GLU A  70     -13.690   4.517   5.264  1.00  0.00           N  
529 | ATOM   1038  CA  GLU A  70     -14.299   5.800   5.618  1.00  0.00           C  
530 | ATOM   1039  C   GLU A  70     -13.638   6.922   4.846  1.00  0.00           C  
531 | ATOM   1040  O   GLU A  70     -12.414   7.048   4.873  1.00  0.00           O  
532 | ATOM   1041  CB  GLU A  70     -14.178   6.039   7.120  1.00  0.00           C  
533 | ATOM   1042  CG  GLU A  70     -14.868   7.286   7.620  1.00  0.00           C  
534 | ATOM   1043  CD  GLU A  70     -14.755   7.443   9.107  1.00  0.00           C  
535 | ATOM   1044  OE1 GLU A  70     -14.195   6.576   9.734  1.00  0.00           O  
536 | ATOM   1045  OE2 GLU A  70     -15.218   8.431   9.619  1.00  0.00           O  
537 | ATOM   1052  N   ILE A  71     -14.430   7.770   4.198  1.00  0.00           N  
538 | ATOM   1053  CA  ILE A  71     -13.828   8.849   3.432  1.00  0.00           C  
539 | ATOM   1054  C   ILE A  71     -13.945  10.142   4.211  1.00  0.00           C  
540 | ATOM   1055  O   ILE A  71     -14.988  10.423   4.818  1.00  0.00           O  
541 | ATOM   1056  CB  ILE A  71     -14.501   9.011   2.017  1.00  0.00           C  
542 | ATOM   1057  CG1 ILE A  71     -13.656   9.902   1.076  1.00  0.00           C  
543 | ATOM   1058  CG2 ILE A  71     -15.911   9.656   2.090  1.00  0.00           C  
544 | ATOM   1059  CD1 ILE A  71     -12.437   9.208   0.584  1.00  0.00           C  
545 | ATOM   1071  N   TYR A  72     -12.870  10.915   4.211  1.00  0.00           N  
546 | ATOM   1072  CA  TYR A  72     -12.886  12.216   4.824  1.00  0.00           C  
547 | ATOM   1073  C   TYR A  72     -12.804  13.276   3.757  1.00  0.00           C  
548 | ATOM   1074  O   TYR A  72     -11.948  13.244   2.872  1.00  0.00           O  
549 | ATOM   1075  CB  TYR A  72     -11.746  12.382   5.817  1.00  0.00           C  
550 | ATOM   1076  CG  TYR A  72     -11.642  13.797   6.310  1.00  0.00           C  
551 | ATOM   1077  CD1 TYR A  72     -12.565  14.295   7.200  1.00  0.00           C  
552 | ATOM   1078  CD2 TYR A  72     -10.617  14.606   5.852  1.00  0.00           C  
553 | ATOM   1079  CE1 TYR A  72     -12.471  15.598   7.625  1.00  0.00           C  
554 | ATOM   1080  CE2 TYR A  72     -10.518  15.892   6.284  1.00  0.00           C  
555 | ATOM   1081  CZ  TYR A  72     -11.443  16.398   7.165  1.00  0.00           C  
556 | ATOM   1082  OH  TYR A  72     -11.344  17.701   7.596  1.00  0.00           O  
557 | ATOM   1092  N   VAL A  73     -13.716  14.213   3.808  1.00  0.00           N  
558 | ATOM   1093  CA  VAL A  73     -13.696  15.265   2.822  1.00  0.00           C  
559 | ATOM   1094  C   VAL A  73     -13.552  16.623   3.483  1.00  0.00           C  
560 | ATOM   1095  O   VAL A  73     -14.253  16.935   4.448  1.00  0.00           O  
561 | ATOM   1096  CB  VAL A  73     -14.928  15.169   1.908  1.00  0.00           C  
562 | ATOM   1097  CG1 VAL A  73     -14.859  13.907   1.018  1.00  0.00           C  
563 | ATOM   1098  CG2 VAL A  73     -16.176  15.090   2.736  1.00  0.00           C  
564 | ATOM   1108  N   ALA A  74     -12.641  17.427   2.942  1.00  0.00           N  
565 | ATOM   1109  CA  ALA A  74     -12.378  18.749   3.493  1.00  0.00           C  
566 | ATOM   1110  C   ALA A  74     -12.659  19.872   2.485  1.00  0.00           C  
567 | ATOM   1111  O   ALA A  74     -12.449  19.693   1.279  1.00  0.00           O  
568 | ATOM   1112  CB  ALA A  74     -10.933  18.846   3.937  1.00  0.00           C  
569 | ATOM   1118  N   PRO A  75     -13.125  21.048   2.953  1.00  0.00           N  
570 | ATOM   1119  CA  PRO A  75     -13.317  22.256   2.184  1.00  0.00           C  
571 | ATOM   1120  C   PRO A  75     -12.095  23.155   2.331  1.00  0.00           C  
572 | ATOM   1121  O   PRO A  75     -11.079  22.927   1.682  1.00  0.00           O  
573 | ATOM   1122  OXT PRO A  75     -12.009  23.815   3.365  1.00  0.00           O  
574 | ATOM   1123  CB  PRO A  75     -14.530  22.885   2.865  1.00  0.00           C  
575 | ATOM   1124  CG  PRO A  75     -14.374  22.496   4.336  1.00  0.00           C  
576 | ATOM   1125  CD  PRO A  75     -13.655  21.153   4.342  1.00  0.00           C  


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools
2 | numpy>=1.23.2
3 | networkx>=2.6.3
4 | scipy>=1.9.1
5 | biopandas>=0.2.7
6 | torch>=1.11.0


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = metl-pretrained
 3 | version = 0.1
 4 | description = Mutational effect transfer learning pretrained models
 5 | url = https://github.com/gitter-lab/metl-pretrained
 6 | author = Sam Gelman
 7 | author_email = sgelman2@wisc.edu
 8 | license = MIT
 9 | 
10 | [options]
11 | packages=find:
12 | install_requires =
13 |     torch
14 |     numpy
15 |     scipy
16 |     biopandas
17 |     networkx
18 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | if __name__ == '__main__':
4 |     setup()
5 | 


--------------------------------------------------------------------------------