├── VFN-IF
    ├── api
    │   ├── losses
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   └── layer_norm.py
    │   ├── __init__.py
    │   ├── modules
    │   │   ├── inverse_module_utils
    │   │   │   └── __init__.py
    │   │   ├── refine_module_utils
    │   │   │   └── __init__.py
    │   │   └── refine_module.py
    │   ├── data
    │   │   └── __init__.py
    │   └── model.py
    ├── unifold
    │   ├── losses
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── optim
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── modules
    │   │   ├── refine_module_utils
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── inverse_module_utils
    │   │   │   └── __init__.py
    │   │   ├── inverse_module.py
    │   │   └── refine_module.py
    │   ├── data
    │   │   └── __init__.py
    │   ├── model.py
    │   └── task.py
    ├── Uni-Core
    │   ├── unicore
    │   │   ├── logging
    │   │   │   └── __init__.py
    │   │   ├── version.txt
    │   │   ├── version.py
    │   │   ├── distributed
    │   │   │   ├── __init__.py
    │   │   │   └── module_proxy_wrapper.py
    │   │   ├── modules
    │   │   │   ├── __init__.py
    │   │   │   ├── transformer_encoder_layer.py
    │   │   │   └── layer_norm.py
    │   │   ├── data
    │   │   │   ├── num_samples_dataset.py
    │   │   │   ├── from_numpy_dataset.py
    │   │   │   ├── lru_cache_dataset.py
    │   │   │   ├── append_token_dataset.py
    │   │   │   ├── prepend_token_dataset.py
    │   │   │   ├── numel_dataset.py
    │   │   │   ├── tokenize_dataset.py
    │   │   │   ├── bert_tokenize_dataset.py
    │   │   │   ├── sort_dataset.py
    │   │   │   ├── pad_dataset.py
    │   │   │   ├── __init__.py
    │   │   │   ├── lmdb_dataset.py
    │   │   │   ├── raw_dataset.py
    │   │   │   ├── base_wrapper_dataset.py
    │   │   │   ├── unicore_dataset.py
    │   │   │   └── nested_dictionary_dataset.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   ├── cross_entropy.py
    │   │   │   ├── masked_lm.py
    │   │   │   └── unicore_loss.py
    │   │   ├── optim
    │   │   │   ├── lr_scheduler
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── pass_through.py
    │   │   │   │   ├── unicore_lr_scheduler.py
    │   │   │   │   ├── exponential_decay_schedule.py
    │   │   │   │   ├── fixed_schedule.py
    │   │   │   │   ├── triangular_lr_scheduler.py
    │   │   │   │   ├── inverse_square_root_schedule.py
    │   │   │   │   └── polynomial_decay_schedule.py
    │   │   │   ├── __init__.py
    │   │   │   ├── adagrad.py
    │   │   │   ├── sgd.py
    │   │   │   ├── adadelta.py
    │   │   │   └── dynamic_loss_scaler.py
    │   │   ├── __init__.py
    │   │   ├── models
    │   │   │   ├── unicore_model.py
    │   │   │   └── distributed_unicore_model.py
    │   │   ├── registry.py
    │   │   ├── tasks
    │   │   │   └── __init__.py
    │   │   └── nan_detector.py
    │   ├── unicore_cli
    │   │   └── __init__.py
    │   ├── examples
    │   │   └── bert
    │   │   │   ├── __init__.py
    │   │   │   ├── example_data
    │   │   │       ├── README.md
    │   │   │       └── preprocess.py
    │   │   │   └── train_bert_test.sh
    │   ├── requirements.txt
    │   ├── csrc
    │   │   ├── multi_tensor
    │   │   │   └── interface.cpp
    │   │   ├── rounding
    │   │   │   ├── interface.cpp
    │   │   │   └── fp32_to_bf16.cu
    │   │   ├── util.h
    │   │   ├── adam
    │   │   │   └── interface.cpp
    │   │   ├── softmax_dropout
    │   │   │   └── interface.cpp
    │   │   └── layernorm
    │   │   │   └── interface_gamma_beta.cpp
    │   ├── LICENSE
    │   ├── .gitignore
    │   ├── docker
    │   │   └── rdma
    │   │   │   └── Dockerfile
    │   └── README.md
    ├── .gitignore
    ├── zenodo.sh
    ├── readme.md
    ├── docker
    │   └── Dockerfile
    ├── extra.py
    └── train_script
    │   ├── vec
    │       ├── x2_16vec_v2.sh
    │       └── x2_64vec_v2.sh
    │   └── layers
    │       ├── x2_vecdirect_noedge_atom_gbf_nofeat_5l.sh
    │       ├── x2_vecdirect_noedge_atom_gbf_nofeat_10l.sh
    │       ├── x2_vecdirect_noedge_atom_gbf_nofeat_12l.sh
    │       ├── x2_vecdirect_noedge_atom_gbf_nofeat_8l.sh
    │       └── x2_vecdirect_noedge_atom_gbf_nofeat_15l.sh
├── VFN-Diff
    ├── openfold
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── tools
    │   │   │   ├── __init__.py
    │   │   │   ├── utils.py
    │   │   │   └── hhsearch.py
    │   │   ├── errors.py
    │   │   └── feature_pipeline.py
    │   ├── resources
    │   │   └── __init__.py
    │   ├── model
    │   │   ├── __init__.py
    │   │   ├── dropout.py
    │   │   └── pair_transition.py
    │   ├── np
    │   │   ├── __init__.py
    │   │   └── relax
    │   │   │   ├── __init__.py
    │   │   │   ├── utils.py
    │   │   │   └── relax.py
    │   └── utils
    │   │   ├── callbacks.py
    │   │   ├── seed.py
    │   │   ├── suppress_output.py
    │   │   ├── precision_utils.py
    │   │   ├── argparse.py
    │   │   ├── validation_metrics.py
    │   │   ├── exponential_moving_average.py
    │   │   ├── superimposition.py
    │   │   ├── checkpointing.py
    │   │   ├── logger.py
    │   │   └── lr_schedulers.py
    ├── weian_script
    │   ├── baseline_1gpu.sh
    │   ├── baseline_1gpu_re.sh
    │   ├── icml_published.sh
    │   ├── process_data.sh
    │   └── vfn_full_4_gpu.sh
    ├── README.md
    ├── config
    │   ├── pure_dsm.yaml
    │   ├── icml_published.yaml
    │   ├── baseline.yaml
    │   ├── baseline_1gpu.yaml
    │   ├── baseline_1gpu_re.yaml
    │   ├── debug.yaml
    │   ├── vfn_full_4_4090.yaml
    │   ├── inference.yaml
    │   └── base.yaml
    ├── setup.py
    ├── data
    │   ├── errors.py
    │   └── parsers.py
    ├── LICENSE
    ├── analysis
    │   └── utils.py
    └── .gitignore
├── graph
    └── logo.png
├── .gitignore
└── README.md


/VFN-IF/api/losses/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VFN-IF/unifold/losses/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/data/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/data/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/resources/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/logging/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore_cli/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/version.txt:
--------------------------------------------------------------------------------
1 | 0.0.1
2 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.0.1"
2 | 


--------------------------------------------------------------------------------
/VFN-IF/api/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .layer_norm import LayerNorm


--------------------------------------------------------------------------------
/VFN-IF/.gitignore:
--------------------------------------------------------------------------------
1 | ./processed/*
2 | ./output_dir/*
3 | ./extra.py
4 | .vscode/


--------------------------------------------------------------------------------
/VFN-IF/unifold/optim/__init__.py:
--------------------------------------------------------------------------------
1 | from .onecyclelr import onecycleLRSchedule


--------------------------------------------------------------------------------
/graph/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aim-uofa/VFN/HEAD/graph/logo.png


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/examples/bert/__init__.py:
--------------------------------------------------------------------------------
1 | import bert.task
2 | import bert.model


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ./processed/*
2 | ./output_dir/*
3 | ./extra.py
4 | .vscode/
5 | 
6 | *.pyc


--------------------------------------------------------------------------------
/VFN-IF/api/__init__.py:
--------------------------------------------------------------------------------
1 | """isort:skip_file"""
2 | 
3 | import argparse
4 | 
5 | from . import model, loss
6 | 


--------------------------------------------------------------------------------
/VFN-Diff/weian_script/baseline_1gpu.sh:
--------------------------------------------------------------------------------
1 | python experiments/train_se3_diffusion.py --config-name=baseline_1gpu
2 | 


--------------------------------------------------------------------------------
/VFN-Diff/weian_script/baseline_1gpu_re.sh:
--------------------------------------------------------------------------------
1 | python experiments/train_se3_diffusion.py --config-name=baseline_1gpu_re
2 | 


--------------------------------------------------------------------------------
/VFN-Diff/weian_script/icml_published.sh:
--------------------------------------------------------------------------------
1 | python experiments/train_se3_diffusion.py --config-name=icml_published
2 | 


--------------------------------------------------------------------------------
/VFN-IF/unifold/__init__.py:
--------------------------------------------------------------------------------
1 | """isort:skip_file"""
2 | 
3 | import argparse
4 | 
5 | from . import task, model, loss, optim
6 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/requirements.txt:
--------------------------------------------------------------------------------
1 | iopath
2 | lmdb
3 | ml_collections
4 | numpy
5 | scipy
6 | tensorboardX
7 | tqdm
8 | tokenizers
9 | 


--------------------------------------------------------------------------------
/VFN-Diff/README.md:
--------------------------------------------------------------------------------
1 | Please refer to the SE3 diffusion installation. The script to run is located at: VFN-Diff/weian_script/vfn_full_4_gpu.sh


--------------------------------------------------------------------------------
/VFN-IF/api/modules/inverse_module_utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Modules of Uni-Fold models."""
2 | 
3 | 
4 | from .pifold_featurizer import Pifold_featurizer


--------------------------------------------------------------------------------
/VFN-IF/api/modules/refine_module_utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Modules of Uni-Fold models."""
2 | 
3 | from .af_refine import AFRefineStructureModule
4 | 


--------------------------------------------------------------------------------
/VFN-IF/unifold/modules/refine_module_utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Modules of Uni-Fold models."""
2 | 
3 | from .af_refine import AFRefineStructureModule
4 | 


--------------------------------------------------------------------------------
/VFN-IF/unifold/modules/__init__.py:
--------------------------------------------------------------------------------
1 | """Modules of Uni-Fold models."""
2 | 
3 | from unicore.utils import (
4 |     set_jit_fusion_options,
5 | )
6 | 
7 | set_jit_fusion_options()


--------------------------------------------------------------------------------
/VFN-Diff/config/pure_dsm.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base
 3 | 
 4 | experiment:
 5 |   name: pure_dsm
 6 |   separate_rot_loss: False
 7 | 
 8 | hydra:
 9 |   sweeper:
10 |     params:
11 | 


--------------------------------------------------------------------------------
/VFN-Diff/weian_script/process_data.sh:
--------------------------------------------------------------------------------
1 | python data/process_pdb_dataset.py \
2 |     --mmcif_dir /media/nvme/dataset/protein/alphafold/pdb_mmcif \
3 |     --write_dir /mnt/nas/share2/home/wayne/dataset/se3


--------------------------------------------------------------------------------
/VFN-IF/unifold/modules/inverse_module_utils/__init__.py:
--------------------------------------------------------------------------------
1 | """Modules of Uni-Fold models."""
2 | 
3 | from .af_inverse import AFInverseStructureModule
4 | from .gragh_init_inverse import AFInverseGraghInitModule
5 | from .pifold_featurizer import Pifold_featurizer


--------------------------------------------------------------------------------
/VFN-IF/zenodo.sh:
--------------------------------------------------------------------------------
1 | zenodo-cli -t ACCESS_TOKEN deposit --metadata '{"metadata": {"title": "My File", "upload_type": "dataset", "description": "This is a test", "creators": [{"name": "John Smith", "affiliation": "University of XYZ", "orcid": "0000-0002-1825-0097"}]}}' --file my_file.txt
2 | 


--------------------------------------------------------------------------------
/VFN-Diff/weian_script/vfn_full_4_gpu.sh:
--------------------------------------------------------------------------------
1 | export CUDA_VISIBLE_DEVICES=4,5,6,7
2 | export MASTER_PORT=6000
3 | export NCCL_P2P_DISABLE=1
4 | python -m torch.distributed.run \
5 |     --nnodes 1 \
6 |     --nproc_per_node=4 \
7 |     experiments/train_se3_diffusion.py \
8 |     --config-name=vfn_full_4_4090


--------------------------------------------------------------------------------
/VFN-IF/readme.md:
--------------------------------------------------------------------------------
1 | # VFN-IF
2 | ## How to install uni-core
3 | ```shell
4 | cd Uni-Core
5 | python setup.py install --build
6 | ```
7 | 
8 | ## Data Processed
9 | We've provided the processed data on the [Zenodo](https://zenodo.org/records/11369361) platform. You can download the data from the link provided and put it as `processed` in the root directory.


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/examples/bert/example_data/README.md:
--------------------------------------------------------------------------------
1 | ## A simple BERT example
2 | 
3 | 1. download data `wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip & unzip wikitext-2-v1.zip`
4 | 2. run `python preprocess.py ./wikitext-2/wiki.train.tokens ./train.lmdb`
5 | 3. run `python preprocess.py ./wikitext-2/wiki.valid.tokens ./valid.lmdb`


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/csrc/multi_tensor/interface.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | 
 4 | at::Tensor multi_tensor_l2norm_cuda(
 5 |   int chunk_size,
 6 |   std::vector<std::vector<at::Tensor>> tensor_lists);
 7 | 
 8 | 
 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
10 |   m.def("l2norm", &multi_tensor_l2norm_cuda,
11 |         "Computes L2 norm for a list of contiguous tensors");
12 | }


--------------------------------------------------------------------------------
/VFN-Diff/config/icml_published.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base
 3 | 
 4 | data:
 5 |   cluster_path: null
 6 | 
 7 | diffuser:
 8 |   so3:
 9 |     use_cached_score: True
10 | 
11 | experiment:
12 |   name: icml_published
13 |   num_epoch: 95
14 |   batch_size: 128
15 |   max_squared_res: 300000
16 |   rot_loss_t_threshold: 0.0
17 |   rot_loss_weight: 1.0
18 |   separate_rot_loss: False
19 |   sample_mode: time_batch
20 | 
21 | hydra:
22 |   sweeper:
23 |     params:
24 | 


--------------------------------------------------------------------------------
/VFN-Diff/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name="se3_diffusion",
 5 |     packages=[
 6 |         'data',
 7 |         'analysis',
 8 |         'model',
 9 |         'experiments',
10 |         'openfold'
11 |     ],
12 |     package_dir={
13 |         'data': './data',
14 |         'analysis': './analysis',
15 |         'model': './model',
16 |         'experiments': './experiments',
17 |         'openfold': './openfold',
18 |     },
19 | )
20 | 


--------------------------------------------------------------------------------
/VFN-Diff/config/baseline.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base
 3 | 
 4 | data:
 5 |   cluster_path: null
 6 | 
 7 | diffuser:
 8 |   so3:
 9 |     use_cached_score: True
10 | 
11 | experiment:
12 |   name: se3_simple_baseline_15epoch_32batch
13 |   num_epoch: 15
14 |   batch_size: 32
15 |   max_squared_res: 300000
16 |   rot_loss_t_threshold: 0.0
17 |   rot_loss_weight: 1.0
18 |   separate_rot_loss: False
19 |   sample_mode: time_batch
20 | 
21 | hydra:
22 |   sweeper:
23 |     params:
24 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/distributed/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from .module_proxy_wrapper import ModuleProxyWrapper
 8 | from .legacy_distributed_data_parallel import LegacyDistributedDataParallel
 9 | 
10 | __all__ = [
11 |     "ModuleProxyWrapper",
12 | ]
13 | 


--------------------------------------------------------------------------------
/VFN-Diff/config/baseline_1gpu.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base
 3 | 
 4 | data:
 5 |   cluster_path: null
 6 | 
 7 | diffuser:
 8 |   so3:
 9 |     use_cached_score: True
10 | 
11 | experiment:
12 |   name: se3_baseline_10epoch_16batch_weian
13 |   num_epoch: 10
14 |   batch_size: 16
15 |   max_squared_res: 300000
16 |   rot_loss_t_threshold: 0.0
17 |   rot_loss_weight: 1.0
18 |   separate_rot_loss: False
19 |   sample_mode: time_batch
20 |   num_gpus: 1
21 | 
22 | hydra:
23 |   sweeper:
24 |     params:
25 | 


--------------------------------------------------------------------------------
/VFN-Diff/config/baseline_1gpu_re.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base
 3 | 
 4 | data:
 5 |   cluster_path: null
 6 | 
 7 | diffuser:
 8 |   so3:
 9 |     use_cached_score: True
10 | 
11 | experiment:
12 |   name: se3_baseline_10epoch_16batch_weian_re
13 |   num_epoch: 10
14 |   batch_size: 16
15 |   max_squared_res: 300000
16 |   rot_loss_t_threshold: 0.0
17 |   rot_loss_weight: 1.0
18 |   separate_rot_loss: False
19 |   sample_mode: time_batch
20 |   num_gpus: 1
21 | 
22 | hydra:
23 |   sweeper:
24 |     params:
25 | 


--------------------------------------------------------------------------------
/VFN-Diff/config/debug.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base
 3 | 
 4 | data:
 5 |   cluster_path: null
 6 | 
 7 | diffuser:
 8 |   so3:
 9 |     use_cached_score: True
10 | 
11 | experiment:
12 |   name: icml_published_debug
13 |   num_epoch: 95
14 |   batch_size: 2
15 |   max_squared_res: 300000
16 |   rot_loss_t_threshold: 0.0
17 |   rot_loss_weight: 1.0
18 |   separate_rot_loss: False
19 |   sample_mode: time_batch
20 |   num_gpus: 1
21 |   early_ckpt: False
22 |   use_wandb: False
23 | 
24 | hydra:
25 |   sweeper:
26 |     params:
27 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/modules/__init__.py:
--------------------------------------------------------------------------------
 1 | """isort:skip_file"""
 2 | 
 3 | from .layer_norm import LayerNorm
 4 | from .softmax_dropout import softmax_dropout
 5 | from .multihead_attention import SelfMultiheadAttention, CrossMultiheadAttention
 6 | from .transformer_encoder_layer import TransformerEncoderLayer
 7 | from .transformer_encoder import TransformerEncoder, init_bert_params, relative_position_bucket
 8 | from .transformer_decoder_layer import TransformerDecoderLayer
 9 | from .transformer_decoder import TransformerDecoder
10 | 


--------------------------------------------------------------------------------
/VFN-Diff/config/vfn_full_4_4090.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - base
 3 | 
 4 | data:
 5 |   cluster_path: null
 6 | 
 7 | diffuser:
 8 |   so3:
 9 |     use_cached_score: True
10 | 
11 |   
12 | experiment:
13 |   name: vfn_v11_full_4_4090
14 |   num_epoch: 95
15 |   batch_size: 128
16 |   max_squared_res: 300000
17 |   rot_loss_t_threshold: 0.0
18 |   rot_loss_weight: 1.0
19 |   separate_rot_loss: False
20 |   sample_mode: time_batch
21 |   num_gpus: 4
22 |   use_ddp : True
23 |   num_loader_workers: 64
24 | 
25 | hydra:
26 |   sweeper:
27 |     params:
28 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/model/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import importlib as importlib
 4 | 
 5 | _files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
 6 | __all__ = [
 7 |     os.path.basename(f)[:-3]
 8 |     for f in _files
 9 |     if os.path.isfile(f) and not f.endswith("__init__.py")
10 | ]
11 | _modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
12 | for _m in _modules:
13 |     globals()[_m[0]] = _m[1]
14 | 
15 | # Avoid needlessly cluttering the global namespace
16 | del _files, _m, _modules
17 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/np/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import importlib as importlib
 4 | 
 5 | _files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
 6 | __all__ = [
 7 |     os.path.basename(f)[:-3]
 8 |     for f in _files
 9 |     if os.path.isfile(f) and not f.endswith("__init__.py")
10 | ]
11 | _modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
12 | for _m in _modules:
13 |     globals()[_m[0]] = _m[1]
14 | 
15 | # Avoid needlessly cluttering the global namespace
16 | del _files, _m, _modules
17 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/np/relax/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import importlib as importlib
 4 | 
 5 | _files = glob.glob(os.path.join(os.path.dirname(__file__), "*.py"))
 6 | __all__ = [
 7 |     os.path.basename(f)[:-3]
 8 |     for f in _files
 9 |     if os.path.isfile(f) and not f.endswith("__init__.py")
10 | ]
11 | _modules = [(m, importlib.import_module("." + m, __name__)) for m in __all__]
12 | for _m in _modules:
13 |     globals()[_m[0]] = _m[1]
14 | 
15 | # Avoid needlessly cluttering the global namespace
16 | del _files, _m, _modules
17 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/num_samples_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from . import UnicoreDataset
 8 | 
 9 | 
10 | class NumSamplesDataset(UnicoreDataset):
11 |     def __getitem__(self, index):
12 |         return 1
13 | 
14 |     def __len__(self):
15 |         return 0
16 | 
17 |     def collater(self, samples):
18 |         return sum(samples)
19 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/from_numpy_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | import torch
 6 | from functools import lru_cache
 7 | 
 8 | from . import BaseWrapperDataset
 9 | 
10 | 
11 | class FromNumpyDataset(BaseWrapperDataset):
12 |     def __init__(self, dataset):
13 |         super().__init__(dataset)
14 | 
15 |     @lru_cache(maxsize=16)
16 |     def __getitem__(self, idx):
17 |         return torch.from_numpy(self.dataset[idx])
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/VFN-Diff/data/errors.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | """Error class for handled errors."""
 4 | 
 5 | 
 6 | class DataError(Exception):
 7 |     """Data exception."""
 8 |     pass
 9 | 
10 | 
11 | class FileExistsError(DataError):
12 |     """Raised when file already exists."""
13 |     pass
14 | 
15 | 
16 | class MmcifParsingError(DataError):
17 |     """Raised when mmcif parsing fails."""
18 |     pass
19 | 
20 | 
21 | class ResolutionError(DataError):
22 |     """Raised when resolution isn't acceptable."""
23 |     pass
24 | 
25 | 
26 | class LengthError(DataError):
27 |     """Raised when length isn't acceptable."""
28 |     pass


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/callbacks.py:
--------------------------------------------------------------------------------
 1 | from pytorch_lightning.utilities import rank_zero_info
 2 | from pytorch_lightning.callbacks.early_stopping import EarlyStopping
 3 | 
 4 | class EarlyStoppingVerbose(EarlyStopping):
 5 |     """
 6 |         The default EarlyStopping callback's verbose mode is too verbose.
 7 |         This class outputs a message only when it's getting ready to stop. 
 8 |     """
 9 |     def _evalute_stopping_criteria(self, *args):
10 |         should_stop, reason = super()._evalute_stopping_criteria(*args)
11 |         if(should_stop):
12 |             rank_zero_info(f"{reason}\n")
13 | 
14 |         return should_stop, reason
15 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/seed.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | import random
 4 | import numpy as np
 5 | from pytorch_lightning.utilities.seed import seed_everything
 6 | 
 7 | from openfold.utils.suppress_output import SuppressLogging
 8 | 
 9 | 
10 | def seed_globally(seed=None):
11 |     if("PL_GLOBAL_SEED" not in os.environ):
12 |         if(seed is None):
13 |             seed = random.randint(0, np.iinfo(np.uint32).max)
14 |         os.environ["PL_GLOBAL_SEED"] = str(seed)
15 |         logging.info(f'os.environ["PL_GLOBAL_SEED"] set to {seed}')
16 | 
17 |     # seed_everything is a bit log-happy
18 |     with SuppressLogging(logging.INFO):
19 |         seed_everything(seed=None)
20 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/suppress_output.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | 
 4 | 
 5 | class SuppressStdout:
 6 |     def __enter__(self):
 7 |         self.stdout = sys.stdout
 8 |         dev_null = open("/dev/null", "w")
 9 |         sys.stdout = dev_null
10 | 
11 |     def __exit__(self, typ, value, traceback):
12 |         fp = sys.stdout
13 |         sys.stdout = self.stdout
14 |         fp.close()
15 |         
16 | 
17 | class SuppressLogging:
18 |     def __init__(self, level):
19 |         self.level = level
20 | 
21 |     def __enter__(self):
22 |         logging.disable(self.level)
23 | 
24 |     def __exit__(self, typ, value, traceback):
25 |         logging.disable(logging.NOTSET)
26 | 
27 | 


--------------------------------------------------------------------------------
/VFN-IF/api/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Data pipeline for model features."""


--------------------------------------------------------------------------------
/VFN-IF/unifold/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 DeepMind Technologies Limited
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | """Data pipeline for model features."""


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/lru_cache_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from functools import lru_cache
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class LRUCacheDataset(BaseWrapperDataset):
13 |     def __init__(self, dataset, token=None):
14 |         super().__init__(dataset)
15 | 
16 |     @lru_cache(maxsize=16)
17 |     def __getitem__(self, index):
18 |         return self.dataset[index]
19 | 
20 |     @lru_cache(maxsize=16)
21 |     def collater(self, samples):
22 |         return self.dataset.collater(samples)
23 | 


--------------------------------------------------------------------------------
/VFN-IF/api/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from api.data import residue_constants as rc
 3 | 
 4 | 
 5 | def softmax_cross_entropy(logits, labels):
 6 |     loss = -1 * torch.sum(
 7 |         labels * torch.nn.functional.log_softmax(logits.float(), dim=-1),
 8 |         dim=-1,
 9 |     )
10 |     return loss
11 | 
12 | 
13 | def sigmoid_cross_entropy(logits, labels):
14 |     logits = logits.float()
15 |     log_p = torch.nn.functional.logsigmoid(logits)
16 |     log_not_p = torch.nn.functional.logsigmoid(-logits)
17 |     loss = -labels * log_p - (1 - labels) * log_not_p
18 |     return loss
19 | 
20 | 
21 | def masked_mean(mask, value, dim, eps=1e-10, keepdim=False):
22 |     mask = mask.expand(*value.shape)
23 |     return torch.sum(mask * value, dim=dim, keepdim=keepdim) / (
24 |         eps + torch.sum(mask, dim=dim, keepdim=keepdim)
25 |     )
26 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/append_token_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | from functools import lru_cache
10 | 
11 | from . import BaseWrapperDataset
12 | 
13 | 
14 | class AppendTokenDataset(BaseWrapperDataset):
15 |     def __init__(self, dataset, token=None):
16 |         super().__init__(dataset)
17 |         self.token = token
18 | 
19 |     @lru_cache(maxsize=16)
20 |     def __getitem__(self, idx):
21 |         item = self.dataset[idx]
22 |         if self.token is not None:
23 |             item = torch.cat([item, torch.full_like(item[0], self.token).unsqueeze(0)], dim=0)
24 |         return item
25 | 
26 | 


--------------------------------------------------------------------------------
/VFN-IF/unifold/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from unifold.data import residue_constants as rc
 3 | 
 4 | 
 5 | def softmax_cross_entropy(logits, labels):
 6 |     loss = -1 * torch.sum(
 7 |         labels * torch.nn.functional.log_softmax(logits.float(), dim=-1),
 8 |         dim=-1,
 9 |     )
10 |     return loss
11 | 
12 | 
13 | def sigmoid_cross_entropy(logits, labels):
14 |     logits = logits.float()
15 |     log_p = torch.nn.functional.logsigmoid(logits)
16 |     log_not_p = torch.nn.functional.logsigmoid(-logits)
17 |     loss = -labels * log_p - (1 - labels) * log_not_p
18 |     return loss
19 | 
20 | 
21 | def masked_mean(mask, value, dim, eps=1e-10, keepdim=False):
22 |     mask = mask.expand(*value.shape)
23 |     return torch.sum(mask * value, dim=dim, keepdim=keepdim) / (
24 |         eps + torch.sum(mask, dim=dim, keepdim=keepdim)
25 |     )
26 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/prepend_token_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | from functools import lru_cache
10 | 
11 | from . import BaseWrapperDataset
12 | 
13 | 
14 | class PrependTokenDataset(BaseWrapperDataset):
15 | 
16 |     def __init__(self, dataset, token=None):
17 |         super().__init__(dataset)
18 |         self.token = token
19 | 
20 |     @lru_cache(maxsize=16)
21 |     def __getitem__(self, idx):
22 |         item = self.dataset[idx]
23 |         if self.token is not None:
24 |             item = torch.cat([torch.full_like(item[0], self.token).unsqueeze(0), item], dim=0)
25 |         return item
26 | 


--------------------------------------------------------------------------------
/VFN-IF/unifold/modules/inverse_module.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | from typing import Tuple
 5 | 
 6 | from .inverse_module_utils import AFInverseStructureModule, AFInverseGraghInitModule
 7 | 
 8 | from unifold.modules.featurization import atom14_to_atom37
 9 | 
10 | class InverseModule(nn.Module):
11 |     def __init__(self, config, type):
12 |         super(InverseModule, self).__init__()
13 |         self.refine_module_type = type
14 |         if self.refine_module_type== 'default':
15 |             self.refine_module = AFInverseStructureModule(**config["baseline_module"])
16 |         elif self.refine_module_type== 'gragh_init':
17 |             self.refine_module = AFInverseGraghInitModule(**config["baseline_module"])
18 |         else:
19 |             raise
20 | 
21 |     def forward(self, s,z,gt_frame,mask):
22 |         outputs = self.refine_module(s,z,gt_frame,mask)
23 | 
24 |         return outputs


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/precision_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 AlQuraishi Laboratory
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import importlib
15 | 
16 | import torch
17 | 
18 | def is_fp16_enabled():
19 |     # Autocast world
20 |     fp16_enabled = torch.get_autocast_gpu_dtype() == torch.float16
21 |     fp16_enabled = fp16_enabled and torch.is_autocast_enabled()
22 | 
23 |     return fp16_enabled
24 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/data/errors.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 AlQuraishi Laboratory
 2 | # Copyright 2021 DeepMind Technologies Limited
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """General-purpose errors used throughout the data pipeline"""
17 | class Error(Exception):
18 |     """Base class for exceptions."""
19 | 
20 | 
21 | class MultipleChainsError(Error):
22 |     """An error indicating that multiple chains were found for a given ID."""
23 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/numel_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | 
10 | from . import BaseWrapperDataset
11 | 
12 | 
13 | class NumelDataset(BaseWrapperDataset):
14 |     def __init__(self, dataset, reduce=False):
15 |         super().__init__(dataset)
16 |         self.reduce = reduce
17 | 
18 |     def __getitem__(self, index):
19 |         item = self.dataset[index]
20 |         if torch.is_tensor(item):
21 |             return torch.numel(item)
22 |         else:
23 |             return np.size(item)
24 | 
25 |     def __len__(self):
26 |         return len(self.dataset)
27 | 
28 |     def collater(self, samples):
29 |         if self.reduce:
30 |             return sum(samples)
31 |         else:
32 |             return torch.tensor(samples)
33 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/tokenize_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | from functools import lru_cache
 6 | 
 7 | import torch
 8 | from unicore.data import Dictionary
 9 | from functools import lru_cache
10 | from . import BaseWrapperDataset
11 | 
12 | 
13 | class TokenizeDataset(BaseWrapperDataset):
14 |     def __init__(
15 |         self,
16 |         dataset: torch.utils.data.Dataset,
17 |         dictionary: Dictionary,
18 |         max_seq_len: int=512,
19 |     ):
20 |         self.dataset = dataset
21 |         self.dictionary = dictionary
22 |         self.max_seq_len = max_seq_len
23 | 
24 |     @lru_cache(maxsize=16)
25 |     def __getitem__(self, index: int):
26 |         raw_data = self.dataset[index]
27 |         assert len(raw_data) < self.max_seq_len and len(raw_data) > 0
28 |         return torch.from_numpy(self.dictionary.vec_index(raw_data)).long()


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/examples/bert/example_data/preprocess.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import pickle
 4 | import lmdb
 5 | 
 6 | 
 7 | def wirte_to_lmbd(filename, outfilename):
 8 |     try:
 9 |         os.remove(outfilename)
10 |     except:
11 |         pass
12 |     env_new = lmdb.open(
13 |         outfilename,
14 |         subdir=False,
15 |         readonly=False,
16 |         lock=False,
17 |         readahead=False,
18 |         meminit=False,
19 |         max_readers=1,
20 |         map_size=int(100e9),
21 |     )
22 |     txn_write = env_new.begin(write = True)
23 | 
24 |     with open(filename, 'r') as input:
25 |         i = 0
26 |         for line in input.readlines():
27 |             line = line.strip()
28 |             if line:
29 |                 txn_write.put(f'{i}'.encode("ascii"), pickle.dumps(line))
30 |                 i += 1
31 |     print('process {} lines'.format(i))
32 |     txn_write.commit()
33 |     env_new.close()
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     wirte_to_lmbd(sys.argv[1], sys.argv[2])


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """isort:skip_file"""
 7 | 
 8 | import importlib
 9 | import os
10 | 
11 | from unicore import registry
12 | from unicore.losses.unicore_loss import (  # noqa
13 |     UnicoreLoss,
14 | )
15 | 
16 | 
17 | (
18 |     build_loss_,
19 |     register_loss,
20 |     CRITERION_REGISTRY,
21 | ) = registry.setup_registry(
22 |     "--loss", base_class=UnicoreLoss, default="cross_entropy"
23 | )
24 | 
25 | 
26 | def build_loss(args, task):
27 |     return build_loss_(args, task)
28 | 
29 | 
30 | # automatically import any Python files in the losses/ directory
31 | for file in os.listdir(os.path.dirname(__file__)):
32 |     if file.endswith(".py") and not file.startswith("_"):
33 |         file_name = file[: file.find(".py")]
34 |         importlib.import_module("unicore.losses." + file_name)
35 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/examples/bert/train_bert_test.sh:
--------------------------------------------------------------------------------
 1 | [ -z "${MASTER_PORT}" ] && MASTER_PORT=10086
 2 | [ -z "${n_gpu}" ] && n_gpu=$(nvidia-smi -L | wc -l)
 3 | export NCCL_ASYNC_ERROR_HANDLING=1
 4 | export OMP_NUM_THREADS=1
 5 | python -m torch.distributed.launch --nproc_per_node=$n_gpu --master_port=$MASTER_PORT $(which unicore-train) ./example_data  --user-dir . --valid-subset valid \
 6 |        --num-workers 0 --ddp-backend=c10d \
 7 |        --task bert --loss masked_lm --arch bert_base  \
 8 |        --optimizer adam --adam-betas '(0.9, 0.98)' --adam-eps 1e-6 --clip-norm 1.0 \
 9 |        --lr-scheduler polynomial_decay --lr 1e-4 --warmup-updates 100 --total-num-update 10000 --batch-size 4 \
10 |        --update-freq 1 --seed 1 \
11 |        --fp16 --fp16-init-scale 4 --fp16-scale-window 256 --tensorboard-logdir ./tsb/ \
12 |        --max-update 10000 --log-interval 100 --log-format simple \
13 |        --save-interval-updates 5000 --validate-interval-updates 5000 --keep-interval-updates 30 --no-epoch-checkpoints  \
14 |        --save-dir ./save
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/csrc/rounding/interface.cpp:
--------------------------------------------------------------------------------
 1 | #include <ATen/ATen.h>
 2 | #include <cuda.h>
 3 | #include <cuda_runtime.h>
 4 | #include <cuda_fp16.h>
 5 | #include <cuda_bf16.h>
 6 | #include <ATen/cuda/CUDAContext.h>
 7 | #include <torch/extension.h>
 8 | 
 9 | void fused_fp32_to_bf16_sr_cuda(at::Tensor & input, at::Tensor & output);
10 | 
11 | #define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
12 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
13 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
14 | 
15 | void fused_fp32_to_bf16_sr(at::Tensor & input, at::Tensor & output) {
16 |     CHECK_INPUT(input);
17 |     CHECK_INPUT(output);
18 |     int64_t num_elem = input.numel();
19 |     AT_ASSERTM(output.numel() == num_elem, "number of elements in input ond output tensors should be equal");
20 |     fused_fp32_to_bf16_sr_cuda(input, output);
21 | }
22 | 
23 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
24 |     m.def("fp32_to_bf16_sr", &fused_fp32_to_bf16_sr, "fused fp32 to bf16 random rounding");
25 | }


--------------------------------------------------------------------------------
/VFN-IF/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM dptechnology/unicore:latest-pytorch1.11.0-cuda11.3
 2 | 
 3 | # metainformation
 4 | LABEL org.opencontainers.image.version = "2.0.0"
 5 | LABEL org.opencontainers.image.authors = "DP Technology"
 6 | LABEL org.opencontainers.image.source = "https://github.com/dptech-corp/Uni-Fold"
 7 | LABEL org.opencontainers.image.licenses = "Apache License 2.0"
 8 | 
 9 | # Use bash to support string substitution.
10 | SHELL ["/bin/bash", "-c"]
11 | 
12 | RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
13 |   hmmer \
14 |   kalign
15 | 
16 | # Compile HHsuite from source.
17 | RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \
18 |   && mkdir /tmp/hh-suite/build \
19 |   && pushd /tmp/hh-suite/build \
20 |   && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \
21 |   && make -j 4 && make install \
22 |   && ln -s /opt/hhsuite/bin/* /usr/bin \
23 |   && popd \
24 |   && rm -rf /tmp/hh-suite
25 | 
26 | RUN ldconfig && \
27 |   apt-get clean && \
28 |   apt-get autoremove && \
29 |   rm -rf /var/lib/apt/lists/* /tmp/* && \
30 |   conda clean -ya
31 | 


--------------------------------------------------------------------------------
/VFN-Diff/config/inference.yaml:
--------------------------------------------------------------------------------
 1 | # Configuration for inference on SE(3) diffusion experiments.
 2 | defaults:
 3 |   - base
 4 | 
 5 | inference:
 6 |   name: null
 7 |   gpu_id: null  # CUDA GPU to use
 8 |   seed: 123
 9 | 
10 |   # Directory of software, weights, and outputs.
11 |   pt_hub_dir: ./.cache/torch/
12 |   pmpnn_dir: ./ProteinMPNN/
13 |   output_dir: ./inference_outputs/
14 | 
15 |   # Path to model weights.
16 |   weights_path: ./weights/paper_weights.pth
17 | 
18 |   diffusion:
19 |     # Number of steps.
20 |     num_t: 500
21 |     # Analogous to sampling temperature.
22 |     noise_scale: 0.1
23 |     # Final t.
24 |     min_t: 0.01
25 | 
26 |   samples:
27 |     # Number of backbone samples per sequence length.
28 |     samples_per_length: 10
29 |     # Number of ESMFdold samples per backbone sample.
30 |     seq_per_sample: 8
31 |     # Minimum sequence length to sample.
32 |     min_length: 100
33 |     # Maximum sequence length to sample.
34 |     max_length: 500
35 |     # gap between lengths to sample. i.e. this script will sample all lengths
36 |     # in range(min_length, max_length, length_step)
37 |     length_step: 5
38 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2022 DP Technology
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/argparse.py:
--------------------------------------------------------------------------------
 1 | from argparse import HelpFormatter
 2 | from operator import attrgetter
 3 | 
 4 | class ArgparseAlphabetizer(HelpFormatter):
 5 |     """
 6 |         Sorts the optional arguments of an argparse parser alphabetically
 7 |     """
 8 | 
 9 |     @staticmethod
10 |     def sort_actions(actions):
11 |         return sorted(actions, key=attrgetter("option_strings"))
12 | 
13 |     # Formats the help message
14 |     def add_arguments(self, actions):
15 |         actions = ArgparseAlphabetizer.sort_actions(actions)
16 |         super(ArgparseAlphabetizer, self).add_arguments(actions)
17 | 
18 |     # Formats the usage message
19 |     def add_usage(self, usage, actions, groups, prefix=None):
20 |         actions = ArgparseAlphabetizer.sort_actions(actions)
21 |         args = usage, actions, groups, prefix
22 |         super(ArgparseAlphabetizer, self).add_usage(*args)
23 | 
24 | 
25 | def remove_arguments(parser, args):
26 |     for arg in args:
27 |         for action in parser._actions:
28 |             opts = vars(action)["option_strings"]
29 |             if(arg in opts):
30 |                 parser._handle_conflict_resolve(None, [(arg, action)])
31 | 


--------------------------------------------------------------------------------
/VFN-IF/api/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from .modules.denoise import Denosie
 3 | from .config import model_config
 4 | 
 5 | 
 6 | class DeModel(nn.Module):
 7 |     def __init__(self, args):
 8 |         super().__init__()
 9 |         self.args = args
10 |         config = model_config(
11 |             self.args.model_name,
12 |             train=True,
13 |         )
14 |         self.model = Denosie(config)
15 |         self.config = config
16 | 
17 |     def half(self):
18 |         self.model = self.model.half()
19 |         return self
20 | 
21 |     def bfloat16(self):
22 |         self.model = self.model.bfloat16()
23 |         return self
24 | 
25 |     @classmethod
26 |     def build_model(cls, args, task):
27 |         """Build a new model instance."""
28 |         return cls(args)
29 | 
30 |     def forward(self, batch, **kwargs):
31 |         outputs = self.model.forward(batch)
32 |         return outputs, self.config.loss
33 | 
34 |     @staticmethod
35 |     def add_args(parser):
36 |         """Add model-specific arguments to the parser."""
37 |         parser.add_argument(
38 |             "--model-name",
39 |             help="choose the model config",
40 |         )
41 | 
42 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/csrc/util.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #define DIV_CELL(a, b) (((a) + (b) - 1) / (b))
 3 | #if __cplusplus >= 201703L
 4 |     #define IF_CONSTEXPR constexpr
 5 | #else
 6 |     #define IF_CONSTEXPR
 7 | #endif
 8 | 
 9 | template <typename T>
10 | __device__ __forceinline__ T SHFL_XOR(T value, int laneMask, int width, unsigned int mask = 0xffffffff)
11 | {
12 | #if CUDA_VERSION >= 9000
13 |     return __shfl_xor_sync(mask, value, laneMask, width);
14 | #else
15 |     return __shfl_xor(value, laneMask, width);
16 | #endif
17 | }
18 | 
19 | template <typename T, int N>
20 | struct VecTypeImpl;
21 | 
22 | #define DEFINE_VEC_TYPE(t, n, tn) \
23 | template <> \
24 | struct VecTypeImpl<t, n> { \
25 |     using type = tn; \
26 | };
27 | 
28 | DEFINE_VEC_TYPE(half, 1, half)
29 | DEFINE_VEC_TYPE(__nv_bfloat16, 1, __nv_bfloat16)
30 | DEFINE_VEC_TYPE(float, 1, float)
31 | DEFINE_VEC_TYPE(half, 2, half2)
32 | DEFINE_VEC_TYPE(__nv_bfloat16, 2, __nv_bfloat162)
33 | DEFINE_VEC_TYPE(float, 2, float2)
34 | DEFINE_VEC_TYPE(half, 4, uint64_t)
35 | DEFINE_VEC_TYPE(__nv_bfloat16, 4, uint64_t)
36 | DEFINE_VEC_TYPE(float, 4, float4)
37 | 
38 | template <typename T, int N>
39 | using VecType = typename VecTypeImpl<T, N>::type;


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/lr_scheduler/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """isort:skip_file"""
 7 | 
 8 | import importlib
 9 | import os
10 | 
11 | from unicore import registry
12 | from unicore.optim.lr_scheduler.unicore_lr_scheduler import (  # noqa
13 |     UnicoreLRScheduler,
14 | )
15 | 
16 | 
17 | (
18 |     build_lr_scheduler_,
19 |     register_lr_scheduler,
20 |     LR_SCHEDULER_REGISTRY,
21 | ) = registry.setup_registry(
22 |     "--lr-scheduler", base_class=UnicoreLRScheduler, default="fixed"
23 | )
24 | 
25 | 
26 | def build_lr_scheduler(args, optimizer, total_train_steps):
27 |     return build_lr_scheduler_(args, optimizer, total_train_steps)
28 | 
29 | 
30 | # automatically import any Python files in the optim/lr_scheduler/ directory
31 | for file in os.listdir(os.path.dirname(__file__)):
32 |     if file.endswith(".py") and not file.startswith("_"):
33 |         file_name = file[: file.find(".py")]
34 |         importlib.import_module("unicore.optim.lr_scheduler." + file_name)
35 | 


--------------------------------------------------------------------------------
/VFN-Diff/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Jason Yim, Brian L Trippe, Valentin De Bortoli, Emile Mathieu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """isort:skip_file"""
 7 | 
 8 | import os
 9 | import sys
10 | 
11 | try:
12 |     from .version import __version__  # noqa
13 | except ImportError:
14 |     version_txt = os.path.join(os.path.dirname(__file__), "version.txt")
15 |     with open(version_txt) as f:
16 |         __version__ = f.read().strip()
17 | 
18 | __all__ = ["pdb"]
19 | 
20 | # backwards compatibility to support `from unicore.X import Y`
21 | from unicore.distributed import utils as distributed_utils
22 | from unicore.logging import meters, metrics, progress_bar  # noqa
23 | 
24 | sys.modules["unicore.distributed_utils"] = distributed_utils
25 | sys.modules["unicore.meters"] = meters
26 | sys.modules["unicore.metrics"] = metrics
27 | sys.modules["unicore.progress_bar"] = progress_bar
28 | 
29 | import unicore.losses  # noqa
30 | import unicore.distributed  # noqa
31 | import unicore.models  # noqa
32 | import unicore.modules  # noqa
33 | import unicore.optim  # noqa
34 | import unicore.optim.lr_scheduler  # noqa
35 | import unicore.tasks  # noqa
36 | 
37 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/validation_metrics.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 AlQuraishi Laboratory
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | import torch
15 | 
16 | 
17 | def gdt(p1, p2, mask, cutoffs):
18 |     n = torch.sum(mask, dim=-1)
19 |     
20 |     p1 = p1.float()
21 |     p2 = p2.float()
22 |     distances = torch.sqrt(torch.sum((p1 - p2)**2, dim=-1))
23 |     
24 |     scores = []
25 |     for c in cutoffs:
26 |         score = torch.sum((distances <= c) * mask, dim=-1) / n
27 |         scores.append(score)
28 | 
29 |     return sum(scores) / len(scores)
30 | 
31 | 
32 | def gdt_ts(p1, p2, mask):
33 |     return gdt(p1, p2, mask, [1., 2., 4., 8.])
34 | 
35 | 
36 | def gdt_ha(p1, p2, mask):
37 |     return gdt(p1, p2, mask, [0.5, 1., 2., 4.])
38 | 
39 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/bert_tokenize_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | from functools import lru_cache
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | from tokenizers import BertWordPieceTokenizer
10 | 
11 | from . import BaseWrapperDataset, LRUCacheDataset
12 | 
13 | 
14 | class BertTokenizeDataset(BaseWrapperDataset):
15 |     def __init__(
16 |         self,
17 |         dataset: torch.utils.data.Dataset,
18 |         dict_path: str,
19 |         max_seq_len: int=512,
20 |     ):
21 |         self.dataset = dataset
22 |         self.tokenizer = BertWordPieceTokenizer(dict_path, lowercase=True)
23 |         self.max_seq_len = max_seq_len
24 | 
25 |     @property
26 |     def can_reuse_epoch_itr_across_epochs(self):
27 |         return True  # only the noise changes, not item sizes
28 | 
29 |     def __getitem__(self, index: int):
30 |         raw_str = self.dataset[index]
31 |         raw_str = raw_str.replace('<unk>', '[UNK]')
32 |         output = self.tokenizer.encode(raw_str)
33 |         ret = torch.Tensor(output.ids).long()
34 |         if ret.size(0) > self.max_seq_len:
35 |             ret = ret[:self.max_seq_len]
36 |         return ret


--------------------------------------------------------------------------------
/VFN-IF/extra.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import json
 4 | 
 5 | def copyfile(src_dir, dest_dir, file_name):
 6 |     src_file = os.path.join(src_dir, file_name)
 7 |     dest_file = os.path.join(dest_dir, file_name)
 8 |     # 尝试复制文件，如果出错则跳过
 9 |     try:
10 |         shutil.copyfile(src_file, dest_file)
11 |     except Exception as e:
12 |         print(f'Failed to copy file {file_name}. Error: {e}')
13 | 
14 | 
15 | json_file = './dataset/json/CATH4.2/train_multi_label.json'
16 | data = json.load(open(json_file, 'r'))
17 | 
18 | # 源文件夹和目标文件夹
19 | src_dir = '/mnt/nas/datasets/protein/unifold/processed'
20 | dest_dir = './processed/pdb_labels'
21 | 
22 | # 确保目标目录存在
23 | if not os.path.exists(dest_dir):
24 |     os.makedirs(dest_dir)
25 | 
26 | # 遍历JSON数据中的所有键
27 | for key, values in data.items():
28 |     # 构建文件的相对路径
29 |     for file_name in values:
30 |         # label_name = file_name+'.label.pkl.gz'
31 |         # feature_name = file_name+'.feature.pkl.gz'
32 |         uniprots = file_name+'.label.pkl.gz'
33 |         print(f'Processing file {file_name}...')
34 |         
35 |         # 复制文件
36 |         # copyfile(src_dir, dest_dir, label_name)
37 |         # copyfile(src_dir, dest_dir, feature_name)
38 |         copyfile(src_dir, dest_dir, uniprots)
39 |     
40 | 
41 | print('File extraction complete.')


--------------------------------------------------------------------------------
/VFN-IF/train_script/vec/x2_16vec_v2.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_ASYNC_ERROR_HANDLING=1
 2 | export OMP_NUM_THREADS=1
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | mkdir -p ./output_dir/x2/VFN_baseline_16vec_v2/
 5 | python train.py ./processed/  \
 6 |        --user-dir user \
 7 |        --num-workers 16 \
 8 |        --ddp-backend=no_c10d \
 9 |        --task de --loss af2 --arch de \
10 |        --optimizer adam --adam-betas '(0.9, 0.999)' --adam-eps 1e-6 --wd 0.1 --clip-norm 1.0 --allreduce-fp32-grad  \
11 |        --lr-scheduler onecycle --lr 1e-3 --warmup-updates 1000 --decay-ratio 0.1 --decay-steps 100000 \
12 |        --batch-size 8 \
13 |        --update-freq 1 --seed 2  \
14 |        --max-update 100000 --log-interval 10 --save-interval-updates 5000 --validate-interval-updates 10000 --keep-interval-updates 5 \
15 |        --log-format simple \
16 |        --tensorboard-logdir ./output_dir/x2/VFN_baseline_16vec_v2/tsb \
17 |        --save-dir ./output_dir/x2/VFN_baseline_16vec_v2/ \
18 |        --tmp-save-dir ./output_dir/x2/VFN_baseline_16vec_v2/tmp/ \
19 |        --required-batch-size-multiple 1 \
20 |        --ema-decay 0.999 \
21 |        --model-name VFN_baseline_16vec_v2 \
22 |        --batch-size-valid 1 \
23 |        --json-prefix CATH4.2 \
24 |        --disable-sd \
25 |        --data-buffer-size 32 >> ./output_dir/x2/VFN_baseline_16vec_v2/output.txt 2>&1
26 | 


--------------------------------------------------------------------------------
/VFN-IF/train_script/vec/x2_64vec_v2.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_ASYNC_ERROR_HANDLING=1
 2 | export OMP_NUM_THREADS=1
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | mkdir -p ./output_dir/x2/VFN_baseline_64vec_v2/
 5 | python train.py ./processed/  \
 6 |        --user-dir user \
 7 |        --num-workers 16 \
 8 |        --ddp-backend=no_c10d \
 9 |        --task de --loss af2 --arch de \
10 |        --optimizer adam --adam-betas '(0.9, 0.999)' --adam-eps 1e-6 --wd 0.1 --clip-norm 1.0 --allreduce-fp32-grad  \
11 |        --lr-scheduler onecycle --lr 1e-3 --warmup-updates 1000 --decay-ratio 0.1 --decay-steps 100000 \
12 |        --batch-size 8 \
13 |        --update-freq 1 --seed 2  \
14 |        --max-update 100000 --log-interval 10 --save-interval-updates 5000 --validate-interval-updates 10000 --keep-interval-updates 5 \
15 |        --log-format simple \
16 |        --tensorboard-logdir ./output_dir/x2/VFN_baseline_64vec_v2/tsb \
17 |        --save-dir ./output_dir/x2/VFN_baseline_64vec_v2/ \
18 |        --tmp-save-dir ./output_dir/x2/VFN_baseline_64vec_v2/tmp/ \
19 |        --required-batch-size-multiple 1 \
20 |        --ema-decay 0.999 \
21 |        --model-name VFN_baseline_64vec_v2 \
22 |        --batch-size-valid 1 \
23 |        --json-prefix CATH4.2 \
24 |        --disable-sd \
25 |        --data-buffer-size 32 >> ./output_dir/x2/VFN_baseline_64vec_v2/output.txt 2>&1
26 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/csrc/adam/interface.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | void fused_adam_cuda(at::Tensor & p, at::Tensor & m, at::Tensor & v, at::Tensor & g, float lr, float beta1, float beta2, float eps, float grad_scale, int step, int bias_correction, float decay);
 4 | 
 5 | #define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
 6 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
 7 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
 8 | 
 9 | void adam(at::Tensor & p, at::Tensor & m, at::Tensor & v, at::Tensor & g, float lr, float beta1, float beta2, float eps, float grad_scale, int step, int bias_correction, float decay) {
10 |     CHECK_INPUT(p);
11 |     CHECK_INPUT(m);
12 |     CHECK_INPUT(v);
13 |     CHECK_INPUT(g);
14 |     int64_t num_elem = p.numel();
15 |     AT_ASSERTM(m.numel() == num_elem, "number of elements in m and p tensors should be equal");
16 |     AT_ASSERTM(v.numel() == num_elem, "number of elements in v and p tensors should be equal");
17 |     AT_ASSERTM(g.numel() == num_elem, "number of elements in g and p tensors should be equal");
18 |     fused_adam_cuda(p, m, v, g, lr, beta1, beta2, eps, grad_scale, step, bias_correction, decay);
19 | }
20 | 
21 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
22 |     m.def("adam", &adam, "Adam optimized CUDA implementation.");
23 | }


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/sort_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import numpy as np
 8 | 
 9 | from . import BaseWrapperDataset, data_utils
10 | 
11 | 
12 | class SortDataset(BaseWrapperDataset):
13 |     def __init__(self, dataset, sort_order):
14 |         super().__init__(dataset)
15 |         if not isinstance(sort_order, (list, tuple)):
16 |             sort_order = [sort_order]
17 |         self.sort_order = sort_order
18 | 
19 |         assert all(len(so) == len(dataset) for so in sort_order)
20 | 
21 |     def ordered_indices(self):
22 |         return np.lexsort(self.sort_order)
23 | 
24 | 
25 | class EpochShuffleDataset(BaseWrapperDataset):
26 |     def __init__(self, dataset, size, seed):
27 |         super().__init__(dataset)
28 |         self.size = size
29 |         self.seed = seed
30 |         self.set_epoch(1)
31 |     
32 |     def set_epoch(self, epoch):
33 |         super().set_epoch(epoch)
34 |         with data_utils.numpy_seed(self.seed + epoch - 1):
35 |             self.sort_order = np.random.permutation(self.size)
36 | 
37 |     def ordered_indices(self):
38 |         return self.sort_order
39 | 
40 |     @property
41 |     def can_reuse_epoch_itr_across_epochs(self):
42 |         return False


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/pad_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from unicore.data import data_utils
 8 | 
 9 | from . import BaseWrapperDataset
10 | 
11 | 
12 | class PadDataset(BaseWrapperDataset):
13 |     def __init__(self, dataset, pad_idx, left_pad):
14 |         super().__init__(dataset)
15 |         self.pad_idx = pad_idx
16 |         self.left_pad = left_pad
17 | 
18 |     def collater(self, samples):
19 |         return data_utils.collate_tokens(samples, self.pad_idx, left_pad=self.left_pad, pad_to_multiple=8)
20 | 
21 | 
22 | class LeftPadDataset(PadDataset):
23 |     def __init__(self, dataset, pad_idx):
24 |         super().__init__(dataset, pad_idx, left_pad=True)
25 | 
26 | 
27 | class RightPadDataset(PadDataset):
28 |     def __init__(self, dataset, pad_idx):
29 |         super().__init__(dataset, pad_idx, left_pad=False)
30 | 
31 | 
32 | class RightPadDataset2D(BaseWrapperDataset):
33 |     def __init__(self, dataset, pad_idx,left_pad=False):
34 |         super().__init__(dataset)
35 |         self.pad_idx = pad_idx
36 |         self.left_pad = left_pad
37 |     def collater(self, samples):
38 |         return data_utils.collate_tokens_2d(samples, self.pad_idx, left_pad=self.left_pad, pad_to_multiple=8)
39 | 


--------------------------------------------------------------------------------
/VFN-IF/api/modules/refine_module.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | from typing import Tuple
 5 | 
 6 | from .refine_module_utils import AFRefineStructureModule
 7 | 
 8 | from .featurization import atom14_to_atom37
 9 | 
10 | class RefineModule(nn.Module):
11 |     def __init__(self, config):
12 |         super(RefineModule, self).__init__()
13 |         self.refine_module_type = config['type']
14 |         if self.refine_module_type== 'baseline':
15 |             self.refine_module = AFRefineStructureModule(**config["baseline_module"])
16 |         else:
17 |             raise
18 |         print('a')
19 | 
20 |     def forward(self, outputs,feats):
21 |         if self.refine_module_type== 'baseline':
22 |             sm_output = outputs['sm']
23 | 
24 |             s = outputs['single']
25 |             z = outputs["pair"]
26 |             aatype = sm_output["aatype"]
27 |             mask = sm_output["mask"]
28 |             quat_encoder = sm_output["quat_encoder"]
29 | 
30 |             outputs['rm'] = self.refine_module(s,z,aatype,quat_encoder,mask)
31 |         else:
32 |             raise
33 | 
34 |         outputs["final_atom_positions_rm"] = atom14_to_atom37(
35 |             outputs["rm"]["positions"], feats
36 |         )
37 |         outputs["final_atom_mask"] = feats["atom37_atom_exists"]
38 |         outputs["pred_frame_tensor"] = outputs["rm"]["frames"][-1]
39 | 
40 |         return outputs


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """isort:skip_file"""
 7 | 
 8 | import importlib
 9 | import os
10 | 
11 | from unicore import registry
12 | from unicore.optim.unicore_optimizer import (  # noqa
13 |     UnicoreOptimizer,
14 | )
15 | from unicore.optim.fp16_optimizer import FP16Optimizer
16 | 
17 | __all__ = [
18 |     "UnicoreOptimizer",
19 |     "FP16Optimizer",
20 | ]
21 | 
22 | (
23 |     _build_optimizer,
24 |     register_optimizer,
25 |     OPTIMIZER_REGISTRY
26 | ) = registry.setup_registry("--optimizer", base_class=UnicoreOptimizer, default='adam')
27 | 
28 | 
29 | def build_optimizer(args, params, *extra_args, **extra_kwargs):
30 |     if all(isinstance(p, dict) for p in params):
31 |         params = [t for p in params for t in p.values()]
32 |     params = list(filter(lambda p: p.requires_grad, params))
33 |     return _build_optimizer(args, params, *extra_args, **extra_kwargs)
34 | 
35 | 
36 | # automatically import any Python files in the optim/ directory
37 | for file in os.listdir(os.path.dirname(__file__)):
38 |     if file.endswith(".py") and not file.startswith("_"):
39 |         file_name = file[: file.find(".py")]
40 |         importlib.import_module("unicore.optim." + file_name)
41 | 


--------------------------------------------------------------------------------
/VFN-IF/train_script/layers/x2_vecdirect_noedge_atom_gbf_nofeat_5l.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_ASYNC_ERROR_HANDLING=1
 2 | export OMP_NUM_THREADS=1
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | mkdir -p ./output_dir/nofeat/VFN_baseline_wo_node_feat_5l/
 5 | python train.py ./processed/  \
 6 |        --user-dir user \
 7 |        --num-workers 4\
 8 |        --ddp-backend=no_c10d \
 9 |        --task de --loss af2 --arch de \
10 |        --optimizer adam --adam-betas '(0.9, 0.999)' --adam-eps 1e-6 --wd 0.1 --clip-norm 1.0 --allreduce-fp32-grad  \
11 |        --lr-scheduler onecycle --lr 1e-3 --warmup-updates 1000 --decay-ratio 0.1 --decay-steps 100000 \
12 |        --batch-size 8 \
13 |        --update-freq 1 --seed 3  \
14 |        --max-update 100000 --log-interval 10 --save-interval-updates 5000 --validate-interval-updates 10000 --keep-interval-updates 5 \
15 |        --log-format simple \
16 |        --tensorboard-logdir ./output_dir/nofeat/VFN_baseline_wo_node_feat_5l/tsb \
17 |        --save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_5l/ \
18 |        --tmp-save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_5l/tmp/ \
19 |        --required-batch-size-multiple 1 \
20 |        --ema-decay 0.999 \
21 |        --model-name VFN_baseline_wo_node_feat_5l \
22 |        --batch-size-valid 1 \
23 |        --json-prefix CATH4.2 \
24 |        --disable-sd \
25 |        --data-buffer-size 32 >> ./output_dir/nofeat/VFN_baseline_wo_node_feat_5l/output.txt 2>&1
26 | 


--------------------------------------------------------------------------------
/VFN-IF/train_script/layers/x2_vecdirect_noedge_atom_gbf_nofeat_10l.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_ASYNC_ERROR_HANDLING=1
 2 | export OMP_NUM_THREADS=1
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | mkdir -p ./output_dir/nofeat/VFN_baseline_wo_node_feat_10l/
 5 | python train.py ./processed/  \
 6 |        --user-dir user \
 7 |        --num-workers 16 \
 8 |        --ddp-backend=no_c10d \
 9 |        --task de --loss af2 --arch de \
10 |        --optimizer adam --adam-betas '(0.9, 0.999)' --adam-eps 1e-6 --wd 0.1 --clip-norm 1.0 --allreduce-fp32-grad  \
11 |        --lr-scheduler onecycle --lr 1e-3 --warmup-updates 1000 --decay-ratio 0.1 --decay-steps 100000 \
12 |        --batch-size 8 \
13 |        --update-freq 1 --seed 3  \
14 |        --max-update 100000 --log-interval 10 --save-interval-updates 5000 --validate-interval-updates 10000 --keep-interval-updates 5 \
15 |        --log-format simple \
16 |        --tensorboard-logdir ./output_dir/nofeat/VFN_baseline_wo_node_feat_10l/tsb \
17 |        --save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_10l/ \
18 |        --tmp-save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_10l/tmp/ \
19 |        --required-batch-size-multiple 1 \
20 |        --ema-decay 0.999 \
21 |        --model-name VFN_baseline_wo_node_feat_10l \
22 |        --batch-size-valid 1 \
23 |        --json-prefix CATH4.2 \
24 |        --disable-sd \
25 |        --data-buffer-size 32 >> ./output_dir/nofeat/VFN_baseline_wo_node_feat_10l/output.txt 2>&1


--------------------------------------------------------------------------------
/VFN-IF/train_script/layers/x2_vecdirect_noedge_atom_gbf_nofeat_12l.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_ASYNC_ERROR_HANDLING=1
 2 | export OMP_NUM_THREADS=1
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | mkdir -p ./output_dir/nofeat/VFN_baseline_wo_node_feat_12l/
 5 | python train.py ./processed/  \
 6 |        --user-dir user \
 7 |        --num-workers 16 \
 8 |        --ddp-backend=no_c10d \
 9 |        --task de --loss af2 --arch de \
10 |        --optimizer adam --adam-betas '(0.9, 0.999)' --adam-eps 1e-6 --wd 0.1 --clip-norm 1.0 --allreduce-fp32-grad  \
11 |        --lr-scheduler onecycle --lr 1e-3 --warmup-updates 1000 --decay-ratio 0.1 --decay-steps 100000 \
12 |        --batch-size 8 \
13 |        --update-freq 1 --seed 3  \
14 |        --max-update 100000 --log-interval 10 --save-interval-updates 5000 --validate-interval-updates 10000 --keep-interval-updates 5 \
15 |        --log-format simple \
16 |        --tensorboard-logdir ./output_dir/nofeat/VFN_baseline_wo_node_feat_12l/tsb \
17 |        --save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_12l/ \
18 |        --tmp-save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_12l/tmp/ \
19 |        --required-batch-size-multiple 1 \
20 |        --ema-decay 0.999 \
21 |        --model-name VFN_baseline_wo_node_feat_12l \
22 |        --batch-size-valid 1 \
23 |        --json-prefix CATH4.2 \
24 |        --disable-sd \
25 |        --data-buffer-size 32 >> ./output_dir/nofeat/VFN_baseline_wo_node_feat_12l/output.txt 2>&1


--------------------------------------------------------------------------------
/VFN-IF/train_script/layers/x2_vecdirect_noedge_atom_gbf_nofeat_8l.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_ASYNC_ERROR_HANDLING=1
 2 | export OMP_NUM_THREADS=1
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | mkdir -p ./output_dir/nofeat/VFN_baseline_wo_node_feat_8l/
 5 | python train.py ./processed/  \
 6 |        --user-dir user \
 7 |        --num-workers 16 \
 8 |        --ddp-backend=no_c10d \
 9 |        --task de --loss af2 --arch de \
10 |        --optimizer adam --adam-betas '(0.9, 0.999)' --adam-eps 1e-6 --wd 0.1 --clip-norm 1.0 --allreduce-fp32-grad  \
11 |        --lr-scheduler onecycle --lr 1e-3 --warmup-updates 1000 --decay-ratio 0.1 --decay-steps 100000 \
12 |        --batch-size 8 \
13 |        --update-freq 1 --seed 3  \
14 |        --max-update 100000 --log-interval 10 --save-interval-updates 5000 --validate-interval-updates 10000 --keep-interval-updates 5 \
15 |        --log-format simple \
16 |        --tensorboard-logdir ./output_dir/nofeat/VFN_baseline_wo_node_feat_8l/tsb \
17 |        --save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_8l/ \
18 |        --tmp-save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_8l/tmp/ \
19 |        --required-batch-size-multiple 1 \
20 |        --ema-decay 0.999 \
21 |        --model-name VFN_baseline_wo_node_feat_8l \
22 |        --batch-size-valid 1 \
23 |        --json-prefix CATH4.2 \
24 |        --disable-sd \
25 |        --data-buffer-size 32 >> ./output_dir/nofeat/VFN_baseline_wo_node_feat_8l/output.txt 2>&1
26 | 


--------------------------------------------------------------------------------
/VFN-IF/unifold/modules/refine_module.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | from typing import Tuple
 5 | 
 6 | from .refine_module_utils import AFRefineStructureModule
 7 | 
 8 | from unifold.modules.featurization import atom14_to_atom37
 9 | 
10 | class RefineModule(nn.Module):
11 |     def __init__(self, config):
12 |         super(RefineModule, self).__init__()
13 |         self.refine_module_type = config['type']
14 |         if self.refine_module_type== 'baseline':
15 |             self.refine_module = AFRefineStructureModule(**config["baseline_module"])
16 |         else:
17 |             raise
18 |         print('a')
19 | 
20 |     def forward(self, outputs,feats):
21 |         if self.refine_module_type== 'baseline':
22 |             sm_output = outputs['sm']
23 | 
24 |             s = outputs['single']
25 |             z = outputs["pair"]
26 |             aatype = sm_output["aatype"]
27 |             mask = sm_output["mask"]
28 |             quat_encoder = sm_output["quat_encoder"]
29 | 
30 |             outputs['rm'] = self.refine_module(s,z,aatype,quat_encoder,mask)
31 |         else:
32 |             raise
33 | 
34 |         outputs["final_atom_positions_rm"] = atom14_to_atom37(
35 |             outputs["rm"]["positions"], feats
36 |         )
37 |         outputs["final_atom_mask"] = feats["atom37_atom_exists"]
38 |         outputs["pred_frame_tensor"] = outputs["rm"]["frames"][-1]
39 | 
40 |         return outputs


--------------------------------------------------------------------------------
/VFN-IF/train_script/layers/x2_vecdirect_noedge_atom_gbf_nofeat_15l.sh:
--------------------------------------------------------------------------------
 1 | export NCCL_ASYNC_ERROR_HANDLING=1
 2 | export OMP_NUM_THREADS=1
 3 | export CUDA_VISIBLE_DEVICES=0
 4 | mkdir -p ./output_dir/nofeat/VFN_baseline_wo_node_feat_15l/
 5 | python train.py ./processed/  \
 6 |        --user-dir user \
 7 |        --num-workers 4 \
 8 |        --ddp-backend=no_c10d \
 9 |        --task de --loss af2 --arch de \
10 |        --optimizer adam --adam-betas '(0.9, 0.999)' --adam-eps 1e-6 --wd 0.1 --clip-norm 1.0 --allreduce-fp32-grad  \
11 |        --lr-scheduler onecycle --lr 1e-3 --warmup-updates 1000 --decay-ratio 0.1 --decay-steps 100000 \
12 |        --batch-size 8 \
13 |        --update-freq 1 --seed 3407  \
14 |        --max-update 100000 --log-interval 10 --save-interval-updates 5000 --validate-interval-updates 10000 --keep-interval-updates 5 \
15 |        --log-format simple \
16 |        --tensorboard-logdir ./output_dir/nofeat/VFN_baseline_wo_node_feat_15l/tsb \
17 |        --save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_15l/ \
18 |        --tmp-save-dir ./output_dir/nofeat/VFN_baseline_wo_node_feat_15l/tmp/ \
19 |        --required-batch-size-multiple 1 \
20 |        --ema-decay 0.999 \
21 |        --model-name VFN_baseline_wo_node_feat_15l \
22 |        --batch-size-valid 1 \
23 |        --json-prefix CATH4.2 \
24 |        --disable-sd \
25 |        --data-buffer-size 32 >> ./output_dir/nofeat/VFN_baseline_wo_node_feat_15l/output.txt 2>&1
26 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """isort:skip_file"""
 7 | 
 8 | 
 9 | from .unicore_dataset import UnicoreDataset
10 | 
11 | from .base_wrapper_dataset import BaseWrapperDataset
12 | 
13 | from .append_token_dataset import AppendTokenDataset
14 | from .dictionary import Dictionary
15 | from .lru_cache_dataset import LRUCacheDataset
16 | from .mask_tokens_dataset import MaskTokensDataset
17 | from .bert_tokenize_dataset import BertTokenizeDataset
18 | from .tokenize_dataset import TokenizeDataset
19 | from .nested_dictionary_dataset import NestedDictionaryDataset
20 | from .numel_dataset import NumelDataset
21 | from .num_samples_dataset import NumSamplesDataset
22 | from .pad_dataset import LeftPadDataset, PadDataset, RightPadDataset, RightPadDataset2D
23 | from .prepend_token_dataset import PrependTokenDataset
24 | from .raw_dataset import RawLabelDataset, RawArrayDataset, RawNumpyDataset
25 | from .lmdb_dataset import LMDBDataset
26 | from .sort_dataset import SortDataset, EpochShuffleDataset
27 | from .from_numpy_dataset import FromNumpyDataset
28 | 
29 | from .iterators import (
30 |     CountingIterator,
31 |     EpochBatchIterator,
32 |     GroupedIterator,
33 |     ShardedIterator,
34 | )
35 | 
36 | __all__ = []
37 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/lr_scheduler/pass_through.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from unicore.optim.lr_scheduler import UnicoreLRScheduler, register_lr_scheduler
 8 | 
 9 | 
10 | @register_lr_scheduler("pass_through")
11 | class PassThroughScheduleSchedule(UnicoreLRScheduler):
12 |     """Delegate lr scheduling to the optimizer."""
13 | 
14 |     def __init__(self, args, optimizer, total_train_steps):
15 |         super().__init__(args, optimizer, total_train_steps)
16 |         assert (
17 |             hasattr(optimizer, "lr_scheduler") and optimizer.lr_scheduler is not None
18 |         ), "Pass-through schedule can only be used with optimizers with their own schedulers"
19 | 
20 |     def state_dict(self):
21 |         return self.optimizer.lr_scheduler.state_dict()
22 | 
23 |     def load_state_dict(self, state_dict):
24 |         self.optimizer.lr_scheduler.load_state_dict(state_dict)
25 | 
26 |     def step_begin_epoch(self, epoch):
27 |         """Update the learning rate at the beginning of the given epoch."""
28 |         return self.optimizer.lr_scheduler.step_begin_epoch(epoch)
29 | 
30 |     def step_update(self, num_updates):
31 |         """Update the learning rate after each update."""
32 |         return self.optimizer.lr_scheduler.step_update(num_updates)
33 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/adagrad.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch.optim
 8 | 
 9 | from . import UnicoreOptimizer, register_optimizer
10 | 
11 | 
12 | @register_optimizer("adagrad")
13 | class Adagrad(UnicoreOptimizer):
14 |     def __init__(self, args, params):
15 |         super().__init__(args)
16 |         self._optimizer = torch.optim.Adagrad(params, **self.optimizer_config)
17 | 
18 |     @staticmethod
19 |     def add_args(parser):
20 |         """Add optimizer-specific arguments to the parser."""
21 |         # fmt: off
22 |         parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
23 |                             help='weight decay')
24 |         # fmt: on
25 | 
26 |     @property
27 |     def optimizer_config(self):
28 |         """
29 |         Return a kwarg dictionary that will be used to override optimizer
30 |         args stored in checkpoints. This allows us to load a checkpoint and
31 |         resume training using a different set of optimizer args, e.g., with a
32 |         different learning rate.
33 |         """
34 |         return {
35 |             "lr": self.args.lr[0],
36 |             "weight_decay": self.args.weight_decay,
37 |         }
38 | 
39 |     @property
40 |     def supports_flat_params(self):
41 |         return False
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # De novo Protein Design using Geometric Vector Field Networks
 2 | 
 3 | > [**De novo Protein Design using Geometric Vector Field Networks**](https://arxiv.org/pdf/2310.11802),            
 4 | > Weian Mao\*, Muzhi Zhu\*, Zheng Sun\*, Shuaike Shen, Lin Yuanbo Wu, Hao Chen, Chunhua Shen.  
 5 | > In: International Conference on Learning Representations (ICLR), 2024.  
 6 | > **Spotlight Presentation**   
 7 | > (\* equal contribution)  
 8 | 
 9 | Due to the complexity of VFN's involved code, we will organize these codes in the future and provide corresponding explanations. We have already released all key source codes, and for specific installation instructions, please refer to UniFold and se3 diffusion. We will supplement the installation instructions later.
10 | 
11 | Most of the VFN code is based on UniFold and se3 diffusion. We thank UniFold and se3 diffusion for their help in this work.
12 | 
13 | ## BibTeX
14 | ```
15 | @inproceedings{mao2024de,
16 | title={De novo Protein Design Using Geometric Vector Field Networks},
17 | author={Weian Mao and Muzhi Zhu and Zheng Sun and Shuaike Shen and Lin Yuanbo Wu and Hao Chen and Chunhua Shen},
18 | booktitle={The Twelfth International Conference on Learning Representations},
19 | year={2024},
20 | url={https://openreview.net/forum?id=9UIGyJJpay}
21 | }
22 | ```
23 | 
24 | ## 🎫 License
25 | For non-commercial academic use, this project is licensed under [the 2-clause BSD License](https://opensource.org/license/bsd-2-clause). 
26 | For commercial use, please contact [Chunhua Shen](mailto:chhshen@gmail.com).
27 | 
28 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/data/tools/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 AlQuraishi Laboratory
 2 | # Copyright 2021 DeepMind Technologies Limited
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Common utilities for data pipeline tools."""
17 | import contextlib
18 | import datetime
19 | import logging
20 | import shutil
21 | import tempfile
22 | import time
23 | from typing import Optional
24 | 
25 | 
26 | @contextlib.contextmanager
27 | def tmpdir_manager(base_dir: Optional[str] = None):
28 |     """Context manager that deletes a temporary directory on exit."""
29 |     tmpdir = tempfile.mkdtemp(dir=base_dir)
30 |     try:
31 |         yield tmpdir
32 |     finally:
33 |         shutil.rmtree(tmpdir, ignore_errors=True)
34 | 
35 | 
36 | @contextlib.contextmanager
37 | def timing(msg: str):
38 |     logging.info("Started %s", msg)
39 |     tic = time.perf_counter()
40 |     yield
41 |     toc = time.perf_counter()
42 |     logging.info("Finished %s in %.3f seconds", msg, toc - tic)
43 | 
44 | 
45 | def to_date(s: str):
46 |     return datetime.datetime(
47 |         year=int(s[:4]), month=int(s[5:7]), day=int(s[8:10])
48 |     )
49 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/lmdb_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | import lmdb
 6 | import os
 7 | import pickle
 8 | import torch
 9 | import numpy as np
10 | import collections
11 | from functools import lru_cache
12 | from . import data_utils
13 | import logging
14 | logger = logging.getLogger(__name__)
15 | 
16 | class LMDBDataset:
17 |     def __init__(self, db_path):
18 |         self.db_path = db_path
19 |         assert os.path.isfile(self.db_path), "{} not found".format(
20 |             self.db_path
21 |         )
22 |         env = self.connect_db(self.db_path)
23 |         with env.begin() as txn:
24 |             self._keys = list(txn.cursor().iternext(values=False))
25 | 
26 |     def connect_db(self, lmdb_path, save_to_self=False):
27 |         env = lmdb.open(
28 |             lmdb_path,
29 |             subdir=False,
30 |             readonly=True,
31 |             lock=False,
32 |             readahead=False,
33 |             meminit=False,
34 |             max_readers=256,
35 |         )
36 |         if not save_to_self:
37 |             return env
38 |         else:
39 |             self.env = env
40 | 
41 |     def __len__(self):
42 |         return len(self._keys)
43 | 
44 |     @lru_cache(maxsize=16)
45 |     def __getitem__(self, idx):
46 |         if not hasattr(self, 'env'):
47 |             self.connect_db(self.db_path, save_to_self=True)
48 |         datapoint_pickled = self.env.begin().get(self._keys[idx])
49 |         data = pickle.loads(datapoint_pickled)
50 |         return data
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/sgd.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch.optim
 8 | 
 9 | from . import UnicoreOptimizer, register_optimizer
10 | 
11 | 
12 | @register_optimizer("sgd")
13 | class SGD(UnicoreOptimizer):
14 |     def __init__(self, args, params):
15 |         super().__init__(args)
16 |         self._optimizer = torch.optim.SGD(params, **self.optimizer_config)
17 | 
18 |     @staticmethod
19 |     def add_args(parser):
20 |         """Add optimizer-specific arguments to the parser."""
21 |         # fmt: off
22 |         parser.add_argument('--momentum', default=0.0, type=float, metavar='M',
23 |                             help='momentum factor')
24 |         parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
25 |                             help='weight decay')
26 |         # fmt: on
27 | 
28 |     @property
29 |     def optimizer_config(self):
30 |         """
31 |         Return a kwarg dictionary that will be used to override optimizer
32 |         args stored in checkpoints. This allows us to load a checkpoint and
33 |         resume training using a different set of optimizer args, e.g., with a
34 |         different learning rate.
35 |         """
36 |         return {
37 |             "lr": self.args.lr[0],
38 |             "momentum": self.args.momentum,
39 |             "weight_decay": self.args.weight_decay,
40 |         }
41 | 
42 |     @property
43 |     def supports_flat_params(self):
44 |         return True
45 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/models/unicore_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """
 7 | Base classes for various unicore models.
 8 | """
 9 | 
10 | import logging
11 | 
12 | import torch
13 | import torch.nn as nn
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | class BaseUnicoreModel(nn.Module):
19 |     """Base class for unicore models."""
20 | 
21 |     def __init__(self):
22 |         super().__init__()
23 | 
24 |     @classmethod
25 |     def add_args(cls, parser):
26 |         """Add model-specific arguments to the parser."""
27 |         pass
28 | 
29 |     @classmethod
30 |     def build_model(cls, args, task):
31 |         """Build a new model instance."""
32 |         raise NotImplementedError("Model must implement the build_model method")
33 | 
34 |     def extract_features(self, *args, **kwargs):
35 |         """Similar to *forward* but only return features."""
36 |         return self(*args, **kwargs)
37 | 
38 |     def load_state_dict(
39 |         self,
40 |         state_dict,
41 |         strict=True,
42 |         model_args = None,
43 |     ):
44 |         """Copies parameters and buffers from *state_dict* into this module and
45 |         its descendants.
46 | 
47 |         Overrides the method in :class:`nn.Module`. 
48 |         """
49 |         return super().load_state_dict(state_dict, strict)
50 | 
51 |     def set_num_updates(self, num_updates):
52 |         """State from trainer to pass along to model at every update."""
53 | 
54 |         def _apply(m):
55 |             if hasattr(m, "set_num_updates") and m != self:
56 |                 m.set_num_updates(num_updates)
57 | 
58 |         self.apply(_apply)
59 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/lr_scheduler/unicore_lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from argparse import Namespace
 8 | 
 9 | from unicore.optim import UnicoreOptimizer
10 | 
11 | 
12 | class UnicoreLRScheduler(object):
13 |     def __init__(self, args, optimizer, total_train_steps):
14 |         super().__init__()
15 |         if optimizer is not None and not isinstance(optimizer, UnicoreOptimizer):
16 |             raise ValueError("optimizer must be an instance of UnicoreOptimizer")
17 |         self.args = args
18 |         self.optimizer = optimizer
19 |         self.total_train_steps = total_train_steps
20 |         self.best = None
21 | 
22 |     @classmethod
23 |     def add_args(cls, parser):
24 |         """Add arguments to the parser for this LR scheduler."""
25 |         pass
26 | 
27 |     def state_dict(self):
28 |         """Return the LR scheduler state dict."""
29 |         return {"best": self.best}
30 | 
31 |     def load_state_dict(self, state_dict):
32 |         """Load an LR scheduler state dict."""
33 |         self.best = state_dict["best"]
34 | 
35 |     def step_begin_epoch(self, epoch):
36 |         """Update the learning rate at the beginning of the given epoch."""
37 |         pass
38 | 
39 |     def step(self, epoch, val_loss=None):
40 |         """Update the learning rate at the end of the given epoch."""
41 |         if val_loss is not None:
42 |             if self.best is None:
43 |                 self.best = val_loss
44 |             else:
45 |                 self.best = min(self.best, val_loss)
46 | 
47 |     def step_update(self, num_updates):
48 |         """Update the learning rate after each update."""
49 |         return self.optimizer.get_lr()
50 | 
51 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/raw_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | import torch
 6 | from torch.utils.data.dataloader import default_collate
 7 | from functools import lru_cache
 8 | from . import UnicoreDataset
 9 | 
10 | 
11 | class RawLabelDataset(UnicoreDataset):
12 |     def __init__(self, labels):
13 |         super().__init__()
14 |         self.labels = labels
15 |     
16 |     @lru_cache(maxsize=16)
17 |     def __getitem__(self, index):
18 |         return self.labels[index]
19 | 
20 |     def __len__(self):
21 |         return len(self.labels)
22 | 
23 |     def collater(self, samples):
24 |         return torch.tensor(samples)
25 | 
26 | 
27 | class RawArrayDataset(UnicoreDataset):
28 | 
29 |     def __init__(self, dataset):
30 |         super().__init__()
31 |         self.dataset = dataset
32 |     
33 |     @lru_cache(maxsize=16)
34 |     def __getitem__(self, index):
35 |         return self.dataset[index]
36 | 
37 |     def __len__(self):
38 |         return len(self.dataset)
39 | 
40 |     def collater(self, samples):
41 |         if hasattr(self.dataset, 'collater'):
42 |             return self.dataset.collater(samples)
43 |         else:
44 |             return default_collate(samples)
45 | 
46 | 
47 | class RawNumpyDataset(UnicoreDataset):
48 | 
49 |     def __init__(self, dataset):
50 |         super().__init__()
51 |         self.dataset = dataset
52 | 
53 |     @lru_cache(maxsize=16)
54 |     def __getitem__(self, index):
55 |         return torch.from_numpy(self.dataset[index])
56 | 
57 |     def __len__(self):
58 |         return len(self.dataset)
59 | 
60 |     def collater(self, samples):
61 |         if hasattr(self.dataset, 'collater'):
62 |             return self.dataset.collater(samples)
63 |         else:
64 |             return default_collate(samples)
65 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/base_wrapper_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from torch.utils.data.dataloader import default_collate
 8 | 
 9 | from . import UnicoreDataset
10 | 
11 | 
12 | class BaseWrapperDataset(UnicoreDataset):
13 |     def __init__(self, dataset):
14 |         super().__init__()
15 |         self.dataset = dataset
16 | 
17 |     def __getitem__(self, index):
18 |         return self.dataset[index]
19 | 
20 |     def __len__(self):
21 |         return len(self.dataset)
22 | 
23 |     def collater(self, samples):
24 |         if hasattr(self.dataset, "collater"):
25 |             return self.dataset.collater(samples)
26 |         else:
27 |             return default_collate(samples)
28 | 
29 |     def ordered_indices(self):
30 |         return self.dataset.ordered_indices()
31 | 
32 |     @property
33 |     def supports_prefetch(self):
34 |         return getattr(self.dataset, "supports_prefetch", False)
35 | 
36 |     def attr(self, attr: str, index: int):
37 |         return self.dataset.attr(attr, index)
38 | 
39 |     def prefetch(self, indices):
40 |         self.dataset.prefetch(indices)
41 | 
42 |     def batch_by_size(
43 |         self,
44 |         indices,
45 |         batch_size=None,
46 |         required_batch_size_multiple=1,
47 |     ):
48 |         return self.dataset.batch_by_size(
49 |             indices,
50 |             batch_size=batch_size,
51 |             required_batch_size_multiple=required_batch_size_multiple,
52 |         )
53 | 
54 |     @property
55 |     def can_reuse_epoch_itr_across_epochs(self):
56 |         return self.dataset.can_reuse_epoch_itr_across_epochs
57 | 
58 |     def set_epoch(self, epoch):
59 |         super().set_epoch(epoch)
60 |         if hasattr(self.dataset, "set_epoch"):
61 |             self.dataset.set_epoch(epoch)
62 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/adadelta.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import torch.optim
 8 | 
 9 | from . import UnicoreOptimizer, register_optimizer
10 | 
11 | 
12 | @register_optimizer("adadelta")
13 | class Adadelta(UnicoreOptimizer):
14 |     def __init__(self, args, params):
15 |         super().__init__(args)
16 |         self._optimizer = torch.optim.Adadelta(params, **self.optimizer_config)
17 | 
18 |     @staticmethod
19 |     def add_args(parser):
20 |         """Add optimizer-specific arguments to the parser."""
21 |         # fmt: off
22 |         parser.add_argument('--adadelta-rho', type=float, default=0.9, metavar='RHO',
23 |                             help='coefficient used for computing a running average of squared gradients')
24 |         parser.add_argument('--adadelta-eps', type=float, default=1e-6, metavar='EPS',
25 |                             help='term added to the denominator to improve numerical stability')
26 |         parser.add_argument('--weight-decay', '--wd', default=0.0, type=float, metavar='WD',
27 |                             help='weight decay')
28 |         parser.add_argument('--anneal-eps', action='store_true', help='flag to anneal eps')
29 |         # fmt: on
30 | 
31 |     @property
32 |     def optimizer_config(self):
33 |         """
34 |         Return a kwarg dictionary that will be used to override optimizer
35 |         args stored in checkpoints. This allows us to load a checkpoint and
36 |         resume training using a different set of optimizer args, e.g., with a
37 |         different learning rate.
38 |         """
39 |         return {
40 |             "lr": self.args.lr[0],
41 |             "rho": self.args.adadelta_rho,
42 |             "eps": self.args.adadelta_eps,
43 |             "weight_decay": self.args.weight_decay,
44 |         }
45 | 
46 |     @property
47 |     def supports_flat_params(self):
48 |         return True
49 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/.gitignore:
--------------------------------------------------------------------------------
  1 | *.pt
  2 | *.tfevents.*
  3 | # JetBrains PyCharm IDE
  4 | .idea/
  5 | 
  6 | # Byte-compiled / optimized / DLL files
  7 | __pycache__/
  8 | *.py[cod]
  9 | *$py.class
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # macOS dir files
 15 | .DS_Store
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | env/
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | *.egg-info/
 33 | .installed.args
 34 | *.egg
 35 | 
 36 | # Checkpoints
 37 | checkpoints
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | .hypothesis/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # dotenv
 94 | .env
 95 | 
 96 | # virtualenv
 97 | .venv
 98 | venv/
 99 | ENV/
100 | 
101 | # Spyder project settings
102 | .spyderproject
103 | .spyproject
104 | 
105 | # Rope project settings
106 | .ropeproject
107 | 
108 | # mypy
109 | .mypy_cache/
110 | 
111 | # VSCODE
112 | .vscode/ftp-sync.json
113 | .vscode/settings.json
114 | 
115 | # too big to git
116 | *.lmdb
117 | *.sto
118 | *.pt
119 | *.pkl
120 | 
121 | # pytest
122 | .pytest_cache
123 | test/.pytest_cache
124 | /local*
125 | /_*


--------------------------------------------------------------------------------
/VFN-IF/unifold/model.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any
 3 | 
 4 | from unicore.models import BaseUnicoreModel, register_model, register_model_architecture
 5 | from unifold.modules.alphafold import AlphaFold
 6 | from unifold.modules.denoise import Denosie
 7 | from unifold.config import model_config
 8 | 
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | @register_model("af2")
14 | class AlphafoldModel(BaseUnicoreModel):
15 |     @staticmethod
16 |     def add_args(parser):
17 |         """Add model-specific arguments to the parser."""
18 |         parser.add_argument(
19 |             "--model-name",
20 |             help="choose the model config",
21 |         )
22 | 
23 |     def __init__(self, args):
24 |         super().__init__()
25 |         base_architecture(args)
26 |         self.args = args
27 |         config = model_config(
28 |             self.args.model_name,
29 |             train=True,
30 |         )
31 |         self.model = AlphaFold(config)
32 |         self.config = config
33 | 
34 |     def half(self):
35 |         self.model = self.model.half()
36 |         return self
37 | 
38 |     def bfloat16(self):
39 |         self.model = self.model.bfloat16()
40 |         return self
41 | 
42 |     @classmethod
43 |     def build_model(cls, args, task):
44 |         """Build a new model instance."""
45 |         return cls(args)
46 | 
47 |     def forward(self, batch, **kwargs):
48 |         outputs = self.model.forward(batch)
49 |         return outputs, self.config.loss
50 | 
51 | 
52 | @register_model_architecture("af2", "af2")
53 | def base_architecture(args):
54 |     args.model_name = getattr(args, "model_name", "model_2")
55 | 
56 | 
57 | @register_model("de")
58 | class DeModel(AlphafoldModel):
59 |     def __init__(self, args):
60 |         BaseUnicoreModel.__init__(self)
61 |         base_architecture(args)
62 |         self.args = args
63 |         config = model_config(
64 |             self.args.model_name,
65 |             train=True,
66 |         )
67 |         self.model = Denosie(config)
68 |         self.config = config
69 | 
70 | @register_model_architecture("de", "de")
71 | def base_architecture(args):
72 |     args.model_name = getattr(args, "model_name", "de_base")


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/distributed/module_proxy_wrapper.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from torch import nn
 8 | 
 9 | 
10 | class ModuleProxyWrapper(nn.Module):
11 |     """
12 |     Wrap a DistributedDataParallel module and forward requests for missing
13 |     attributes to the module wrapped by DDP (the twice-wrapped module).
14 |     Also forward calls to :func:`state_dict` and :func:`load_state_dict`.
15 | 
16 |     Usage::
17 | 
18 |         module.xyz = "hello world"
19 |         wrapped_module = DistributedDataParallel(module, **ddp_args)
20 |         wrapped_module = ModuleProxyWrapper(wrapped_module)
21 |         assert wrapped_module.xyz == "hello world"
22 |         assert wrapped_module.state_dict().keys() == module.state_dict().keys()
23 | 
24 |     Args:
25 |         module (nn.Module): module to wrap
26 |     """
27 | 
28 |     def __init__(self, module: nn.Module):
29 |         super().__init__()
30 |         assert hasattr(module, "module"), \
31 |             "ModuleProxyWrapper expects input to wrap another module"
32 |         self.module = module
33 | 
34 |     def __getattr__(self, name):
35 |         """Forward missing attributes to twice-wrapped module."""
36 |         try:
37 |             # defer to nn.Module's logic
38 |             return super().__getattr__(name)
39 |         except AttributeError:
40 |             try:
41 |                 # forward to the once-wrapped module
42 |                 return getattr(self.module, name)
43 |             except AttributeError:
44 |                 # forward to the twice-wrapped module
45 |                 return getattr(self.module.module, name)
46 | 
47 |     def state_dict(self, *args, **kwargs):
48 |         """Forward to the twice-wrapped module."""
49 |         return self.module.module.state_dict(*args, **kwargs)
50 | 
51 |     def load_state_dict(self, *args, **kwargs):
52 |         """Forward to the twice-wrapped module."""
53 |         return self.module.module.load_state_dict(*args, **kwargs)
54 | 
55 |     def forward(self, *args, **kwargs):
56 |         return self.module(*args, **kwargs)
57 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/lr_scheduler/exponential_decay_schedule.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import List
 8 | 
 9 | from unicore.optim.lr_scheduler import UnicoreLRScheduler, register_lr_scheduler
10 | 
11 | @register_lr_scheduler("exponential_decay")
12 | class ExponentialDecayLRSchedule(UnicoreLRScheduler):
13 |     """Decay the LR on a fixed schedule."""
14 | 
15 |     def __init__(self, args, optimizer, total_train_steps):
16 |         super().__init__(args, optimizer, total_train_steps)
17 |         self.warmup_updates = args.warmup_updates
18 |         self.lr = args.lr[0]
19 |         if self.warmup_updates > 0:
20 |             self.warmup_factor = 1.0 / self.warmup_updates
21 |         else:
22 |             self.warmup_factor = 1.0
23 |         self.decay_ratio = args.decay_ratio
24 |         self.decay_steps = args.decay_steps
25 |         self.optimizer.set_lr(self.warmup_factor * self.lr)
26 |         self.stair_decay = getattr(args, "stair_decay", False)
27 | 
28 |     @staticmethod
29 |     def add_args(parser):
30 |         """Add arguments to the parser for this LR scheduler."""
31 |         parser.add_argument('--warmup-updates', default=1000, type=int, metavar='N',
32 |                             help='warmup the learning rate linearly for the first N updates')
33 |         parser.add_argument('--decay-ratio', default=0.95, type=float)
34 |         parser.add_argument('--decay-steps', default=500, type=int)
35 |         parser.add_argument('--stair-decay', action="store_true")
36 | 
37 |     def step_update(self, num_updates):
38 |         """Update the learning rate after each update."""
39 |         if self.warmup_updates > 0 and num_updates <= self.warmup_updates:
40 |             self.warmup_factor = num_updates / float(self.warmup_updates)
41 |             lr = self.warmup_factor * self.lr
42 |         else:
43 |             if self.stair_decay:
44 |                 step = num_updates
45 |                 lr = self.lr * float(self.decay_ratio ** (int(step // self.decay_steps)))
46 |             else:
47 |                 step = num_updates - self.warmup_updates
48 |                 lr = self.lr * float(self.decay_ratio ** (float(step / self.decay_steps)))
49 |         self.optimizer.set_lr(lr)
50 |         return self.optimizer.get_lr()
51 | 


--------------------------------------------------------------------------------
/VFN-Diff/data/parsers.py:
--------------------------------------------------------------------------------
 1 | """Library for parsing different data structures."""
 2 | from Bio.PDB.Chain import Chain
 3 | import numpy as np
 4 | 
 5 | from data import residue_constants
 6 | from data import protein
 7 | 
 8 | Protein = protein.Protein
 9 | 
10 | 
11 | def process_chain(chain: Chain, chain_id: str) -> Protein:
12 |     """Convert a PDB chain object into a AlphaFold Protein instance.
13 |     
14 |     Forked from alphafold.common.protein.from_pdb_string
15 |     
16 |     WARNING: All non-standard residue types will be converted into UNK. All
17 |         non-standard atoms will be ignored.
18 |     
19 |     Took out lines 94-97 which don't allow insertions in the PDB.
20 |     Sabdab uses insertions for the chothia numbering so we need to allow them.
21 |     
22 |     Took out lines 110-112 since that would mess up CDR numbering.
23 |     
24 |     Args:
25 |         chain: Instance of Biopython's chain class.
26 |     
27 |     Returns:
28 |         Protein object with protein features.
29 |     """
30 |     atom_positions = []
31 |     aatype = []
32 |     atom_mask = []
33 |     residue_index = []
34 |     b_factors = []
35 |     chain_ids = []
36 |     for res in chain:
37 |         res_shortname = residue_constants.restype_3to1.get(res.resname, 'X')
38 |         restype_idx = residue_constants.restype_order.get(
39 |             res_shortname, residue_constants.restype_num)
40 |         pos = np.zeros((residue_constants.atom_type_num, 3))
41 |         mask = np.zeros((residue_constants.atom_type_num,))
42 |         res_b_factors = np.zeros((residue_constants.atom_type_num,))
43 |         for atom in res:
44 |             if atom.name not in residue_constants.atom_types:
45 |                 continue
46 |             pos[residue_constants.atom_order[atom.name]] = atom.coord
47 |             mask[residue_constants.atom_order[atom.name]] = 1.
48 |             res_b_factors[residue_constants.atom_order[atom.name]
49 |                           ] = atom.bfactor
50 |         aatype.append(restype_idx)
51 |         atom_positions.append(pos)
52 |         atom_mask.append(mask)
53 |         residue_index.append(res.id[1])
54 |         b_factors.append(res_b_factors)
55 |         chain_ids.append(chain_id)
56 | 
57 |     return Protein(
58 |         atom_positions=np.array(atom_positions),
59 |         atom_mask=np.array(atom_mask),
60 |         aatype=np.array(aatype),
61 |         residue_index=np.array(residue_index),
62 |         chain_index=np.array(chain_ids),
63 |         b_factors=np.array(b_factors))


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/csrc/rounding/fp32_to_bf16.cu:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | #include <ATen/ATen.h>
 3 | #include <ATen/cuda/CUDAGeneratorImpl.h>
 4 | #include <ATen/cuda/detail/IndexUtils.cuh>
 5 | #include <ATen/cuda/detail/TensorInfo.cuh>
 6 | #include <c10/cuda/CUDAMathCompat.h>
 7 | #include <cuda.h>
 8 | #include <cuda_runtime.h>
 9 | #include <cuda_fp16.h>
10 | #include <cuda_bf16.h>
11 | #include <curand_kernel.h>
12 | #include <ATen/cuda/CUDAContext.h>
13 | #include <torch/extension.h>
14 | #include <math.h>
15 | #include <iostream>
16 | 
17 | union float_int_32
18 | {
19 |     uint32_t i;
20 |     float f;
21 | };
22 | 
23 | __global__ void fp32_to_bf16(
24 |     const float* input,
25 |     nv_bfloat16* output,
26 |     const int tsize,
27 |     uint64_t seed,
28 |     uint64_t offset) {
29 | 
30 |     int i = threadIdx.x + blockIdx.x * blockDim.x;
31 |     if (i < tsize) {
32 |         float_int_32 d;
33 |         d.f = input[i];
34 |         curandStatePhilox4_32_10_t state;
35 |         curand_init(seed, i, offset, &state);
36 |         d.i += curand(&state) & 0x0000ffff;
37 |         output[i] = __float2bfloat16_rz(d.f);
38 |     }
39 | }
40 | 
41 | void fused_fp32_to_bf16_sr_cuda(
42 |     at::Tensor & input,
43 |     at::Tensor & output)
44 | {
45 |     int tsize = input.numel();
46 |     const int threadsPerBlock = 512;
47 |     const int blocks = (tsize + threadsPerBlock - 1) / threadsPerBlock;
48 |     AT_ASSERTM(at::cuda::detail::canUse32BitIndexMath(input), "parameter tensor is too large to be indexed with int32");
49 |     AT_ASSERTM(input.scalar_type() == at::ScalarType::Float, "expected input to be float32 tensor");
50 |     AT_ASSERTM(output.scalar_type() == at::ScalarType::BFloat16, "expected output to be bfloat16 tensor");
51 |     auto gen = at::cuda::detail::getDefaultCUDAGenerator();
52 |     std::pair<uint64_t, uint64_t> rng_engine_inputs;
53 |     {
54 |         // See Note [Acquire lock when using random generators]
55 |         std::lock_guard<std::mutex> lock(gen.mutex());
56 |         rng_engine_inputs = at::check_generator<at::CUDAGeneratorImpl>(gen)->philox_engine_inputs(1);
57 |     }
58 |     uint64_t seed = std::get<0>(rng_engine_inputs);
59 |     uint64_t offset = std::get<1>(rng_engine_inputs);
60 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
61 |     fp32_to_bf16<<<blocks, threadsPerBlock, 0, stream>>>(
62 |         (const float*)input.data_ptr(),
63 |         (nv_bfloat16*)output.data_ptr(),
64 |         tsize,
65 |         seed,
66 |         offset);
67 |     AT_CUDA_CHECK(cudaGetLastError());
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/model/dropout.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 AlQuraishi Laboratory
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | import torch
17 | import torch.nn as nn
18 | from functools import partialmethod
19 | from typing import Union, List
20 | 
21 | 
22 | class Dropout(nn.Module):
23 |     """
24 |     Implementation of dropout with the ability to share the dropout mask
25 |     along a particular dimension.
26 | 
27 |     If not in training mode, this module computes the identity function.
28 |     """
29 | 
30 |     def __init__(self, r: float, batch_dim: Union[int, List[int]]):
31 |         """
32 |         Args:
33 |             r:
34 |                 Dropout rate
35 |             batch_dim:
36 |                 Dimension(s) along which the dropout mask is shared
37 |         """
38 |         super(Dropout, self).__init__()
39 | 
40 |         self.r = r
41 |         if type(batch_dim) == int:
42 |             batch_dim = [batch_dim]
43 |         self.batch_dim = batch_dim
44 |         self.dropout = nn.Dropout(self.r)
45 | 
46 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
47 |         """
48 |         Args:
49 |             x:
50 |                 Tensor to which dropout is applied. Can have any shape
51 |                 compatible with self.batch_dim
52 |         """
53 |         shape = list(x.shape)
54 |         if self.batch_dim is not None:
55 |             for bd in self.batch_dim:
56 |                 shape[bd] = 1
57 |         mask = x.new_ones(shape)
58 |         mask = self.dropout(mask)
59 |         x *= mask
60 |         return x
61 | 
62 | 
63 | class DropoutRowwise(Dropout):
64 |     """
65 |     Convenience class for rowwise dropout as described in subsection
66 |     1.11.6.
67 |     """
68 | 
69 |     __init__ = partialmethod(Dropout.__init__, batch_dim=-3)
70 | 
71 | 
72 | class DropoutColumnwise(Dropout):
73 |     """
74 |     Convenience class for columnwise dropout as described in subsection
75 |     1.11.6.
76 |     """
77 | 
78 |     __init__ = partialmethod(Dropout.__init__, batch_dim=-2)
79 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/docker/rdma/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvcr.io/nvidia/pytorch:22.04-py3
 2 | 
 3 | RUN APT_INSTALL="apt-get install -y --no-install-recommends" && \
 4 |     rm -rf /var/lib/apt/lists/* \
 5 |            /etc/apt/sources.list.d/cuda.list \
 6 |            /etc/apt/sources.list.d/nvidia-ml.list && \
 7 |     apt-get update && \
 8 |     DEBIAN_FRONTEND=noninteractive $APT_INSTALL \
 9 |         software-properties-common \
10 |         && \
11 |     apt-get update && \
12 |     DEBIAN_FRONTEND=noninteractive $APT_INSTALL \
13 |         build-essential \
14 |         apt-utils \
15 |         ca-certificates \
16 |         wget \
17 |         git \
18 |         vim \
19 |         libssl-dev \
20 |         curl \
21 |         unzip \
22 |         unrar \
23 |         cmake \
24 |         net-tools \
25 |         sudo \
26 |         autotools-dev \
27 |         rsync \
28 |         jq \
29 |         openssh-server \
30 |         tmux \
31 |         screen \
32 |         htop \
33 |         pdsh \
34 |         openssh-client \
35 |         lshw \
36 |         dmidecode \
37 |         util-linux \
38 |         automake \
39 |         autoconf \
40 |         libtool \
41 |         net-tools \
42 |         pciutils \
43 |         libpci-dev \
44 |         libaio-dev \
45 |         libcap2 \
46 |         libtinfo5 \
47 |         fakeroot \
48 |         devscripts \
49 |         debhelper \
50 |         nfs-common
51 | 
52 | RUN  pip uninstall -y torch torchvision torchtext && \
53 | pip uninstall -y torch torchvision torchtext && \
54 | rm -rf ~/.cache/pip && \
55 | conda clean -ya
56 | 
57 | RUN conda install -y pyyaml tensorboardX && \
58 |     conda clean -ya
59 | 
60 | # RUN ldconfig
61 | 
62 | # # ==================================================================
63 | # # pytorch
64 | # # ------------------------------------------------------------------
65 | ENV TORCH_CUDA_ARCH_LIST "7.0;7.5;8.0"
66 | 
67 | RUN conda install -y ninja typing && \
68 |     conda clean -ya
69 | 
70 | RUN pip3 install --no-cache-dir torch==1.12.1 --extra-index-url https://download.pytorch.org/whl/cu116 && rm -rf ~/.cache/pip 
71 | 
72 | RUN cd /tmp && \
73 |     git clone https://github.com/dptech-corp/Uni-Core && \
74 |     cd Uni-Core && \
75 |     python setup.py install && \
76 |     rm -rf  /tmp/* && rm -rf ~/.cache/pip 
77 | 
78 | RUN pip3 install --no-cache-dir tokenizers lmdb biopython ml-collections timeout-decorator urllib3 tree dm-tree && rm -rf ~/.cache/pip 
79 | 
80 | RUN ldconfig && \
81 |     apt-get clean && \
82 |     apt-get autoremove && \
83 |     rm -rf /var/lib/apt/lists/* /tmp/* && \
84 |     conda clean -ya
85 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/exponential_moving_average.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import copy
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from openfold.utils.tensor_utils import tensor_tree_map
 7 | 
 8 | 
 9 | class ExponentialMovingAverage:
10 |     """
11 |     Maintains moving averages of parameters with exponential decay
12 | 
13 |     At each step, the stored copy `copy` of each parameter `param` is
14 |     updated as follows:
15 | 
16 |         `copy = decay * copy + (1 - decay) * param`
17 | 
18 |     where `decay` is an attribute of the ExponentialMovingAverage object.
19 |     """
20 | 
21 |     def __init__(self, model: nn.Module, decay: float):
22 |         """
23 |         Args:
24 |             model:
25 |                 A torch.nn.Module whose parameters are to be tracked
26 |             decay:
27 |                 A value (usually close to 1.) by which updates are
28 |                 weighted as part of the above formula
29 |         """
30 |         super(ExponentialMovingAverage, self).__init__()
31 | 
32 |         clone_param = lambda t: t.clone().detach()
33 |         self.params = tensor_tree_map(clone_param, model.state_dict())
34 |         self.decay = decay
35 |         self.device = next(model.parameters()).device
36 | 
37 |     def to(self, device):
38 |         self.params = tensor_tree_map(lambda t: t.to(device), self.params)
39 |         self.device = device
40 | 
41 |     def _update_state_dict_(self, update, state_dict):
42 |         with torch.no_grad():
43 |             for k, v in update.items():
44 |                 stored = state_dict[k]
45 |                 if not isinstance(v, torch.Tensor):
46 |                     self._update_state_dict_(v, stored)
47 |                 else:
48 |                     diff = stored - v
49 |                     diff *= 1 - self.decay
50 |                     stored -= diff
51 | 
52 |     def update(self, model: torch.nn.Module) -> None:
53 |         """
54 |         Updates the stored parameters using the state dict of the provided
55 |         module. The module should have the same structure as that used to
56 |         initialize the ExponentialMovingAverage object.
57 |         """
58 |         self._update_state_dict_(model.state_dict(), self.params)
59 | 
60 |     def load_state_dict(self, state_dict: OrderedDict) -> None:
61 |         self.params = state_dict["params"]
62 |         self.decay = state_dict["decay"]
63 | 
64 |     def state_dict(self) -> OrderedDict:
65 |         return OrderedDict(
66 |             {
67 |                 "params": self.params,
68 |                 "decay": self.decay,
69 |             }
70 |         )
71 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/README.md:
--------------------------------------------------------------------------------
 1 | Uni-Core, an efficient distributed PyTorch framework
 2 | ====================================================
 3 | 
 4 | Uni-Core is built for rapidly creating PyTorch models with high performance, especially for Transfromer-based models. It supports the following features:
 5 | - Distributed training over multi-GPUs and multi-nodes
 6 | - Mixed-precision training with fp16 and bf16
 7 | - High-performance fused CUDA kernels
 8 | - model checkpoint management
 9 | - Friendly logging
10 | - Buffered (GPU-CPU overlapping) data loader
11 | - Gradient accumulation
12 | - Commonly used optimizers and LR schedulers
13 | - Easy to create new models
14 | 
15 | 
16 | Installation
17 | ------------
18 | 
19 | **Build from source**
20 | 
21 | You can use `python setup.py install` or `pip install .` to build Uni-Core from source. The CUDA version in the build environment should be the same as the one in PyTorch.
22 | 
23 | 
24 | **Use pre-compiled python wheels**
25 | 
26 | We also pre-compiled wheels by GitHub Actions. You can download them from the [Release](https://github.com/dptech-corp/Uni-Core/releases). And you should check the pyhon version, PyTorch version and CUDA version. For example, for PyToch 1.12.1, python 3.7, and CUDA 11.3, you can install [unicore-0.0.1+cu113torch1.12.1-cp37-cp37m-linux_x86_64.whl](https://github.com/dptech-corp/Uni-Core/releases/download/0.0.1/unicore-0.0.1+cu113torch1.12.1-cp37-cp37m-linux_x86_64.whl). 
27 | 
28 | **Docker image**
29 | 
30 | We also provide the docker image. you can pull it by `docker pull dptechnology/unicore:0.0.1-pytorch1.11.0-cuda11.3`. To use GPUs within docker, you need to [install nvidia-docker-2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#docker) first.
31 | 
32 | 
33 | Example
34 | -------
35 | 
36 | To build a model, you can refer to [example/bert](https://github.com/dptech-corp/Uni-Core/tree/main/examples/bert). 
37 | 
38 | Related projects
39 | ----------------
40 | 
41 | - [Uni-Mol](https://github.com/dptech-corp/Uni-Mol)
42 | - [Uni-Fold](https://github.com/dptech-corp/Uni-Fold)
43 | 
44 | Acknowledgement
45 | ---------------
46 | 
47 | The main framework is from [facebookresearch/fairseq](https://github.com/facebookresearch/fairseq).
48 | 
49 | The fused kernels are from [guolinke/fused_ops](https://github.com/guolinke/fused_ops).
50 | 
51 | Dockerfile is from [guolinke/pytorch-docker](https://github.com/guolinke/pytorch-docker).
52 | 
53 | License
54 | -------
55 | 
56 | This project is licensed under the terms of the MIT license. See [LICENSE](https://github.com/dptech-corp/Uni-Core/blob/main/LICENSE) for additional details.
57 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/losses/cross_entropy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import math
 8 | import torch
 9 | import torch.nn.functional as F
10 | from unicore import metrics
11 | from unicore.losses import UnicoreLoss, register_loss
12 | 
13 | @register_loss("cross_entropy")
14 | class CrossEntropyLoss(UnicoreLoss):
15 |     def __init__(self, task):
16 |         super().__init__(task)
17 | 
18 |     def forward(self, model, sample, reduce=True):
19 |         """Compute the loss for the given sample.
20 | 
21 |         Returns a tuple with three elements:
22 |         1) the loss
23 |         2) the sample size, which is used as the denominator for the gradient
24 |         3) logging outputs to display while training
25 |         """
26 |         net_output = model(**sample["net_input"])
27 |         loss = self.compute_loss(model, net_output, sample, reduce=reduce)
28 |         sample_size = sample["target"].size(0)
29 |         logging_output = {
30 |             "loss": loss.data,
31 |             "bsz": sample["target"].size(0),
32 |             "sample_size": sample_size,
33 |         }
34 |         return loss, sample_size, logging_output
35 | 
36 |     def compute_loss(self, model, net_output, sample, reduce=True):
37 |         lprobs = F.log_softmax(net_output.float(), dim=-1)
38 |         lprobs = lprobs.view(-1, lprobs.size(-1))
39 |         target = sample['target'].view(-1)
40 |         loss = F.nll_loss(
41 |             lprobs,
42 |             target,
43 |             reduction="sum" if reduce else "none",
44 |         )
45 |         return loss
46 | 
47 |     @staticmethod
48 |     def reduce_metrics(logging_outputs, split='valid') -> None:
49 |         """Aggregate logging outputs from data parallel training."""
50 |         loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
51 |         sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
52 | 
53 |         # we divide by log(2) to convert the loss from base e to base 2
54 |         metrics.log_scalar(
55 |             "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
56 |         )
57 | 
58 |     @staticmethod
59 |     def logging_outputs_can_be_summed(is_train) -> bool:
60 |         """
61 |         Whether the logging outputs returned by `forward` can be summed
62 |         across workers prior to calling `reduce_metrics`. Setting this
63 |         to True will improves distributed training speed.
64 |         """
65 |         return True
66 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/losses/masked_lm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | import math
 7 | import torch
 8 | import torch.nn.functional as F
 9 | from unicore import metrics
10 | from unicore.losses import UnicoreLoss, register_loss
11 | 
12 | @register_loss("masked_lm")
13 | class MaskedLMLoss(UnicoreLoss):
14 |     def __init__(self, task):
15 |         super().__init__(task)
16 |         self.padding_idx = task.dictionary.pad()
17 | 
18 |     def forward(self, model, sample, reduce=True):
19 |         masked_tokens = sample["target"].ne(self.padding_idx)
20 |         sample_size = masked_tokens.int().sum()
21 | 
22 |         masked_tokens = torch.where(
23 |             masked_tokens.any(),
24 |             masked_tokens,
25 |             masked_tokens.new([True]),
26 |         )
27 |         logits = model(**sample["net_input"], masked_tokens=masked_tokens)
28 |         target = sample['target']
29 |         if masked_tokens is not None:
30 |             target = target[masked_tokens]
31 |         loss = F.nll_loss(
32 |             F.log_softmax(logits, dim=-1, dtype=torch.float32),
33 |             target,
34 |             ignore_index=self.padding_idx,
35 |             reduction='sum',
36 |         )
37 |         logging_output = {
38 |             "loss": loss.data,
39 |             "bsz": sample["target"].size(0),
40 |             "sample_size": sample_size,
41 |             "seq_len": sample["target"].size(1) * sample["target"].size(0),
42 |         }
43 |         return loss, sample_size, logging_output
44 | 
45 |     @staticmethod
46 |     def reduce_metrics(logging_outputs, split='valid') -> None:
47 |         """Aggregate logging outputs from data parallel training."""
48 |         loss_sum = sum(log.get("loss", 0) for log in logging_outputs)
49 |         bsz = sum(log.get("bsz", 0) for log in logging_outputs)
50 |         sample_size = sum(log.get("sample_size", 0) for log in logging_outputs)
51 |         seq_len = sum(log.get("seq_len", 0) for log in logging_outputs)
52 |         # we divide by log(2) to convert the loss from base e to base 2
53 |         metrics.log_scalar(
54 |             "loss", loss_sum / sample_size / math.log(2), sample_size, round=3
55 |         )
56 |         metrics.log_scalar(
57 |             "seq_len", seq_len / bsz, 1, round=3
58 |         )
59 | 
60 |     @staticmethod
61 |     def logging_outputs_can_be_summed(is_train) -> bool:
62 |         """
63 |         Whether the logging outputs returned by `forward` can be summed
64 |         across workers prior to calling `reduce_metrics`. Setting this
65 |         to True will improves distributed training speed.
66 |         """
67 |         return True
68 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/models/distributed_unicore_model.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | from torch.nn.parallel import DistributedDataParallel
12 | 
13 | from unicore.distributed import (
14 |     ModuleProxyWrapper, LegacyDistributedDataParallel
15 | )
16 | 
17 | 
18 | logger = logging.getLogger(__name__)
19 | 
20 | def DistributedUnicoreModel(args, model, process_group, device):
21 |     """
22 |     Wrap a *model* to support distributed data parallel training.
23 | 
24 |     This is similar to the built-in DistributedDataParallel, but allows
25 |     additional configuration of the DistributedDataParallel class to
26 |     use, and also provides easier access to the wrapped model by
27 |     forwarding requests for missing attributes to the wrapped model.
28 | 
29 |     Args:
30 |         args (argparse.Namespace): unicore args
31 |         model (BaseUnicoreModel): model to wrap
32 |         process_group: the c10d process group to be used for distributed data
33 |             parallel all-reduction.
34 |         device: device to move model to
35 |     """
36 |     assert isinstance(model, nn.Module)
37 |     if args.ddp_backend in {"c10d", "pytorch_ddp"}:
38 |         wrapped_model = DistributedDataParallel(
39 |             module=model.to(device),
40 |             device_ids=[args.device_id],
41 |             output_device=args.device_id,
42 |             broadcast_buffers=args.broadcast_buffers,
43 |             bucket_cap_mb=args.bucket_cap_mb,
44 |             process_group=process_group,
45 |             find_unused_parameters=args.find_unused_parameters,
46 |         )
47 |         # forward missing getattr and state_dict/load_state_dict to orig model
48 |         wrapped_model = ModuleProxyWrapper(wrapped_model)
49 |     elif args.ddp_backend in {'apex'}:
50 |         import apex
51 |         wrapped_model = apex.parallel.DistributedDataParallel(
52 |             module=model.to(device)
53 |         )
54 |         # forward missing getattr and state_dict/load_state_dict to orig model
55 |         wrapped_model = ModuleProxyWrapper(wrapped_model)
56 |     elif args.ddp_backend in {"no_c10d", "legacy_ddp"}:
57 |         wrapped_model = LegacyDistributedDataParallel(
58 |             module=model.to(device),
59 |             buffer_size=2 ** 28,
60 |             process_group=process_group,
61 |         )
62 |         # forward missing getattr and state_dict/load_state_dict to orig model
63 |         wrapped_model = ModuleProxyWrapper(wrapped_model)
64 |     else:
65 |         raise ValueError("Unknown --ddp-backend: " + args.ddp_backend)
66 | 
67 |     return wrapped_model
68 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/superimposition.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 AlQuraishi Laboratory
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from Bio.SVDSuperimposer import SVDSuperimposer
16 | import numpy as np
17 | import torch
18 | 
19 | 
20 | def _superimpose_np(reference, coords):
21 |     """
22 |         Superimposes coordinates onto a reference by minimizing RMSD using SVD.
23 | 
24 |         Args:
25 |             reference:
26 |                 [N, 3] reference array
27 |             coords:
28 |                 [N, 3] array
29 |         Returns:
30 |             A tuple of [N, 3] superimposed coords and the final RMSD.
31 |     """
32 |     sup = SVDSuperimposer()
33 |     sup.set(reference, coords)
34 |     sup.run()
35 |     return sup.get_transformed(), sup.get_rms()
36 | 
37 | 
38 | def _superimpose_single(reference, coords):
39 |     reference_np = reference.detach().cpu().numpy()    
40 |     coords_np = coords.detach().cpu().numpy()
41 |     superimposed, rmsd = _superimpose_np(reference_np, coords_np)
42 |     return coords.new_tensor(superimposed), coords.new_tensor(rmsd)
43 | 
44 | 
45 | def superimpose(reference, coords):
46 |     """
47 |         Superimposes coordinates onto a reference by minimizing RMSD using SVD.
48 | 
49 |         Args:
50 |             reference:
51 |                 [*, N, 3] reference tensor
52 |             coords:
53 |                 [*, N, 3] tensor
54 |         Returns:
55 |             A tuple of [*, N, 3] superimposed coords and [*] final RMSDs.
56 |     """
57 |     batch_dims = reference.shape[:-2]
58 |     flat_reference = reference.reshape((-1,) + reference.shape[-2:])
59 |     flat_coords = coords.reshape((-1,) + reference.shape[-2:])
60 |     superimposed_list = []
61 |     rmsds = []
62 |     for r, c in zip(flat_reference, flat_coords):
63 |        superimposed, rmsd = _superimpose_single(r, c)
64 |        superimposed_list.append(superimposed)
65 |        rmsds.append(rmsd)
66 | 
67 |     superimposed_stacked = torch.stack(superimposed_list, dim=0)
68 |     rmsds_stacked = torch.stack(rmsds, dim=0)
69 | 
70 |     superimposed_reshaped = superimposed_stacked.reshape(
71 |         batch_dims + coords.shape[-2:]
72 |     )
73 |     rmsds_reshaped = rmsds_stacked.reshape(
74 |         batch_dims
75 |     )
76 | 
77 |     return superimposed_reshaped, rmsds_reshaped
78 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/checkpointing.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 AlQuraishi Laboratory
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import deepspeed
16 | import torch
17 | import torch.utils.checkpoint
18 | from typing import Any, Tuple, List, Callable, Optional
19 | 
20 | 
21 | BLOCK_ARG = Any
22 | BLOCK_ARGS = List[BLOCK_ARG]
23 | 
24 | 
25 | def get_checkpoint_fn():
26 |     if(deepspeed.checkpointing.is_configured()):
27 |         checkpoint = deepspeed.checkpointing.checkpoint
28 |     else:
29 |         checkpoint = torch.utils.checkpoint.checkpoint
30 | 
31 |     return checkpoint
32 | 
33 | 
34 | @torch.jit.ignore
35 | def checkpoint_blocks(
36 |     blocks: List[Callable],
37 |     args: BLOCK_ARGS,
38 |     blocks_per_ckpt: Optional[int],
39 | ) -> BLOCK_ARGS:
40 |     """
41 |     Chunk a list of blocks and run each chunk with activation
42 |     checkpointing. We define a "block" as a callable whose only inputs are
43 |     the outputs of the previous block.
44 | 
45 |     Implements Subsection 1.11.8
46 | 
47 |     Args:
48 |         blocks:
49 |             List of blocks
50 |         args:
51 |             Tuple of arguments for the first block.
52 |         blocks_per_ckpt:
53 |             Size of each chunk. A higher value corresponds to fewer 
54 |             checkpoints, and trades memory for speed. If None, no checkpointing 
55 |             is performed.
56 |     Returns:
57 |         The output of the final block
58 |     """
59 |     def wrap(a):
60 |         return (a,) if type(a) is not tuple else a
61 | 
62 |     def exec(b, a):
63 |         for block in b:
64 |             a = wrap(block(*a))
65 |         return a
66 | 
67 |     def chunker(s, e):
68 |         def exec_sliced(*a):
69 |             return exec(blocks[s:e], a)
70 | 
71 |         return exec_sliced
72 | 
73 |     # Avoids mishaps when the blocks take just one argument
74 |     args = wrap(args)
75 | 
76 |     if blocks_per_ckpt is None:
77 |         return exec(blocks, args)
78 |     elif blocks_per_ckpt < 1 or blocks_per_ckpt > len(blocks):
79 |         raise ValueError("blocks_per_ckpt must be between 1 and len(blocks)")
80 | 
81 |     checkpoint = get_checkpoint_fn() 
82 | 
83 |     for s in range(0, len(blocks), blocks_per_ckpt):
84 |         e = s + blocks_per_ckpt
85 |         args = checkpoint(chunker(s, e), *args)
86 |         args = wrap(args)
87 | 
88 |     return args
89 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/lr_scheduler/fixed_schedule.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import List
 8 | 
 9 | from unicore.optim.lr_scheduler import UnicoreLRScheduler, register_lr_scheduler
10 | 
11 | 
12 | @register_lr_scheduler("fixed")
13 | class FixedLRSchedule(UnicoreLRScheduler):
14 |     """Decay the LR on a fixed schedule."""
15 | 
16 |     def __init__(self, args, optimizer, total_train_steps):
17 |         super().__init__(args, optimizer, total_train_steps)
18 | 
19 |         self.lr = args.lr[0]
20 |         if args.warmup_updates > 0:
21 |             self.warmup_factor = 1.0 / args.warmup_updates
22 |         else:
23 |             self.warmup_factor = 1
24 | 
25 |     @staticmethod
26 |     def add_args(parser):
27 |         """Add arguments to the parser for this LR scheduler."""
28 |         # fmt: off
29 |         parser.add_argument('--force-anneal', '--fa', type=int, metavar='N',
30 |                             help='force annealing at specified epoch')
31 |         parser.add_argument('--lr-shrink', default=0.1, type=float, metavar='LS',
32 |                             help='shrink factor for annealing, lr_new = (lr * lr_shrink)')
33 |         parser.add_argument('--warmup-updates', default=0, type=int, metavar='N',
34 |                             help='warmup the learning rate linearly for the first N updates')
35 |         # fmt: on
36 | 
37 |     def state_dict(self):
38 |         return {"lr": self.lr}
39 | 
40 |     def load_state_dict(self, state_dict):
41 |         if "lr" in state_dict:
42 |             self.lr = state_dict["lr"]
43 | 
44 |     def get_next_lr(self, epoch):
45 |         lrs = self.args.lr
46 |         if self.args.force_anneal is None or epoch < self.args.force_anneal:
47 |             # use fixed LR schedule
48 |             next_lr = lrs[min(epoch - 1, len(lrs) - 1)]
49 |         else:
50 |             # annneal based on lr_shrink
51 |             next_lr = lrs[-1] * self.args.lr_shrink ** (
52 |                 epoch + 1 - self.args.force_anneal
53 |             )
54 |         return next_lr
55 | 
56 |     def step_begin_epoch(self, epoch):
57 |         """Update the learning rate at the beginning of the given epoch."""
58 |         self.lr = self.get_next_lr(epoch)
59 |         self.optimizer.set_lr(self.warmup_factor * self.lr)
60 |         return self.optimizer.get_lr()
61 | 
62 |     def step_update(self, num_updates):
63 |         """Update the learning rate after each update."""
64 |         if self.args.warmup_updates > 0 and num_updates < self.args.warmup_updates:
65 |             self.warmup_factor = (num_updates + 1) / float(self.args.warmup_updates)
66 |             self.optimizer.set_lr(self.warmup_factor * self.lr)
67 |         else:
68 |             self.optimizer.set_lr(self.lr)
69 |         return self.optimizer.get_lr()
70 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/dynamic_loss_scaler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | 
 8 | class DynamicLossScaler(object):
 9 |     def __init__(
10 |         self,
11 |         init_scale=2.0 ** 15,
12 |         scale_factor=2.0,
13 |         scale_window=2000,
14 |         tolerance=0.0,
15 |         threshold=None,
16 |         min_loss_scale=1e-4,
17 |     ):
18 |         self.loss_scale = init_scale
19 |         self.scale_factor = scale_factor
20 |         self.scale_window = scale_window
21 |         self.tolerance = tolerance
22 |         self.threshold = threshold
23 |         self._iter = 0
24 |         self._last_overflow_iter = -1
25 |         self._last_rescale_iter = -1
26 |         self._overflows_since_rescale = 0
27 |         self.min_loss_scale = min_loss_scale
28 | 
29 |     def scale(self, outputs):
30 |         return self.loss_scale * outputs
31 | 
32 |     def update(self):
33 |         if (self._iter - self._last_overflow_iter) % self.scale_window == 0:
34 |             self.loss_scale *= self.scale_factor
35 |             self._last_rescale_iter = self._iter
36 |         self._iter += 1
37 | 
38 |     def _decrease_loss_scale(self):
39 |         self.loss_scale /= self.scale_factor
40 |         if self.threshold is not None:
41 |             self.loss_scale = max(self.loss_scale, self.threshold)
42 | 
43 |     def check_overflow(self, grad_norm):
44 |         # detect inf and nan
45 |         if grad_norm == float("inf") or grad_norm != grad_norm:
46 |             # overflow has occured
47 |             prev_scale = self.loss_scale
48 |             iter_since_rescale = self._iter - self._last_rescale_iter
49 | 
50 |             self._last_overflow_iter = self._iter
51 |             self._overflows_since_rescale += 1
52 |             pct_overflow = self._overflows_since_rescale / float(iter_since_rescale)
53 |             if pct_overflow >= self.tolerance:
54 |                 self._decrease_loss_scale()
55 |                 self._last_rescale_iter = self._iter
56 |                 self._overflows_since_rescale = 0
57 | 
58 |             if self.loss_scale <= self.min_loss_scale:
59 |                 # Use FloatingPointError as an uncommon error that parent
60 |                 # functions can safely catch to stop training.
61 |                 self.loss_scale = prev_scale
62 |                 raise FloatingPointError(
63 |                     (
64 |                         "Minimum loss scale reached ({}). Your loss is probably exploding. "
65 |                         "Try lowering the learning rate, using gradient clipping or "
66 |                         "increasing the batch size."
67 |                     ).format(self.min_loss_scale)
68 |                 )
69 | 
70 |             self._iter += 1
71 |             raise OverflowError("setting loss scale to: " + str(self.loss_scale))
72 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import argparse
 8 | 
 9 | 
10 | REGISTRIES = {}
11 | 
12 | 
13 | def setup_registry(
14 |     registry_name: str,
15 |     base_class=None,
16 |     default=None,
17 | ):
18 |     assert registry_name.startswith('--')
19 |     registry_name = registry_name[2:].replace('-', '_')
20 | 
21 |     REGISTRY = {}
22 |     REGISTRY_CLASS_NAMES = set()
23 | 
24 |     # maintain a registry of all registries
25 |     if registry_name in REGISTRIES:
26 |         return  # registry already exists
27 |     REGISTRIES[registry_name] = {
28 |         'registry': REGISTRY,
29 |         'default': default,
30 |     }
31 | 
32 |     def build_x(args, *extra_args, **extra_kwargs):
33 |         choice = getattr(args, registry_name, None)
34 |         if choice is None:
35 |             return None
36 |         cls = REGISTRY[choice]
37 |         if hasattr(cls, 'build_' + registry_name):
38 |             builder = getattr(cls, 'build_' + registry_name)
39 |         else:
40 |             builder = cls
41 |         set_defaults(args, cls)
42 |         return builder(args, *extra_args, **extra_kwargs)
43 | 
44 |     def register_x(name):
45 | 
46 |         def register_x_cls(cls):
47 |             if name in REGISTRY:
48 |                 raise ValueError('Cannot register duplicate {} ({})'.format(registry_name, name))
49 |             if cls.__name__ in REGISTRY_CLASS_NAMES:
50 |                 raise ValueError(
51 |                     'Cannot register {} with duplicate class name ({})'.format(
52 |                         registry_name, cls.__name__,
53 |                     )
54 |                 )
55 |             if base_class is not None and not issubclass(cls, base_class):
56 |                 raise ValueError('{} must extend {}'.format(cls.__name__, base_class.__name__))
57 |             REGISTRY[name] = cls
58 |             REGISTRY_CLASS_NAMES.add(cls.__name__)
59 |             return cls
60 | 
61 |         return register_x_cls
62 | 
63 |     return build_x, register_x, REGISTRY
64 | 
65 | 
66 | def set_defaults(args, cls):
67 |     """Helper to set default arguments based on *add_args*."""
68 |     if not hasattr(cls, 'add_args'):
69 |         return
70 |     parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, allow_abbrev=False)
71 |     cls.add_args(parser)
72 |     # copied from argparse.py:
73 |     defaults = argparse.Namespace()
74 |     for action in parser._actions:
75 |         if action.dest is not argparse.SUPPRESS:
76 |             if not hasattr(defaults, action.dest):
77 |                 if action.default is not argparse.SUPPRESS:
78 |                     setattr(defaults, action.dest, action.default)
79 |     for key, default_value in vars(defaults).items():
80 |         if not hasattr(args, key):
81 |             setattr(args, key, default_value)
82 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/losses/unicore_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import inspect
 8 | from typing import Any, Dict, List
 9 | 
10 | from unicore import metrics, utils
11 | from torch.nn.modules.loss import _Loss
12 | 
13 | 
14 | class UnicoreLoss(_Loss):
15 |     def __init__(self, task):
16 |         super().__init__()
17 |         self.task = task
18 |         if task is not None:
19 |             self.args = task.args
20 |             if hasattr(task, "target_dictionary"):
21 |                 tgt_dict = task.target_dictionary
22 |                 self.padding_idx = tgt_dict.pad() if tgt_dict is not None else -100
23 | 
24 |     @classmethod
25 |     def add_args(cls, parser):
26 |         pass
27 | 
28 |     @classmethod
29 |     def build_loss(cls, args, task):
30 |         """Construct a loss from command-line args."""
31 |         # arguments in the __init__.
32 |         init_args = {}
33 |         for p in inspect.signature(cls).parameters.values():
34 |             if (
35 |                 p.kind == p.POSITIONAL_ONLY
36 |                 or p.kind == p.VAR_POSITIONAL
37 |                 or p.kind == p.VAR_KEYWORD
38 |             ):
39 |                 # we haven't implemented inference for these argument types,
40 |                 # but PRs welcome :)
41 |                 raise NotImplementedError("{} not supported".format(p.kind))
42 | 
43 |             assert p.kind in {p.POSITIONAL_OR_KEYWORD, p.KEYWORD_ONLY}
44 | 
45 |             if p.name == "task":
46 |                 init_args["task"] = task
47 |             elif p.name == "args":
48 |                 init_args["args"] = args
49 |             elif hasattr(args, p.name):
50 |                 init_args[p.name] = getattr(args, p.name)
51 |             elif p.default != p.empty:
52 |                 pass  # we'll use the default value
53 |             else:
54 |                 raise NotImplementedError(
55 |                     "Unable to infer Loss arguments, please implement "
56 |                     "{}.build_loss".format(cls.__name__)
57 |                 )
58 |         return cls(**init_args)
59 | 
60 |     def forward(self, model, sample, reduce=True):
61 |         """Compute the loss for the given sample.
62 | 
63 |         Returns a tuple with three elements:
64 |         1) the loss
65 |         2) the sample size, which is used as the denominator for the gradient
66 |         3) logging outputs to display while training
67 |         """
68 |         raise NotImplementedError
69 | 
70 |     @staticmethod
71 |     def logging_outputs_can_be_summed(is_train: bool) -> bool:
72 |         """
73 |         Whether the logging outputs returned by `forward` can be summed
74 |         across workers prior to calling `reduce_metrics`. Setting this
75 |         to True will improves distributed training speed.
76 |         """
77 |         return False
78 | 
79 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | """isort:skip_file"""
 7 | 
 8 | import argparse
 9 | import importlib
10 | import os
11 | 
12 | from .unicore_task import UnicoreTask
13 | 
14 | 
15 | # register dataclass
16 | TASK_REGISTRY = {}
17 | TASK_CLASS_NAMES = set()
18 | 
19 | 
20 | def setup_task(args, **kwargs):
21 |     return TASK_REGISTRY[args.task].setup_task(args, **kwargs)
22 | 
23 | 
24 | def register_task(name):
25 |     """
26 |     New tasks can be added to unicore with the
27 |     :func:`~unicore.tasks.register_task` function decorator.
28 | 
29 |     For example::
30 | 
31 |         @register_task('classification')
32 |         class ClassificationTask(UnicoreTask):
33 |             (...)
34 | 
35 |     .. note::
36 | 
37 |         All Tasks must implement the :class:`~unicore.tasks.UnicoreTask`
38 |         interface.
39 | 
40 |     Args:
41 |         name (str): the name of the task
42 |     """
43 | 
44 |     def register_task_cls(cls):
45 |         if name in TASK_REGISTRY:
46 |             raise ValueError("Cannot register duplicate task ({})".format(name))
47 |         if not issubclass(cls, UnicoreTask):
48 |             raise ValueError(
49 |                 "Task ({}: {}) must extend UnicoreTask".format(name, cls.__name__)
50 |             )
51 |         if cls.__name__ in TASK_CLASS_NAMES:
52 |             raise ValueError(
53 |                 "Cannot register task with duplicate class name ({})".format(
54 |                     cls.__name__
55 |                 )
56 |             )
57 |         TASK_REGISTRY[name] = cls
58 |         TASK_CLASS_NAMES.add(cls.__name__)
59 |         return cls
60 | 
61 |     return register_task_cls
62 | 
63 | 
64 | # automatically import any Python files in the tasks/ directory
65 | tasks_dir = os.path.dirname(__file__)
66 | for file in os.listdir(tasks_dir):
67 |     path = os.path.join(tasks_dir, file)
68 |     if (
69 |         not file.startswith("_")
70 |         and not file.startswith(".")
71 |         and (file.endswith(".py") or os.path.isdir(path))
72 |     ):
73 |         task_name = file[: file.find(".py")] if file.endswith(".py") else file
74 |         module = importlib.import_module("unicore.tasks." + task_name)
75 | 
76 |         # expose `task_parser` for sphinx
77 |         if task_name in TASK_REGISTRY:
78 |             parser = argparse.ArgumentParser(add_help=False)
79 |             group_task = parser.add_argument_group("Task name")
80 |             # fmt: off
81 |             group_task.add_argument('--task', metavar=task_name,
82 |                                     help='Enable this task with: ``--task=' + task_name + '``')
83 |             # fmt: on
84 |             group_args = parser.add_argument_group("Additional command-line arguments")
85 |             TASK_REGISTRY[task_name].add_args(group_args)
86 |             globals()[task_name + "_parser"] = parser
87 | 


--------------------------------------------------------------------------------
/VFN-IF/unifold/task.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | 
  4 | import contextlib
  5 | from typing import Optional
  6 | 
  7 | import numpy as np
  8 | 
  9 | from unifold.dataset import UnifoldDataset
 10 | from unicore.data import data_utils
 11 | from unicore.tasks import UnicoreTask, register_task
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | @register_task("af2")
 17 | class AlphafoldTask(UnicoreTask):
 18 |     """Task for training masked language models (e.g., BERT)."""
 19 | 
 20 |     @staticmethod
 21 |     def add_args(parser):
 22 |         """Add task-specific arguments to the parser."""
 23 |         parser.add_argument(
 24 |             "data",
 25 |         )
 26 |         parser.add_argument("--disable-sd", action="store_true")
 27 |         parser.add_argument(
 28 |             "--json-prefix",
 29 |             type=str,
 30 |             default="",
 31 |         )
 32 |         parser.add_argument(
 33 |             "--max-chains",
 34 |             type=int,
 35 |             default=18,
 36 |         )
 37 |         parser.add_argument(
 38 |             "--sd-prob",
 39 |             type=float,
 40 |             default=0.75,
 41 |         )
 42 | 
 43 |     def __init__(self, args):
 44 |         super().__init__(args)
 45 |         self.seed = args.seed
 46 | 
 47 |     @classmethod
 48 |     def setup_task(cls, args, **kwargs):
 49 |         return cls(args)
 50 | 
 51 |     def load_dataset(self, split, combine=False, **kwargs):
 52 |         """Load a given dataset split.
 53 |         Args:
 54 |             split (str): name of the split (e.g., train, valid, test)
 55 |         """
 56 |         data_class = UnifoldDataset
 57 |         if split == "train":
 58 |             dataset = data_class(
 59 |                 self.args,
 60 |                 self.args.seed + 81,
 61 |                 self.config,
 62 |                 self.args.data,
 63 |                 mode="train",
 64 |                 max_step=self.args.max_update,
 65 |                 disable_sd=self.args.disable_sd,
 66 |                 json_prefix=self.args.json_prefix,
 67 |             )
 68 |         else:
 69 |             dataset = data_class(
 70 |                 self.args,
 71 |                 self.args.seed + 81,
 72 |                 self.config,
 73 |                 self.args.data,
 74 |                 mode="eval",
 75 |                 max_step=None,
 76 |                 json_prefix=self.args.json_prefix,
 77 |             )
 78 | 
 79 |         self.datasets[split] = dataset
 80 | 
 81 |     def build_model(self, args):
 82 |         from unicore import models
 83 | 
 84 |         model = models.build_model(args, self)
 85 |         self.config = model.config
 86 | 
 87 |         return model
 88 | 
 89 |     def disable_shuffling(self) -> bool:
 90 |         return True
 91 | 
 92 | @register_task("de")
 93 | class DeTask(AlphafoldTask):
 94 |     def build_model(self, args):
 95 |         from unicore import models
 96 | 
 97 |         model = models.build_model(args, self)
 98 |         self.config = model.config
 99 |         
100 |         return model


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/unicore_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import logging
 8 | import numpy as np
 9 | import torch.utils.data
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class EpochListening:
15 |     """Mixin for receiving updates whenever the epoch increments."""
16 | 
17 |     @property
18 |     def can_reuse_epoch_itr_across_epochs(self):
19 |         """
20 |         Whether we can reuse the :class:`unicore.data.EpochBatchIterator` for
21 |         this dataset across epochs.
22 | 
23 |         This needs to return ``False`` if the sample sizes can change across
24 |         epochs, in which case we may need to regenerate batches at each epoch.
25 |         If your dataset relies in ``set_epoch`` then you should consider setting
26 |         this to ``False``.
27 |         """
28 |         return True
29 | 
30 |     def set_epoch(self, epoch):
31 |         """Will receive the updated epoch number at the beginning of the epoch."""
32 |         pass
33 | 
34 | 
35 | class UnicoreDataset(torch.utils.data.Dataset, EpochListening):
36 |     """A dataset that provides helpers for batching."""
37 | 
38 |     def __getitem__(self, index):
39 |         raise NotImplementedError
40 | 
41 |     def __len__(self):
42 |         raise NotImplementedError
43 | 
44 |     def collater(self, samples):
45 |         """Merge a list of samples to form a mini-batch.
46 | 
47 |         Args:
48 |             samples (List[dict]): samples to collate
49 | 
50 |         Returns:
51 |             dict: a mini-batch suitable for forwarding with a Model
52 |         """
53 |         raise NotImplementedError
54 | 
55 |     def ordered_indices(self):
56 |         """Return an ordered list of indices. Batches will be constructed based
57 |         on this order."""
58 |         return np.arange(len(self), dtype=np.int64)
59 | 
60 |     @property
61 |     def supports_prefetch(self):
62 |         """Whether this dataset supports prefetching."""
63 |         return False
64 | 
65 |     def attr(self, attr: str, index: int):
66 |         return getattr(self, attr, None)
67 | 
68 |     def prefetch(self, indices):
69 |         """Prefetch the data required for this epoch."""
70 |         raise NotImplementedError
71 | 
72 |     def batch_by_size(
73 |         self,
74 |         indices,
75 |         batch_size=None,
76 |         required_batch_size_multiple=1,
77 |     ):
78 |         """
79 |         Given an ordered set of indices
80 |         """
81 |         from unicore.data import data_utils
82 |         return data_utils.batch_by_size(
83 |             indices,
84 |             batch_size=batch_size,
85 |             required_batch_size_multiple=required_batch_size_multiple,
86 |         )
87 | 
88 |     @property
89 |     def supports_fetch_outside_dataloader(self):
90 |         """Whether this dataset supports fetching outside the workers of the dataloader."""
91 |         return True
92 | 


--------------------------------------------------------------------------------
/VFN-Diff/analysis/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import re
 4 | from data import protein
 5 | from data import residue_constants
 6 | from scipy.spatial.transform import Rotation
 7 | from openfold.utils import rigid_utils
 8 | 
 9 | 
10 | CA_IDX = residue_constants.atom_order['CA']
11 | Rigid = rigid_utils.Rigid
12 | 
13 | 
14 | def create_full_prot(
15 |         atom37: np.ndarray,
16 |         atom37_mask: np.ndarray,
17 |         aatype=None,
18 |         b_factors=None,
19 |     ):
20 |     assert atom37.ndim == 3
21 |     assert atom37.shape[-1] == 3
22 |     assert atom37.shape[-2] == 37
23 |     n = atom37.shape[0]
24 |     residue_index = np.arange(n)
25 |     chain_index = np.zeros(n)
26 |     if b_factors is None:
27 |         b_factors = np.zeros([n, 37])
28 |     if aatype is None:
29 |         aatype = np.zeros(n, dtype=int)
30 |     return protein.Protein(
31 |         atom_positions=atom37,
32 |         atom_mask=atom37_mask,
33 |         aatype=aatype,
34 |         residue_index=residue_index,
35 |         chain_index=chain_index,
36 |         b_factors=b_factors)
37 | 
38 | 
39 | def write_prot_to_pdb(
40 |         prot_pos: np.ndarray,
41 |         file_path: str,
42 |         aatype: np.ndarray=None,
43 |         overwrite=False,
44 |         no_indexing=False,
45 |         b_factors=None,
46 |     ):
47 |     if overwrite:
48 |         max_existing_idx = 0
49 |     else:
50 |         file_dir = os.path.dirname(file_path)
51 |         file_name = os.path.basename(file_path).strip('.pdb')
52 |         existing_files = [x for x in os.listdir(file_dir) if file_name in x]
53 |         max_existing_idx = max([
54 |             int(re.findall(r'_(\d+).pdb', x)[0]) for x in existing_files if re.findall(r'_(\d+).pdb', x)
55 |             if re.findall(r'_(\d+).pdb', x)] + [0])
56 |     if not no_indexing:
57 |         save_path = file_path.replace('.pdb', '') + f'_{max_existing_idx+1}.pdb'
58 |     else:
59 |         save_path = file_path
60 |     with open(save_path, 'w') as f:
61 |         if prot_pos.ndim == 4:
62 |             for t, pos37 in enumerate(prot_pos):
63 |                 atom37_mask = np.sum(np.abs(pos37), axis=-1) > 1e-7
64 |                 prot = create_full_prot(
65 |                     pos37, atom37_mask, aatype=aatype, b_factors=b_factors)
66 |                 pdb_prot = protein.to_pdb(prot, model=t + 1, add_end=False)
67 |                 f.write(pdb_prot)
68 |         elif prot_pos.ndim == 3:
69 |             atom37_mask = np.sum(np.abs(prot_pos), axis=-1) > 1e-7
70 |             prot = create_full_prot(
71 |                 prot_pos, atom37_mask, aatype=aatype, b_factors=b_factors)
72 |             pdb_prot = protein.to_pdb(prot, model=1, add_end=False)
73 |             f.write(pdb_prot)
74 |         else:
75 |             raise ValueError(f'Invalid positions shape {prot_pos.shape}')
76 |         f.write('END')
77 |     return save_path
78 | 
79 | 
80 | def rigids_to_se3_vec(frame, scale_factor=1.0):
81 |     trans = frame[:, 4:] * scale_factor
82 |     rotvec = Rotation.from_quat(frame[:, :4]).as_rotvec()
83 |     se3_vec = np.concatenate([rotvec, trans], axis=-1)
84 |     return se3_vec
85 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | import operator
17 | import time
18 | 
19 | import dllogger as logger
20 | import numpy as np
21 | import torch.cuda.profiler as profiler
22 | from dllogger import JSONStreamBackend, StdOutBackend, Verbosity
23 | from pytorch_lightning import Callback
24 | 
25 | 
26 | def is_main_process():
27 |     return int(os.getenv("LOCAL_RANK", "0")) == 0
28 | 
29 | 
30 | class PerformanceLoggingCallback(Callback):
31 |     def __init__(self, log_file, global_batch_size, warmup_steps: int = 0, profile: bool = False):
32 |         logger.init(backends=[JSONStreamBackend(Verbosity.VERBOSE, log_file), StdOutBackend(Verbosity.VERBOSE)])
33 |         self.warmup_steps = warmup_steps
34 |         self.global_batch_size = global_batch_size
35 |         self.step = 0
36 |         self.profile = profile
37 |         self.timestamps = []
38 | 
39 |     def do_step(self):
40 |         self.step += 1
41 |         if self.profile and self.step == self.warmup_steps:
42 |             profiler.start()
43 |         if self.step > self.warmup_steps:
44 |             self.timestamps.append(time.time())
45 | 
46 |     def on_train_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
47 |         self.do_step()
48 | 
49 |     def on_test_batch_start(self, trainer, pl_module, batch, batch_idx, dataloader_idx):
50 |         self.do_step()
51 | 
52 |     def process_performance_stats(self, deltas):
53 |         def _round3(val):
54 |             return round(val, 3)
55 | 
56 |         throughput_imgps = _round3(self.global_batch_size / np.mean(deltas))
57 |         timestamps_ms = 1000 * deltas
58 |         stats = {
59 |             f"throughput": throughput_imgps,
60 |             f"latency_mean": _round3(timestamps_ms.mean()),
61 |         }
62 |         for level in [90, 95, 99]:
63 |             stats.update({f"latency_{level}": _round3(np.percentile(timestamps_ms, level))})
64 | 
65 |         return stats
66 | 
67 |     def _log(self):
68 |         if is_main_process():
69 |             diffs = list(map(operator.sub, self.timestamps[1:], self.timestamps[:-1]))
70 |             deltas = np.array(diffs)
71 |             stats = self.process_performance_stats(deltas)
72 |             logger.log(step=(), data=stats)
73 |             logger.flush()
74 | 
75 |     def on_train_end(self, trainer, pl_module):
76 |         if self.profile:
77 |             profiler.stop()
78 |         self._log()
79 | 
80 |     def on_epoch_end(self, trainer, pl_module):
81 |         self._log()
82 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/lr_scheduler/triangular_lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | import math
 8 | from typing import List
 9 | from unicore.optim.lr_scheduler import UnicoreLRScheduler, register_lr_scheduler
10 | 
11 | 
12 | 
13 | @register_lr_scheduler("triangular")
14 | class TriangularLRSchedule(UnicoreLRScheduler):
15 |     """Assign LR based on a triangular cyclical schedule.
16 | 
17 |     See https://arxiv.org/pdf/1506.01186.pdf for details.
18 |     """
19 | 
20 |     def __init__(self, args, optimizer, total_train_steps):
21 |         super().__init__(args, optimizer, total_train_steps)
22 |         if len(args.lr) > 1:
23 |             raise ValueError(
24 |                 "Cannot use a fixed learning rate schedule with triangular."
25 |                 " Consider --lr-scheduler=fixed instead."
26 |             )
27 | 
28 |         lr = args.lr[0]
29 | 
30 |         assert args.max_lr > lr, "max_lr must be more than lr"
31 |         self.min_lr = lr
32 |         self.max_lr = args.max_lr
33 |         self.stepsize = args.lr_period_updates // 2
34 |         self.lr_shrink = args.lr_shrink
35 |         self.shrink_min = args.shrink_min
36 | 
37 |         # initial learning rate
38 |         self.lr = self.min_lr
39 |         self.optimizer.set_lr(self.lr)
40 | 
41 |     @staticmethod
42 |     def add_args(parser):
43 |         """Add arguments to the parser for this LR scheduler."""
44 |         # fmt: off
45 |         parser.add_argument('--max-lr', required=True, type=float, metavar='LR',
46 |                             help='max learning rate, must be more than args.lr')
47 |         parser.add_argument('--lr-period-updates', default=5000, type=float, metavar='LR',
48 |                             help='initial number of updates per period (cycle length)')
49 |         parser.add_argument('--lr-shrink', default=0.1, type=float, metavar='LS',
50 |                             help='shrink factor for annealing')
51 |         parser.add_argument('--shrink-min', action='store_true',
52 |                             help='if set, also shrinks min lr')
53 |         # fmt: on
54 | 
55 |     def step(self, epoch, val_loss=None):
56 |         """Update the learning rate at the end of the given epoch."""
57 |         super().step(epoch, val_loss)
58 |         # we don't change the learning rate at epoch boundaries
59 |         return self.optimizer.get_lr()
60 | 
61 |     def step_update(self, num_updates):
62 |         """Update the learning rate after each update."""
63 |         cycle = math.floor(num_updates / (2 * self.stepsize))
64 | 
65 |         lr_shrink = self.lr_shrink ** cycle
66 |         max_lr = self.max_lr * lr_shrink
67 |         if self.shrink_min:
68 |             min_lr = self.min_lr * lr_shrink
69 |         else:
70 |             min_lr = self.min_lr
71 | 
72 |         x = abs(num_updates / self.stepsize - 2 * (cycle + 1) + 1)
73 |         self.lr = min_lr + (max_lr - min_lr) * max(0, (1 - x))
74 | 
75 |         self.optimizer.set_lr(self.lr)
76 |         return self.lr
77 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/model/pair_transition.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 AlQuraishi Laboratory
  2 | # Copyright 2021 DeepMind Technologies Limited
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | from typing import Optional
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | 
 20 | from openfold.model.primitives import Linear, LayerNorm
 21 | from openfold.utils.tensor_utils import chunk_layer
 22 | 
 23 | 
 24 | class PairTransition(nn.Module):
 25 |     """
 26 |     Implements Algorithm 15.
 27 |     """
 28 | 
 29 |     def __init__(self, c_z, n):
 30 |         """
 31 |         Args:
 32 |             c_z:
 33 |                 Pair transition channel dimension
 34 |             n:
 35 |                 Factor by which c_z is multiplied to obtain hidden channel
 36 |                 dimension
 37 |         """
 38 |         super(PairTransition, self).__init__()
 39 | 
 40 |         self.c_z = c_z
 41 |         self.n = n
 42 | 
 43 |         self.layer_norm = LayerNorm(self.c_z)
 44 |         self.linear_1 = Linear(self.c_z, self.n * self.c_z, init="relu")
 45 |         self.relu = nn.ReLU()
 46 |         self.linear_2 = Linear(self.n * self.c_z, c_z, init="final")
 47 | 
 48 |     def _transition(self, z, mask):
 49 |         # [*, N_res, N_res, C_hidden]
 50 |         z = self.linear_1(z)
 51 |         z = self.relu(z)
 52 | 
 53 |         # [*, N_res, N_res, C_z]
 54 |         z = self.linear_2(z) * mask
 55 | 
 56 |         return z
 57 | 
 58 |     @torch.jit.ignore
 59 |     def _chunk(self,
 60 |         z: torch.Tensor,
 61 |         mask: torch.Tensor,
 62 |         chunk_size: int,
 63 |     ) -> torch.Tensor:
 64 |         return chunk_layer(
 65 |             self._transition,
 66 |             {"z": z, "mask": mask},
 67 |             chunk_size=chunk_size,
 68 |             no_batch_dims=len(z.shape[:-2]),
 69 |         )
 70 | 
 71 | 
 72 |     def forward(self, 
 73 |         z: torch.Tensor, 
 74 |         mask: Optional[torch.Tensor] = None,
 75 |         chunk_size: Optional[int] = None,
 76 |     ) -> torch.Tensor:
 77 |         """
 78 |         Args:
 79 |             z:
 80 |                 [*, N_res, N_res, C_z] pair embedding
 81 |         Returns:
 82 |             [*, N_res, N_res, C_z] pair embedding update
 83 |         """
 84 |         # DISCREPANCY: DeepMind forgets to apply the mask in this module.
 85 |         if mask is None:
 86 |             mask = z.new_ones(z.shape[:-1])
 87 | 
 88 |         # [*, N_res, N_res, 1]
 89 |         mask = mask.unsqueeze(-1)
 90 | 
 91 |         # [*, N_res, N_res, C_z]
 92 |         z = self.layer_norm(z)
 93 | 
 94 |         if chunk_size is not None:
 95 |             z = self._chunk(z, mask, chunk_size)
 96 |         else:
 97 |             z = self._transition(z=z, mask=mask)
 98 | 
 99 |         return z
100 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/np/relax/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 AlQuraishi Laboratory
 2 | # Copyright 2021 DeepMind Technologies Limited
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Utils for minimization."""
17 | import io
18 | from openfold.np import residue_constants
19 | from Bio import PDB
20 | import numpy as np
21 | # simtk.openmm is not supported anymore. Remove simtk.
22 | # https://github.com/openmm/openmm/releases
23 | from openmm import app as openmm_app
24 | from openmm.app.internal.pdbstructure import PdbStructure
25 | 
26 | 
27 | def overwrite_pdb_coordinates(pdb_str: str, pos) -> str:
28 |     pdb_file = io.StringIO(pdb_str)
29 |     structure = PdbStructure(pdb_file)
30 |     topology = openmm_app.PDBFile(structure).getTopology()
31 |     with io.StringIO() as f:
32 |         openmm_app.PDBFile.writeFile(topology, pos, f)
33 |         return f.getvalue()
34 | 
35 | 
36 | def overwrite_b_factors(pdb_str: str, bfactors: np.ndarray) -> str:
37 |     """Overwrites the B-factors in pdb_str with contents of bfactors array.
38 | 
39 |     Args:
40 |       pdb_str: An input PDB string.
41 |       bfactors: A numpy array with shape [1, n_residues, 37]. We assume that the
42 |         B-factors are per residue; i.e. that the nonzero entries are identical in
43 |         [0, i, :].
44 | 
45 |     Returns:
46 |       A new PDB string with the B-factors replaced.
47 |     """
48 |     if bfactors.shape[-1] != residue_constants.atom_type_num:
49 |         raise ValueError(
50 |             f"Invalid final dimension size for bfactors: {bfactors.shape[-1]}."
51 |         )
52 | 
53 |     parser = PDB.PDBParser(QUIET=True)
54 |     handle = io.StringIO(pdb_str)
55 |     structure = parser.get_structure("", handle)
56 | 
57 |     curr_resid = ("", "", "")
58 |     idx = -1
59 |     for atom in structure.get_atoms():
60 |         atom_resid = atom.parent.get_id()
61 |         if atom_resid != curr_resid:
62 |             idx += 1
63 |             if idx >= bfactors.shape[0]:
64 |                 raise ValueError(
65 |                     "Index into bfactors exceeds number of residues. "
66 |                     "B-factors shape: {shape}, idx: {idx}."
67 |                 )
68 |         curr_resid = atom_resid
69 |         atom.bfactor = bfactors[idx, residue_constants.atom_order["CA"]]
70 | 
71 |     new_pdb = io.StringIO()
72 |     pdb_io = PDB.PDBIO()
73 |     pdb_io.set_structure(structure)
74 |     pdb_io.save(new_pdb)
75 |     return new_pdb.getvalue()
76 | 
77 | 
78 | def assert_equal_nonterminal_atom_types(
79 |     atom_mask: np.ndarray, ref_atom_mask: np.ndarray
80 | ):
81 |     """Checks that pre- and post-minimized proteins have same atom set."""
82 |     # Ignore any terminal OXT atoms which may have been added by minimization.
83 |     oxt = residue_constants.atom_order["OXT"]
84 |     no_oxt_mask = np.ones(shape=atom_mask.shape, dtype=np.bool)
85 |     no_oxt_mask[..., oxt] = False
86 |     np.testing.assert_almost_equal(
87 |         ref_atom_mask[no_oxt_mask], atom_mask[no_oxt_mask]
88 |     )
89 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/lr_scheduler/inverse_square_root_schedule.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from collections.abc import Collection
 8 | from typing import List
 9 | 
10 | from unicore.optim.lr_scheduler import UnicoreLRScheduler, register_lr_scheduler
11 | 
12 | 
13 | @register_lr_scheduler("inverse_sqrt")
14 | class InverseSquareRootSchedule(UnicoreLRScheduler):
15 |     """Decay the LR based on the inverse square root of the update number.
16 | 
17 |     We also support a warmup phase where we linearly increase the learning rate
18 |     from some initial learning rate (``--warmup-init-lr``) until the configured
19 |     learning rate (``--lr``). Thereafter we decay proportional to the number of
20 |     updates, with a decay factor set to align with the configured learning rate.
21 | 
22 |     During warmup::
23 | 
24 |       lrs = torch.linspace(args.warmup_init_lr, args.lr, args.warmup_updates)
25 |       lr = lrs[update_num]
26 | 
27 |     After warmup::
28 | 
29 |       decay_factor = args.lr * sqrt(args.warmup_updates)
30 |       lr = decay_factor / sqrt(update_num)
31 |     """
32 | 
33 |     def __init__(self, args, optimizer, total_train_steps):
34 |         super().__init__(args, optimizer, total_train_steps)
35 |         if isinstance(args.lr, Collection) and len(args.lr) > 1:
36 |             raise ValueError(
37 |                 "Cannot use a fixed learning rate schedule with inverse_sqrt."
38 |                 " Consider --lr-scheduler=fixed instead."
39 |             )
40 |         warmup_end_lr = args.lr[0] if isinstance(args.lr, Collection) else args.lr
41 |         if args.warmup_init_lr < 0:
42 |             args.warmup_init_lr = 0 if args.warmup_updates > 0 else warmup_end_lr
43 | 
44 |         # linearly warmup for the first args.warmup_updates
45 |         self.lr_step = (warmup_end_lr - args.warmup_init_lr) / args.warmup_updates
46 | 
47 |         # then, decay prop. to the inverse square root of the update number
48 |         self.decay_factor = warmup_end_lr * args.warmup_updates ** 0.5
49 | 
50 |         # initial learning rate
51 |         self.lr = args.warmup_init_lr
52 |         self.optimizer.set_lr(self.lr)
53 | 
54 |     @staticmethod
55 |     def add_args(parser):
56 |         """Add arguments to the parser for this LR scheduler."""
57 |         # fmt: off
58 |         parser.add_argument('--warmup-updates', default=4000, type=int, metavar='N',
59 |                             help='warmup the learning rate linearly for the first N updates')
60 |         parser.add_argument('--warmup-init-lr', default=-1, type=float, metavar='LR',
61 |                             help='initial learning rate during warmup phase; default is args.lr')
62 |         # fmt: on
63 | 
64 |     def step(self, epoch, val_loss=None):
65 |         """Update the learning rate at the end of the given epoch."""
66 |         super().step(epoch, val_loss)
67 |         # we don't change the learning rate at epoch boundaries
68 |         return self.optimizer.get_lr()
69 | 
70 |     def step_update(self, num_updates):
71 |         """Update the learning rate after each update."""
72 |         if num_updates < self.args.warmup_updates:
73 |             self.lr = self.args.warmup_init_lr + num_updates * self.lr_step
74 |         else:
75 |             self.lr = self.decay_factor * num_updates ** -0.5
76 |         self.optimizer.set_lr(self.lr)
77 |         return self.lr
78 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/modules/transformer_encoder_layer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import Dict, Optional
 8 | 
 9 | import torch
10 | import torch.nn.functional as F
11 | from unicore import utils
12 | from torch import nn
13 | from . import LayerNorm, SelfMultiheadAttention
14 | 
15 | class TransformerEncoderLayer(nn.Module):
16 |     """
17 |     Implements a Transformer Encoder Layer used in BERT/XLM style pre-trained
18 |     models.
19 |     """
20 | 
21 |     def __init__(
22 |         self,
23 |         embed_dim: int = 768,
24 |         ffn_embed_dim: int = 3072,
25 |         attention_heads: int = 8,
26 |         dropout: float = 0.1,
27 |         attention_dropout: float = 0.1,
28 |         activation_dropout: float = 0.0,
29 |         activation_fn: str = "gelu",
30 |         post_ln = False,
31 |     ) -> None:
32 |         super().__init__()
33 | 
34 |         # Initialize parameters
35 |         self.embed_dim = embed_dim
36 |         self.attention_heads = attention_heads
37 |         self.attention_dropout = attention_dropout
38 | 
39 |         self.dropout = dropout
40 |         self.activation_dropout = activation_dropout
41 |         self.activation_fn = utils.get_activation_fn(activation_fn)
42 | 
43 |         self.self_attn = SelfMultiheadAttention(
44 |             self.embed_dim,
45 |             attention_heads,
46 |             dropout=attention_dropout,
47 |         )
48 |         # layer norm associated with the self attention layer
49 |         self.self_attn_layer_norm = LayerNorm(self.embed_dim)
50 |         self.fc1 = nn.Linear(self.embed_dim, ffn_embed_dim)
51 |         self.fc2 = nn.Linear(ffn_embed_dim, self.embed_dim)
52 |         self.final_layer_norm = LayerNorm(self.embed_dim)
53 |         self.post_ln = post_ln
54 | 
55 | 
56 |     def forward(
57 |         self,
58 |         x: torch.Tensor,
59 |         attn_bias: Optional[torch.Tensor] = None,
60 |         padding_mask: Optional[torch.Tensor] = None,
61 |         return_attn: bool=False,
62 |     ) -> torch.Tensor:
63 |         """
64 |         LayerNorm is applied either before or after the self-attention/ffn
65 |         modules similar to the original Transformer implementation.
66 |         """
67 |         residual = x
68 |         if not self.post_ln:
69 |             x = self.self_attn_layer_norm(x)
70 |         # new added
71 |         x = self.self_attn(
72 |             query=x,
73 |             key_padding_mask=padding_mask,
74 |             attn_bias=attn_bias,
75 |             return_attn=return_attn,
76 |         )
77 |         if return_attn:
78 |             x, attn_weights, attn_probs = x
79 |         x = F.dropout(x, p=self.dropout, training=self.training)
80 |         x = residual + x
81 |         if self.post_ln:
82 |             x = self.self_attn_layer_norm(x)
83 | 
84 |         residual = x
85 |         if not self.post_ln:
86 |             x = self.final_layer_norm(x)
87 |         x = self.fc1(x)
88 |         x = self.activation_fn(x)
89 |         x = F.dropout(x, p=self.activation_dropout, training=self.training)
90 |         x = self.fc2(x)
91 |         x = F.dropout(x, p=self.dropout, training=self.training)
92 |         x = residual + x
93 |         if self.post_ln:
94 |             x = self.final_layer_norm(x)
95 |         if not return_attn:
96 |             return x
97 |         else:
98 |             return x, attn_weights, attn_probs
99 |                 


--------------------------------------------------------------------------------
/VFN-IF/api/utils/layer_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | import torch
 6 | import numbers
 7 | from torch.nn.parameter import Parameter
 8 | from torch.nn import init
 9 | from torch.nn import functional as F
10 | 
11 | try:
12 |     import unicore_fused_layernorm
13 |     import unicore_fused_layernorm_backward_gamma_beta
14 |     HAS_LAYER_NORM = True
15 | except:
16 |     print("fused_layer_norm is not installed corrected")
17 |     HAS_LAYER_NORM = False
18 | 
19 | if not torch.cuda.is_available() or torch.cuda.get_device_capability()[0] < 7:
20 |     HAS_LAYER_NORM = False
21 | 
22 | class FusedLayerNormFastFunction(torch.autograd.Function):
23 |   @staticmethod
24 |   def forward(ctx, input, weight, bias, normalized_shape, eps):
25 |     ctx.normalized_shape = normalized_shape
26 |     ctx.eps = eps
27 |     input = input.contiguous()
28 |     weight = weight.contiguous()
29 |     bias = bias.contiguous()
30 |     output, mean, invvar = unicore_fused_layernorm.forward(
31 |         input, ctx.normalized_shape, weight, bias, ctx.eps)
32 |     ctx.save_for_backward(input, weight, bias, mean, invvar)
33 |     return output
34 |   @staticmethod
35 |   def backward(ctx, grad_output):
36 |     input_, weight_, bias_, mean, invvar = ctx.saved_tensors
37 |     grad_input = grad_weight = grad_bias = None
38 |     grad_input = unicore_fused_layernorm.backward(
39 |         grad_output.contiguous(), mean, invvar,
40 |         input_, ctx.normalized_shape,
41 |         weight_, bias_, ctx.eps)
42 |     grad_weight, grad_bias = unicore_fused_layernorm_backward_gamma_beta.backward(
43 |         grad_output.contiguous(), mean, invvar,
44 |         input_, ctx.normalized_shape,
45 |         weight_, bias_, ctx.eps)
46 |     return grad_input, grad_weight, grad_bias, None, None
47 | 
48 | FUSED_LAYER_NORM_SUPPORT_DIM = set([64, 128, 256, 320, 384, 512, 640, 768, 1024, 1280, 1536, 1792, 2048, 2560, 5120])
49 | 
50 | class LayerNorm(torch.nn.Module):
51 |     def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
52 |         super(LayerNorm, self).__init__()
53 |         if isinstance(normalized_shape, numbers.Integral):
54 |             normalized_shape = (normalized_shape,)
55 |         self.normalized_shape = torch.Size(normalized_shape)
56 |         self.eps = eps
57 |         assert elementwise_affine
58 |         self.weight = Parameter(torch.Tensor(*normalized_shape))
59 |         self.bias = Parameter(torch.Tensor(*normalized_shape))
60 |         self.reset_parameters()
61 |         def torch_layer_norm(input):
62 |             return F.layer_norm(
63 |                 input, self.normalized_shape, self.weight.type(input.dtype), self.bias.type(input.dtype), self.eps)
64 |         def fused_layer_norm(input):
65 |             if input.is_cuda:
66 |                 return FusedLayerNormFastFunction.apply(
67 |                     input, self.weight.type(input.dtype), self.bias.type(input.dtype), self.normalized_shape, self.eps)
68 |             else:
69 |                 return F.layer_norm(
70 |                     input, self.normalized_shape, self.weight.type(input.dtype), self.bias.type(input.dtype), self.eps)
71 |         self.func = torch_layer_norm if (not HAS_LAYER_NORM or normalized_shape[0] not in FUSED_LAYER_NORM_SUPPORT_DIM) else fused_layer_norm
72 | 
73 |     def reset_parameters(self):
74 |         init.ones_(self.weight)
75 |         init.zeros_(self.bias)
76 | 
77 |     def forward(self, input):
78 |         return self.func(input)
79 | 
80 |     def extra_repr(self):
81 |         return '{normalized_shape}, eps={eps}, ' \
82 |             'elementwise_affine=True'.format(**self.__dict__)
83 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/modules/layer_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # This source code is licensed under the MIT license found in the
 3 | # LICENSE file in the root directory of this source tree.
 4 | 
 5 | import torch
 6 | import numbers
 7 | from torch.nn.parameter import Parameter
 8 | from torch.nn import init
 9 | from torch.nn import functional as F
10 | 
11 | try:
12 |     import unicore_fused_layernorm
13 |     import unicore_fused_layernorm_backward_gamma_beta
14 |     HAS_LAYER_NORM = True
15 | except:
16 |     print("fused_layer_norm is not installed corrected")
17 |     HAS_LAYER_NORM = False
18 | 
19 | if not torch.cuda.is_available() or torch.cuda.get_device_capability()[0] < 7:
20 |     HAS_LAYER_NORM = False
21 | 
22 | class FusedLayerNormFastFunction(torch.autograd.Function):
23 |   @staticmethod
24 |   def forward(ctx, input, weight, bias, normalized_shape, eps):
25 |     ctx.normalized_shape = normalized_shape
26 |     ctx.eps = eps
27 |     input = input.contiguous()
28 |     weight = weight.contiguous()
29 |     bias = bias.contiguous()
30 |     output, mean, invvar = unicore_fused_layernorm.forward(
31 |         input, ctx.normalized_shape, weight, bias, ctx.eps)
32 |     ctx.save_for_backward(input, weight, bias, mean, invvar)
33 |     return output
34 |   @staticmethod
35 |   def backward(ctx, grad_output):
36 |     input_, weight_, bias_, mean, invvar = ctx.saved_tensors
37 |     grad_input = grad_weight = grad_bias = None
38 |     grad_input = unicore_fused_layernorm.backward(
39 |         grad_output.contiguous(), mean, invvar,
40 |         input_, ctx.normalized_shape,
41 |         weight_, bias_, ctx.eps)
42 |     grad_weight, grad_bias = unicore_fused_layernorm_backward_gamma_beta.backward(
43 |         grad_output.contiguous(), mean, invvar,
44 |         input_, ctx.normalized_shape,
45 |         weight_, bias_, ctx.eps)
46 |     return grad_input, grad_weight, grad_bias, None, None
47 | 
48 | FUSED_LAYER_NORM_SUPPORT_DIM = set([64, 128, 256, 320, 384, 512, 640, 768, 1024, 1280, 1536, 1792, 2048, 2560, 5120])
49 | 
50 | class LayerNorm(torch.nn.Module):
51 |     def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
52 |         super(LayerNorm, self).__init__()
53 |         if isinstance(normalized_shape, numbers.Integral):
54 |             normalized_shape = (normalized_shape,)
55 |         self.normalized_shape = torch.Size(normalized_shape)
56 |         self.eps = eps
57 |         assert elementwise_affine
58 |         self.weight = Parameter(torch.Tensor(*normalized_shape))
59 |         self.bias = Parameter(torch.Tensor(*normalized_shape))
60 |         self.reset_parameters()
61 |         def torch_layer_norm(input):
62 |             return F.layer_norm(
63 |                 input, self.normalized_shape, self.weight.type(input.dtype), self.bias.type(input.dtype), self.eps)
64 |         def fused_layer_norm(input):
65 |             if input.is_cuda:
66 |                 return FusedLayerNormFastFunction.apply(
67 |                     input, self.weight.type(input.dtype), self.bias.type(input.dtype), self.normalized_shape, self.eps)
68 |             else:
69 |                 return F.layer_norm(
70 |                     input, self.normalized_shape, self.weight.type(input.dtype), self.bias.type(input.dtype), self.eps)
71 |         self.func = torch_layer_norm if (not HAS_LAYER_NORM or normalized_shape[0] not in FUSED_LAYER_NORM_SUPPORT_DIM) else fused_layer_norm
72 | 
73 |     def reset_parameters(self):
74 |         init.ones_(self.weight)
75 |         init.zeros_(self.bias)
76 | 
77 |     def forward(self, input):
78 |         return self.func(input)
79 | 
80 |     def extra_repr(self):
81 |         return '{normalized_shape}, eps={eps}, ' \
82 |             'elementwise_affine=True'.format(**self.__dict__)
83 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/csrc/softmax_dropout/interface.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | #include <ATen/Generator.h>
 3 | #include <ATen/cuda/CUDAGeneratorImpl.h>
 4 | #include <vector>
 5 | 
 6 | std::vector<c10::optional<torch::Tensor>> fwd_cuda(
 7 |     bool is_training,
 8 |     torch::Tensor &input,
 9 |     const c10::optional<torch::Tensor> &attn_mask,
10 |     const c10::optional<torch::Tensor> &bias,
11 |     float dropout_prob,
12 |     c10::optional<at::Generator> gen_);
13 | 
14 | torch::Tensor bwd_cuda(
15 |     torch::Tensor &output_grads,
16 |     const torch::Tensor &softmax_results,
17 |     const c10::optional<torch::Tensor> &dropout_mask,
18 |     float dropout_prob);
19 | 
20 | // C++ interface
21 | 
22 | #define CHECK_CUDA(x) AT_ASSERTM(x.is_cuda(), #x " must be a CUDA tensor")
23 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
24 | #define CHECK_INPUT(x) \
25 |     CHECK_CUDA(x);     \
26 |     CHECK_CONTIGUOUS(x)
27 | 
28 | std::vector<c10::optional<torch::Tensor>> fwd(
29 |     bool is_training,
30 |     torch::Tensor &input,
31 |     const c10::optional<torch::Tensor> &attn_mask,
32 |     const c10::optional<torch::Tensor> &bias,
33 |     float dropout_prob,
34 |     c10::optional<at::Generator> gen_)
35 | {
36 |     CHECK_INPUT(input);
37 |     if (attn_mask)
38 |     {
39 |         CHECK_INPUT(attn_mask.value());
40 |         AT_ASSERTM(attn_mask->dim() == 3, "expected 3D tensor");
41 |     }
42 |     if (bias)
43 |     {
44 |         CHECK_INPUT(bias.value());
45 |         AT_ASSERTM(bias->dim() == 3, "expected 3D tensor");
46 |         AT_ASSERTM(input.size(0) % bias->size(0) == 0, "wrong first dim of bias.");
47 |         AT_ASSERTM(bias->size(1) == input.size(1) && bias->size(2) == input.size(2), "the last two dims of bias and input should be the same.");
48 |     }
49 |     AT_ASSERTM(input.dim() == 3, "expected 3D tensor");
50 |     AT_ASSERTM(input.scalar_type() == at::ScalarType::Half ||
51 |                    input.scalar_type() == at::ScalarType::BFloat16 ||
52 |                    input.scalar_type() == at::ScalarType::Float,
53 |                "Only HALF/BFloat16/Float is supported");
54 |     return fwd_cuda(is_training, input, attn_mask, bias, dropout_prob, gen_);
55 | }
56 | 
57 | torch::Tensor bwd(
58 |     torch::Tensor &output_grads,
59 |     const torch::Tensor &softmax_results,
60 |     const c10::optional<torch::Tensor> &dropout_mask,
61 |     float dropout_prob)
62 | {
63 |     CHECK_INPUT(output_grads);
64 |     CHECK_INPUT(softmax_results);
65 |     if (dropout_mask)
66 |     {
67 |         CHECK_INPUT(dropout_mask.value());
68 |     }
69 |     AT_ASSERTM(output_grads.dim() == 3, "expected 3D tensor");
70 |     AT_ASSERTM(softmax_results.dim() == 3, "expected 3D tensor");
71 |     AT_ASSERTM(!dropout_mask || dropout_mask->dim() == 1, "expected 1D tensor");
72 | 
73 |     AT_ASSERTM(output_grads.scalar_type() == at::ScalarType::Half ||
74 |                    output_grads.scalar_type() == at::ScalarType::BFloat16 ||
75 |                    output_grads.scalar_type() == at::ScalarType::Float,
76 |                "Only HALF/BFloat16/Float is supported");
77 |     AT_ASSERTM(softmax_results.scalar_type() == at::ScalarType::Half ||
78 |                    softmax_results.scalar_type() == at::ScalarType::BFloat16 ||
79 |                    softmax_results.scalar_type() == at::ScalarType::Float,
80 |                "Only HALF/BFloat16/Float is supported");
81 |     AT_ASSERTM(output_grads.scalar_type() == softmax_results.scalar_type(), "the types mismatch");
82 |     return bwd_cuda(output_grads, softmax_results, dropout_mask, dropout_prob);
83 | }
84 | 
85 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
86 | {
87 |     m.def("forward", &fwd, "softmax dropout -- Forward.");
88 |     m.def("backward", &bwd, "softmax dropout -- Backward.");
89 | }
90 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/data/feature_pipeline.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 AlQuraishi Laboratory
  2 | # Copyright 2021 DeepMind Technologies Limited
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import copy
 17 | from typing import Mapping, Tuple, List, Optional, Dict, Sequence
 18 | 
 19 | import ml_collections
 20 | import numpy as np
 21 | import torch
 22 | 
 23 | from openfold.data import input_pipeline
 24 | 
 25 | 
 26 | FeatureDict = Mapping[str, np.ndarray]
 27 | TensorDict = Dict[str, torch.Tensor]
 28 | 
 29 | 
 30 | def np_to_tensor_dict(
 31 |     np_example: Mapping[str, np.ndarray],
 32 |     features: Sequence[str],
 33 | ) -> TensorDict:
 34 |     """Creates dict of tensors from a dict of NumPy arrays.
 35 | 
 36 |     Args:
 37 |         np_example: A dict of NumPy feature arrays.
 38 |         features: A list of strings of feature names to be returned in the dataset.
 39 | 
 40 |     Returns:
 41 |         A dictionary of features mapping feature names to features. Only the given
 42 |         features are returned, all other ones are filtered out.
 43 |     """
 44 |     tensor_dict = {
 45 |         k: torch.tensor(v) for k, v in np_example.items() if k in features
 46 |     }
 47 |     return tensor_dict
 48 | 
 49 | 
 50 | def make_data_config(
 51 |     config: ml_collections.ConfigDict,
 52 |     mode: str,
 53 |     num_res: int,
 54 | ) -> Tuple[ml_collections.ConfigDict, List[str]]:
 55 |     cfg = copy.deepcopy(config)
 56 |     mode_cfg = cfg[mode]
 57 |     with cfg.unlocked():
 58 |         if mode_cfg.crop_size is None:
 59 |             mode_cfg.crop_size = num_res
 60 | 
 61 |     feature_names = cfg.common.unsupervised_features
 62 | 
 63 |     if cfg.common.use_templates:
 64 |         feature_names += cfg.common.template_features
 65 | 
 66 |     if cfg[mode].supervised:
 67 |         feature_names += cfg.supervised.supervised_features
 68 | 
 69 |     return cfg, feature_names
 70 | 
 71 | 
 72 | def np_example_to_features(
 73 |     np_example: FeatureDict,
 74 |     config: ml_collections.ConfigDict,
 75 |     mode: str,
 76 | ):
 77 |     np_example = dict(np_example)
 78 |     num_res = int(np_example["seq_length"][0])
 79 |     cfg, feature_names = make_data_config(config, mode=mode, num_res=num_res)
 80 | 
 81 |     if "deletion_matrix_int" in np_example:
 82 |         np_example["deletion_matrix"] = np_example.pop(
 83 |             "deletion_matrix_int"
 84 |         ).astype(np.float32)
 85 | 
 86 |     tensor_dict = np_to_tensor_dict(
 87 |         np_example=np_example, features=feature_names
 88 |     )
 89 |     with torch.no_grad():
 90 |         features = input_pipeline.process_tensors_from_config(
 91 |             tensor_dict,
 92 |             cfg.common,
 93 |             cfg[mode],
 94 |         )
 95 | 
 96 |     return {k: v for k, v in features.items()}
 97 | 
 98 | 
 99 | class FeaturePipeline:
100 |     def __init__(
101 |         self,
102 |         config: ml_collections.ConfigDict,
103 |     ):
104 |         self.config = config
105 | 
106 |     def process_features(
107 |         self,
108 |         raw_features: FeatureDict,
109 |         mode: str = "train", 
110 |     ) -> FeatureDict:
111 |         return np_example_to_features(
112 |             np_example=raw_features,
113 |             config=self.config,
114 |             mode=mode,
115 |         )
116 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/np/relax/relax.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2021 AlQuraishi Laboratory
 2 | # Copyright 2021 DeepMind Technologies Limited
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | """Amber relaxation."""
17 | from typing import Any, Dict, Sequence, Tuple
18 | from openfold.np import protein
19 | from openfold.np.relax import amber_minimize, utils
20 | import numpy as np
21 | 
22 | 
23 | class AmberRelaxation(object):
24 |     """Amber relaxation."""
25 |     def __init__(
26 |         self,
27 |         *,
28 |         max_iterations: int,
29 |         tolerance: float,
30 |         stiffness: float,
31 |         exclude_residues: Sequence[int],
32 |         max_outer_iterations: int,
33 |         use_gpu: bool,
34 |     ):
35 |         """Initialize Amber Relaxer.
36 | 
37 |         Args:
38 |           max_iterations: Maximum number of L-BFGS iterations. 0 means no max.
39 |           tolerance: kcal/mol, the energy tolerance of L-BFGS.
40 |           stiffness: kcal/mol A**2, spring constant of heavy atom restraining
41 |             potential.
42 |           exclude_residues: Residues to exclude from per-atom restraining.
43 |             Zero-indexed.
44 |           max_outer_iterations: Maximum number of violation-informed relax
45 |            iterations. A value of 1 will run the non-iterative procedure used in
46 |            CASP14. Use 20 so that >95% of the bad cases are relaxed. Relax finishes
47 |            as soon as there are no violations, hence in most cases this causes no
48 |            slowdown. In the worst case we do 20 outer iterations.
49 |           use_gpu: Whether to run on GPU
50 |         """
51 | 
52 |         self._max_iterations = max_iterations
53 |         self._tolerance = tolerance
54 |         self._stiffness = stiffness
55 |         self._exclude_residues = exclude_residues
56 |         self._max_outer_iterations = max_outer_iterations
57 |         self._use_gpu = use_gpu
58 | 
59 |     def process(
60 |         self, *, prot: protein.Protein
61 |     ) -> Tuple[str, Dict[str, Any], np.ndarray]:
62 |         """Runs Amber relax on a prediction, adds hydrogens, returns PDB string."""
63 |         out = amber_minimize.run_pipeline(
64 |             prot=prot,
65 |             max_iterations=self._max_iterations,
66 |             tolerance=self._tolerance,
67 |             stiffness=self._stiffness,
68 |             exclude_residues=self._exclude_residues,
69 |             max_outer_iterations=self._max_outer_iterations,
70 |             use_gpu=self._use_gpu,
71 |         )
72 |         min_pos = out["pos"]
73 |         start_pos = out["posinit"]
74 |         rmsd = np.sqrt(np.sum((start_pos - min_pos) ** 2) / start_pos.shape[0])
75 |         debug_data = {
76 |             "initial_energy": out["einit"],
77 |             "final_energy": out["efinal"],
78 |             "attempts": out["min_attempts"],
79 |             "rmsd": rmsd,
80 |         }
81 |         pdb_str = amber_minimize.clean_protein(prot)
82 |         min_pdb = utils.overwrite_pdb_coordinates(pdb_str, min_pos)
83 |         min_pdb = utils.overwrite_b_factors(min_pdb, prot.b_factors)
84 |         utils.assert_equal_nonterminal_atom_types(
85 |             protein.from_pdb_string(min_pdb).atom_mask, prot.atom_mask
86 |         )
87 |         violations = out["structural_violations"][
88 |             "total_per_residue_violations_mask"
89 |         ]
90 |         return min_pdb, debug_data, violations
91 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/optim/lr_scheduler/polynomial_decay_schedule.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) DP Technology.
 2 | # Copyright (c) Facebook, Inc. and its affiliates.
 3 | #
 4 | # This source code is licensed under the MIT license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | 
 7 | from typing import List
 8 | 
 9 | from unicore.optim.lr_scheduler import UnicoreLRScheduler, register_lr_scheduler
10 | 
11 | @register_lr_scheduler("polynomial_decay")
12 | class PolynomialDecayLRSchedule(UnicoreLRScheduler):
13 |     """Decay the LR on a fixed schedule."""
14 | 
15 |     def __init__(self, args, optimizer, total_train_steps):
16 |         super().__init__(args, optimizer, total_train_steps)
17 |         if self.args.warmup_ratio > 0:
18 |             # if warmup_ratio > 0, use external train steps
19 |             assert total_train_steps is not None
20 |             self.warmup_updates = int(self.args.warmup_ratio * total_train_steps)
21 |             self.total_num_update = total_train_steps
22 |         else:
23 |             assert args.total_num_update > 0
24 |             self.warmup_updates = args.warmup_updates
25 |             self.total_num_update = args.total_num_update
26 |         self.lr = args.lr[0]
27 |         if self.warmup_updates > 0:
28 |             self.warmup_factor = 1.0 / self.warmup_updates
29 |         else:
30 |             self.warmup_factor = 1
31 |         self.end_learning_rate = args.end_learning_rate
32 |         self.power = args.power
33 |         self.optimizer.set_lr(self.warmup_factor * self.lr)
34 | 
35 |     @staticmethod
36 |     def add_args(parser):
37 |         """Add arguments to the parser for this LR scheduler."""
38 |         parser.add_argument('--force-anneal', '--fa', type=int, metavar='N',
39 |                             help='force annealing at specified epoch')
40 |         parser.add_argument('--warmup-updates', default=0, type=int, metavar='N',
41 |                             help='warmup the learning rate linearly for the first N updates')
42 |         parser.add_argument('--warmup-ratio', default=-1.0, type=float, metavar='N',
43 |                             help='warmup the learning rate linearly for the first N-percent updates')
44 |         parser.add_argument('--end-learning-rate', default=0.0, type=float)
45 |         parser.add_argument('--power', default=1.0, type=float)
46 |         parser.add_argument('--total-num-update', default=1000000, type=int)
47 | 
48 |     def get_next_lr(self, epoch):
49 |         lrs = self.args.lr
50 |         if self.args.force_anneal is None or epoch < self.args.force_anneal:
51 |             # use fixed LR schedule
52 |             next_lr = lrs[min(epoch, len(lrs) - 1)]
53 |         else:
54 |             # annneal based on lr_shrink
55 |             next_lr = self.optimizer.get_lr()
56 |         return next_lr
57 | 
58 |     def step_begin_epoch(self, epoch):
59 |         """Update the learning rate at the beginning of the given epoch."""
60 |         self.lr = self.get_next_lr(epoch)
61 |         self.optimizer.set_lr(self.warmup_factor * self.lr)
62 |         return self.optimizer.get_lr()
63 | 
64 |     def step_update(self, num_updates):
65 |         """Update the learning rate after each update."""
66 |         if self.warmup_updates > 0 and num_updates <= self.warmup_updates:
67 |             self.warmup_factor = num_updates / float(self.warmup_updates)
68 |             lr = self.warmup_factor * self.lr
69 |         elif num_updates >= self.total_num_update:
70 |             lr = self.end_learning_rate
71 |         else:
72 |             warmup = self.warmup_updates
73 |             lr_range = self.lr - self.end_learning_rate
74 |             pct_remaining = 1 - (num_updates - warmup) / (
75 |                 self.total_num_update - warmup
76 |             )
77 |             lr = lr_range * pct_remaining ** (self.power) + self.end_learning_rate
78 |         self.optimizer.set_lr(lr)
79 |         return self.optimizer.get_lr()
80 | 


--------------------------------------------------------------------------------
/VFN-Diff/config/base.yaml:
--------------------------------------------------------------------------------
  1 | # Default or base configuration for SE(3) diffusion experiments.
  2 | 
  3 | defaults:
  4 |   - override hydra/launcher: joblib
  5 | 
  6 | data:
  7 |   # CSV for path and metadata to training examples.
  8 |   csv_path: ./data/processed_pdb/metadata.csv
  9 |   cluster_path: ./data/processed_pdb/clusters-by-entity-30.txt
 10 |   filtering:
 11 |     max_len: 512
 12 |     min_len: 60
 13 |     # Selects a subset of examples. Useful for debugging.
 14 |     subset: null
 15 |     allowed_oligomer: [monomeric]
 16 |     max_helix_percent: 1.0
 17 |     max_loop_percent: 0.5
 18 |     min_beta_percent: -1.0
 19 |     rog_quantile: 0.96
 20 |   min_t: 0.01
 21 |   samples_per_eval_length: 4
 22 |   num_eval_lengths: 10
 23 |   num_t: 100
 24 | 
 25 | diffuser:
 26 |   diffuse_trans: True
 27 |   diffuse_rot: True
 28 | 
 29 |   # R(3) diffuser arguments
 30 |   r3:
 31 |     min_b: 0.1
 32 |     max_b: 20.0
 33 |     coordinate_scaling: 0.1
 34 | 
 35 |   # SO(3) diffuser arguments
 36 |   so3:
 37 |     num_omega: 1000
 38 |     num_sigma: 1000
 39 |     min_sigma: 0.1
 40 |     max_sigma: 1.5
 41 |     schedule: logarithmic
 42 |     cache_dir: .cache/
 43 |     use_cached_score: False
 44 | 
 45 | model:
 46 |   node_embed_size: 256
 47 |   edge_embed_size: 128
 48 |   dropout: 0.0
 49 |   model_type: 'vfn'
 50 |   embed:
 51 |     index_embed_size: 32
 52 |     aatype_embed_size: 64
 53 |     embed_self_conditioning: True
 54 |     num_bins: 22
 55 |     min_bin: 1e-5
 56 |     max_bin: 20.0
 57 |   ipa:
 58 |     c_s: ${model.node_embed_size}
 59 |     c_z: ${model.edge_embed_size}
 60 |     c_hidden: 256
 61 |     c_skip: 64
 62 |     no_heads: 8
 63 |     no_qk_points: 8
 64 |     no_v_points: 12
 65 |     seq_tfmr_num_heads: 4
 66 |     seq_tfmr_num_layers: 2
 67 |     num_blocks: 4
 68 |     coordinate_scaling: ${diffuser.r3.coordinate_scaling}
 69 |   vfn:
 70 |     c_s: ${model.node_embed_size}
 71 |     c_z: ${model.edge_embed_size}
 72 |     c_hidden: 256
 73 |     c_skip: 64
 74 |     no_heads: 8
 75 |     no_points: 16
 76 |     gbf_k: 3
 77 |     g_dim: ${model.edge_embed_size}
 78 |     seq_tfmr_num_heads: 4
 79 |     seq_tfmr_num_layers: 2
 80 |     num_blocks: 4
 81 |     coordinate_scaling: ${diffuser.r3.coordinate_scaling}
 82 |     vfn_attn_factor: 1.0
 83 |     dist_attn_factor: 1.0
 84 | experiment:
 85 |   # Experiment metadata
 86 |   name: baseline
 87 |   run_id: null
 88 | 
 89 |   #training mode
 90 |   use_ddp : False
 91 | 
 92 |   # Training arguments
 93 |   log_freq: 1000
 94 |   batch_size: 256
 95 |   eval_batch_size: ${data.samples_per_eval_length}
 96 |   num_loader_workers: 32
 97 |   num_epoch: 500_000
 98 |   learning_rate: 0.0001
 99 |   max_squared_res: 500000
100 |   prefetch_factor: 100
101 |   use_gpu: True
102 |   num_gpus: 2
103 |   sample_mode: cluster_time_batch
104 | 
105 |   # Wandb logging
106 |   wandb_dir: ./outputs/
107 |   use_wandb: True
108 | 
109 |   # How many steps to checkpoint between.
110 |   ckpt_freq: 10000
111 |   # Take early checkpoint at step 100. Helpful for catching eval bugs early.
112 |   early_ckpt: True
113 | 
114 |   # Checkpoint directory to warm start from.
115 |   warm_start: null
116 |   use_warm_start_conf: False
117 |   ckpt_dir: ./ckpt/
118 | 
119 |   # Loss weights.
120 |   trans_loss_weight: 1.0
121 |   rot_loss_weight: 0.5
122 |   rot_loss_t_threshold: 0.2
123 |   separate_rot_loss: True
124 |   trans_x0_threshold: 1.0
125 |   coordinate_scaling: ${diffuser.r3.coordinate_scaling}
126 |   bb_atom_loss_weight: 1.0
127 |   bb_atom_loss_t_filter: 0.25
128 |   dist_mat_loss_weight: 1.0
129 |   dist_mat_loss_t_filter: 0.25
130 |   aux_loss_weight: 0.25
131 | 
132 |   # Evaluation.
133 |   eval_dir: ./eval_outputs
134 |   noise_scale: 1.0
135 |   # Filled in during training.
136 |   num_parameters: null
137 | 
138 | hydra:
139 |   sweeper:
140 |     params:
141 |       # Example of hydra multi run and wandb.
142 |       experiment.name: use_wandb
143 |       experiment.use_wandb: True
144 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/utils/lr_schedulers.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class AlphaFoldLRScheduler(torch.optim.lr_scheduler._LRScheduler):
  5 |     """ Implements the learning rate schedule defined in the AlphaFold 2
  6 |         supplement. A linear warmup is followed by a plateau at the maximum
  7 |         learning rate and then exponential decay.
  8 |          
  9 |         Note that the initial learning rate of the optimizer in question is 
 10 |         ignored; use this class' base_lr parameter to specify the starting 
 11 |         point of the warmup.
 12 |     """
 13 |     def __init__(self, 
 14 |         optimizer, 
 15 |         last_epoch: int = -1, 
 16 |         verbose: bool = False,
 17 |         base_lr: float = 0.,
 18 |         max_lr: float = 0.001,
 19 |         warmup_no_steps: int = 1000,
 20 |         start_decay_after_n_steps: int = 50000,
 21 |         decay_every_n_steps: int = 50000,
 22 |         decay_factor: float = 0.95,
 23 |     ):
 24 |         step_counts = {
 25 |             "warmup_no_steps": warmup_no_steps,
 26 |             "start_decay_after_n_steps": start_decay_after_n_steps,
 27 |         }
 28 | 
 29 |         for k,v in step_counts.items():
 30 |             if(v < 0):
 31 |                 raise ValueError(f"{k} must be nonnegative")
 32 | 
 33 |         if(warmup_no_steps > start_decay_after_n_steps):
 34 |             raise ValueError(
 35 |                 "warmup_no_steps must not exceed start_decay_after_n_steps"
 36 |             )
 37 | 
 38 |         self.optimizer = optimizer
 39 |         self.last_epoch = last_epoch
 40 |         self.verbose = verbose
 41 |         self.base_lr = base_lr
 42 |         self.max_lr = max_lr
 43 |         self.warmup_no_steps = warmup_no_steps
 44 |         self.start_decay_after_n_steps = start_decay_after_n_steps
 45 |         self.decay_every_n_steps = decay_every_n_steps
 46 |         self.decay_factor = decay_factor
 47 | 
 48 |         super(AlphaFoldLRScheduler, self).__init__(
 49 |             optimizer,
 50 |             last_epoch=last_epoch, 
 51 |             verbose=verbose,
 52 |         )
 53 | 
 54 |     def state_dict(self):
 55 |         state_dict = {
 56 |             k:v for k,v in self.__dict__.items() if k not in ["optimizer"]
 57 |         }
 58 | 
 59 |         return state_dict
 60 | 
 61 |     def load_state_dict(self, state_dict):
 62 |         self.__dict__.update(state_dict)
 63 | 
 64 |     def get_lr(self):
 65 |         if(not self._get_lr_called_within_step):
 66 |             raise RuntimeError(
 67 |                 "To get the last learning rate computed by the scheduler, use "
 68 |                 "get_last_lr()"
 69 |             )
 70 | 
 71 |         step_no = self.last_epoch
 72 | 
 73 |         if(step_no <= self.warmup_no_steps):
 74 |             lr = self.base_lr + (step_no / self.warmup_no_steps) * self.max_lr
 75 |         elif(step_no > self.start_decay_after_n_steps):
 76 |             steps_since_decay = step_no - self.start_decay_after_n_steps
 77 |             exp = (steps_since_decay // self.decay_every_n_steps) + 1
 78 |             lr = self.max_lr * (self.decay_factor ** exp)
 79 |         else: # plateau
 80 |             lr = self.max_lr
 81 | 
 82 |         return [lr for group in self.optimizer.param_groups]
 83 | 
 84 | 
 85 | class TestAF2LRScheduler(AlphaFoldLRScheduler):
 86 |     def __init__(self,
 87 |         optimizer,
 88 |         last_epoch: int = -1, 
 89 |         verbose: bool = False,
 90 |         base_lr: float = 0.,
 91 |         max_lr: float = 0.0001,
 92 |         warmup_no_steps: int = 10,
 93 |         start_decay_after_n_steps: int = 100,
 94 |         decay_every_n_steps: int = 10,
 95 |         decay_factor: float = 0.95,
 96 |     ):
 97 |         super().__init__(
 98 |             optimizer,
 99 |             last_epoch,
100 |             verbose,
101 |             base_lr,
102 |             max_lr,
103 |             warmup_no_steps,
104 |             start_decay_after_n_steps,
105 |             decay_every_n_steps,
106 |             decay_factor,
107 |         )


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/data/nested_dictionary_dataset.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) DP Technology.
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | #
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | from collections import OrderedDict
  8 | 
  9 | import torch
 10 | from torch.utils.data.dataloader import default_collate
 11 | 
 12 | from . import UnicoreDataset
 13 | 
 14 | 
 15 | def _flatten(dico, prefix=None):
 16 |     """Flatten a nested dictionary."""
 17 |     new_dico = OrderedDict()
 18 |     if isinstance(dico, dict):
 19 |         prefix = prefix + "." if prefix is not None else ""
 20 |         for k, v in dico.items():
 21 |             if v is None:
 22 |                 continue
 23 |             new_dico.update(_flatten(v, prefix + k))
 24 |     elif isinstance(dico, list):
 25 |         for i, v in enumerate(dico):
 26 |             new_dico.update(_flatten(v, prefix + ".[" + str(i) + "]"))
 27 |     else:
 28 |         new_dico = OrderedDict({prefix: dico})
 29 |     return new_dico
 30 | 
 31 | 
 32 | def _unflatten(dico):
 33 |     """Unflatten a flattened dictionary into a nested dictionary."""
 34 |     new_dico = OrderedDict()
 35 |     for full_k, v in dico.items():
 36 |         full_k = full_k.split(".")
 37 |         node = new_dico
 38 |         for k in full_k[:-1]:
 39 |             if k.startswith("[") and k.endswith("]"):
 40 |                 k = int(k[1:-1])
 41 |             if k not in node:
 42 |                 node[k] = OrderedDict()
 43 |             node = node[k]
 44 |         node[full_k[-1]] = v
 45 |     return new_dico
 46 | 
 47 | 
 48 | class NestedDictionaryDataset(UnicoreDataset):
 49 |     def __init__(self, defn):
 50 |         super().__init__()
 51 |         self.defn = _flatten(defn)
 52 |         first = None
 53 |         for v in self.defn.values():
 54 |             if not isinstance(
 55 |                 v,
 56 |                 (
 57 |                     UnicoreDataset,
 58 |                     torch.utils.data.Dataset,
 59 |                 ),
 60 |             ):
 61 |                 raise ValueError("Expected Dataset but found: {}".format(v.__class__))
 62 |             first = first or v
 63 |             if len(v) > 0:
 64 |                 assert len(v) == len(first), "dataset lengths must match"
 65 | 
 66 |         self._len = len(first)
 67 | 
 68 |     def __getitem__(self, index):
 69 |         return OrderedDict((k, ds[index]) for k, ds in self.defn.items())
 70 | 
 71 |     def __len__(self):
 72 |         return self._len
 73 | 
 74 |     def collater(self, samples):
 75 |         """Merge a list of samples to form a mini-batch.
 76 | 
 77 |         Args:
 78 |             samples (List[dict]): samples to collate
 79 | 
 80 |         Returns:
 81 |             dict: a mini-batch suitable for forwarding with a Model
 82 |         """
 83 |         if len(samples) == 0:
 84 |             return {}
 85 |         sample = OrderedDict()
 86 |         for k, ds in self.defn.items():
 87 |             try:
 88 |                 sample[k] = ds.collater([s[k] for s in samples])
 89 |             except NotImplementedError:
 90 |                 sample[k] = default_collate([s[k] for s in samples])
 91 |         return _unflatten(sample)
 92 | 
 93 |     @property
 94 |     def supports_prefetch(self):
 95 |         """Whether this dataset supports prefetching."""
 96 |         return any(ds.supports_prefetch for ds in self.defn.values())
 97 | 
 98 |     def prefetch(self, indices):
 99 |         """Prefetch the data required for this epoch."""
100 |         for ds in self.defn.values():
101 |             if getattr(ds, "supports_prefetch", False):
102 |                 ds.prefetch(indices)
103 | 
104 |     @property
105 |     def can_reuse_epoch_itr_across_epochs(self):
106 |         return all(ds.can_reuse_epoch_itr_across_epochs for ds in self.defn.values())
107 | 
108 |     def set_epoch(self, epoch):
109 |         super().set_epoch(epoch)
110 |         for ds in self.defn.values():
111 |             ds.set_epoch(epoch)
112 | 


--------------------------------------------------------------------------------
/VFN-Diff/openfold/data/tools/hhsearch.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2021 AlQuraishi Laboratory
  2 | # Copyright 2021 DeepMind Technologies Limited
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #      http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | """Library to run HHsearch from Python."""
 17 | import glob
 18 | import logging
 19 | import os
 20 | import subprocess
 21 | from typing import Sequence
 22 | 
 23 | from openfold.data.tools import utils
 24 | 
 25 | 
 26 | class HHSearch:
 27 |     """Python wrapper of the HHsearch binary."""
 28 | 
 29 |     def __init__(
 30 |         self,
 31 |         *,
 32 |         binary_path: str,
 33 |         databases: Sequence[str],
 34 |         n_cpu: int = 2,
 35 |         maxseq: int = 1_000_000,
 36 |     ):
 37 |         """Initializes the Python HHsearch wrapper.
 38 | 
 39 |         Args:
 40 |           binary_path: The path to the HHsearch executable.
 41 |           databases: A sequence of HHsearch database paths. This should be the
 42 |             common prefix for the database files (i.e. up to but not including
 43 |             _hhm.ffindex etc.)
 44 |           n_cpu: The number of CPUs to use
 45 |           maxseq: The maximum number of rows in an input alignment. Note that this
 46 |             parameter is only supported in HHBlits version 3.1 and higher.
 47 | 
 48 |         Raises:
 49 |           RuntimeError: If HHsearch binary not found within the path.
 50 |         """
 51 |         self.binary_path = binary_path
 52 |         self.databases = databases
 53 |         self.n_cpu = n_cpu
 54 |         self.maxseq = maxseq
 55 | 
 56 |         for database_path in self.databases:
 57 |             if not glob.glob(database_path + "_*"):
 58 |                 logging.error(
 59 |                     "Could not find HHsearch database %s", database_path
 60 |                 )
 61 |                 raise ValueError(
 62 |                     f"Could not find HHsearch database {database_path}"
 63 |                 )
 64 | 
 65 |     def query(self, a3m: str) -> str:
 66 |         """Queries the database using HHsearch using a given a3m."""
 67 |         with utils.tmpdir_manager(base_dir="/tmp") as query_tmp_dir:
 68 |             input_path = os.path.join(query_tmp_dir, "query.a3m")
 69 |             hhr_path = os.path.join(query_tmp_dir, "output.hhr")
 70 |             with open(input_path, "w") as f:
 71 |                 f.write(a3m)
 72 | 
 73 |             db_cmd = []
 74 |             for db_path in self.databases:
 75 |                 db_cmd.append("-d")
 76 |                 db_cmd.append(db_path)
 77 |             cmd = [
 78 |                 self.binary_path,
 79 |                 "-i",
 80 |                 input_path,
 81 |                 "-o",
 82 |                 hhr_path,
 83 |                 "-maxseq",
 84 |                 str(self.maxseq),
 85 |                 "-cpu",
 86 |                 str(self.n_cpu),
 87 |             ] + db_cmd
 88 | 
 89 |             logging.info('Launching subprocess "%s"', " ".join(cmd))
 90 |             process = subprocess.Popen(
 91 |                 cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE
 92 |             )
 93 |             with utils.timing("HHsearch query"):
 94 |                 stdout, stderr = process.communicate()
 95 |                 retcode = process.wait()
 96 | 
 97 |             if retcode:
 98 |                 # Stderr is truncated to prevent proto size errors in Beam.
 99 |                 raise RuntimeError(
100 |                     "HHSearch failed:\nstdout:\n%s\n\nstderr:\n%s\n"
101 |                     % (stdout.decode("utf-8"), stderr[:100_000].decode("utf-8"))
102 |                 )
103 | 
104 |             with open(hhr_path) as f:
105 |                 hhr = f.read()
106 |         return hhr
107 | 


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/unicore/nan_detector.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) DP Technology.
  2 | # Copyright (c) Facebook, Inc. and its affiliates.
  3 | #
  4 | # This source code is licensed under the MIT license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | 
  7 | import logging
  8 | 
  9 | import torch
 10 | 
 11 | 
 12 | logger = logging.getLogger(__name__)
 13 | 
 14 | 
 15 | class NanDetector:
 16 |     """
 17 |     Detects the first NaN or Inf in forward and/or backward pass and logs, together with the module name
 18 |     """
 19 | 
 20 |     def __init__(self, model, forward=True, backward=True):
 21 |         self.bhooks = []
 22 |         self.fhooks = []
 23 |         self.forward = forward
 24 |         self.backward = backward
 25 |         self.named_parameters = list(model.named_parameters())
 26 |         self.reset()
 27 | 
 28 |         for name, mod in model.named_modules():
 29 |             mod.__module_name = name
 30 |             self.add_hooks(mod)
 31 | 
 32 |     def __enter__(self):
 33 |         return self
 34 | 
 35 |     def __exit__(self, exc_type, exc_value, exc_traceback):
 36 |         # Dump out all model gnorms to enable better debugging
 37 |         norm = {}
 38 |         gradients = {}
 39 |         for name, param in self.named_parameters:
 40 |             if param.grad is not None:
 41 |                 grad_norm = torch.norm(param.grad.data, p=2, dtype=torch.float32)
 42 |                 norm[name] = grad_norm.item()
 43 |                 if torch.isnan(grad_norm).any() or torch.isinf(grad_norm).any():
 44 |                     gradients[name] = param.grad.data
 45 |         if len(gradients) > 0:
 46 |             logger.info("Detected nan/inf grad norm, dumping norms...")
 47 |             logger.info(f"norms: {norm}")
 48 |             logger.info(f"gradients: {gradients}")
 49 | 
 50 |         self.close()
 51 | 
 52 |     def add_hooks(self, module):
 53 |         if self.forward:
 54 |             self.fhooks.append(module.register_forward_hook(self.fhook_fn))
 55 |         if self.backward:
 56 |             self.bhooks.append(module.register_backward_hook(self.bhook_fn))
 57 | 
 58 |     def reset(self):
 59 |         self.has_printed_f = False
 60 |         self.has_printed_b = False
 61 | 
 62 |     def _detect(self, tensor, name, backward):
 63 |         err = None
 64 |         if (
 65 |             torch.is_floating_point(tensor)
 66 |             # single value tensors (like the loss) will not provide much info
 67 |             and tensor.numel() >= 2
 68 |         ):
 69 |             with torch.no_grad():
 70 |                 if torch.isnan(tensor).any():
 71 |                     err = "NaN"
 72 |                 elif torch.isinf(tensor).any():
 73 |                     err = "Inf"
 74 |         if err is not None:
 75 |             err = f"{err} detected in output of {name}, shape: {tensor.shape}, {'backward' if backward else 'forward'}"
 76 |         return err
 77 | 
 78 |     def _apply(self, module, inp, x, backward):
 79 |         if torch.is_tensor(x):
 80 |             if isinstance(inp, tuple) and len(inp) > 0:
 81 |                 inp = inp[0]
 82 |             err = self._detect(x, module.__module_name, backward)
 83 |             if err is not None:
 84 |                 if torch.is_tensor(inp) and not backward:
 85 |                     err += (
 86 |                         f" input max: {inp.max().item()}, input min: {inp.min().item()}"
 87 |                     )
 88 | 
 89 |                 has_printed_attr = "has_printed_b" if backward else "has_printed_f"
 90 |                 logger.warning(err)
 91 |                 setattr(self, has_printed_attr, True)
 92 |         elif isinstance(x, dict):
 93 |             for v in x.values():
 94 |                 self._apply(module, inp, v, backward)
 95 |         elif isinstance(x, list) or isinstance(x, tuple):
 96 |             for v in x:
 97 |                 self._apply(module, inp, v, backward)
 98 | 
 99 |     def fhook_fn(self, module, inp, output):
100 |         if not self.has_printed_f:
101 |             self._apply(module, inp, output, backward=False)
102 | 
103 |     def bhook_fn(self, module, inp, output):
104 |         if not self.has_printed_b:
105 |             self._apply(module, inp, output, backward=True)
106 | 
107 |     def close(self):
108 |         for hook in self.fhooks + self.bhooks:
109 |             hook.remove()
110 | 


--------------------------------------------------------------------------------
/VFN-Diff/.gitignore:
--------------------------------------------------------------------------------
  1 | mmCIF/
  2 | .vscode/
  3 | ckpt/
  4 | multirun/
  5 | wandb/
  6 | *.pdb
  7 | *.csv
  8 | 
  9 | inference_outputs/
 10 | # Byte-compiled / optimized / DLL files
 11 | __pycache__/
 12 | *.py[cod]
 13 | *$py.class
 14 | 
 15 | # C extensions
 16 | *.so
 17 | 
 18 | # Distribution / packaging
 19 | .Python
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | eggs/
 25 | .eggs/
 26 | lib/
 27 | lib64/
 28 | parts/
 29 | sdist/
 30 | var/
 31 | wheels/
 32 | share/python-wheels/
 33 | *.egg-info/
 34 | .installed.cfg
 35 | *.egg
 36 | MANIFEST
 37 | 
 38 | # PyInstaller
 39 | #  Usually these files are written by a python script from a template
 40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 41 | *.manifest
 42 | *.spec
 43 | 
 44 | # Installer logs
 45 | pip-log.txt
 46 | pip-delete-this-directory.txt
 47 | 
 48 | # Unit test / coverage reports
 49 | htmlcov/
 50 | .tox/
 51 | .nox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | *.py,cover
 59 | .hypothesis/
 60 | .pytest_cache/
 61 | cover/
 62 | 
 63 | # Translations
 64 | *.mo
 65 | *.pot
 66 | 
 67 | # Django stuff:
 68 | *.log
 69 | local_settings.py
 70 | db.sqlite3
 71 | db.sqlite3-journal
 72 | 
 73 | # Flask stuff:
 74 | instance/
 75 | .webassets-cache
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | .pybuilder/
 85 | target/
 86 | 
 87 | # Jupyter Notebook
 88 | .ipynb_checkpoints
 89 | 
 90 | # IPython
 91 | profile_default/
 92 | ipython_config.py
 93 | 
 94 | # pyenv
 95 | #   For a library or package, you might want to ignore these files since the code is
 96 | #   intended to run in multiple environments; otherwise, check them in:
 97 | # .python-version
 98 | 
 99 | # pipenv
100 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
101 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
102 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
103 | #   install all needed dependencies.
104 | #Pipfile.lock
105 | 
106 | # poetry
107 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
109 | #   commonly ignored for libraries.
110 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111 | #poetry.lock
112 | 
113 | # pdm
114 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115 | #pdm.lock
116 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
117 | #   in version control.
118 | #   https://pdm.fming.dev/#use-with-ide
119 | .pdm.toml
120 | 
121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
122 | __pypackages__/
123 | 
124 | # Celery stuff
125 | celerybeat-schedule
126 | celerybeat.pid
127 | 
128 | # SageMath parsed files
129 | *.sage.py
130 | 
131 | # Environments
132 | .env
133 | .venv
134 | env/
135 | venv/
136 | ENV/
137 | env.bak/
138 | venv.bak/
139 | 
140 | # Spyder project settings
141 | .spyderproject
142 | .spyproject
143 | 
144 | # Rope project settings
145 | .ropeproject
146 | 
147 | # mkdocs documentation
148 | /site
149 | 
150 | # mypy
151 | .mypy_cache/
152 | .dmypy.json
153 | dmypy.json
154 | 
155 | # Pyre type checker
156 | .pyre/
157 | 
158 | # pytype static type analyzer
159 | .pytype/
160 | 
161 | # Cython debug symbols
162 | cython_debug/
163 | 
164 | # PyCharm
165 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
166 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
167 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
168 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
169 | #.idea/
170 | data/data/
171 | outputs/
172 | 
173 | #PDB processed dataset
174 | data/processed_pdb/
175 | data/processed_pdb_openfold/
176 | 
177 | #PDBBind dataset
178 | DiffusionProteinLigand/data/PDBBind*
179 | 
180 | #pynb for debug code
181 | pynb/
182 | 
183 | #slurm_output for record
184 | slurm_output/
185 | processed_pdb


--------------------------------------------------------------------------------
/VFN-IF/Uni-Core/csrc/layernorm/interface_gamma_beta.cpp:
--------------------------------------------------------------------------------
  1 | #include <torch/extension.h>
  2 | #include <vector>
  3 | #include <cassert>
  4 | 
  5 | namespace {
  6 | void compute_n1_n2(
  7 |     at::Tensor input,
  8 |     at::IntArrayRef normalized_shape,
  9 |     int& n1,
 10 |     int& n2)
 11 | {
 12 |     int idiff = input.ndimension() - normalized_shape.size();
 13 |     n2 = 1;
 14 |     for (int i = 0;  i < (int)normalized_shape.size();  ++i) {
 15 | 	    assert( input.sizes()[i+idiff] == normalized_shape[i] );
 16 | 	    n2 *= normalized_shape[i];
 17 |     }
 18 |     n1 = 1;
 19 |     for (int i = 0;  i < idiff;  ++i) {
 20 | 	    n1 *= input.sizes()[i];
 21 |     }
 22 | }
 23 | 
 24 | void check_args(
 25 |     at::IntArrayRef normalized_shape,
 26 |     at::Tensor gamma,
 27 |     at::Tensor beta
 28 |     )
 29 | {
 30 |     TORCH_CHECK(!gamma.defined() || gamma.sizes().equals(normalized_shape));
 31 |     TORCH_CHECK(!beta.defined() || beta.sizes().equals(normalized_shape));
 32 | }
 33 | 
 34 | void check_args(
 35 |     at::Tensor input,
 36 |     at::IntArrayRef normalized_shape,
 37 |     int& n1,
 38 |     int& n2
 39 |     )
 40 | {
 41 |     int64_t normalized_ndim = normalized_shape.size();
 42 | 
 43 |     if (normalized_ndim < 1) {
 44 |       std::stringstream ss;
 45 |       ss << "Expected normalized_shape to be at least 1-dimensional, i.e., "
 46 |          << "containing at least one element, but got normalized_shape="
 47 |          << normalized_shape;
 48 |       throw std::runtime_error(ss.str());
 49 |     }
 50 | 
 51 |     auto input_shape = input.sizes();
 52 |     auto input_ndim = input.dim();
 53 | 
 54 |     if (input_ndim < normalized_ndim ||
 55 |         !input_shape.slice(input_ndim - normalized_ndim).equals(normalized_shape)) {
 56 |       std::stringstream ss;
 57 |       ss << "Given normalized_shape=" << normalized_shape
 58 |          << ", expected input with shape [*";
 59 |       for (auto size : normalized_shape) {
 60 |         ss << ", " << size;
 61 |       }
 62 |       ss << "], but got input of size" << input_shape;
 63 |       throw std::runtime_error(ss.str());
 64 |     }
 65 | 
 66 |     compute_n1_n2(input,normalized_shape,n1,n2);
 67 | }
 68 | 
 69 | 
 70 | void check_args(
 71 |     at::Tensor input,
 72 |     at::IntArrayRef normalized_shape,
 73 |     at::Tensor gamma,
 74 |     at::Tensor beta,
 75 |     int& n1,
 76 |     int& n2
 77 |     )
 78 | {
 79 |     check_args(input,normalized_shape,n1,n2);
 80 |     check_args(normalized_shape,gamma,beta);
 81 | }
 82 | }
 83 | 
 84 | #define CHECK_CUDA(x) TORCH_CHECK(x.is_cuda(), #x " must be a CUDA tensor")
 85 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
 86 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
 87 | 
 88 | void cuda_layer_norm_gradient(
 89 |     at::Tensor* dout,
 90 |     at::Tensor* mean,
 91 |     at::Tensor* invvar,
 92 |     at::Tensor* input,
 93 |     int n1,
 94 |     int n2,
 95 |     at::IntArrayRef normalized_shape,
 96 |     at::Tensor* gamma,
 97 |     at::Tensor* beta,
 98 |     double epsilon,
 99 |     at::Tensor* grad_gamma,
100 |     at::Tensor* grad_beta
101 |     );
102 | 
103 | std::vector<at::Tensor> layer_norm_gradient(
104 |     at::Tensor dout,
105 |     at::Tensor mean,
106 |     at::Tensor invvar,
107 |     at::Tensor input,
108 |     at::IntArrayRef normalized_shape,
109 |     at::Tensor gamma,
110 |     at::Tensor beta,
111 |     double epsilon) {
112 |   CHECK_INPUT(dout);
113 |   CHECK_INPUT(mean);
114 |   CHECK_INPUT(invvar);
115 |   CHECK_INPUT(input);
116 |   CHECK_INPUT(gamma);
117 |   CHECK_INPUT(beta);
118 |   int n1,n2;
119 |   check_args(input,normalized_shape,gamma,beta,n1,n2);
120 |   TORCH_CHECK(n2 == 64 || n2 == 128 || n2 == 256 || n2 == 320 || n2 == 384 || n2 == 512 || n2 == 640 || n2 == 768 || n2 == 1024 || n2 == 1280 ||
121 |               n2 == 1536 || n2 == 1792 || n2 == 2048 || n2 == 2560 || n2 == 5120, "dimension is not supported");
122 |   at::Tensor grad_gamma = at::empty_like(gamma);
123 |   at::Tensor grad_beta = at::empty_like(beta);
124 |   cuda_layer_norm_gradient(&dout,&mean,&invvar,&input,n1,n2,
125 |       normalized_shape,&gamma,&beta,epsilon,
126 |       &grad_gamma,&grad_beta);
127 |   return {grad_gamma, grad_beta};
128 | }
129 | 
130 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
131 |   m.def("backward", &layer_norm_gradient,
132 |     "LayerNorm fast backward for computing gamma and beta (CUDA)");
133 | }
134 | 


--------------------------------------------------------------------------------