├── .gitignore ├── LICENSE ├── README.md ├── asset └── benchmark.png ├── config ├── multi_task │ ├── BERT │ │ ├── aav_contact_BERT.yaml │ │ ├── aav_fold_BERT.yaml │ │ ├── aav_ss_BERT.yaml │ │ ├── beta_contact_BERT.yaml │ │ ├── beta_fold_BERT.yaml │ │ ├── beta_ss_BERT.yaml │ │ ├── bindingdb_contact_BERT.yaml │ │ ├── bindingdb_fold_BERT.yaml │ │ ├── bindingdb_ss_BERT.yaml │ │ ├── binloc_contact_BERT.yaml │ │ ├── binloc_fold_BERT.yaml │ │ ├── binloc_ss_BERT.yaml │ │ ├── contact_fold_BERT.yaml │ │ ├── contact_ss_BERT.yaml │ │ ├── fluorescence_contact_BERT.yaml │ │ ├── fluorescence_fold_BERT.yaml │ │ ├── fluorescence_ss_BERT.yaml │ │ ├── fold_contact_BERT.yaml │ │ ├── fold_ss_BERT.yaml │ │ ├── gb1_contact_BERT.yaml │ │ ├── gb1_fold_BERT.yaml │ │ ├── gb1_ss_BERT.yaml │ │ ├── human_contact_BERT.yaml │ │ ├── human_fold_BERT.yaml │ │ ├── human_ss_BERT.yaml │ │ ├── pdbbind_contact_BERT.yaml │ │ ├── pdbbind_fold_BERT.yaml │ │ ├── pdbbind_ss_BERT.yaml │ │ ├── ppi_affinity_contact_BERT.yaml │ │ ├── ppi_affinity_fold_BERT.yaml │ │ ├── ppi_affinity_ss_BERT.yaml │ │ ├── solubility_contact_BERT.yaml │ │ ├── solubility_fold_BERT.yaml │ │ ├── solubility_ss_BERT.yaml │ │ ├── ss_contact_BERT.yaml │ │ ├── ss_fold_BERT.yaml │ │ ├── stability_contact_BERT.yaml │ │ ├── stability_fold_BERT.yaml │ │ ├── stability_ss_BERT.yaml │ │ ├── subloc_contact_BERT.yaml │ │ ├── subloc_fold_BERT.yaml │ │ ├── subloc_ss_BERT.yaml │ │ ├── thermo_contact_BERT.yaml │ │ ├── thermo_fold_BERT.yaml │ │ ├── thermo_ss_BERT.yaml │ │ ├── yeast_contact_BERT.yaml │ │ ├── yeast_fold_BERT.yaml │ │ └── yeast_ss_BERT.yaml │ ├── CNN │ │ ├── aav_contact_CNN.yaml │ │ ├── aav_fold_CNN.yaml │ │ ├── aav_ss_CNN.yaml │ │ ├── beta_contact_CNN.yaml │ │ ├── beta_fold_CNN.yaml │ │ ├── beta_ss_CNN.yaml │ │ ├── bindingdb_contact_CNN.yaml │ │ ├── bindingdb_fold_CNN.yaml │ │ ├── bindingdb_ss_CNN.yaml │ │ ├── binloc_contact_CNN.yaml │ │ ├── binloc_fold_CNN.yaml │ │ ├── binloc_ss_CNN.yaml │ │ ├── contact_fold_CNN.yaml │ │ ├── contact_ss_CNN.yaml │ │ ├── fluorescence_contact_CNN.yaml │ │ ├── fluorescence_fold_CNN.yaml │ │ ├── fluorescence_ss_CNN.yaml │ │ ├── fold_contact_CNN.yaml │ │ ├── fold_ss_CNN.yaml │ │ ├── gb1_contact_CNN.yaml │ │ ├── gb1_fold_CNN.yaml │ │ ├── gb1_ss_CNN.yaml │ │ ├── human_contact_CNN.yaml │ │ ├── human_fold_CNN.yaml │ │ ├── human_ss_CNN.yaml │ │ ├── pdbbind_contact_CNN.yaml │ │ ├── pdbbind_fold_CNN.yaml │ │ ├── pdbbind_ss_CNN.yaml │ │ ├── ppi_affinity_contact_CNN.yaml │ │ ├── ppi_affinity_fold_CNN.yaml │ │ ├── ppi_affinity_ss_CNN.yaml │ │ ├── solubility_contact_CNN.yaml │ │ ├── solubility_fold_CNN.yaml │ │ ├── solubility_ss_CNN.yaml │ │ ├── ss_contact_CNN.yaml │ │ ├── ss_fold_CNN.yaml │ │ ├── stability_contact_CNN.yaml │ │ ├── stability_fold_CNN.yaml │ │ ├── stability_ss_CNN.yaml │ │ ├── subloc_contact_CNN.yaml │ │ ├── subloc_fold_CNN.yaml │ │ ├── subloc_ss_CNN.yaml │ │ ├── thermo_contact_CNN.yaml │ │ ├── thermo_fold_CNN.yaml │ │ ├── thermo_ss_CNN.yaml │ │ ├── yeast_contact_CNN.yaml │ │ ├── yeast_fold_CNN.yaml │ │ └── yeast_ss_CNN.yaml │ └── ESM │ │ ├── aav_contact_ESM.yaml │ │ ├── aav_fold_ESM.yaml │ │ ├── aav_ss_ESM.yaml │ │ ├── beta_contact_ESM.yaml │ │ ├── beta_fold_ESM.yaml │ │ ├── beta_ss_ESM.yaml │ │ ├── bindingdb_contact_ESM.yaml │ │ ├── bindingdb_fold_ESM.yaml │ │ ├── bindingdb_ss_ESM.yaml │ │ ├── binloc_contact_ESM.yaml │ │ ├── binloc_fold_ESM.yaml │ │ ├── binloc_ss_ESM.yaml │ │ ├── contact_fold_ESM.yaml │ │ ├── contact_ss_ESM.yaml │ │ ├── fluorescence_contact_ESM.yaml │ │ ├── fluorescence_fold_ESM.yaml │ │ ├── fluorescence_ss_ESM.yaml │ │ ├── fold_contact_ESM.yaml │ │ ├── fold_ss_ESM.yaml │ │ ├── gb1_contact_ESM.yaml │ │ ├── gb1_fold_ESM.yaml │ │ ├── gb1_ss_ESM.yaml │ │ ├── human_contact_ESM.yaml │ │ ├── human_fold_ESM.yaml │ │ ├── human_ss_ESM.yaml │ │ ├── pdbbind_contact_ESM.yaml │ │ ├── pdbbind_fold_ESM.yaml │ │ ├── pdbbind_ss_ESM.yaml │ │ ├── ppi_affinity_contact_ESM.yaml │ │ ├── ppi_affinity_fold_ESM.yaml │ │ ├── ppi_affinity_ss_ESM.yaml │ │ ├── solubility_contact_ESM.yaml │ │ ├── solubility_fold_ESM.yaml │ │ ├── solubility_ss_ESM.yaml │ │ ├── ss_contact_ESM.yaml │ │ ├── ss_fold_ESM.yaml │ │ ├── stability_contact_ESM.yaml │ │ ├── stability_fold_ESM.yaml │ │ ├── stability_ss_ESM.yaml │ │ ├── subloc_contact_ESM.yaml │ │ ├── subloc_fold_ESM.yaml │ │ ├── subloc_ss_ESM.yaml │ │ ├── thermo_contact_ESM.yaml │ │ ├── thermo_fold_ESM.yaml │ │ ├── thermo_ss_ESM.yaml │ │ ├── yeast_contact_ESM.yaml │ │ ├── yeast_fold_ESM.yaml │ │ └── yeast_ss_ESM.yaml └── single_task │ ├── BERT │ ├── aav_BERT.yaml │ ├── beta_BERT.yaml │ ├── bindingdb_BERT.yaml │ ├── binloc_BERT.yaml │ ├── contact_BERT.yaml │ ├── fluorescence_BERT.yaml │ ├── fold_BERT.yaml │ ├── gb1_BERT.yaml │ ├── human_BERT.yaml │ ├── pdbbind_BERT.yaml │ ├── ppi_affinity_BERT.yaml │ ├── solubility_BERT.yaml │ ├── ss_BERT.yaml │ ├── stability_BERT.yaml │ ├── subloc_BERT.yaml │ ├── thermo_BERT.yaml │ └── yeast_BERT.yaml │ ├── CNN │ ├── aav_CNN.yaml │ ├── beta_CNN.yaml │ ├── bindingdb_CNN.yaml │ ├── binloc_CNN.yaml │ ├── contact_CNN.yaml │ ├── fluorescence_CNN.yaml │ ├── fold_CNN.yaml │ ├── gb1_CNN.yaml │ ├── human_CNN.yaml │ ├── pdbbind_CNN.yaml │ ├── ppi_affinity_CNN.yaml │ ├── solubility_CNN.yaml │ ├── ss_CNN.yaml │ ├── stability_CNN.yaml │ ├── subloc_CNN.yaml │ ├── thermo_CNN.yaml │ └── yeast_CNN.yaml │ ├── DDE │ ├── aav_DDE.yaml │ ├── beta_DDE.yaml │ ├── binloc_DDE.yaml │ ├── fluorescence_DDE.yaml │ ├── fold_DDE.yaml │ ├── gb1_DDE.yaml │ ├── human_DDE.yaml │ ├── ppi_affinity_DDE.yaml │ ├── solubility_DDE.yaml │ ├── stability_DDE.yaml │ ├── subloc_DDE.yaml │ ├── thermo_DDE.yaml │ └── yeast_DDE.yaml │ ├── ESM │ ├── aav_ESM.yaml │ ├── aav_ESM_fix.yaml │ ├── beta_ESM.yaml │ ├── beta_ESM_fix.yaml │ ├── bindingdb_ESM.yaml │ ├── bindingdb_ESM_fix.yaml │ ├── binloc_ESM.yaml │ ├── binloc_ESM_fix.yaml │ ├── contact_ESM.yaml │ ├── contact_ESM_fix.yaml │ ├── fluorescence_ESM.yaml │ ├── fluorescence_ESM_fix.yaml │ ├── fold_ESM.yaml │ ├── fold_ESM_fix.yaml │ ├── gb1_ESM.yaml │ ├── gb1_ESM_fix.yaml │ ├── human_ESM.yaml │ ├── human_ESM_fix.yaml │ ├── pdbbind_ESM.yaml │ ├── pdbbind_ESM_fix.yaml │ ├── ppi_affinity_ESM.yaml │ ├── ppi_affinity_ESM_fix.yaml │ ├── solubility_ESM.yaml │ ├── solubility_ESM_fix.yaml │ ├── ss_ESM.yaml │ ├── ss_ESM_fix.yaml │ ├── stability_ESM.yaml │ ├── stability_ESM_fix.yaml │ ├── subloc_ESM.yaml │ ├── subloc_ESM_fix.yaml │ ├── thermo_ESM.yaml │ ├── thermo_ESM_fix.yaml │ ├── yeast_ESM.yaml │ └── yeast_ESM_fix.yaml │ ├── LSTM │ ├── aav_LSTM.yaml │ ├── beta_LSTM.yaml │ ├── bindingdb_LSTM.yaml │ ├── binloc_LSTM.yaml │ ├── contact_LSTM.yaml │ ├── fluorescence_LSTM.yaml │ ├── fold_LSTM.yaml │ ├── gb1_LSTM.yaml │ ├── human_LSTM.yaml │ ├── pdbbind_LSTM.yaml │ ├── ppi_affinity_LSTM.yaml │ ├── solubility_LSTM.yaml │ ├── ss_LSTM.yaml │ ├── stability_LSTM.yaml │ ├── subloc_LSTM.yaml │ ├── thermo_LSTM.yaml │ └── yeast_LSTM.yaml │ ├── Moran │ ├── aav_Moran.yaml │ ├── beta_Moran.yaml │ ├── binloc_Moran.yaml │ ├── fluorescence_Moran.yaml │ ├── fold_Moran.yaml │ ├── gb1_Moran.yaml │ ├── human_Moran.yaml │ ├── ppi_affinity_Moran.yaml │ ├── solubility_Moran.yaml │ ├── stability_Moran.yaml │ ├── subloc_Moran.yaml │ ├── thermo_Moran.yaml │ └── yeast_Moran.yaml │ ├── ProtBert │ ├── aav_ProtBert.yaml │ ├── aav_ProtBert_fix.yaml │ ├── beta_ProtBert.yaml │ ├── beta_ProtBert_fix.yaml │ ├── bindingdb_ProtBert.yaml │ ├── bindingdb_ProtBert_fix.yaml │ ├── binloc_ProtBert.yaml │ ├── binloc_ProtBert_fix.yaml │ ├── contact_ProtBert.yaml │ ├── contact_ProtBert_fix.yaml │ ├── fluorescence_ProtBert.yaml │ ├── fluorescence_ProtBert_fix.yaml │ ├── fold_ProtBert.yaml │ ├── fold_ProtBert_fix.yaml │ ├── gb1_ProtBert.yaml │ ├── gb1_ProtBert_fix.yaml │ ├── human_ProtBert.yaml │ ├── human_ProtBert_fix.yaml │ ├── pdbbind_ProtBert.yaml │ ├── pdbbind_ProtBert_fix.yaml │ ├── ppi_affinity_ProtBert.yaml │ ├── ppi_affinity_ProtBert_fix.yaml │ ├── solubility_ProtBert.yaml │ ├── solubility_ProtBert_fix.yaml │ ├── ss_ProtBert.yaml │ ├── ss_ProtBert_fix.yaml │ ├── stability_ProtBert.yaml │ ├── stability_ProtBert_fix.yaml │ ├── subloc_ProtBert.yaml │ ├── subloc_ProtBert_fix.yaml │ ├── thermo_ProtBert.yaml │ ├── thermo_ProtBert_fix.yaml │ ├── yeast_ProtBert.yaml │ └── yeast_ProtBert_fix.yaml │ └── ResNet │ ├── aav_ResNet.yaml │ ├── beta_ResNet.yaml │ ├── bindingdb_ResNet.yaml │ ├── binloc_ResNet.yaml │ ├── contact_ResNet.yaml │ ├── fluorescence_ResNet.yaml │ ├── fold_ResNet.yaml │ ├── gb1_ResNet.yaml │ ├── human_ResNet.yaml │ ├── pdbbind_ResNet.yaml │ ├── ppi_affinity_ResNet.yaml │ ├── solubility_ResNet.yaml │ ├── ss_ResNet.yaml │ ├── stability_ResNet.yaml │ ├── subloc_ResNet.yaml │ ├── thermo_ResNet.yaml │ └── yeast_ResNet.yaml ├── peer ├── __init__.py ├── engine.py ├── flip.py ├── protbert.py └── util.py ├── requirements.txt └── script ├── run_multi.py └── run_single.py /.gitignore: -------------------------------------------------------------------------------- 1 | slurm-* 2 | 3 | # PyCharm 4 | .idea/ 5 | 6 | # Sphinx 7 | /doc/build 8 | 9 | # Python 10 | __pycache__ 11 | *.pyc 12 | *.egg-info 13 | 14 | # macOS 15 | */.DS_Store 16 | .DS_Store 17 | -------------------------------------------------------------------------------- /asset/benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DeepGraphLearning/PEER_Benchmark/b40eec914273f204c50acca690df2a892e36c4a5/asset/benchmark.png -------------------------------------------------------------------------------- /config/multi_task/BERT/binloc_fold_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinBERT 42 | input_dim: 21 43 | hidden_dim: 512 44 | num_layers: 4 45 | num_heads: 8 46 | intermediate_dim: 2048 47 | hidden_dropout: 0.1 48 | attention_dropout: 0.1 49 | 50 | eval_metric: Center - BinaryLocalization accuracy 51 | 52 | optimizer: 53 | class: Adam 54 | lr: 2.0e-5 55 | 56 | engine: 57 | gpus: [0, 1, 2, 3] 58 | batch_size: 4 59 | 60 | train: 61 | num_epoch: 100 62 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/BERT/gb1_fold_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinBERT 42 | input_dim: 21 43 | hidden_dim: 512 44 | num_layers: 4 45 | num_heads: 8 46 | intermediate_dim: 2048 47 | hidden_dropout: 0.1 48 | attention_dropout: 0.1 49 | 50 | eval_metric: Center - GB1 spearmanr 51 | 52 | optimizer: 53 | class: Adam 54 | lr: 2.0e-5 55 | 56 | engine: 57 | gpus: [0, 1, 2, 3] 58 | batch_size: 4 59 | 60 | train: 61 | num_epoch: 100 62 | tradeoff: 1.0 63 | -------------------------------------------------------------------------------- /config/multi_task/BERT/solubility_fold_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinBERT 42 | input_dim: 21 43 | hidden_dim: 512 44 | num_layers: 4 45 | num_heads: 8 46 | intermediate_dim: 2048 47 | hidden_dropout: 0.1 48 | attention_dropout: 0.1 49 | 50 | eval_metric: Center - Solubility accuracy 51 | 52 | optimizer: 53 | class: Adam 54 | lr: 2.0e-5 55 | 56 | engine: 57 | gpus: [0, 1, 2, 3] 58 | batch_size: 4 59 | 60 | train: 61 | num_epoch: 50 62 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/BERT/solubility_ss_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinBERT 42 | input_dim: 21 43 | hidden_dim: 512 44 | num_layers: 4 45 | num_heads: 8 46 | intermediate_dim: 2048 47 | hidden_dropout: 0.1 48 | attention_dropout: 0.1 49 | 50 | eval_metric: Center - Solubility accuracy 51 | 52 | optimizer: 53 | class: Adam 54 | lr: 2.0e-5 55 | 56 | engine: 57 | gpus: [0, 1, 2, 3] 58 | batch_size: 4 59 | 60 | train: 61 | num_epoch: 50 62 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/aav_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | center: True 10 | transform: 11 | class: Compose 12 | transforms: 13 | - class: ProteinView 14 | view: "residue" 15 | - class: Fold 16 | path: ~/scratch/protein-datasets/ 17 | test_split: test_fold_holdout 18 | atom_feature: null 19 | bond_feature: null 20 | transform: 21 | class: Compose 22 | transforms: 23 | - class: TruncateProtein 24 | max_length: 200 25 | random: True 26 | - class: ProteinView 27 | view: "residue" 28 | 29 | tasks: 30 | - class: PropertyPrediction 31 | criterion: mse 32 | metric: [ "mae", "rmse", "spearmanr" ] 33 | normalization: False 34 | num_mlp_layer: 2 35 | - class: PropertyPrediction 36 | criterion: ce 37 | metric: [ "acc", "mcc" ] 38 | num_mlp_layer: 2 39 | num_class: 1195 40 | 41 | model: 42 | class: ProteinConvolutionalNetwork 43 | input_dim: 21 44 | hidden_dims: [1024, 1024] 45 | kernel_size: 5 46 | padding: 2 47 | 48 | eval_metric: Center - AAV spearmanr 49 | 50 | optimizer: 51 | class: Adam 52 | lr: 2.0e-5 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 8 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 61 | -------------------------------------------------------------------------------- /config/multi_task/CNN/aav_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | center: True 10 | transform: 11 | class: Compose 12 | transforms: 13 | - class: ProteinView 14 | view: "residue" 15 | - class: SecondaryStructure 16 | path: ~/scratch/protein-datasets/ 17 | atom_feature: null 18 | bond_feature: null 19 | test_split: cb513 20 | transform: 21 | class: Compose 22 | transforms: 23 | - class: TruncateProtein 24 | max_length: 200 25 | random: True 26 | - class: ProteinView 27 | view: "residue" 28 | 29 | tasks: 30 | - class: PropertyPrediction 31 | criterion: mse 32 | metric: [ "mae", "rmse", "spearmanr" ] 33 | normalization: False 34 | num_mlp_layer: 2 35 | - class: NodePropertyPrediction 36 | criterion: ce 37 | metric: [ "micro_acc", "macro_acc" ] 38 | num_mlp_layer: 2 39 | num_class: 3 40 | 41 | model: 42 | class: ProteinConvolutionalNetwork 43 | input_dim: 21 44 | hidden_dims: [1024, 1024] 45 | kernel_size: 5 46 | padding: 2 47 | 48 | eval_metric: Center - AAV spearmanr 49 | 50 | optimizer: 51 | class: Adam 52 | lr: 2.0e-5 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 8 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 61 | -------------------------------------------------------------------------------- /config/multi_task/CNN/beta_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - BetaLactamase spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/beta_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - BetaLactamase spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/binloc_contact_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: ProteinNet 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | transform: 19 | class: Compose 20 | transforms: 21 | - class: TruncateProtein 22 | max_length: 1022 23 | random: False 24 | - class: ProteinView 25 | view: "residue" 26 | 27 | tasks: 28 | - class: PropertyPrediction 29 | criterion: ce 30 | metric: [ "acc", "mcc" ] 31 | num_mlp_layer: 2 32 | num_class: 2 33 | - class: ContactPrediction 34 | criterion: bce 35 | metric: [ "accuracy", "prec@L5", "prec@5" ] 36 | max_length: 300 37 | random_truncate: yes 38 | threshold: 8.0 39 | gap: 6 40 | num_mlp_layer: 2 41 | 42 | model: 43 | class: ProteinConvolutionalNetwork 44 | input_dim: 21 45 | hidden_dims: [1024, 1024] 46 | kernel_size: 5 47 | padding: 2 48 | 49 | eval_metric: Center - BinaryLocalization accuracy 50 | 51 | optimizer: 52 | class: Adam 53 | lr: 2.0e-4 54 | 55 | engine: 56 | gpus: [0, 1, 2, 3] 57 | batch_size: 8 58 | 59 | train: 60 | num_epoch: 100 61 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/binloc_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - BinaryLocalization accuracy 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/binloc_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - BinaryLocalization accuracy 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/fluorescence_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - Fluorescence spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/fluorescence_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - Fluorescence spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/fold_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | test_split: test_fold_holdout 9 | center: True 10 | transform: 11 | class: Compose 12 | transforms: 13 | - class: ProteinView 14 | view: "residue" 15 | - class: SecondaryStructure 16 | path: ~/scratch/protein-datasets/ 17 | atom_feature: null 18 | bond_feature: null 19 | test_split: cb513 20 | transform: 21 | class: Compose 22 | transforms: 23 | - class: TruncateProtein 24 | max_length: 200 25 | random: True 26 | - class: ProteinView 27 | view: "residue" 28 | 29 | tasks: 30 | - class: PropertyPrediction 31 | criterion: ce 32 | metric: [ "acc", "mcc" ] 33 | num_mlp_layer: 2 34 | num_class: 1195 35 | - class: NodePropertyPrediction 36 | criterion: ce 37 | metric: [ "micro_acc", "macro_acc" ] 38 | num_mlp_layer: 2 39 | num_class: 3 40 | 41 | model: 42 | class: ProteinConvolutionalNetwork 43 | input_dim: 21 44 | hidden_dims: [1024, 1024] 45 | kernel_size: 5 46 | padding: 2 47 | 48 | eval_metric: Center - Fold accuracy 49 | 50 | optimizer: 51 | class: Adam 52 | lr: 2.0e-5 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 8 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/gb1_contact_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: ProteinNet 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | transform: 19 | class: Compose 20 | transforms: 21 | - class: TruncateProtein 22 | max_length: 1022 23 | random: False 24 | - class: ProteinView 25 | view: "residue" 26 | 27 | tasks: 28 | - class: PropertyPrediction 29 | criterion: mse 30 | metric: [ "mae", "rmse", "spearmanr" ] 31 | normalization: False 32 | num_mlp_layer: 2 33 | - class: ContactPrediction 34 | criterion: bce 35 | metric: [ "accuracy", "prec@L5", "prec@5" ] 36 | max_length: 300 37 | random_truncate: yes 38 | threshold: 8.0 39 | gap: 6 40 | num_mlp_layer: 2 41 | 42 | model: 43 | class: ProteinConvolutionalNetwork 44 | input_dim: 21 45 | hidden_dims: [1024, 1024] 46 | kernel_size: 5 47 | padding: 2 48 | 49 | eval_metric: Center - GB1 spearmanr 50 | 51 | optimizer: 52 | class: Adam 53 | lr: 2.0e-4 54 | 55 | engine: 56 | gpus: [0, 1, 2, 3] 57 | batch_size: 8 58 | 59 | train: 60 | num_epoch: 100 61 | tradeoff: 1.0 62 | -------------------------------------------------------------------------------- /config/multi_task/CNN/gb1_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - GB1 spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 60 | -------------------------------------------------------------------------------- /config/multi_task/CNN/gb1_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - GB1 spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 60 | -------------------------------------------------------------------------------- /config/multi_task/CNN/human_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | test_split: test 9 | center: True 10 | transform: 11 | class: Compose 12 | transforms: 13 | - class: ProteinView 14 | view: "residue" 15 | keys: [ "graph1", "graph2" ] 16 | - class: Fold 17 | path: ~/scratch/protein-datasets/ 18 | test_split: test_fold_holdout 19 | atom_feature: null 20 | bond_feature: null 21 | transform: 22 | class: Compose 23 | transforms: 24 | - class: TruncateProtein 25 | max_length: 200 26 | random: True 27 | - class: ProteinView 28 | view: "residue" 29 | 30 | tasks: 31 | - class: InteractionPrediction 32 | criterion: ce 33 | metric: [ "acc", "mcc" ] 34 | num_mlp_layer: 2 35 | num_class: 2 36 | - class: PropertyPrediction 37 | criterion: ce 38 | metric: [ "acc", "mcc" ] 39 | num_mlp_layer: 2 40 | num_class: 1195 41 | 42 | model: 43 | class: ProteinConvolutionalNetwork 44 | input_dim: 21 45 | hidden_dims: [1024, 1024] 46 | kernel_size: 5 47 | padding: 2 48 | 49 | eval_metric: Center - HumanPPI accuracy 50 | 51 | optimizer: 52 | class: Adam 53 | lr: 2.0e-5 54 | 55 | engine: 56 | gpus: [0, 1, 2, 3] 57 | batch_size: 8 58 | 59 | train: 60 | num_epoch: 50 61 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/solubility_contact_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: ProteinNet 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | transform: 19 | class: Compose 20 | transforms: 21 | - class: TruncateProtein 22 | max_length: 1022 23 | random: False 24 | - class: ProteinView 25 | view: "residue" 26 | 27 | tasks: 28 | - class: PropertyPrediction 29 | criterion: ce 30 | metric: [ "acc", "mcc" ] 31 | num_mlp_layer: 2 32 | num_class: 2 33 | - class: ContactPrediction 34 | criterion: bce 35 | metric: [ "accuracy", "prec@L5", "prec@5" ] 36 | max_length: 300 37 | random_truncate: yes 38 | threshold: 8.0 39 | gap: 6 40 | num_mlp_layer: 2 41 | 42 | model: 43 | class: ProteinConvolutionalNetwork 44 | input_dim: 21 45 | hidden_dims: [1024, 1024] 46 | kernel_size: 5 47 | padding: 2 48 | 49 | eval_metric: Center - Solubility accuracy 50 | 51 | optimizer: 52 | class: Adam 53 | lr: 2.0e-4 54 | 55 | engine: 56 | gpus: [0, 1, 2, 3] 57 | batch_size: 8 58 | 59 | train: 60 | num_epoch: 50 61 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/solubility_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - Solubility accuracy 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 50 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/solubility_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - Solubility accuracy 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 50 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/ss_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | test_split: cb513 9 | center: True 10 | transform: 11 | class: Compose 12 | transforms: 13 | - class: ProteinView 14 | view: "residue" 15 | - class: Fold 16 | path: ~/scratch/protein-datasets/ 17 | test_split: test_fold_holdout 18 | atom_feature: null 19 | bond_feature: null 20 | transform: 21 | class: Compose 22 | transforms: 23 | - class: TruncateProtein 24 | max_length: 200 25 | random: True 26 | - class: ProteinView 27 | view: "residue" 28 | 29 | tasks: 30 | - class: NodePropertyPrediction 31 | criterion: ce 32 | metric: [ "micro_acc", "macro_acc" ] 33 | num_mlp_layer: 2 34 | num_class: 3 35 | - class: PropertyPrediction 36 | criterion: ce 37 | metric: [ "acc", "mcc" ] 38 | num_mlp_layer: 2 39 | num_class: 1195 40 | 41 | model: 42 | class: ProteinConvolutionalNetwork 43 | input_dim: 21 44 | hidden_dims: [1024, 1024] 45 | kernel_size: 5 46 | padding: 2 47 | 48 | eval_metric: Center - SecondaryStructure macro_acc 49 | 50 | optimizer: 51 | class: Adam 52 | lr: 2.0e-5 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 8 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/stability_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - Stability spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/stability_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - Stability spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/subloc_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 10 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - SubcellularLocalization accuracy 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/subloc_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: ce 31 | metric: [ "acc", "mcc" ] 32 | num_mlp_layer: 2 33 | num_class: 10 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - SubcellularLocalization accuracy 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/CNN/thermo_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - Thermostability spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 60 | -------------------------------------------------------------------------------- /config/multi_task/CNN/thermo_ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ProteinConvolutionalNetwork 42 | input_dim: 21 43 | hidden_dims: [1024, 1024] 44 | kernel_size: 5 45 | padding: 2 46 | 47 | eval_metric: Center - Thermostability spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | engine: 54 | gpus: [0, 1, 2, 3] 55 | batch_size: 8 56 | 57 | train: 58 | num_epoch: 100 59 | tradeoff: 1.0 60 | -------------------------------------------------------------------------------- /config/multi_task/CNN/yeast_fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | test_split: test 9 | center: True 10 | transform: 11 | class: Compose 12 | transforms: 13 | - class: ProteinView 14 | view: "residue" 15 | keys: [ "graph1", "graph2" ] 16 | - class: Fold 17 | path: ~/scratch/protein-datasets/ 18 | test_split: test_fold_holdout 19 | atom_feature: null 20 | bond_feature: null 21 | transform: 22 | class: Compose 23 | transforms: 24 | - class: TruncateProtein 25 | max_length: 200 26 | random: True 27 | - class: ProteinView 28 | view: "residue" 29 | 30 | tasks: 31 | - class: InteractionPrediction 32 | criterion: ce 33 | metric: [ "acc", "mcc" ] 34 | num_mlp_layer: 2 35 | num_class: 2 36 | - class: PropertyPrediction 37 | criterion: ce 38 | metric: [ "acc", "mcc" ] 39 | num_mlp_layer: 2 40 | num_class: 1195 41 | 42 | model: 43 | class: ProteinConvolutionalNetwork 44 | input_dim: 21 45 | hidden_dims: [1024, 1024] 46 | kernel_size: 5 47 | padding: 2 48 | 49 | eval_metric: Center - YeastPPI accuracy 50 | 51 | optimizer: 52 | class: Adam 53 | lr: 2.0e-5 54 | 55 | engine: 56 | gpus: [0, 1, 2, 3] 57 | batch_size: 8 58 | 59 | train: 60 | num_epoch: 100 61 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/ESM/aav_fold_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | keep_mutation_region: True 10 | transform: 11 | class: Compose 12 | transforms: 13 | - class: ProteinView 14 | view: "residue" 15 | - class: Fold 16 | path: ~/scratch/protein-datasets/ 17 | test_split: test_fold_holdout 18 | atom_feature: null 19 | bond_feature: null 20 | transform: 21 | class: Compose 22 | transforms: 23 | - class: TruncateProtein 24 | max_length: 200 25 | random: True 26 | - class: ProteinView 27 | view: "residue" 28 | 29 | tasks: 30 | - class: PropertyPrediction 31 | criterion: mse 32 | metric: [ "mae", "rmse", "spearmanr" ] 33 | normalization: False 34 | num_mlp_layer: 2 35 | - class: PropertyPrediction 36 | criterion: ce 37 | metric: [ "acc", "mcc" ] 38 | num_mlp_layer: 2 39 | num_class: 1195 40 | 41 | model: 42 | class: ESM 43 | path: ~/scratch/protein-model-weights/esm-model-weights/ 44 | model: ESM-1b 45 | readout: mean 46 | 47 | eval_metric: Center - AAV spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | lr_ratio: 0.1 54 | 55 | engine: 56 | gpus: [0, 1, 2, 3] 57 | batch_size: 2 58 | 59 | train: 60 | num_epoch: 100 61 | tradeoff: 1.0 62 | -------------------------------------------------------------------------------- /config/multi_task/ESM/aav_ss_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | keep_mutation_region: True 10 | transform: 11 | class: Compose 12 | transforms: 13 | - class: ProteinView 14 | view: "residue" 15 | - class: SecondaryStructure 16 | path: ~/scratch/protein-datasets/ 17 | atom_feature: null 18 | bond_feature: null 19 | test_split: cb513 20 | transform: 21 | class: Compose 22 | transforms: 23 | - class: TruncateProtein 24 | max_length: 200 25 | random: True 26 | - class: ProteinView 27 | view: "residue" 28 | 29 | tasks: 30 | - class: PropertyPrediction 31 | criterion: mse 32 | metric: [ "mae", "rmse", "spearmanr" ] 33 | normalization: False 34 | num_mlp_layer: 2 35 | - class: NodePropertyPrediction 36 | criterion: ce 37 | metric: [ "micro_acc", "macro_acc" ] 38 | num_mlp_layer: 2 39 | num_class: 3 40 | 41 | model: 42 | class: ESM 43 | path: ~/scratch/protein-model-weights/esm-model-weights/ 44 | model: ESM-1b 45 | readout: mean 46 | 47 | eval_metric: Center - AAV spearmanr 48 | 49 | optimizer: 50 | class: Adam 51 | lr: 2.0e-5 52 | 53 | lr_ratio: 0.1 54 | 55 | engine: 56 | gpus: [0, 1, 2, 3] 57 | batch_size: 2 58 | 59 | train: 60 | num_epoch: 100 61 | tradeoff: 1.0 62 | -------------------------------------------------------------------------------- /config/multi_task/ESM/beta_fold_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - BetaLactamase spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 2.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 2 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/ESM/beta_ss_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - BetaLactamase spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 2.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 2 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/ESM/fluorescence_fold_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - Fluorescence spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 1.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 4 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/ESM/fluorescence_ss_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - Fluorescence spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 1.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 4 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/ESM/gb1_fold_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/protein-model-weights/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - GB1 spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 2.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 2 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 61 | -------------------------------------------------------------------------------- /config/multi_task/ESM/gb1_ss_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/protein-model-weights/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - GB1 spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 2.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 2 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 61 | -------------------------------------------------------------------------------- /config/multi_task/ESM/stability_fold_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - Stability spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 1.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 8 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/ESM/stability_ss_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - Stability spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 1.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 8 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 -------------------------------------------------------------------------------- /config/multi_task/ESM/thermo_fold_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: Fold 15 | path: ~/scratch/protein-datasets/ 16 | test_split: test_fold_holdout 17 | atom_feature: null 18 | bond_feature: null 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: PropertyPrediction 35 | criterion: ce 36 | metric: [ "acc", "mcc" ] 37 | num_mlp_layer: 2 38 | num_class: 1195 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/protein-model-weights/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - Thermostability spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 2.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 2 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 61 | -------------------------------------------------------------------------------- /config/multi_task/ESM/thermo_ss_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | datasets: 4 | - class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | center: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | - class: SecondaryStructure 15 | path: ~/scratch/protein-datasets/ 16 | atom_feature: null 17 | bond_feature: null 18 | test_split: cb513 19 | transform: 20 | class: Compose 21 | transforms: 22 | - class: TruncateProtein 23 | max_length: 200 24 | random: True 25 | - class: ProteinView 26 | view: "residue" 27 | 28 | tasks: 29 | - class: PropertyPrediction 30 | criterion: mse 31 | metric: [ "mae", "rmse", "spearmanr" ] 32 | normalization: False 33 | num_mlp_layer: 2 34 | - class: NodePropertyPrediction 35 | criterion: ce 36 | metric: [ "micro_acc", "macro_acc" ] 37 | num_mlp_layer: 2 38 | num_class: 3 39 | 40 | model: 41 | class: ESM 42 | path: ~/scratch/protein-model-weights/esm-model-weights/ 43 | model: ESM-1b 44 | readout: mean 45 | 46 | eval_metric: Center - Thermostability spearmanr 47 | 48 | optimizer: 49 | class: Adam 50 | lr: 2.0e-5 51 | 52 | lr_ratio: 0.1 53 | 54 | engine: 55 | gpus: [0, 1, 2, 3] 56 | batch_size: 2 57 | 58 | train: 59 | num_epoch: 100 60 | tradeoff: 1.0 61 | -------------------------------------------------------------------------------- /config/single_task/BERT/aav_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: ProteinBERT 19 | input_dim: 21 20 | hidden_dim: 512 21 | num_layers: 4 22 | num_heads: 8 23 | intermediate_dim: 2048 24 | hidden_dropout: 0.1 25 | attention_dropout: 0.1 26 | criterion: mse 27 | metric: ["mae", "rmse", "spearmanr"] 28 | normalization: False 29 | num_mlp_layer: 2 30 | 31 | eval_metric: spearmanr 32 | 33 | optimizer: 34 | class: Adam 35 | lr: 5.0e-5 36 | 37 | engine: 38 | gpus: [0, 1, 2, 3] 39 | batch_size: 32 40 | 41 | train: 42 | num_epoch: 100 43 | -------------------------------------------------------------------------------- /config/single_task/BERT/beta_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinBERT 18 | input_dim: 21 19 | hidden_dim: 512 20 | num_layers: 4 21 | num_heads: 8 22 | intermediate_dim: 2048 23 | hidden_dropout: 0.1 24 | attention_dropout: 0.1 25 | criterion: mse 26 | metric: ["mae", "rmse", "spearmanr"] 27 | normalization: False 28 | num_mlp_layer: 2 29 | 30 | eval_metric: spearmanr 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 5.0e-5 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/BERT/bindingdb_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BindingDB 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: holdout_test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinBERT 21 | input_dim: 21 22 | hidden_dim: 512 23 | num_layers: 4 24 | num_heads: 8 25 | intermediate_dim: 2048 26 | hidden_dropout: 0.1 27 | attention_dropout: 0.1 28 | model2: 29 | class: GIN 30 | input_dim: 66 31 | hidden_dims: [ 256, 256, 256, 256 ] 32 | batch_norm: yes 33 | short_cut: yes 34 | concat_hidden: yes 35 | criterion: mse 36 | metric: [ "mae", "rmse", "spearmanr" ] 37 | num_mlp_layer: 2 38 | normalization: False 39 | 40 | eval_metric: root mean squared error 41 | 42 | optimizer: 43 | class: Adam 44 | lr: 5.0e-5 45 | 46 | engine: 47 | gpus: [0, 1, 2, 3] 48 | batch_size: 8 49 | 50 | train: 51 | num_epoch: 100 52 | -------------------------------------------------------------------------------- /config/single_task/BERT/binloc_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinBERT 18 | input_dim: 21 19 | hidden_dim: 512 20 | num_layers: 4 21 | num_heads: 8 22 | intermediate_dim: 2048 23 | hidden_dropout: 0.1 24 | attention_dropout: 0.1 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 2 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 5.0e-5 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/BERT/contact_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: ProteinNet 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test 15 | 16 | task: 17 | class: ContactPrediction 18 | model: 19 | class: ProteinBERT 20 | input_dim: 21 21 | hidden_dim: 512 22 | num_layers: 4 23 | num_heads: 8 24 | intermediate_dim: 2048 25 | hidden_dropout: 0.1 26 | attention_dropout: 0.1 27 | criterion: bce 28 | metric: ["accuracy", "prec@L5", "prec@5"] 29 | max_length: 400 30 | random_truncate: yes 31 | threshold: 8.0 32 | gap: 6 33 | num_mlp_layer: 2 34 | 35 | eval_metric: prec@L5 36 | 37 | optimizer: 38 | class: Adam 39 | lr: 5.0e-5 40 | 41 | engine: 42 | gpus: [0, 1, 2, 3] 43 | batch_size: 4 44 | 45 | train: 46 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/BERT/fluorescence_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinBERT 18 | input_dim: 21 19 | hidden_dim: 512 20 | num_layers: 4 21 | num_heads: 8 22 | intermediate_dim: 2048 23 | hidden_dropout: 0.1 24 | attention_dropout: 0.1 25 | criterion: mse 26 | metric: ["mae", "rmse", "spearmanr"] 27 | normalization: False 28 | num_mlp_layer: 2 29 | 30 | eval_metric: spearmanr 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 5.0e-5 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/BERT/fold_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ProteinBERT 20 | input_dim: 21 21 | hidden_dim: 512 22 | num_layers: 4 23 | num_heads: 8 24 | intermediate_dim: 2048 25 | hidden_dropout: 0.1 26 | attention_dropout: 0.1 27 | criterion: ce 28 | metric: ["acc", "mcc"] 29 | num_mlp_layer: 2 30 | num_class: 1195 31 | 32 | eval_metric: accuracy 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 5.0e-5 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 16 41 | 42 | train: 43 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/BERT/gb1_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinBERT 18 | input_dim: 21 19 | hidden_dim: 512 20 | num_layers: 4 21 | num_heads: 8 22 | intermediate_dim: 2048 23 | hidden_dropout: 0.1 24 | attention_dropout: 0.1 25 | criterion: mse 26 | metric: ["mae", "rmse", "spearmanr"] 27 | normalization: False 28 | num_mlp_layer: 2 29 | 30 | eval_metric: spearmanr 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 5.0e-5 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 42 | -------------------------------------------------------------------------------- /config/single_task/BERT/human_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinBERT 21 | input_dim: 21 22 | hidden_dim: 512 23 | num_layers: 4 24 | num_heads: 8 25 | intermediate_dim: 2048 26 | hidden_dropout: 0.1 27 | attention_dropout: 0.1 28 | criterion: ce 29 | metric: ["acc", "mcc"] 30 | num_mlp_layer: 2 31 | num_class: 2 32 | 33 | eval_metric: accuracy 34 | 35 | optimizer: 36 | class: Adam 37 | lr: 5.0e-5 38 | 39 | engine: 40 | gpus: [0, 1, 2, 3] 41 | batch_size: 8 42 | 43 | train: 44 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/BERT/pdbbind_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PDBBind 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinBERT 21 | input_dim: 21 22 | hidden_dim: 512 23 | num_layers: 4 24 | num_heads: 8 25 | intermediate_dim: 2048 26 | hidden_dropout: 0.1 27 | attention_dropout: 0.1 28 | model2: 29 | class: GIN 30 | input_dim: 66 31 | hidden_dims: [ 256, 256, 256, 256 ] 32 | batch_norm: yes 33 | short_cut: yes 34 | concat_hidden: yes 35 | criterion: mse 36 | metric: [ "mae", "rmse", "spearmanr" ] 37 | num_mlp_layer: 2 38 | normalization: False 39 | 40 | eval_metric: root mean squared error 41 | 42 | optimizer: 43 | class: Adam 44 | lr: 5.0e-5 45 | 46 | engine: 47 | gpus: [0, 1, 2, 3] 48 | batch_size: 8 49 | 50 | train: 51 | num_epoch: 100 52 | -------------------------------------------------------------------------------- /config/single_task/BERT/ppi_affinity_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinBERT 21 | input_dim: 21 22 | hidden_dim: 512 23 | num_layers: 4 24 | num_heads: 8 25 | intermediate_dim: 2048 26 | hidden_dropout: 0.1 27 | attention_dropout: 0.1 28 | criterion: mse 29 | metric: [ "mae", "rmse", "spearmanr" ] 30 | num_mlp_layer: 2 31 | normalization: False 32 | 33 | eval_metric: root mean squared error 34 | 35 | optimizer: 36 | class: Adam 37 | lr: 5.0e-5 38 | 39 | engine: 40 | gpus: [0, 1, 2, 3] 41 | batch_size: 8 42 | 43 | train: 44 | num_epoch: 100 45 | -------------------------------------------------------------------------------- /config/single_task/BERT/solubility_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinBERT 18 | input_dim: 21 19 | hidden_dim: 512 20 | num_layers: 4 21 | num_heads: 8 22 | intermediate_dim: 2048 23 | hidden_dropout: 0.1 24 | attention_dropout: 0.1 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 2 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 5.0e-5 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/BERT/ss_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: cb513 15 | 16 | task: 17 | class: NodePropertyPrediction 18 | model: 19 | class: ProteinBERT 20 | input_dim: 21 21 | hidden_dim: 512 22 | num_layers: 4 23 | num_heads: 8 24 | intermediate_dim: 2048 25 | hidden_dropout: 0.1 26 | attention_dropout: 0.1 27 | criterion: ce 28 | metric: ["micro_acc", "macro_acc"] 29 | num_mlp_layer: 2 30 | num_class: 3 31 | 32 | eval_metric: macro_acc 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 5.0e-5 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 16 41 | 42 | train: 43 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/BERT/stability_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinBERT 18 | input_dim: 21 19 | hidden_dim: 512 20 | num_layers: 4 21 | num_heads: 8 22 | intermediate_dim: 2048 23 | hidden_dropout: 0.1 24 | attention_dropout: 0.1 25 | criterion: mse 26 | metric: ["mae", "rmse", "spearmanr"] 27 | normalization: False 28 | num_mlp_layer: 2 29 | 30 | eval_metric: spearmanr 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 5.0e-5 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/BERT/subloc_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinBERT 18 | input_dim: 21 19 | hidden_dim: 512 20 | num_layers: 4 21 | num_heads: 8 22 | intermediate_dim: 2048 23 | hidden_dropout: 0.1 24 | attention_dropout: 0.1 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 10 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 5.0e-5 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/BERT/thermo_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 1024 13 | - class: ProteinView 14 | view: "residue" 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ProteinBERT 20 | input_dim: 21 21 | hidden_dim: 512 22 | num_layers: 4 23 | num_heads: 8 24 | intermediate_dim: 2048 25 | hidden_dropout: 0.1 26 | attention_dropout: 0.1 27 | criterion: mse 28 | metric: ["mae", "rmse", "spearmanr"] 29 | normalization: False 30 | num_mlp_layer: 2 31 | 32 | eval_metric: spearmanr 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 5.0e-5 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 4 41 | 42 | train: 43 | num_epoch: 100 44 | -------------------------------------------------------------------------------- /config/single_task/BERT/yeast_BERT.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: ["graph1", "graph2"] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinBERT 21 | input_dim: 21 22 | hidden_dim: 512 23 | num_layers: 4 24 | num_heads: 8 25 | intermediate_dim: 2048 26 | hidden_dropout: 0.1 27 | attention_dropout: 0.1 28 | criterion: ce 29 | metric: ["acc", "mcc"] 30 | num_mlp_layer: 2 31 | num_class: 2 32 | 33 | eval_metric: accuracy 34 | 35 | optimizer: 36 | class: Adam 37 | lr: 5.0e-5 38 | 39 | engine: 40 | gpus: [0, 1, 2, 3] 41 | batch_size: 8 42 | 43 | train: 44 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/aav_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: ProteinConvolutionalNetwork 19 | input_dim: 21 20 | hidden_dims: [1024, 1024] 21 | kernel_size: 5 22 | padding: 2 23 | criterion: mse 24 | metric: ["mae", "rmse", "spearmanr"] 25 | normalization: False 26 | num_mlp_layer: 2 27 | 28 | eval_metric: spearmanr 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 1.0e-4 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 64 37 | 38 | train: 39 | num_epoch: 100 40 | -------------------------------------------------------------------------------- /config/single_task/CNN/beta_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinConvolutionalNetwork 18 | input_dim: 21 19 | hidden_dims: [1024, 1024] 20 | kernel_size: 5 21 | padding: 2 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0] 35 | batch_size: 64 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/bindingdb_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BindingDB 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | keys: "graph1" 15 | - class: ProteinView 16 | view: "residue" 17 | keys: "graph1" 18 | 19 | test_split: holdout_test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ProteinConvolutionalNetwork 25 | input_dim: 21 26 | hidden_dims: [1024, 1024] 27 | kernel_size: 5 28 | padding: 2 29 | model2: 30 | class: GIN 31 | input_dim: 66 32 | hidden_dims: [256, 256, 256, 256] 33 | batch_norm: yes 34 | short_cut: yes 35 | concat_hidden: yes 36 | criterion: mse 37 | metric: ["mae", "rmse", "spearmanr"] 38 | num_mlp_layer: 2 39 | normalization: False 40 | 41 | eval_metric: root mean squared error 42 | 43 | optimizer: 44 | class: Adam 45 | lr: 2.0e-4 46 | 47 | engine: 48 | gpus: [0, 1, 2, 3] 49 | batch_size: 32 50 | 51 | train: 52 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/binloc_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinConvolutionalNetwork 18 | input_dim: 21 19 | hidden_dims: [1024, 1024] 20 | kernel_size: 5 21 | padding: 2 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 2.0e-4 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 256 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/contact_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: ProteinNet 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test 15 | 16 | task: 17 | class: ContactPrediction 18 | model: 19 | class: ProteinConvolutionalNetwork 20 | input_dim: 21 21 | hidden_dims: [1024, 1024] 22 | kernel_size: 5 23 | padding: 2 24 | criterion: bce 25 | metric: ["accuracy", "prec@L5", "prec@5"] 26 | max_length: 400 27 | random_truncate: yes 28 | threshold: 8.0 29 | gap: 6 30 | num_mlp_layer: 2 31 | 32 | eval_metric: prec@L5 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 2.0e-4 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 4 41 | 42 | train: 43 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/CNN/fluorescence_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinConvolutionalNetwork 18 | input_dim: 21 19 | hidden_dims: [1024, 1024] 20 | kernel_size: 5 21 | padding: 2 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/CNN/fold_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ProteinConvolutionalNetwork 20 | input_dim: 21 21 | hidden_dims: [1024, 1024] 22 | kernel_size: 5 23 | padding: 2 24 | criterion: ce 25 | metric: ["acc", "mcc"] 26 | num_class: 1195 27 | num_mlp_layer: 2 28 | 29 | eval_metric: accuracy 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 2.0e-4 34 | 35 | engine: 36 | gpus: [0] 37 | batch_size: 64 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/gb1_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinConvolutionalNetwork 18 | input_dim: 21 19 | hidden_dims: [1024, 1024] 20 | kernel_size: 5 21 | padding: 2 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 64 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/CNN/human_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinConvolutionalNetwork 21 | input_dim: 21 22 | hidden_dims: [1024, 1024] 23 | kernel_size: 5 24 | padding: 2 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 2 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 2.0e-4 35 | 36 | engine: 37 | gpus: [0] 38 | batch_size: 256 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/pdbbind_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PDBBind 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinConvolutionalNetwork 21 | input_dim: 21 22 | hidden_dims: [1024, 1024] 23 | kernel_size: 5 24 | padding: 2 25 | model2: 26 | class: GIN 27 | input_dim: 66 28 | hidden_dims: [256, 256, 256, 256] 29 | batch_norm: yes 30 | short_cut: yes 31 | concat_hidden: yes 32 | normalization: False 33 | criterion: mse 34 | metric: ["mae", "rmse", "spearmanr"] 35 | num_mlp_layer: 2 36 | 37 | eval_metric: root mean squared error 38 | 39 | optimizer: 40 | class: Adam 41 | lr: 2.0e-4 42 | 43 | engine: 44 | gpus: [0] 45 | batch_size: 256 46 | 47 | train: 48 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/ppi_affinity_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinConvolutionalNetwork 21 | input_dim: 21 22 | hidden_dims: [1024, 1024] 23 | kernel_size: 5 24 | padding: 2 25 | normalization: False 26 | criterion: mse 27 | metric: ["mae", "rmse", "spearmanr"] 28 | num_mlp_layer: 2 29 | 30 | eval_metric: root mean squared error 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 2.0e-4 35 | 36 | engine: 37 | gpus: [0] 38 | batch_size: 256 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/solubility_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinConvolutionalNetwork 18 | input_dim: 21 19 | hidden_dims: [1024, 1024] 20 | kernel_size: 5 21 | padding: 2 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_class: 2 25 | num_mlp_layer: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/ss_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: cb513 15 | 16 | task: 17 | class: NodePropertyPrediction 18 | model: 19 | class: ProteinConvolutionalNetwork 20 | input_dim: 21 21 | hidden_dims: [1024, 1024] 22 | kernel_size: 5 23 | padding: 2 24 | criterion: ce 25 | metric: ["micro_acc", "macro_acc"] 26 | num_mlp_layer: 2 27 | num_class: 3 28 | 29 | eval_metric: macro_acc 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 2.0e-4 34 | 35 | engine: 36 | gpus: [0] 37 | batch_size: 256 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/stability_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinConvolutionalNetwork 18 | input_dim: 21 19 | hidden_dims: [1024, 1024] 20 | kernel_size: 5 21 | padding: 2 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/CNN/subloc_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinConvolutionalNetwork 18 | input_dim: 21 19 | hidden_dims: [1024, 1024] 20 | kernel_size: 5 21 | padding: 2 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 2.0e-4 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 256 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/CNN/thermo_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinConvolutionalNetwork 18 | input_dim: 21 19 | hidden_dims: [1024, 1024] 20 | kernel_size: 5 21 | padding: 2 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/CNN/yeast_CNN.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinConvolutionalNetwork 21 | input_dim: 21 22 | hidden_dims: [1024, 1024] 23 | kernel_size: 5 24 | padding: 2 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 2 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 2.0e-4 35 | 36 | engine: 37 | gpus: [0] 38 | batch_size: 256 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/DDE/aav_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: Statistic 19 | type: DDE 20 | hidden_dims: [512] 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 1.0e-4 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 16 35 | 36 | train: 37 | num_epoch: 100 38 | -------------------------------------------------------------------------------- /config/single_task/DDE/beta_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Statistic 18 | type: DDE 19 | hidden_dims: [512] 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 1.0e-4 30 | 31 | engine: 32 | gpus: [0, 1, 2, 3] 33 | batch_size: 16 34 | 35 | train: 36 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/DDE/binloc_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Statistic 18 | type: DDE 19 | hidden_dims: [512] 20 | criterion: ce 21 | metric: ["acc", "mcc"] 22 | num_mlp_layer: 2 23 | 24 | eval_metric: accuracy 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 1.0e-4 29 | 30 | engine: 31 | gpus: [0, 1, 2, 3] 32 | batch_size: 16 33 | 34 | train: 35 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/DDE/fluorescence_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Statistic 18 | type: DDE 19 | hidden_dims: [512] 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 1.0e-4 30 | 31 | engine: 32 | gpus: [0, 1, 2, 3] 33 | batch_size: 16 34 | 35 | train: 36 | num_epoch: 100 37 | -------------------------------------------------------------------------------- /config/single_task/DDE/fold_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: Statistic 20 | type: DDE 21 | hidden_dims: [512] 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_class: 1195 25 | num_mlp_layer: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/DDE/gb1_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Statistic 18 | type: DDE 19 | hidden_dims: [512] 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 1.0e-4 30 | 31 | engine: 32 | gpus: [0, 1, 2, 3] 33 | batch_size: 16 34 | 35 | train: 36 | num_epoch: 100 37 | -------------------------------------------------------------------------------- /config/single_task/DDE/human_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: Statistic 21 | type: DDE 22 | hidden_dims: [512] 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_mlp_layer: 2 26 | num_class: 2 27 | 28 | eval_metric: accuracy 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 1.0e-4 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 16 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/DDE/ppi_affinity_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: Statistic 21 | type: DDE 22 | hidden_dims: [512] 23 | normalization: False 24 | criterion: mse 25 | metric: ["mae", "rmse", "spearmanr"] 26 | num_mlp_layer: 2 27 | 28 | eval_metric: root mean squared error 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 1.0e-4 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 16 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/DDE/solubility_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Statistic 18 | type: DDE 19 | hidden_dims: [512] 20 | criterion: ce 21 | metric: ["acc", "mcc"] 22 | num_class: 2 23 | num_mlp_layer: 2 24 | 25 | eval_metric: accuracy 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 1.0e-4 30 | 31 | engine: 32 | gpus: [0, 1, 2, 3] 33 | batch_size: 16 34 | 35 | train: 36 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/DDE/stability_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Statistic 18 | type: DDE 19 | hidden_dims: [512] 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 1.0e-4 30 | 31 | engine: 32 | gpus: [0, 1, 2, 3] 33 | batch_size: 16 34 | 35 | train: 36 | num_epoch: 100 37 | -------------------------------------------------------------------------------- /config/single_task/DDE/subloc_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Statistic 18 | type: DDE 19 | hidden_dims: [512] 20 | criterion: ce 21 | metric: ["acc", "mcc"] 22 | num_mlp_layer: 2 23 | 24 | eval_metric: accuracy 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 1.0e-4 29 | 30 | engine: 31 | gpus: [0, 1, 2, 3] 32 | batch_size: 16 33 | 34 | train: 35 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/DDE/thermo_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Statistic 18 | type: DDE 19 | hidden_dims: [512] 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 1.0e-4 30 | 31 | engine: 32 | gpus: [0, 1, 2, 3] 33 | batch_size: 16 34 | 35 | train: 36 | num_epoch: 100 37 | -------------------------------------------------------------------------------- /config/single_task/DDE/yeast_DDE.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: Statistic 21 | type: DDE 22 | hidden_dims: [512] 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_mlp_layer: 2 26 | num_class: 2 27 | 28 | eval_metric: accuracy 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 1.0e-4 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 16 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/aav_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: ESM 19 | path: ~/scratch/protein-model-weights/esm-model-weights/ 20 | model: ESM-1b 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 2.0e-4 31 | 32 | lr_ratio: 0.1 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 8 37 | 38 | train: 39 | num_epoch: 100 40 | -------------------------------------------------------------------------------- /config/single_task/ESM/aav_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: ESM 19 | path: ~/scratch/protein-model-weights/esm-model-weights/ 20 | model: ESM-1b 21 | readout: mean 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | fix_encoder: True 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 32 38 | 39 | train: 40 | num_epoch: 100 41 | -------------------------------------------------------------------------------- /config/single_task/ESM/beta_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 1 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 2.0e-4 30 | 31 | lr_ratio: 0.1 32 | 33 | engine: 34 | gpus: [0] 35 | batch_size: 8 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/beta_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | readout: mean 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 32 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/bindingdb_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BindingDB 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | keys: "graph1" 15 | - class: ProteinView 16 | view: "residue" 17 | keys: "graph1" 18 | 19 | test_split: holdout_test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ESM 25 | path: ~/scratch/esm-model-weights/ 26 | model: ESM-1b 27 | model2: 28 | class: GIN 29 | input_dim: 66 30 | hidden_dims: [256, 256, 256, 256] 31 | batch_norm: yes 32 | short_cut: yes 33 | concat_hidden: yes 34 | normalization: False 35 | criterion: mse 36 | metric: ["mae", "rmse", "spearmanr"] 37 | num_mlp_layer: 2 38 | 39 | eval_metric: root mean squared error 40 | 41 | optimizer: 42 | class: Adam 43 | lr: 2.0e-4 44 | 45 | engine: 46 | gpus: [0, 1, 2, 3] 47 | batch_size: 4 48 | 49 | lr_ratio: 0.1 50 | 51 | train: 52 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/bindingdb_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BindingDB 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: holdout_test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ESM 21 | path: ~/scratch/esm-model-weights/ 22 | model: ESM-1b 23 | readout: mean 24 | model2: 25 | class: GIN 26 | input_dim: 66 27 | hidden_dims: [ 256, 256, 256, 256 ] 28 | batch_norm: yes 29 | short_cut: yes 30 | concat_hidden: yes 31 | criterion: mse 32 | metric: [ "mae", "rmse", "spearmanr" ] 33 | num_mlp_layer: 2 34 | normalization: False 35 | 36 | eval_metric: root mean squared error 37 | 38 | optimizer: 39 | class: Adam 40 | lr: 5.0e-5 41 | 42 | fix_encoder: True 43 | 44 | engine: 45 | gpus: [0, 1, 2, 3] 46 | batch_size: 32 47 | 48 | train: 49 | num_epoch: 100 50 | -------------------------------------------------------------------------------- /config/single_task/ESM/binloc_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | task: 18 | class: PropertyPrediction 19 | model: 20 | class: ESM 21 | path: ~/scratch/esm-model-weights/ 22 | model: ESM-1b 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_mlp_layer: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 2.0e-4 32 | 33 | lr_ratio: 0.1 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 4 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/binloc_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | readout: mean 21 | criterion: ce 22 | metric: ["acc", "mcc"] 23 | num_mlp_layer: 2 24 | num_class: 2 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 32 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/contact_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: ProteinNet 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 1022 13 | random: False 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | test_split: test 18 | 19 | task: 20 | class: ContactPrediction 21 | model: 22 | class: ESM 23 | path: ~/scratch/esm-model-weights/ 24 | model: ESM-1b 25 | criterion: bce 26 | metric: ["accuracy", "prec@L5", "prec@5"] 27 | max_length: 350 28 | random_truncate: yes 29 | threshold: 8.0 30 | gap: 6 31 | num_mlp_layer: 2 32 | 33 | eval_metric: prec@L5 34 | 35 | optimizer: 36 | class: Adam 37 | lr: 2.0e-4 38 | 39 | engine: 40 | gpus: [0, 1, 2, 3] 41 | batch_size: 2 42 | 43 | lr_ratio: 0.1 44 | 45 | train: 46 | num_epoch: 50 47 | -------------------------------------------------------------------------------- /config/single_task/ESM/contact_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: ProteinNet 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 1022 13 | random: False 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | test_split: test 18 | test_batch_size: 1 19 | 20 | task: 21 | class: ContactPrediction 22 | model: 23 | class: ESM 24 | path: ~/scratch/esm-model-weights/ 25 | model: ESM-1b 26 | readout: mean 27 | criterion: bce 28 | metric: ["accuracy", "prec@L5", "prec@5"] 29 | max_length: 400 30 | random_truncate: yes 31 | threshold: 8.0 32 | gap: 6 33 | num_mlp_layer: 2 34 | 35 | eval_metric: prec@L5 36 | 37 | optimizer: 38 | class: Adam 39 | lr: 5.0e-5 40 | 41 | fix_encoder: True 42 | 43 | engine: 44 | gpus: [0, 1, 2, 3] 45 | batch_size: 6 46 | 47 | train: 48 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ESM/fluorescence_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 2.0e-4 30 | 31 | lr_ratio: 0.1 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 8 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/fluorescence_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | readout: mean 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 32 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/fold_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | test_split: test_fold_holdout 18 | 19 | task: 20 | class: PropertyPrediction 21 | model: 22 | class: ESM 23 | path: ~/scratch/esm-model-weights/ 24 | model: ESM-1b 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | 29 | eval_metric: accuracy 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 2.0e-4 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 4 38 | 39 | lr_ratio: 0.1 40 | 41 | train: 42 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/fold_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ESM 20 | path: ~/scratch/esm-model-weights/ 21 | model: ESM-1b 22 | readout: mean 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_mlp_layer: 2 26 | num_class: 1195 27 | 28 | eval_metric: accuracy 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 5.0e-5 33 | 34 | fix_encoder: True 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/gb1_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/protein-model-weights/esm-model-weights/ 19 | model: ESM-1b 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 2.0e-4 30 | 31 | lr_ratio: 0.1 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 8 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/ESM/gb1_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/protein-model-weights/esm-model-weights/ 19 | model: ESM-1b 20 | readout: mean 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 32 37 | 38 | train: 39 | num_epoch: 100 40 | -------------------------------------------------------------------------------- /config/single_task/ESM/human_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | keys: [ "graph1", "graph2" ] 15 | - class: ProteinView 16 | view: "residue" 17 | keys: [ "graph1", "graph2" ] 18 | 19 | test_split: test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ESM 25 | path: ~/scratch/esm-model-weights/ 26 | model: ESM-1b 27 | criterion: ce 28 | metric: ["acc", "mcc"] 29 | num_mlp_layer: 2 30 | num_class: 2 31 | 32 | eval_metric: accuracy 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 2.0e-4 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 2 41 | 42 | lr_ratio: 0.1 43 | 44 | train: 45 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ESM/human_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ESM 21 | path: ~/scratch/esm-model-weights/ 22 | model: ESM-1b 23 | readout: mean 24 | criterion: ce 25 | metric: ["acc", "mcc"] 26 | num_mlp_layer: 2 27 | num_class: 2 28 | 29 | eval_metric: accuracy 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 5.0e-5 34 | 35 | fix_encoder: True 36 | fix_encoder2: True 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 8 41 | 42 | train: 43 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ESM/pdbbind_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PDBBind 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | keys: "graph1" 15 | - class: ProteinView 16 | view: "residue" 17 | keys: "graph1" 18 | 19 | test_split: test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ESM 25 | path: ~/scratch/esm-model-weights/ 26 | model: ESM-1b 27 | model2: 28 | class: GIN 29 | input_dim: 66 30 | hidden_dims: [256, 256, 256, 256] 31 | batch_norm: yes 32 | short_cut: yes 33 | concat_hidden: yes 34 | criterion: mse 35 | metric: ["mae", "rmse", "spearmanr"] 36 | num_mlp_layer: 2 37 | normalization: False 38 | 39 | eval_metric: root mean squared error 40 | 41 | optimizer: 42 | class: Adam 43 | lr: 2.0e-4 44 | 45 | engine: 46 | gpus: [0, 1, 2, 3] 47 | batch_size: 4 48 | 49 | lr_ratio: 0.1 50 | 51 | train: 52 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/pdbbind_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PDBBind 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ESM 21 | path: ~/scratch/esm-model-weights/ 22 | model: ESM-1b 23 | readout: mean 24 | model2: 25 | class: GIN 26 | input_dim: 66 27 | hidden_dims: [ 256, 256, 256, 256 ] 28 | batch_norm: yes 29 | short_cut: yes 30 | concat_hidden: yes 31 | criterion: mse 32 | metric: [ "mae", "rmse", "spearmanr" ] 33 | num_mlp_layer: 2 34 | normalization: False 35 | 36 | eval_metric: root mean squared error 37 | 38 | optimizer: 39 | class: Adam 40 | lr: 5.0e-5 41 | 42 | fix_encoder: True 43 | 44 | engine: 45 | gpus: [0, 1, 2, 3] 46 | batch_size: 32 47 | 48 | train: 49 | num_epoch: 100 50 | -------------------------------------------------------------------------------- /config/single_task/ESM/ppi_affinity_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | keys: [ "graph1", "graph2" ] 15 | - class: ProteinView 16 | view: "residue" 17 | keys: [ "graph1", "graph2" ] 18 | 19 | test_split: test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ESM 25 | path: ~/scratch/esm-model-weights/ 26 | model: ESM-1b 27 | normalization: False 28 | criterion: mse 29 | metric: ["mae", "rmse", "spearmanr"] 30 | num_mlp_layer: 2 31 | 32 | eval_metric: root mean squared error 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 2.0e-4 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 2 41 | 42 | lr_ratio: 0.1 43 | 44 | train: 45 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/ppi_affinity_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ESM 21 | path: ~/scratch/esm-model-weights/ 22 | model: ESM-1b 23 | readout: mean 24 | criterion: mse 25 | metric: [ "mae", "rmse", "spearmanr" ] 26 | num_mlp_layer: 2 27 | normalization: False 28 | 29 | eval_metric: root mean squared error 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 5.0e-5 34 | 35 | fix_encoder: True 36 | fix_encoder2: True 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 8 41 | 42 | train: 43 | num_epoch: 100 44 | -------------------------------------------------------------------------------- /config/single_task/ESM/solubility_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: False 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | task: 18 | class: PropertyPrediction 19 | model: 20 | class: ESM 21 | path: ~/scratch/esm-model-weights/ 22 | model: ESM-1b 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_class: 2 26 | num_mlp_layer: 2 27 | 28 | eval_metric: accuracy 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 2.0e-4 33 | 34 | lr_ratio: 0.1 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 4 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/solubility_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | readout: mean 21 | criterion: ce 22 | metric: ["acc", "mcc"] 23 | num_mlp_layer: 2 24 | num_class: 2 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 32 37 | 38 | train: 39 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ESM/ss_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | test_split: cb513 18 | 19 | task: 20 | class: NodePropertyPrediction 21 | model: 22 | class: ESM 23 | path: ~/scratch/esm-model-weights/ 24 | model: ESM-1b 25 | criterion: ce 26 | metric: ["micro_acc", "macro_acc"] 27 | num_mlp_layer: 2 28 | num_class: 3 29 | 30 | eval_metric: macro_acc 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 2.0e-4 35 | 36 | lr_ratio: 0.1 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 4 41 | 42 | train: 43 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/ss_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 1022 13 | random: False 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | test_split: cb513 18 | 19 | task: 20 | class: NodePropertyPrediction 21 | model: 22 | class: ESM 23 | path: ~/scratch/esm-model-weights/ 24 | model: ESM-1b 25 | readout: mean 26 | criterion: ce 27 | metric: ["micro_acc", "macro_acc"] 28 | num_mlp_layer: 2 29 | num_class: 3 30 | 31 | eval_metric: macro_acc 32 | 33 | optimizer: 34 | class: Adam 35 | lr: 5.0e-5 36 | 37 | fix_encoder: True 38 | 39 | engine: 40 | gpus: [0, 1, 2, 3] 41 | batch_size: 8 42 | 43 | train: 44 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/stability_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 2.0e-4 30 | 31 | lr_ratio: 0.1 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 8 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/stability_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | readout: mean 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 32 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/subloc_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | task: 18 | class: PropertyPrediction 19 | model: 20 | class: ESM 21 | path: ~/scratch/esm-model-weights/ 22 | model: ESM-1b 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_mlp_layer: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 2.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 4 36 | 37 | lr_ratio: 0.1 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/subloc_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/esm-model-weights/ 19 | model: ESM-1b 20 | readout: mean 21 | criterion: ce 22 | metric: ["acc", "mcc"] 23 | num_mlp_layer: 2 24 | num_class: 10 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 32 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/thermo_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/protein-model-weights/esm-model-weights/ 19 | model: ESM-1b 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 2.0e-4 30 | 31 | lr_ratio: 0.1 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 2 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/ESM/thermo_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ESM 18 | path: ~/scratch/protein-model-weights/esm-model-weights/ 19 | model: ESM-1b 20 | readout: mean 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 8 37 | 38 | train: 39 | num_epoch: 100 40 | -------------------------------------------------------------------------------- /config/single_task/ESM/yeast_ESM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: True 14 | keys: [ "graph1", "graph2" ] 15 | - class: ProteinView 16 | view: "residue" 17 | keys: [ "graph1", "graph2" ] 18 | 19 | test_split: test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ESM 25 | path: ~/scratch/esm-model-weights/ 26 | model: ESM-1b 27 | criterion: ce 28 | metric: ["acc", "mcc"] 29 | num_mlp_layer: 2 30 | num_class: 2 31 | 32 | eval_metric: accuracy 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 2.0e-4 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 2 41 | 42 | lr_ratio: 0.1 43 | 44 | train: 45 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ESM/yeast_ESM_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ESM 21 | path: ~/scratch/esm-model-weights/ 22 | model: ESM-1b 23 | readout: mean 24 | criterion: ce 25 | metric: ["acc", "mcc"] 26 | num_mlp_layer: 2 27 | num_class: 2 28 | 29 | eval_metric: accuracy 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 5.0e-5 34 | 35 | fix_encoder: True 36 | fix_encoder2: True 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 8 41 | 42 | train: 43 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/aav_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: ProteinLSTM 19 | input_dim: 21 20 | hidden_dim: 640 21 | num_layers: 3 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/LSTM/beta_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinLSTM 18 | input_dim: 21 19 | hidden_dim: 640 20 | num_layers: 3 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 32 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/bindingdb_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BindingDB 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: holdout_test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinLSTM 21 | input_dim: 21 22 | hidden_dim: 640 23 | num_layers: 3 24 | model2: 25 | class: GIN 26 | input_dim: 66 27 | hidden_dims: [ 256, 256, 256, 256 ] 28 | batch_norm: yes 29 | short_cut: yes 30 | concat_hidden: yes 31 | criterion: mse 32 | metric: [ "mae", "rmse", "spearmanr" ] 33 | num_mlp_layer: 2 34 | normalization: False 35 | 36 | eval_metric: root mean squared error 37 | 38 | optimizer: 39 | class: Adam 40 | lr: 5.0e-5 41 | 42 | engine: 43 | gpus: [0, 1, 2, 3] 44 | batch_size: 32 45 | 46 | train: 47 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/binloc_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinLSTM 18 | input_dim: 21 19 | hidden_dim: 640 20 | num_layers: 3 21 | criterion: ce 22 | metric: ["acc", "mcc"] 23 | num_mlp_layer: 2 24 | num_class: 2 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 32 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/contact_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: ProteinNet 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test 15 | 16 | task: 17 | class: ContactPrediction 18 | model: 19 | class: ProteinLSTM 20 | input_dim: 21 21 | hidden_dim: 640 22 | num_layers: 3 23 | criterion: bce 24 | metric: ["accuracy", "prec@L5", "prec@5"] 25 | max_length: 400 26 | random_truncate: yes 27 | threshold: 8.0 28 | gap: 6 29 | num_mlp_layer: 2 30 | 31 | eval_metric: prec@L5 32 | 33 | optimizer: 34 | class: Adam 35 | lr: 5.0e-5 36 | 37 | engine: 38 | gpus: [0, 1, 2, 3] 39 | batch_size: 3 40 | 41 | train: 42 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/LSTM/fluorescence_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinLSTM 18 | input_dim: 21 19 | hidden_dim: 640 20 | num_layers: 3 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 32 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/fold_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ProteinLSTM 20 | input_dim: 21 21 | hidden_dim: 640 22 | num_layers: 3 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_mlp_layer: 2 26 | num_class: 1195 27 | 28 | eval_metric: accuracy 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 5.0e-5 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 16 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/gb1_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinLSTM 18 | input_dim: 21 19 | hidden_dim: 640 20 | num_layers: 3 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 32 35 | 36 | train: 37 | num_epoch: 100 38 | -------------------------------------------------------------------------------- /config/single_task/LSTM/human_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinLSTM 21 | input_dim: 21 22 | hidden_dim: 640 23 | num_layers: 3 24 | criterion: ce 25 | metric: ["acc", "mcc"] 26 | num_mlp_layer: 2 27 | num_class: 2 28 | 29 | eval_metric: accuracy 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 5.0e-5 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 32 38 | 39 | train: 40 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/LSTM/pdbbind_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PDBBind 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinLSTM 21 | input_dim: 21 22 | hidden_dim: 640 23 | num_layers: 3 24 | model2: 25 | class: GIN 26 | input_dim: 66 27 | hidden_dims: [ 256, 256, 256, 256 ] 28 | batch_norm: yes 29 | short_cut: yes 30 | concat_hidden: yes 31 | criterion: mse 32 | metric: [ "mae", "rmse", "spearmanr" ] 33 | num_mlp_layer: 2 34 | normalization: False 35 | 36 | eval_metric: root mean squared error 37 | 38 | optimizer: 39 | class: Adam 40 | lr: 5.0e-5 41 | 42 | engine: 43 | gpus: [0, 1, 2, 3] 44 | batch_size: 32 45 | 46 | train: 47 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/ppi_affinity_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinLSTM 21 | input_dim: 21 22 | hidden_dim: 640 23 | num_layers: 3 24 | criterion: mse 25 | metric: [ "mae", "rmse", "spearmanr" ] 26 | num_mlp_layer: 2 27 | normalization: False 28 | 29 | eval_metric: root mean squared error 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 5.0e-5 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 32 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/solubility_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinLSTM 18 | input_dim: 21 19 | hidden_dim: 640 20 | num_layers: 3 21 | criterion: ce 22 | metric: ["acc", "mcc"] 23 | num_mlp_layer: 2 24 | num_class: 2 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 32 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/ss_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: cb513 15 | 16 | task: 17 | class: NodePropertyPrediction 18 | model: 19 | class: ProteinLSTM 20 | input_dim: 21 21 | hidden_dim: 640 22 | num_layers: 3 23 | criterion: ce 24 | metric: ["micro_acc", "macro_acc"] 25 | num_mlp_layer: 2 26 | num_class: 3 27 | 28 | eval_metric: macro_acc 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 5.0e-5 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 16 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/stability_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinLSTM 18 | input_dim: 21 19 | hidden_dim: 640 20 | num_layers: 3 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 32 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/subloc_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinLSTM 18 | input_dim: 21 19 | hidden_dim: 640 20 | num_layers: 3 21 | criterion: ce 22 | metric: ["acc", "mcc"] 23 | num_mlp_layer: 2 24 | num_class: 10 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 32 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/LSTM/thermo_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinLSTM 18 | input_dim: 21 19 | hidden_dim: 640 20 | num_layers: 3 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 4 35 | 36 | train: 37 | num_epoch: 100 38 | -------------------------------------------------------------------------------- /config/single_task/LSTM/yeast_LSTM.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinLSTM 21 | input_dim: 21 22 | hidden_dim: 640 23 | num_layers: 3 24 | criterion: ce 25 | metric: ["acc", "mcc"] 26 | num_mlp_layer: 2 27 | num_class: 2 28 | 29 | eval_metric: accuracy 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 5.0e-5 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 32 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/Moran/aav_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: Physicochemical 19 | path: ~/scratch/protein-model-weights/physicochemical 20 | type: moran 21 | nlag: 30 22 | hidden_dims: [512] 23 | criterion: mse 24 | metric: ["mae", "rmse", "spearmanr"] 25 | normalization: False 26 | num_mlp_layer: 2 27 | 28 | eval_metric: spearmanr 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 1.0e-4 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 16 37 | 38 | train: 39 | num_epoch: 100 40 | -------------------------------------------------------------------------------- /config/single_task/Moran/beta_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Physicochemical 18 | path: ~/scratch/physicochemical 19 | type: moran 20 | nlag: 30 21 | hidden_dims: [512] 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/Moran/binloc_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Physicochemical 18 | path: ~/scratch/physicochemical 19 | type: moran 20 | nlag: 30 21 | hidden_dims: [512] 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 1.0e-4 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 16 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/Moran/fluorescence_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Physicochemical 18 | path: ~/scratch/physicochemical 19 | type: moran 20 | nlag: 30 21 | hidden_dims: [512] 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/Moran/fold_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: Physicochemical 20 | path: ~/scratch/physicochemical 21 | type: moran 22 | nlag: 30 23 | hidden_dims: [512] 24 | criterion: ce 25 | metric: ["acc", "mcc"] 26 | num_class: 1195 27 | num_mlp_layer: 2 28 | 29 | eval_metric: accuracy 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 1.0e-4 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 16 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/Moran/gb1_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Physicochemical 18 | path: ~/scratch/protein-model-weights/physicochemical 19 | type: moran 20 | nlag: 30 21 | hidden_dims: [512] 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/Moran/human_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: Physicochemical 21 | path: ~/scratch/physicochemical 22 | type: moran 23 | nlag: 30 24 | hidden_dims: [512] 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 2 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 1.0e-4 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 16 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/Moran/ppi_affinity_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: Physicochemical 21 | path: ~/scratch/physicochemical 22 | type: moran 23 | nlag: 30 24 | hidden_dims: [512] 25 | normalization: False 26 | criterion: mse 27 | metric: ["mae", "rmse", "spearmanr"] 28 | num_mlp_layer: 2 29 | 30 | eval_metric: root mean squared error 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 1.0e-4 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 16 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/Moran/solubility_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Physicochemical 18 | path: ~/scratch/physicochemical 19 | type: moran 20 | nlag: 30 21 | hidden_dims: [512] 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_class: 2 25 | num_mlp_layer: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/Moran/stability_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Physicochemical 18 | path: ~/scratch/physicochemical 19 | type: moran 20 | nlag: 30 21 | hidden_dims: [512] 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/Moran/subloc_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Physicochemical 18 | path: ~/scratch/physicochemical 19 | type: moran 20 | nlag: 30 21 | hidden_dims: [512] 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 1.0e-4 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 16 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/Moran/thermo_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: Physicochemical 18 | path: ~/scratch/protein-model-weights/physicochemical 19 | type: moran 20 | nlag: 30 21 | hidden_dims: [512] 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/Moran/yeast_Moran.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: Physicochemical 21 | path: ~/scratch/physicochemical 22 | type: moran 23 | nlag: 30 24 | hidden_dims: [512] 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 2 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 1.0e-4 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 16 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/aav_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: ProtBert 19 | path: ~/scratch/protbert-model-weights/ 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 5.0e-5 30 | 31 | lr_ratio: 0.1 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 16 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/aav_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: ProtBert 19 | path: ~/scratch/protein-model-weights/protbert-model-weights/ 20 | readout: mean 21 | criterion: mse 22 | metric: ["mae", "rmse", "spearmanr"] 23 | normalization: False 24 | num_mlp_layer: 2 25 | 26 | eval_metric: spearmanr 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | fix_encoder: True 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 64 37 | 38 | train: 39 | num_epoch: 100 40 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/beta_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | criterion: mse 20 | metric: ["mae", "rmse", "spearmanr"] 21 | normalization: False 22 | num_mlp_layer: 2 23 | 24 | eval_metric: spearmanr 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 5.0e-5 29 | 30 | lr_ratio: 0.1 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 16 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/beta_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | readout: mean 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 5.0e-5 30 | 31 | fix_encoder: True 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 64 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/bindingdb_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BindingDB 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: False 14 | keys: "graph1" 15 | - class: ProteinView 16 | view: "residue" 17 | keys: "graph1" 18 | 19 | test_split: holdout_test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ProtBert 25 | path: ~/scratch/protbert-model-weights/ 26 | model2: 27 | class: GIN 28 | input_dim: 66 29 | hidden_dims: [ 256, 256, 256, 256 ] 30 | batch_norm: yes 31 | short_cut: yes 32 | concat_hidden: yes 33 | criterion: mse 34 | metric: [ "mae", "rmse", "spearmanr" ] 35 | num_mlp_layer: 2 36 | normalization: False 37 | 38 | eval_metric: root mean squared error 39 | 40 | optimizer: 41 | class: Adam 42 | lr: 5.0e-5 43 | 44 | lr_ratio: 0.1 45 | 46 | engine: 47 | gpus: [0, 1, 2, 3] 48 | batch_size: 2 49 | 50 | train: 51 | num_epoch: 100 52 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/bindingdb_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BindingDB 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: False 14 | keys: "graph1" 15 | - class: ProteinView 16 | view: "residue" 17 | keys: "graph1" 18 | 19 | test_split: holdout_test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ProtBert 25 | path: ~/scratch/protbert-model-weights/ 26 | readout: mean 27 | model2: 28 | class: GIN 29 | input_dim: 66 30 | hidden_dims: [ 256, 256, 256, 256 ] 31 | batch_norm: yes 32 | short_cut: yes 33 | concat_hidden: yes 34 | criterion: mse 35 | metric: [ "mae", "rmse", "spearmanr" ] 36 | num_mlp_layer: 2 37 | normalization: False 38 | 39 | eval_metric: root mean squared error 40 | 41 | optimizer: 42 | class: Adam 43 | lr: 5.0e-5 44 | 45 | fix_encoder: True 46 | 47 | engine: 48 | gpus: [0, 1, 2, 3] 49 | batch_size: 32 50 | 51 | train: 52 | num_epoch: 100 53 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/binloc_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | criterion: ce 20 | metric: ["acc", "mcc"] 21 | num_mlp_layer: 2 22 | num_class: 2 23 | 24 | eval_metric: accuracy 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 5.0e-5 29 | 30 | lr_ratio: 0.1 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 2 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/binloc_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | readout: mean 20 | criterion: ce 21 | metric: ["acc", "mcc"] 22 | num_mlp_layer: 2 23 | num_class: 2 24 | 25 | eval_metric: accuracy 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 5.0e-5 30 | 31 | fix_encoder: True 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 64 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/contact_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: ProteinNet 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test 15 | test_batch_size: 1 16 | 17 | task: 18 | class: ContactPrediction 19 | model: 20 | class: ProtBert 21 | path: ~/scratch/protbert-model-weights/ 22 | criterion: bce 23 | metric: ["accuracy", "prec@L5", "prec@5"] 24 | max_length: 400 25 | random_truncate: yes 26 | threshold: 8.0 27 | gap: 6 28 | num_mlp_layer: 2 29 | 30 | eval_metric: prec@L5 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 5.0e-5 35 | 36 | lr_ratio: 0.1 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 2 41 | 42 | train: 43 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ProtBert/contact_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: ProteinNet 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 1022 13 | random: False 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | test_split: test 18 | test_batch_size: 1 19 | 20 | task: 21 | class: ContactPrediction 22 | model: 23 | class: ProtBert 24 | path: ~/scratch/protbert-model-weights/ 25 | readout: mean 26 | criterion: bce 27 | metric: ["accuracy", "prec@L5", "prec@5"] 28 | max_length: 400 29 | random_truncate: yes 30 | threshold: 8.0 31 | gap: 6 32 | num_mlp_layer: 2 33 | 34 | eval_metric: prec@L5 35 | 36 | optimizer: 37 | class: Adam 38 | lr: 5.0e-5 39 | 40 | fix_encoder: True 41 | 42 | engine: 43 | gpus: [0, 1, 2, 3] 44 | batch_size: 6 45 | 46 | train: 47 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ProtBert/fluorescence_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | criterion: mse 20 | metric: ["mae", "rmse", "spearmanr"] 21 | normalization: False 22 | num_mlp_layer: 2 23 | 24 | eval_metric: spearmanr 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 5.0e-5 29 | 30 | lr_ratio: 0.1 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 16 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/fluorescence_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | readout: mean 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 5.0e-5 30 | 31 | fix_encoder: True 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 64 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/fold_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ProtBert 20 | path: ~/scratch/protbert-model-weights/ 21 | criterion: ce 22 | metric: ["acc", "mcc"] 23 | num_mlp_layer: 2 24 | num_class: 1195 25 | 26 | eval_metric: accuracy 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | lr_ratio: 0.1 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 2 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/fold_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ProtBert 20 | path: ~/scratch/protbert-model-weights/ 21 | readout: mean 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | num_class: 1195 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | fix_encoder: True 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 64 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/gb1_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | criterion: mse 20 | metric: ["mae", "rmse", "spearmanr"] 21 | normalization: False 22 | num_mlp_layer: 2 23 | 24 | eval_metric: spearmanr 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 5.0e-5 29 | 30 | lr_ratio: 0.1 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 16 35 | 36 | train: 37 | num_epoch: 100 38 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/gb1_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protein-model-weights/protbert-model-weights/ 19 | readout: mean 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 5.0e-5 30 | 31 | fix_encoder: True 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 64 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/human_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProtBert 21 | path: ~/scratch/protbert-model-weights/ 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | num_class: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | lr_ratio: 0.1 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 1 38 | 39 | train: 40 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ProtBert/human_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProtBert 21 | path: ~/scratch/protbert-model-weights/ 22 | readout: mean 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_mlp_layer: 2 26 | num_class: 2 27 | 28 | eval_metric: accuracy 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 5.0e-5 33 | 34 | fix_encoder: True 35 | fix_encoder2: True 36 | 37 | engine: 38 | gpus: [0, 1, 2, 3] 39 | batch_size: 8 40 | 41 | train: 42 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ProtBert/pdbbind_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PDBBind 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: False 14 | keys: "graph1" 15 | - class: ProteinView 16 | view: "residue" 17 | keys: "graph1" 18 | 19 | test_split: test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ProtBert 25 | path: ~/scratch/protbert-model-weights/ 26 | model2: 27 | class: GIN 28 | input_dim: 66 29 | hidden_dims: [ 256, 256, 256, 256 ] 30 | batch_norm: yes 31 | short_cut: yes 32 | concat_hidden: yes 33 | criterion: mse 34 | metric: [ "mae", "rmse", "spearmanr" ] 35 | num_mlp_layer: 2 36 | normalization: False 37 | 38 | eval_metric: root mean squared error 39 | 40 | optimizer: 41 | class: Adam 42 | lr: 5.0e-5 43 | 44 | lr_ratio: 0.1 45 | 46 | engine: 47 | gpus: [0, 1, 2, 3] 48 | batch_size: 2 49 | 50 | train: 51 | num_epoch: 100 52 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/pdbbind_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PDBBind 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: False 14 | keys: "graph1" 15 | - class: ProteinView 16 | view: "residue" 17 | keys: "graph1" 18 | 19 | test_split: test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ProtBert 25 | path: ~/scratch/protbert-model-weights/ 26 | readout: mean 27 | model2: 28 | class: GIN 29 | input_dim: 66 30 | hidden_dims: [ 256, 256, 256, 256 ] 31 | batch_norm: yes 32 | short_cut: yes 33 | concat_hidden: yes 34 | criterion: mse 35 | metric: [ "mae", "rmse", "spearmanr" ] 36 | num_mlp_layer: 2 37 | normalization: False 38 | 39 | eval_metric: root mean squared error 40 | 41 | optimizer: 42 | class: Adam 43 | lr: 5.0e-5 44 | 45 | fix_encoder: True 46 | 47 | engine: 48 | gpus: [0, 1, 2, 3] 49 | batch_size: 32 50 | 51 | train: 52 | num_epoch: 100 53 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/ppi_affinity_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: False 14 | keys: [ "graph1", "graph2" ] 15 | - class: ProteinView 16 | view: "residue" 17 | keys: [ "graph1", "graph2" ] 18 | 19 | test_split: test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ProtBert 25 | path: ~/scratch/protbert-model-weights/ 26 | criterion: mse 27 | metric: [ "mae", "rmse", "spearmanr" ] 28 | num_mlp_layer: 2 29 | normalization: False 30 | 31 | eval_metric: root mean squared error 32 | 33 | optimizer: 34 | class: Adam 35 | lr: 5.0e-5 36 | 37 | lr_ratio: 0.1 38 | 39 | engine: 40 | gpus: [0, 1, 2, 3] 41 | batch_size: 2 42 | 43 | train: 44 | num_epoch: 100 45 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/ppi_affinity_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 550 13 | random: False 14 | keys: [ "graph1", "graph2" ] 15 | - class: ProteinView 16 | view: "residue" 17 | keys: [ "graph1", "graph2" ] 18 | 19 | test_split: test 20 | 21 | task: 22 | class: InteractionPrediction 23 | model: 24 | class: ProtBert 25 | path: ~/scratch/protbert-model-weights/ 26 | readout: mean 27 | criterion: mse 28 | metric: [ "mae", "rmse", "spearmanr" ] 29 | num_mlp_layer: 2 30 | normalization: False 31 | 32 | eval_metric: root mean squared error 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 5.0e-5 37 | 38 | fix_encoder: True 39 | fix_encoder2: True 40 | 41 | engine: 42 | gpus: [0, 1, 2, 3] 43 | batch_size: 8 44 | 45 | train: 46 | num_epoch: 100 47 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/solubility_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | criterion: ce 20 | metric: ["acc", "mcc"] 21 | num_mlp_layer: 2 22 | num_class: 2 23 | 24 | eval_metric: accuracy 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 5.0e-5 29 | 30 | lr_ratio: 0.1 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 2 35 | 36 | train: 37 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ProtBert/solubility_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | readout: mean 20 | criterion: ce 21 | metric: ["acc", "mcc"] 22 | num_mlp_layer: 2 23 | num_class: 2 24 | 25 | eval_metric: accuracy 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 5.0e-5 30 | 31 | fix_encoder: True 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 8 36 | 37 | train: 38 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ProtBert/ss_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: cb513 15 | 16 | task: 17 | class: NodePropertyPrediction 18 | model: 19 | class: ProtBert 20 | path: ~/scratch/protbert-model-weights/ 21 | criterion: ce 22 | metric: ["micro_acc", "macro_acc"] 23 | num_mlp_layer: 2 24 | num_class: 3 25 | 26 | eval_metric: macro_acc 27 | 28 | optimizer: 29 | class: Adam 30 | lr: 5.0e-5 31 | 32 | lr_ratio: 0.1 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 1 37 | 38 | train: 39 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/ss_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: cb513 15 | 16 | task: 17 | class: NodePropertyPrediction 18 | model: 19 | class: ProtBert 20 | path: ~/scratch/protbert-model-weights/ 21 | readout: mean 22 | criterion: ce 23 | metric: ["micro_acc", "macro_acc"] 24 | num_mlp_layer: 2 25 | num_class: 3 26 | 27 | eval_metric: macro_acc 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | fix_encoder: True 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 8 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/stability_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | criterion: mse 20 | metric: ["mae", "rmse", "spearmanr"] 21 | normalization: False 22 | num_mlp_layer: 2 23 | 24 | eval_metric: spearmanr 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 5.0e-5 29 | 30 | lr_ratio: 0.1 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 16 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/stability_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | readout: mean 20 | criterion: mse 21 | metric: ["mae", "rmse", "spearmanr"] 22 | normalization: False 23 | num_mlp_layer: 2 24 | 25 | eval_metric: spearmanr 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 5.0e-5 30 | 31 | fix_encoder: True 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 64 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/subloc_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | criterion: ce 20 | metric: ["acc", "mcc"] 21 | num_mlp_layer: 2 22 | num_class: 10 23 | 24 | eval_metric: accuracy 25 | 26 | optimizer: 27 | class: Adam 28 | lr: 5.0e-5 29 | 30 | lr_ratio: 0.1 31 | 32 | engine: 33 | gpus: [0, 1, 2, 3] 34 | batch_size: 2 35 | 36 | train: 37 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/subloc_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProtBert 18 | path: ~/scratch/protbert-model-weights/ 19 | readout: mean 20 | criterion: ce 21 | metric: ["acc", "mcc"] 22 | num_mlp_layer: 2 23 | num_class: 10 24 | 25 | eval_metric: accuracy 26 | 27 | optimizer: 28 | class: Adam 29 | lr: 5.0e-5 30 | 31 | fix_encoder: True 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 64 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/thermo_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 1024 13 | random: False 14 | - class: ProteinView 15 | view: "residue" 16 | 17 | task: 18 | class: PropertyPrediction 19 | model: 20 | class: ProtBert 21 | path: ~/scratch/protbert-model-weights/ 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | lr_ratio: 0.1 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 2 38 | 39 | train: 40 | num_epoch: 100 41 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/thermo_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: TruncateProtein 12 | max_length: 1022 13 | - class: ProteinView 14 | view: "residue" 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ProtBert 20 | path: ~/scratch/protein-model-weights/protbert-model-weights/ 21 | readout: mean 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | fix_encoder: True 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 16 38 | 39 | train: 40 | num_epoch: 100 41 | -------------------------------------------------------------------------------- /config/single_task/ProtBert/yeast_ProtBert.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProtBert 21 | path: ~/scratch/protbert-model-weights/ 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | num_class: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | lr_ratio: 0.1 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 1 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ProtBert/yeast_ProtBert_fix.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProtBert 21 | path: ~/scratch/protbert-model-weights/ 22 | readout: mean 23 | criterion: ce 24 | metric: ["acc", "mcc"] 25 | num_mlp_layer: 2 26 | num_class: 2 27 | 28 | eval_metric: accuracy 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 5.0e-5 33 | 34 | fix_encoder: True 35 | fix_encoder2: True 36 | 37 | engine: 38 | gpus: [0, 1, 2, 3] 39 | batch_size: 8 40 | 41 | train: 42 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/aav_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: AAV 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | keep_mutation_region: True 9 | transform: 10 | class: Compose 11 | transforms: 12 | - class: ProteinView 13 | view: "residue" 14 | 15 | task: 16 | class: PropertyPrediction 17 | model: 18 | class: ProteinResNet 19 | input_dim: 21 20 | hidden_dims: [512, 512, 512, 512, 512, 512, 512, 512] 21 | layer_norm: True 22 | dropout: 0.1 23 | criterion: mse 24 | metric: ["mae", "rmse", "spearmanr"] 25 | normalization: False 26 | num_mlp_layer: 2 27 | 28 | eval_metric: spearmanr 29 | 30 | optimizer: 31 | class: Adam 32 | lr: 1.0e-4 33 | 34 | engine: 35 | gpus: [0, 1, 2, 3] 36 | batch_size: 32 37 | 38 | train: 39 | num_epoch: 100 40 | -------------------------------------------------------------------------------- /config/single_task/ResNet/beta_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BetaLactamase 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinResNet 18 | input_dim: 21 19 | hidden_dims: [512, 512, 512, 512, 512, 512, 512, 512] 20 | layer_norm: True 21 | dropout: 0.1 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/bindingdb_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BindingDB 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: holdout_test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinResNet 21 | input_dim: 21 22 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 23 | layer_norm: True 24 | dropout: 0.1 25 | model2: 26 | class: GIN 27 | input_dim: 66 28 | hidden_dims: [ 256, 256, 256, 256 ] 29 | batch_norm: yes 30 | short_cut: yes 31 | concat_hidden: yes 32 | criterion: mse 33 | metric: [ "mae", "rmse", "spearmanr" ] 34 | num_mlp_layer: 2 35 | normalization: False 36 | 37 | eval_metric: root mean squared error 38 | 39 | optimizer: 40 | class: Adam 41 | lr: 1.0e-4 42 | 43 | engine: 44 | gpus: [0, 1, 2, 3] 45 | batch_size: 32 46 | 47 | train: 48 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/binloc_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: BinaryLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinResNet 18 | input_dim: 21 19 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 20 | layer_norm: True 21 | dropout: 0.1 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | num_class: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/contact_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: ProteinNet 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test 15 | 16 | task: 17 | class: ContactPrediction 18 | model: 19 | class: ProteinResNet 20 | input_dim: 21 21 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 22 | layer_norm: True 23 | dropout: 0.1 24 | criterion: bce 25 | metric: ["accuracy", "prec@L5", "prec@5"] 26 | max_length: 400 27 | random_truncate: yes 28 | threshold: 8.0 29 | gap: 6 30 | num_mlp_layer: 2 31 | 32 | eval_metric: prec@L5 33 | 34 | optimizer: 35 | class: Adam 36 | lr: 1.0e-4 37 | 38 | engine: 39 | gpus: [0, 1, 2, 3] 40 | batch_size: 4 41 | 42 | train: 43 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ResNet/fluorescence_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fluorescence 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinResNet 18 | input_dim: 21 19 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 20 | layer_norm: True 21 | dropout: 0.1 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/fold_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Fold 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: test_fold_holdout 15 | 16 | task: 17 | class: PropertyPrediction 18 | model: 19 | class: ProteinResNet 20 | input_dim: 21 21 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 22 | layer_norm: True 23 | dropout: 0.1 24 | criterion: ce 25 | metric: ["acc", "mcc"] 26 | num_mlp_layer: 2 27 | num_class: 1195 28 | 29 | eval_metric: accuracy 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 1.0e-4 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 16 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/gb1_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: GB1 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinResNet 18 | input_dim: 21 19 | hidden_dims: [512, 512, 512, 512, 512, 512, 512, 512] 20 | layer_norm: True 21 | dropout: 0.1 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/ResNet/human_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: HumanPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinResNet 21 | input_dim: 21 22 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 23 | layer_norm: True 24 | dropout: 0.1 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 2 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 1.0e-4 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 50 -------------------------------------------------------------------------------- /config/single_task/ResNet/pdbbind_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PDBBind 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: "graph1" 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinResNet 21 | input_dim: 21 22 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 23 | layer_norm: True 24 | dropout: 0.1 25 | model2: 26 | class: GIN 27 | input_dim: 66 28 | hidden_dims: [ 256, 256, 256, 256 ] 29 | batch_norm: yes 30 | short_cut: yes 31 | concat_hidden: yes 32 | criterion: mse 33 | metric: [ "mae", "rmse", "spearmanr" ] 34 | num_mlp_layer: 2 35 | normalization: False 36 | 37 | eval_metric: root mean squared error 38 | 39 | optimizer: 40 | class: Adam 41 | lr: 1.0e-4 42 | 43 | engine: 44 | gpus: [0, 1, 2, 3] 45 | batch_size: 32 46 | 47 | train: 48 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/ppi_affinity_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: PPIAffinity 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinResNet 21 | input_dim: 21 22 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 23 | layer_norm: True 24 | dropout: 0.1 25 | criterion: mse 26 | metric: [ "mae", "rmse", "spearmanr" ] 27 | num_mlp_layer: 2 28 | normalization: False 29 | 30 | eval_metric: root mean squared error 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 1.0e-4 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/solubility_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Solubility 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinResNet 18 | input_dim: 21 19 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 20 | layer_norm: True 21 | dropout: 0.1 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | num_class: 2 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/ss_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SecondaryStructure 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | test_split: cb513 15 | 16 | task: 17 | class: NodePropertyPrediction 18 | model: 19 | class: ProteinResNet 20 | input_dim: 21 21 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 22 | layer_norm: True 23 | dropout: 0.1 24 | criterion: ce 25 | metric: ["micro_acc", "macro_acc"] 26 | num_mlp_layer: 2 27 | num_class: 3 28 | 29 | eval_metric: macro_acc 30 | 31 | optimizer: 32 | class: Adam 33 | lr: 1.0e-4 34 | 35 | engine: 36 | gpus: [0, 1, 2, 3] 37 | batch_size: 16 38 | 39 | train: 40 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/stability_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Stability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinResNet 18 | input_dim: 21 19 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 20 | layer_norm: True 21 | dropout: 0.1 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 2.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/subloc_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: SubcellularLocalization 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinResNet 18 | input_dim: 21 19 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 20 | layer_norm: True 21 | dropout: 0.1 22 | criterion: ce 23 | metric: ["acc", "mcc"] 24 | num_mlp_layer: 2 25 | num_class: 10 26 | 27 | eval_metric: accuracy 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 5.0e-5 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 32 36 | 37 | train: 38 | num_epoch: 100 -------------------------------------------------------------------------------- /config/single_task/ResNet/thermo_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: Thermostability 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | 14 | task: 15 | class: PropertyPrediction 16 | model: 17 | class: ProteinResNet 18 | input_dim: 21 19 | hidden_dims: [512, 512, 512, 512, 512, 512, 512, 512] 20 | layer_norm: True 21 | dropout: 0.1 22 | criterion: mse 23 | metric: ["mae", "rmse", "spearmanr"] 24 | normalization: False 25 | num_mlp_layer: 2 26 | 27 | eval_metric: spearmanr 28 | 29 | optimizer: 30 | class: Adam 31 | lr: 1.0e-4 32 | 33 | engine: 34 | gpus: [0, 1, 2, 3] 35 | batch_size: 8 36 | 37 | train: 38 | num_epoch: 100 39 | -------------------------------------------------------------------------------- /config/single_task/ResNet/yeast_ResNet.yaml: -------------------------------------------------------------------------------- 1 | output_dir: ~/scratch/torchprotein_output/ 2 | 3 | dataset: 4 | class: YeastPPI 5 | path: ~/scratch/protein-datasets/ 6 | atom_feature: null 7 | bond_feature: null 8 | transform: 9 | class: Compose 10 | transforms: 11 | - class: ProteinView 12 | view: "residue" 13 | keys: [ "graph1", "graph2" ] 14 | 15 | test_split: test 16 | 17 | task: 18 | class: InteractionPrediction 19 | model: 20 | class: ProteinResNet 21 | input_dim: 21 22 | hidden_dims: [ 512, 512, 512, 512, 512, 512, 512, 512 ] 23 | layer_norm: True 24 | dropout: 0.1 25 | criterion: ce 26 | metric: ["acc", "mcc"] 27 | num_mlp_layer: 2 28 | num_class: 2 29 | 30 | eval_metric: accuracy 31 | 32 | optimizer: 33 | class: Adam 34 | lr: 1.0e-4 35 | 36 | engine: 37 | gpus: [0, 1, 2, 3] 38 | batch_size: 32 39 | 40 | train: 41 | num_epoch: 100 -------------------------------------------------------------------------------- /peer/__init__.py: -------------------------------------------------------------------------------- 1 | from .protbert import ProtBert 2 | from .engine import MultiTaskEngine 3 | from . import util 4 | 5 | __all__ = [ 6 | "ProtBert", "MultiTaskEngine", "util", 7 | ] -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.8.0 2 | torch-scatter>=2.0.8 3 | torch-cluster>=1.5.9 4 | decorator 5 | numpy>=1.11 6 | rdkit-pypi>=2020.9 7 | matplotlib 8 | tqdm 9 | networkx 10 | ninja 11 | jinja2 12 | easydict 13 | pyyaml 14 | lmdb 15 | fair-esm 16 | transformers>=4.18.0 --------------------------------------------------------------------------------