├── .DS_Store ├── .idea ├── .gitignore ├── .name ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── protease-gcnn-pytorch.iml └── vcs.xml ├── README.md ├── __init__.py ├── __pycache__ ├── layers.cpython-37.pyc ├── models.cpython-37.pyc └── utils.cpython-37.pyc ├── analysis ├── BenchmarkMLTrainigAfterPGCN.ipynb ├── MetricCalculationAfterTrain.ipynb ├── PlotLogoPlotSeqIdentityDataSummary.ipynb ├── PlotSankeyBarplot_Mutation.ipynb ├── PostAnalysisCrossTrainTest.ipynb ├── TestIndexSelection.ipynb ├── node_edge_weight_analysis_Joey.ipynb └── suppl │ ├── Table-S1A-HCV_sequence_protease_label.xlsx │ ├── Table-S1B-TEV_sequence_protease_label.xlsx │ ├── Table-S2-MetricSummary_ML_PGCN_suppl.xlsx │ ├── Table-S4-node_edge_importance_binary.xlsx │ ├── Table-S5-TEV_Daivd_Liu_Mutation_Sites_Numbering.xlsx │ ├── Table-S8-Yeast-assay_PGCN_predicted_results_TEV_designs .xlsx │ └── Table-S9-CrossTestSummary.xlsx ├── bin ├── graph_generation.sh ├── ml_benchmark.sh ├── test.sh ├── train.sh └── variable_importance.sh ├── data └── .ipynb_checkpoints │ └── ind.None-checkpoint.pose_indices ├── design_protease.py ├── graph ├── __pycache__ │ └── protein_graph.cpython-38.pyc ├── classifications │ ├── .ipynb_checkpoints │ │ ├── HCV_A171T-checkpoint.txt │ │ ├── PDZ_class-checkpoint.txt │ │ └── tev_design_for_validation_dual_directions_cleavage-checkpoint.txt │ ├── 20220911_tev_design_successes_p1pA.txt │ ├── 20220922_tev_design_dual_directions_cleavage.txt │ ├── 20220925_tev_design_p3.txt │ ├── 2bof-ER-summarized_label_singlePDB.txt │ ├── 2yol-ER-summarized_label_singlePDB.txt │ ├── 5gj4-ER-summarized_label_singlePDB.txt │ ├── 5y4l-ER-summarized_label_singlePDB.txt │ ├── HCV.txt │ ├── HCV_A171T.txt │ ├── HCV_D183A.txt │ ├── HCV_R170K_A171T_D183A.txt │ ├── PDZ_class.txt │ ├── TEV.txt │ ├── TEV_David_Liu_mutants_sequences_labels_no_single_three_libraries_duplicates_used4structGen_05_10_2022.csv │ ├── TEV_WT_balanced.txt │ ├── TEV_WT_not_exclusive.txt │ ├── TEV_final_all_var_noDup.txt │ ├── dvdar_design.txt │ ├── oydv-ER-summarized_label_singlePDB.txt │ ├── pgcn_rbd_class_file.txt │ ├── pgcn_rbd_class_file_ternary.txt │ ├── protease_3c_designs.txt │ ├── protease_3c_designs_2bof.txt │ ├── protease_3c_designs_5y4l.txt │ ├── protease_3c_designs_oydv.txt │ ├── tev-ER-summarized_label_singlePDB.txt │ ├── tev_design_20220912.txt │ ├── tev_design_for_validation_dual_directions_cleavage.txt │ ├── tev_design_negpool.txt │ ├── tev_oydv_design_candidates.txt │ ├── tev_oydv_expt_library_best_decoys.txt │ └── tev_oydv_raw_designs.txt ├── crystal_structures │ ├── HCV.pdb │ └── TEV_QS.pdb └── protein_graph.py ├── helper ├── .ipynb_checkpoints │ ├── 2yol-ER-summarized_label-checkpoint.txt │ ├── 2yol-ER-summarized_label_singlePDB-checkpoint.txt │ ├── RAAVGRG-checkpoint.fasc │ ├── Untitled-checkpoint.ipynb │ ├── generate_class_singlePDB-checkpoint.py │ └── make_modeling_commands-checkpoint.py ├── BenchmarkMLTrainAfterPGCN.py ├── generate_class_singlePDB.py ├── make_modeling_commands.py └── text_to_slurm.py ├── model ├── findBestAcc.py ├── findBestAcc_from_log.py ├── importance.py ├── layers.py ├── models.py ├── outputs │ ├── HCV_A171T_model.pth │ ├── HCV_Combined_model.pth │ ├── HCV_D183A_model.pth │ ├── HCV_Triple_model.pth │ ├── HCV_WT_model.pth │ └── TEV_model.pth ├── test.py ├── train.py └── utils.py └── pipeline.png /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/.DS_Store -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | train.py -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/protease-gcnn-pytorch.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # protease-gcnn-pytorch 2 | This project is to present a graph-based convolutional neural network, called protein convolutional neural network (PGCN) to predict protease specificity. We propose a new creation of feature set that holds natural energy information for proteins, which could best represent protein activities. 3 | 4 | ![](https://github.com/Nucleus2014/protease-gcnn-pytorch/blob/master/pipeline.png) 5 | 6 | To use our method, first download this repository by using the following command: 7 | ```git clone https://github.com/Nucleus2014/protease-gcnn-pytorch``` 8 | 9 | Rosetta models were generated using *design_protease.py* script. If you need source structures for pre-trained HCV/TEV models, please contact us. 10 | Pre-trained models for HCV/TEV are in [model/outputs](https://github.com/Nucleus2014/protease-gcnn-pytorch/tree/master/model/outputs), and cleavage information for HCV/TEV/TEV_design are in [graph/classifications folder](https://github.com/Nucleus2014/protease-gcnn-pytorch/tree/master/graph/classifications). 11 | Analysis scripts are in [analysis folder](https://github.com/Nucleus2014/protease-gcnn-pytorch/tree/master/analysis) 12 | 13 | ## Step 1: Generation of graphs 14 | Go to *graph* folder and excecute *protein_graph.py*: 15 | ``` 16 | cd graph 17 | python protein_graph.py -o -pr_path /projects/f_sdk94_1/EnzymeModelling/TEVFinalStructures -class TEV.txt -prot TEV_QS.pdb -d 10 18 | ``` 19 | ### Description of generated data 20 | If the suffix is one of the below, 21 | ``: the edge feature tensor in the dimension of (K,N,N,M) 22 | ``: the node feature matrix in the dimension of (K,N,F) 23 | ``: labels in the dimension of $(K,2)$; CLEAVED if $[1,0]$, UNCLEAVED if $[0,1]$ 24 | ``: the indicator of which class is for the columns in `` 25 | ``: the list of sample names 26 | Where K is the number of samples (graphs), N is the number of nodes, M is the number of edge features, F is the number of node features. 27 | 28 | Slit data and save their original indices in: 29 | and : indices of samples if triple splitting data into training, validation and test sets. Indices starts from 0. 30 | 31 | ## Step 2: Train, validate and test 32 | Go to *model* folder and excecute *train.py*: 33 | ``` 34 | cd model 35 | python train.py --dataset TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond 36 | --test_dataset TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond 37 | --val_dataset TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond 38 | --seed 1 --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 39 | --model gcn --batch_size 100 --lr 0.005 --dropout 0.2 --weight_decay 0.0005 40 | --save "outputs/tev/TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond/bs_100/' 41 | ``` 42 | Options of hyperparameter tuning: 43 | ``` 44 | weight_decay=(1e-3 5e-3 1e-4 5e-4) 45 | learning_rate=(1e-2 5e-2 1e-3 5e-3 1e-4 5e-4) 46 | dropout=(0.01 0.05 0.1 0.2 0.3 0.4 0.5) 47 | batch_size=(500 100 1000 50 10) 48 | ``` 49 | 50 | When model has been trained, *train.py* saves the model to the corresponding preset directory (using the flag *--save*). 51 | I wrote a script to find the model with best accuracy, named as *find_best_acc.py*. 52 | 53 | ## Test with the pre-trained model 54 | If you would like to test with already-trained pgcn model, you could use *importance.py* in *model* folder. It will load existed pytorch model file and test data that you specify. Currently, we offer pre-trained models located at *model/outputs* for HCV wild type, HCV A171T, HCV D183A, HCV R170K_A171T_D183A, HCV Combined and TEV Combined. 55 | ``` 56 | cd model 57 | python importance.py --dataset ${data} --hidden1 20 --depth 2 --linear 0 --att 0 58 | --batch_size ${bs} --lr ${lr} --dropout ${dt} --weight_decay ${wd} --seed ${seed} 59 | --save --data_path --new 60 | --test_logits_path 61 | ``` 62 | 63 | ## Variable Importance Analysis (Alternative) 64 | Here we propose a method to represent importance of nodes and edges. You could leverage it by using following command: 65 | ``` 66 | cd analysis 67 | python importance.py --importance --dataset HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond --hidden1 20 --depth 2 --linear 0 --att 0 --batch_size 500 --lr 0.005 --dropout 0.05 --weight_decay 5e-4 --save 68 | ``` 69 | ## Comparison with other machine learning methods 70 | In the paper, we compare GCNN + new generated feature set with five machine learning models. For those results (parameter tuning + train and test) using machine learning models, 71 | ``` 72 | cd helper 73 | python BenchmarkMLTrainAfterPGCN.py -data HCV_all_10_ang_aa_energy_7_energyedge_5_hbond_flattened -feature complete -model ann -save outputs/hcv_ann" 74 | ``` 75 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | from .layers import * 5 | from .models import * 6 | from .utils import * -------------------------------------------------------------------------------- /__pycache__/layers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/__pycache__/layers.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/models.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/__pycache__/models.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /analysis/TestIndexSelection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pickle as pkl\n", 10 | "import numpy as np\n", 11 | "import os\n", 12 | "os.chdir('/scratch/cl1205/protease-gcnn-pytorch/model')\n", 13 | "from utils import *\n", 14 | "import torch\n", 15 | "from torch import nn" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 3, 21 | "metadata": {}, 22 | "outputs": [], 23 | "source": [ 24 | "def TestIndexSave(dataset_str):\n", 25 | " cwd = os.getcwd()\n", 26 | " names = ['x', 'y', 'graph', 'sequences', 'proteases', 'labelorder']\n", 27 | " features, y_arr, adj_ls, sequences, proteases, labelorder = tuple(load_input(dataset_str, names, input_type='train'))\n", 28 | "\n", 29 | " idx = np.arange(y_arr.shape[0])\n", 30 | " print(y_arr.shape[0])\n", 31 | " np.random.shuffle(idx)\n", 32 | " cutoff_2 = int(0.7 * len(idx)) # 10% of the benchmark set as testing data\n", 33 | " idx_test = idx[cutoff_2:]\n", 34 | " idx_train = idx[:cutoff_2]\n", 35 | " print(len(idx_test))\n", 36 | " np.savetxt('../data/ind.' + dataset_str + '.test.index', idx_test, fmt='%d')\n", 37 | " return idx_test\n" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 4, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "5425\n", 50 | "1628\n" 51 | ] 52 | } 53 | ], 54 | "source": [ 55 | "idx = TestIndexSave('TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 5, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "array([3947, 4140, 777, ..., 224, 5388, 1575])" 67 | ] 68 | }, 69 | "execution_count": 5, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "idx" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "# Validation - Test Dataset" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 14, 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "def ValTestIndex(dataset_str):\n", 92 | " cwd = os.getcwd()\n", 93 | " names = ['x', 'y', 'graph', 'sequences', 'proteases', 'labelorder']\n", 94 | " features, y_arr, adj_ls, sequences, proteases, labelorder = tuple(load_input(dataset_str, names, input_type='train'))\n", 95 | "\n", 96 | " idx = np.arange(y_arr.shape[0])\n", 97 | " np.random.shuffle(idx)\n", 98 | " cutoff = int(0.8 * len(idx)) # 10% of the benchmark set as testing data\n", 99 | " cutoff_2 = int(0.9 * len(idx))\n", 100 | " idx_test = idx[cutoff_2:]\n", 101 | " idx_train = idx[:cutoff]\n", 102 | " idx_val = idx[cutoff: cutoff_2]\n", 103 | " print(len(idx_train), len(idx_val), len(idx_test))\n", 104 | " np.savetxt('../data/ind.' + dataset_str + '.trisplit.test.index', idx_test, fmt='%d')\n", 105 | " np.savetxt('../data/ind.' + dataset_str + '.trisplit.val.index', idx_val, fmt='%d')\n", 106 | " return idx_val, idx_test\n" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 15, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 119 | "4340 542 543\n", 120 | "HCV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 121 | "31399 3925 3925\n", 122 | "HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 123 | "5873 734 735\n", 124 | "HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 125 | "10564 1320 1321\n", 126 | "HCV_D183A_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 127 | "9491 1186 1187\n", 128 | "HCV_Triple_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 129 | "5470 684 684\n" 130 | ] 131 | } 132 | ], 133 | "source": [ 134 | "for data in ['TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 135 | " 'HCV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 136 | " 'HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 137 | " 'HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 138 | " 'HCV_D183A_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 139 | " 'HCV_Triple_binary_10_ang_aa_energy_7_energyedge_5_hbond']:\n", 140 | " print(data)\n", 141 | " idx_val, idx_test = ValTestIndex(data)\n", 142 | " " 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "# Training/Val/Test Data Simple Statistics" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 2, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "def raw_data_statistics(dataset):\n", 159 | " idy = pkl.load(open('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.y'.format(dataset), 'rb'))\n", 160 | " test_index = np.loadtxt('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.test.index'.format(dataset), dtype=int)\n", 161 | " sequences = pkl.load(open('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.sequences'.format(dataset), 'rb'))\n", 162 | " test_index = np.loadtxt('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.trisplit.test.index'.format(dataset), dtype=int)\n", 163 | " val_index = np.loadtxt('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.trisplit.val.index'.format(dataset), dtype=int)\n", 164 | " test_index = np.sort(test_index)\n", 165 | " val_index = np.sort(val_index)\n", 166 | " y_val = idy[val_index]\n", 167 | " y_test = idy[test_index]\n", 168 | " \n", 169 | " train_mask = np.array([i not in test_index and i not in val_index for i in range(idy.shape[0])]) \n", 170 | " y_train = idy[train_mask]\n", 171 | " print(np.array(sequences)[train_mask][0]) # 1 0 means cleaved\n", 172 | " print('Train:| Cleaved {} | Uncleaved {} | Total {} |'.format(np.sum(y_train==[1,0],axis=0)[0], \n", 173 | " np.sum(y_train==[0,1],axis=0)[0], \n", 174 | " y_train.shape[0]))\n", 175 | " print('Val:| Cleaved {} | Uncleaved {} | Total {} |'.format(np.sum(y_val==[1,0],axis=0)[0], \n", 176 | " np.sum(y_val==[0,1],axis=0)[0], \n", 177 | " y_val.shape[0]))\n", 178 | " print('Test:| Cleaved {} | Uncleaved {} | Total {} |'.format(np.sum(y_test==[1,0],axis=0)[0], \n", 179 | " np.sum(y_test==[0,1],axis=0)[0], \n", 180 | " y_test.shape[0]))\n", 181 | " print('Total: {}'.format(y_train.shape[0] + y_val.shape[0] + y_test.shape[0]))\n", 182 | " return sequences, test_index, val_index" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 32, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "name": "stdout", 192 | "output_type": "stream", 193 | "text": [ 194 | "N176I_TAHLYFQSGT.pdb\n", 195 | "Train:| Cleaved 2111 | Uncleaved 2229 | Total 4340 |\n", 196 | "Val:| Cleaved 259 | Uncleaved 283 | Total 542 |\n", 197 | "Test:| Cleaved 238 | Uncleaved 305 | Total 543 |\n", 198 | "Total: 5425\n" 199 | ] 200 | } 201 | ], 202 | "source": [ 203 | "raw_data_statistics('TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond')" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 3, 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "name": "stdout", 213 | "output_type": "stream", 214 | "text": [ 215 | "N176I_TAHLYFQSGT.pdb\n", 216 | "Train:| Cleaved 2111 | Uncleaved 2229 | Total 4340 |\n", 217 | "Val:| Cleaved 259 | Uncleaved 283 | Total 542 |\n", 218 | "Test:| Cleaved 238 | Uncleaved 305 | Total 543 |\n", 219 | "Total: 5425\n" 220 | ] 221 | } 222 | ], 223 | "source": [ 224 | "sequences, test_index, val_index = raw_data_statistics('TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond')" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 7, 230 | "metadata": {}, 231 | "outputs": [], 232 | "source": [ 233 | "test_sequences = np.array(sequences)[test_index]" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 9, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "for seq in test_sequences:\n", 243 | " if seq == 'WT_TENLYFQSGT.pdb':\n", 244 | " print('in test')\n", 245 | "val_sequences = np.array(sequences)[val_index]\n", 246 | "for seq in val_sequences:\n", 247 | " if seq == 'WT_TENLYFQSGT.pdb':\n", 248 | " print('in_val')" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 27, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "text/plain": [ 259 | "2111" 260 | ] 261 | }, 262 | "execution_count": 27, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "np.sum(y_train==[1,0], axis=0)[0]" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 33, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "name": "stdout", 278 | "output_type": "stream", 279 | "text": [ 280 | "TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 281 | "N176I_TAHLYFQSGT.pdb\n", 282 | "Train:| Cleaved 2111 | Uncleaved 2229 | Total 4340 |\n", 283 | "Val:| Cleaved 259 | Uncleaved 283 | Total 542 |\n", 284 | "Test:| Cleaved 238 | Uncleaved 305 | Total 543 |\n", 285 | "Total: 5425\n", 286 | "HCV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 287 | "AYYYEPC.ASHL\n", 288 | "Train:| Cleaved 10404 | Uncleaved 20995 | Total 31399 |\n", 289 | "Val:| Cleaved 1319 | Uncleaved 2606 | Total 3925 |\n", 290 | "Test:| Cleaved 1338 | Uncleaved 2587 | Total 3925 |\n", 291 | "Total: 39249\n", 292 | "HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 293 | "AYYYEPC.ASHL\n", 294 | "Train:| Cleaved 1566 | Uncleaved 4307 | Total 5873 |\n", 295 | "Val:| Cleaved 175 | Uncleaved 559 | Total 734 |\n", 296 | "Test:| Cleaved 191 | Uncleaved 544 | Total 735 |\n", 297 | "Total: 7342\n", 298 | "HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 299 | "AETMLLC.ASHL\n", 300 | "Train:| Cleaved 2905 | Uncleaved 7659 | Total 10564 |\n", 301 | "Val:| Cleaved 366 | Uncleaved 954 | Total 1320 |\n", 302 | "Test:| Cleaved 373 | Uncleaved 948 | Total 1321 |\n", 303 | "Total: 13205\n", 304 | "HCV_D183A_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 305 | "ADLMDDC.ASHL\n", 306 | "Train:| Cleaved 3538 | Uncleaved 5953 | Total 9491 |\n", 307 | "Val:| Cleaved 422 | Uncleaved 764 | Total 1186 |\n", 308 | "Test:| Cleaved 390 | Uncleaved 797 | Total 1187 |\n", 309 | "Total: 11864\n", 310 | "HCV_Triple_binary_10_ang_aa_energy_7_energyedge_5_hbond\n", 311 | "AKQTISC.ASHL\n", 312 | "Train:| Cleaved 2496 | Uncleaved 2974 | Total 5470 |\n", 313 | "Val:| Cleaved 315 | Uncleaved 369 | Total 684 |\n", 314 | "Test:| Cleaved 324 | Uncleaved 360 | Total 684 |\n", 315 | "Total: 6838\n" 316 | ] 317 | } 318 | ], 319 | "source": [ 320 | "for data in ['TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 321 | " 'HCV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 322 | " 'HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 323 | " 'HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 324 | " 'HCV_D183A_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n", 325 | " 'HCV_Triple_binary_10_ang_aa_energy_7_energyedge_5_hbond']:\n", 326 | " print(data)\n", 327 | " raw_data_statistics(data)" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": {}, 334 | "outputs": [], 335 | "source": [] 336 | } 337 | ], 338 | "metadata": { 339 | "kernelspec": { 340 | "display_name": "Python 3", 341 | "language": "python", 342 | "name": "python3" 343 | }, 344 | "language_info": { 345 | "codemirror_mode": { 346 | "name": "ipython", 347 | "version": 3 348 | }, 349 | "file_extension": ".py", 350 | "mimetype": "text/x-python", 351 | "name": "python", 352 | "nbconvert_exporter": "python", 353 | "pygments_lexer": "ipython3", 354 | "version": "3.6.4" 355 | } 356 | }, 357 | "nbformat": 4, 358 | "nbformat_minor": 2 359 | } 360 | -------------------------------------------------------------------------------- /analysis/suppl/Table-S1A-HCV_sequence_protease_label.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S1A-HCV_sequence_protease_label.xlsx -------------------------------------------------------------------------------- /analysis/suppl/Table-S1B-TEV_sequence_protease_label.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S1B-TEV_sequence_protease_label.xlsx -------------------------------------------------------------------------------- /analysis/suppl/Table-S2-MetricSummary_ML_PGCN_suppl.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S2-MetricSummary_ML_PGCN_suppl.xlsx -------------------------------------------------------------------------------- /analysis/suppl/Table-S4-node_edge_importance_binary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S4-node_edge_importance_binary.xlsx -------------------------------------------------------------------------------- /analysis/suppl/Table-S5-TEV_Daivd_Liu_Mutation_Sites_Numbering.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S5-TEV_Daivd_Liu_Mutation_Sites_Numbering.xlsx -------------------------------------------------------------------------------- /analysis/suppl/Table-S8-Yeast-assay_PGCN_predicted_results_TEV_designs .xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S8-Yeast-assay_PGCN_predicted_results_TEV_designs .xlsx -------------------------------------------------------------------------------- /analysis/suppl/Table-S9-CrossTestSummary.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S9-CrossTestSummary.xlsx -------------------------------------------------------------------------------- /bin/graph_generation.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --partition=main # Partition (job queue) 4 | #SBATCH --requeue # Return job to the queue if preempted 5 | #SBATCH --job-name=TEV_all # Assign an short name to your job 6 | #SBATCH --nodes=1 # Number of nodes you require 7 | #SBATCH --ntasks=1 # Total # of tasks across all nodes 8 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks) 9 | #SBATCH --mem=32000 # Real memory (RAM) required (MB) 10 | #SBATCH --time=3-00:00:00 # Total run time limit (HH:MM:SS) 11 | #SBATCH --output=gg.tev_all.10_ang_aa_energy_7_energyedge_5_hbond.%N.%j.out # STDOUT output file 12 | #SBATCH --error=gg.tev_all.10_ang_aa_energy_7_energyedge_5_hbond.%N.%j.err # STDERR output file (optional) 13 | #SBATCH --export=ALL # Export you current env to the job env 14 | 15 | cd /scratch/cl1205/protease-gcnn-pytorch/graph/ 16 | 17 | srun python protein_graph.py -o TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond -pr_path /projects/f_sdk94_1/EnzymeModelling/TEVFinalStructures -class TEV_final_all_var_noDup.txt -prot TEV_QS.pdb -d 10 18 | 19 | -------------------------------------------------------------------------------- /bin/ml_benchmark.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --partition=main # Partition (job queue) 4 | #SBATCH --requeue # Return job to the queue if preempted 5 | #SBATCH --job-name=ml # Assign an short name to your job 6 | #SBATCH --ntasks=1 # Total # of tasks across all nodes 7 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks) 8 | #SBATCH --mem=32000 # Real memory (RAM) required (MB) 9 | #SBATCH --time=3-00:00:00 # Total run time limit (HH:MM:SS) 10 | #SBATCH --output=tt.HCV_flatten.%a.%N.%j.out # STDOUT output file 11 | #SBATCH --error=tt.HCV_flatten.%a.%N.%j.err # STDERR output file (optional) 12 | #SBATCH --export=ALL # Export you current env to the job env 13 | data=$1 14 | feature=$2 15 | model=$3 16 | 17 | cd /scratch/cl1205/ml-cleavage/scripts 18 | python BenchmarkMLTrainAfterPGCN.py -data $data -feature $feature -model $model -save "/scratch/cl1205/ml-cleavage/outputs/hcv_noProtID_trisplit_20220705" 19 | 20 | -------------------------------------------------------------------------------- /bin/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --partition=main # Partition (job queue) 4 | #SBATCH --requeue # Return job to the queue if preempted 5 | #SBATCH --job-name=new1 # Assign an short name to your job 6 | #SBATCH --nodes=1 # Number of nodes you require 7 | #SBATCH --ntasks=1 # Total # of tasks across all nodes 8 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks) 9 | #SBATCH --mem=32000 # Real memory (RAM) required (MB) 10 | #SBATCH --time=02:00:00 # Total run time limit (HH:MM:SS) 11 | #SBATCH --output=testnew.binary.%N.%j.out # STDOUT output file 12 | #SBATCH --error=testnew.binary.%N.%j.err # STDERR output file (optional) 13 | #SBATCH --export=ALL # Export you current env to the job env 14 | 15 | cd /scratch/cl1205/protease-gcnn-pytorch/model 16 | data=$1 17 | seed=$2 18 | feature=$3 19 | wd=$4 20 | lr=$5 21 | dt=$6 22 | bs=$7 23 | ind=$8 24 | #echo "data: $data" 25 | #echo "seed: $seed" 26 | #echo "feature: $feature" 27 | #echo "weight_decay: $wd" 28 | #echo "learning_rate: $lr" 29 | #echo "dropout: $dt" 30 | #echo "batch_size: $bs" 31 | if [ ${feature} == _ ] 32 | then 33 | flag=--energy_only 34 | #rerun='_rerun/' 35 | else 36 | flag= 37 | #rerun='/' 38 | fi 39 | # call coord, but actually no coord in it 40 | python importance.py --dataset HCV_${data}_binary_new_10_ang_aa_energy_7_coord_energyedge_5_hbond_${ind} --hidden1 20 --depth 2 --linear 0 --att 0 --batch_size ${bs} --lr ${lr} --dropout ${dt} --weight_decay ${wd} --seed ${seed} --save "outputs/tt_finalize_20210413/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond_bs_${bs}/" ${flag} --new #&> tt.log 41 | 42 | -------------------------------------------------------------------------------- /bin/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --partition=main # Partition (job queue) 4 | #SBATCH --requeue # Return job to the queue if preempted 5 | #SBATCH --job-name=tt1 # Assign an short name to your job 6 | #SBATCH --array=0-167 7 | #SBATCH --ntasks=1 # Total # of tasks across all nodes 8 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks) 9 | #SBATCH --mem=32000 # Real memory (RAM) required (MB) 10 | #SBATCH --time=24:00:00 # Total run time limit (HH:MM:SS) 11 | #SBATCH --output=tt.HCV_binary_10_ang_aa_energy_7_energyedge_5_hbond.%a.%N.%j.out # STDOUT output file 12 | #SBATCH --error=tt.HCV_binary_10_ang_aa_energy_7_energyedge_5_hbond.%a.%N.%j.err # STDERR output file (optional) 13 | #SBATCH --export=ALL # Export you current env to the job env 14 | 15 | data=$1 16 | seed=$2 17 | feature=$3 18 | 19 | echo "data: $data" 20 | echo "seed: $seed" 21 | echo "feature: $feature" 22 | cd /scratch/cl1205/protease-gcnn-pytorch/model/ 23 | weight_decay=(1e-3 5e-3 1e-4 5e-4) 24 | learning_rate=(1e-2 5e-2 1e-3 5e-3 1e-4 5e-4) 25 | dropout=(0.01 0.05 0.1 0.2 0.3 0.4 0.5) 26 | wd=() 27 | lr=() 28 | dt=() 29 | for i in {0..3} 30 | do 31 | for j in {0..5} 32 | do 33 | for k in {0..6} 34 | do 35 | wd+=(${weight_decay[$i]}) 36 | lr+=(${learning_rate[$j]}) 37 | dt+=(${dropout[$k]}) 38 | done 39 | done 40 | done 41 | echo "array id: $SLRUM_ARRAY_TASK_ID" 42 | echo "weight decay: ${wd[$SLURM_ARRAY_TASK_ID]}" 43 | echo "learning rate: ${lr[$SLURM_ARRAY_TASK_ID]}" 44 | echo "dropout rate: ${dt[$SLURM_ARRAY_TASK_ID]}" 45 | tmp_wd=${wd[$SLURM_ARRAY_TASK_ID]} 46 | tmp_lr=${lr[$SLURM_ARRAY_TASK_ID]} 47 | tmp_dt=${dt[$SLURM_ARRAY_TASK_ID]} 48 | 49 | if [ ${feature} == _ ] 50 | then 51 | flag=--energy_only 52 | else 53 | flag= 54 | fi 55 | 56 | echo "batch_size: 500" 57 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_aa_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 500 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_500/" ${flag} #&> tt.log 58 | 59 | echo "batch_size: 100" 60 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 100 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_100/" ${flag} 61 | 62 | echo "batch_size: 1000" 63 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 1000 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_1000/" ${flag} 64 | 65 | echo "batch_size: 50" 66 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 50 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_50/" ${flag} 67 | 68 | echo "batch_size: 10" 69 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 10 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_10/" ${flag} 70 | 71 | 72 | -------------------------------------------------------------------------------- /bin/variable_importance.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #SBATCH --partition=main # Partition (job queue) 4 | #SBATCH --requeue # Return job to the queue if preempted 5 | #SBATCH --job-name=vi1 # Assign an short name to your job 6 | #SBATCH --nodes=1 # Number of nodes you require 7 | #SBATCH --ntasks=1 # Total # of tasks across all nodes 8 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks) 9 | #SBATCH --mem=32000 # Real memory (RAM) required (MB) 10 | #SBATCH --time=2-00:00:00 # Total run time limit (HH:MM:SS) 11 | #SBATCH --output=varimport.binary.%N.%j.out # STDOUT output file 12 | #SBATCH --error=varimport.binary.%N.%j.err # STDERR output file (optional) 13 | #SBATCH --export=ALL # Export you current env to the job env 14 | 15 | cd /scratch/cl1205/protease-gcnn-pytorch/model 16 | data=$1 17 | seed=$2 18 | feature=$3 19 | wd=$4 20 | lr=$5 21 | dt=$6 22 | bs=$7 23 | echo "data: $data" 24 | echo "seed: $seed" 25 | echo "feature: $feature" 26 | echo "weight_decay: $wd" 27 | echo "learning_rate: $lr" 28 | echo "dropout: $dt" 29 | echo "batch_size: $bs" 30 | 31 | if [ ${feature} == _ ] 32 | then 33 | label=--energy_only 34 | rerun='_rerun/' 35 | else 36 | label= 37 | rerun='/' 38 | fi 39 | srun python importance.py --importance --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --hidden1 20 --depth 2 --linear 0 --att 0 --batch_size ${bs} --lr ${lr} --dropout ${dt} --weight_decay ${wd} --seed ${seed} --save "outputs/tt_finalize_20210413/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond${rerun}bs_${bs}/" ${label} #&> tt.log 40 | 41 | -------------------------------------------------------------------------------- /data/.ipynb_checkpoints/ind.None-checkpoint.pose_indices: -------------------------------------------------------------------------------- 1 | ,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 2 | GRIP2_4_DAAHHDESNC_0_afd.pdb,104,105,106,107,108,109,110,111,112,113,12,13,14,15,16,17,22,24,49,50,51,52,53,54,55,56,57,90,91,92,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 3 | MPP2_1_QPCFNKLFPL_0_afd.pdb,87,88,89,90,91,92,93,94,95,96,10,14,15,16,17,18,19,20,21,22,23,24,27,29,30,31,32,33,38,44,50,62,63,65,66,68,69,70,72,78 4 | -------------------------------------------------------------------------------- /graph/__pycache__/protein_graph.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/graph/__pycache__/protein_graph.cpython-38.pyc -------------------------------------------------------------------------------- /graph/classifications/.ipynb_checkpoints/tev_design_for_validation_dual_directions_cleavage-checkpoint.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | p2_c2_5.pdb CLEAVED 3 | p2_c2_2.pdb CLEAVED 4 | p2_c2_4.pdb CLEAVED 5 | p2_c2_7.pdb CLEAVED 6 | p2_c2_8.pdb CLEAVED 7 | p2_c2_1.pdb CLEAVED 8 | p2_c2_0.pdb CLEAVED 9 | p2_c2_3.pdb CLEAVED 10 | p2_c2_6.pdb CLEAVED 11 | p2_c2_9.pdb CLEAVED 12 | p6_c5_6.pdb CLEAVED 13 | p6_c5_9.pdb CLEAVED 14 | p6_c5_7.pdb CLEAVED 15 | p6_c5_8.pdb CLEAVED 16 | p6_c5_5.pdb CLEAVED 17 | p6_c5_1.pdb CLEAVED 18 | p6_c5_0.pdb CLEAVED 19 | p6_c5_2.pdb CLEAVED 20 | p6_c5_4.pdb CLEAVED 21 | p6_c5_3.pdb CLEAVED 22 | p2_c9_4.pdb CLEAVED 23 | p2_c9_3.pdb CLEAVED 24 | p2_c9_5.pdb CLEAVED 25 | p2_c9_9.pdb CLEAVED 26 | p2_c9_6.pdb CLEAVED 27 | p2_c9_8.pdb CLEAVED 28 | p2_c9_0.pdb CLEAVED 29 | p2_c9_2.pdb CLEAVED 30 | p2_c9_7.pdb CLEAVED 31 | p2_c9_1.pdb CLEAVED 32 | P6_c3_9.pdb CLEAVED 33 | P6_c3_3.pdb CLEAVED 34 | P6_c3_1.pdb CLEAVED 35 | P6_c3_0.pdb CLEAVED 36 | P6_c3_4.pdb CLEAVED 37 | P6_c3_5.pdb CLEAVED 38 | P6_c3_2.pdb CLEAVED 39 | P6_c3_7.pdb CLEAVED 40 | P6_c3_6.pdb CLEAVED 41 | P6_c3_8.pdb CLEAVED 42 | p6_c7_7.pdb CLEAVED 43 | p6_c7_3.pdb CLEAVED 44 | p6_c7_0.pdb CLEAVED 45 | p6_c7_5.pdb CLEAVED 46 | p6_c7_9.pdb CLEAVED 47 | p6_c7_1.pdb CLEAVED 48 | p6_c7_8.pdb CLEAVED 49 | p6_c7_2.pdb CLEAVED 50 | p6_c7_4.pdb CLEAVED 51 | p6_c7_6.pdb CLEAVED 52 | P6_c2_7.pdb CLEAVED 53 | P6_c2_0.pdb CLEAVED 54 | P6_c2_4.pdb CLEAVED 55 | P6_c2_9.pdb CLEAVED 56 | P6_c2_5.pdb CLEAVED 57 | P6_c2_1.pdb CLEAVED 58 | P6_c2_3.pdb CLEAVED 59 | P6_c2_8.pdb CLEAVED 60 | P6_c2_6.pdb CLEAVED 61 | P6_c2_2.pdb CLEAVED 62 | p2_c10_2.pdb CLEAVED 63 | p2_c10_8.pdb CLEAVED 64 | p2_c10_1.pdb CLEAVED 65 | p2_c10_0.pdb CLEAVED 66 | p2_c10_9.pdb CLEAVED 67 | p2_c10_5.pdb CLEAVED 68 | p2_c10_6.pdb CLEAVED 69 | p2_c10_4.pdb CLEAVED 70 | p2_c10_7.pdb CLEAVED 71 | p2_c10_3.pdb CLEAVED 72 | p6_c6_1.pdb CLEAVED 73 | p6_c6_6.pdb CLEAVED 74 | p6_c6_9.pdb CLEAVED 75 | p6_c6_4.pdb CLEAVED 76 | p6_c6_2.pdb CLEAVED 77 | p6_c6_5.pdb CLEAVED 78 | p6_c6_3.pdb CLEAVED 79 | p6_c6_7.pdb CLEAVED 80 | p6_c6_8.pdb CLEAVED 81 | p6_c6_0.pdb CLEAVED 82 | p6_c8_5.pdb CLEAVED 83 | p6_c8_6.pdb CLEAVED 84 | p6_c8_1.pdb CLEAVED 85 | p6_c8_8.pdb CLEAVED 86 | p6_c8_4.pdb CLEAVED 87 | p6_c8_9.pdb CLEAVED 88 | p6_c8_3.pdb CLEAVED 89 | p6_c8_0.pdb CLEAVED 90 | p6_c8_2.pdb CLEAVED 91 | p6_c8_7.pdb CLEAVED 92 | p3_c2_5.pdb UNCLEAVED 93 | p3_c2_2.pdb UNCLEAVED 94 | p3_c2_4.pdb UNCLEAVED 95 | p3_c2_7.pdb UNCLEAVED 96 | p3_c2_8.pdb UNCLEAVED 97 | p3_c2_1.pdb UNCLEAVED 98 | p3_c2_0.pdb UNCLEAVED 99 | p3_c2_3.pdb UNCLEAVED 100 | p3_c2_6.pdb UNCLEAVED 101 | p3_c2_9.pdb UNCLEAVED 102 | p3_c3_9.pdb UNCLEAVED 103 | p3_c3_3.pdb UNCLEAVED 104 | p3_c3_1.pdb UNCLEAVED 105 | p3_c3_0.pdb UNCLEAVED 106 | p3_c3_4.pdb UNCLEAVED 107 | p3_c3_5.pdb UNCLEAVED 108 | p3_c3_2.pdb UNCLEAVED 109 | p3_c3_7.pdb UNCLEAVED 110 | p3_c3_6.pdb UNCLEAVED 111 | p3_c3_8.pdb UNCLEAVED 112 | p3_c1_2.pdb UNCLEAVED 113 | p3_c1_8.pdb UNCLEAVED 114 | p3_c1_1.pdb UNCLEAVED 115 | p3_c1_0.pdb UNCLEAVED 116 | p3_c1_5.pdb UNCLEAVED 117 | p3_c1_7.pdb UNCLEAVED 118 | p3_c1_4.pdb UNCLEAVED 119 | p3_c1_3.pdb UNCLEAVED 120 | p3_c1_9.pdb UNCLEAVED 121 | p3_c1_6.pdb UNCLEAVED 122 | tev_p2_V209I_W211L_M218D_relaxed_0.pdb UNCLEAVED 123 | tev_p2_V209I_W211L_M218D_relaxed_1.pdb UNCLEAVED 124 | tev_p2_V209I_W211L_M218D_relaxed_2.pdb UNCLEAVED 125 | tev_p2_V209I_W211L_M218D_relaxed_3.pdb UNCLEAVED 126 | tev_p2_V209I_W211L_M218D_relaxed_4.pdb UNCLEAVED 127 | tev_p2_V209I_W211L_M218D_relaxed_5.pdb UNCLEAVED 128 | tev_p2_V209I_W211L_M218D_relaxed_6.pdb UNCLEAVED 129 | tev_p2_V209I_W211L_M218D_relaxed_7.pdb UNCLEAVED 130 | tev_p2_V209I_W211L_M218D_relaxed_8.pdb UNCLEAVED 131 | tev_p2_V209I_W211L_M218D_relaxed_9.pdb UNCLEAVED 132 | tev_p2_V209S_W211R_G213R_relaxed_0.pdb UNCLEAVED 133 | tev_p2_V209S_W211R_G213R_relaxed_1.pdb UNCLEAVED 134 | tev_p2_V209S_W211R_G213R_relaxed_2.pdb UNCLEAVED 135 | tev_p2_V209S_W211R_G213R_relaxed_3.pdb UNCLEAVED 136 | tev_p2_V209S_W211R_G213R_relaxed_4.pdb UNCLEAVED 137 | tev_p2_V209S_W211R_G213R_relaxed_5.pdb UNCLEAVED 138 | tev_p2_V209S_W211R_G213R_relaxed_6.pdb UNCLEAVED 139 | tev_p2_V209S_W211R_G213R_relaxed_7.pdb UNCLEAVED 140 | tev_p2_V209S_W211R_G213R_relaxed_8.pdb UNCLEAVED 141 | tev_p2_V209S_W211R_G213R_relaxed_9.pdb UNCLEAVED 142 | tev_p2_V209S_W211R_M218D_relaxed_0.pdb UNCLEAVED 143 | tev_p2_V209S_W211R_M218D_relaxed_1.pdb UNCLEAVED 144 | tev_p2_V209S_W211R_M218D_relaxed_2.pdb UNCLEAVED 145 | tev_p2_V209S_W211R_M218D_relaxed_3.pdb UNCLEAVED 146 | tev_p2_V209S_W211R_M218D_relaxed_4.pdb UNCLEAVED 147 | tev_p2_V209S_W211R_M218D_relaxed_5.pdb UNCLEAVED 148 | tev_p2_V209S_W211R_M218D_relaxed_6.pdb UNCLEAVED 149 | tev_p2_V209S_W211R_M218D_relaxed_7.pdb UNCLEAVED 150 | tev_p2_V209S_W211R_M218D_relaxed_8.pdb UNCLEAVED 151 | tev_p2_V209S_W211R_M218D_relaxed_9.pdb UNCLEAVED 152 | tev_p2_V209S_W211R_relaxed_0.pdb UNCLEAVED 153 | tev_p2_V209S_W211R_relaxed_1.pdb UNCLEAVED 154 | tev_p2_V209S_W211R_relaxed_2.pdb UNCLEAVED 155 | tev_p2_V209S_W211R_relaxed_3.pdb UNCLEAVED 156 | tev_p2_V209S_W211R_relaxed_4.pdb UNCLEAVED 157 | tev_p2_V209S_W211R_relaxed_5.pdb UNCLEAVED 158 | tev_p2_V209S_W211R_relaxed_6.pdb UNCLEAVED 159 | tev_p2_V209S_W211R_relaxed_7.pdb UNCLEAVED 160 | tev_p2_V209S_W211R_relaxed_8.pdb UNCLEAVED 161 | tev_p2_V209S_W211R_relaxed_9.pdb UNCLEAVED 162 | tev_p6_F172Y_N174H_relaxed_0.pdb UNCLEAVED 163 | tev_p6_F172Y_N174H_relaxed_1.pdb UNCLEAVED 164 | tev_p6_F172Y_N174H_relaxed_2.pdb UNCLEAVED 165 | tev_p6_F172Y_N174H_relaxed_3.pdb UNCLEAVED 166 | tev_p6_F172Y_N174H_relaxed_4.pdb UNCLEAVED 167 | tev_p6_F172Y_N174H_relaxed_5.pdb UNCLEAVED 168 | tev_p6_F172Y_N174H_relaxed_6.pdb UNCLEAVED 169 | tev_p6_F172Y_N174H_relaxed_7.pdb UNCLEAVED 170 | tev_p6_F172Y_N174H_relaxed_8.pdb UNCLEAVED 171 | tev_p6_F172Y_N174H_relaxed_9.pdb UNCLEAVED 172 | tev_p6_K141E_T175P_relaxed_0.pdb UNCLEAVED 173 | tev_p6_K141E_T175P_relaxed_1.pdb UNCLEAVED 174 | tev_p6_K141E_T175P_relaxed_2.pdb UNCLEAVED 175 | tev_p6_K141E_T175P_relaxed_3.pdb UNCLEAVED 176 | tev_p6_K141E_T175P_relaxed_4.pdb UNCLEAVED 177 | tev_p6_K141E_T175P_relaxed_5.pdb UNCLEAVED 178 | tev_p6_K141E_T175P_relaxed_6.pdb UNCLEAVED 179 | tev_p6_K141E_T175P_relaxed_7.pdb UNCLEAVED 180 | tev_p6_K141E_T175P_relaxed_8.pdb UNCLEAVED 181 | tev_p6_K141E_T175P_relaxed_9.pdb UNCLEAVED 182 | tev_p6_T173A_T175A_N176D_relaxed_0.pdb UNCLEAVED 183 | tev_p6_T173A_T175A_N176D_relaxed_1.pdb UNCLEAVED 184 | tev_p6_T173A_T175A_N176D_relaxed_2.pdb UNCLEAVED 185 | tev_p6_T173A_T175A_N176D_relaxed_3.pdb UNCLEAVED 186 | tev_p6_T173A_T175A_N176D_relaxed_4.pdb UNCLEAVED 187 | tev_p6_T173A_T175A_N176D_relaxed_5.pdb UNCLEAVED 188 | tev_p6_T173A_T175A_N176D_relaxed_6.pdb UNCLEAVED 189 | tev_p6_T173A_T175A_N176D_relaxed_7.pdb UNCLEAVED 190 | tev_p6_T173A_T175A_N176D_relaxed_8.pdb UNCLEAVED 191 | tev_p6_T173A_T175A_N176D_relaxed_9.pdb UNCLEAVED 192 | -------------------------------------------------------------------------------- /graph/classifications/20220911_tev_design_successes_p1pA.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | tev_p2_wt_relaxed_3.pdb UNCLEAVED 3 | tev_p2_wt_relaxed_6.pdb UNCLEAVED 4 | tev_p2_wt_relaxed_0.pdb UNCLEAVED 5 | tev_p2_wt_relaxed_8.pdb UNCLEAVED 6 | tev_p2_wt_relaxed_7.pdb UNCLEAVED 7 | tev_p2_wt_relaxed_5.pdb UNCLEAVED 8 | tev_p2_wt_relaxed_1.pdb UNCLEAVED 9 | tev_p2_wt_relaxed_2.pdb UNCLEAVED 10 | tev_p2_wt_relaxed_4.pdb UNCLEAVED 11 | tev_p2_wt_relaxed_9.pdb UNCLEAVED 12 | tev_p6_c3_relaxed_2.pdb CLEAVED 13 | tev_p6_c3_relaxed_6.pdb CLEAVED 14 | tev_p6_c3_relaxed_7.pdb CLEAVED 15 | tev_p6_c3_relaxed_3.pdb CLEAVED 16 | tev_p6_c3_relaxed_4.pdb CLEAVED 17 | tev_p6_c3_relaxed_0.pdb CLEAVED 18 | tev_p6_c3_relaxed_5.pdb CLEAVED 19 | tev_p6_c3_relaxed_8.pdb CLEAVED 20 | tev_p6_c3_relaxed_9.pdb CLEAVED 21 | tev_p6_c3_relaxed_1.pdb CLEAVED 22 | tev_p6_wt_relaxed_7.pdb UNCLEAVED 23 | tev_p6_wt_relaxed_2.pdb UNCLEAVED 24 | tev_p6_wt_relaxed_9.pdb UNCLEAVED 25 | tev_p6_wt_relaxed_8.pdb UNCLEAVED 26 | tev_p6_wt_relaxed_6.pdb UNCLEAVED 27 | tev_p6_wt_relaxed_1.pdb UNCLEAVED 28 | tev_p6_wt_relaxed_3.pdb UNCLEAVED 29 | tev_p6_wt_relaxed_5.pdb UNCLEAVED 30 | tev_p6_wt_relaxed_0.pdb UNCLEAVED 31 | tev_p6_wt_relaxed_4.pdb UNCLEAVED 32 | tev_p6_c2_relaxed_4.pdb CLEAVED 33 | tev_p6_c2_relaxed_6.pdb CLEAVED 34 | tev_p6_c2_relaxed_1.pdb CLEAVED 35 | tev_p6_c2_relaxed_7.pdb CLEAVED 36 | tev_p6_c2_relaxed_8.pdb CLEAVED 37 | tev_p6_c2_relaxed_5.pdb CLEAVED 38 | tev_p6_c2_relaxed_2.pdb CLEAVED 39 | tev_p6_c2_relaxed_0.pdb CLEAVED 40 | tev_p6_c2_relaxed_3.pdb CLEAVED 41 | tev_p6_c2_relaxed_9.pdb CLEAVED 42 | tev_p6_c5_relaxed_0.pdb CLEAVED 43 | tev_p6_c5_relaxed_5.pdb CLEAVED 44 | tev_p6_c5_relaxed_7.pdb CLEAVED 45 | tev_p6_c5_relaxed_9.pdb CLEAVED 46 | tev_p6_c5_relaxed_6.pdb CLEAVED 47 | tev_p6_c5_relaxed_8.pdb CLEAVED 48 | tev_p6_c5_relaxed_3.pdb CLEAVED 49 | tev_p6_c5_relaxed_1.pdb CLEAVED 50 | tev_p6_c5_relaxed_4.pdb CLEAVED 51 | tev_p6_c5_relaxed_2.pdb CLEAVED 52 | tev_p2_c10_relaxed_5.pdb CLEAVED 53 | tev_p2_c10_relaxed_7.pdb CLEAVED 54 | tev_p2_c10_relaxed_9.pdb CLEAVED 55 | tev_p2_c10_relaxed_4.pdb CLEAVED 56 | tev_p2_c10_relaxed_6.pdb CLEAVED 57 | tev_p2_c10_relaxed_2.pdb CLEAVED 58 | tev_p2_c10_relaxed_8.pdb CLEAVED 59 | tev_p2_c10_relaxed_1.pdb CLEAVED 60 | tev_p2_c10_relaxed_0.pdb CLEAVED 61 | tev_p2_c10_relaxed_3.pdb CLEAVED 62 | tev_p2_c9_relaxed_9.pdb CLEAVED 63 | tev_p2_c9_relaxed_4.pdb CLEAVED 64 | tev_p2_c9_relaxed_3.pdb CLEAVED 65 | tev_p2_c9_relaxed_7.pdb CLEAVED 66 | tev_p2_c9_relaxed_0.pdb CLEAVED 67 | tev_p2_c9_relaxed_5.pdb CLEAVED 68 | tev_p2_c9_relaxed_8.pdb CLEAVED 69 | tev_p2_c9_relaxed_1.pdb CLEAVED 70 | tev_p2_c9_relaxed_6.pdb CLEAVED 71 | tev_p2_c9_relaxed_2.pdb CLEAVED 72 | tev_wt_wt_relaxed_0.pdb CLEAVED 73 | tev_wt_wt_relaxed_1.pdb CLEAVED 74 | tev_wt_wt_relaxed_9.pdb CLEAVED 75 | tev_wt_wt_relaxed_5.pdb CLEAVED 76 | tev_wt_wt_relaxed_8.pdb CLEAVED 77 | tev_wt_wt_relaxed_7.pdb CLEAVED 78 | tev_wt_wt_relaxed_2.pdb CLEAVED 79 | tev_wt_wt_relaxed_6.pdb CLEAVED 80 | tev_wt_wt_relaxed_4.pdb CLEAVED 81 | tev_wt_wt_relaxed_3.pdb CLEAVED 82 | tev_p2_c2_relaxed_8.pdb CLEAVED 83 | tev_p2_c2_relaxed_1.pdb CLEAVED 84 | tev_p2_c2_relaxed_5.pdb CLEAVED 85 | tev_p2_c2_relaxed_0.pdb CLEAVED 86 | tev_p2_c2_relaxed_4.pdb CLEAVED 87 | tev_p2_c2_relaxed_3.pdb CLEAVED 88 | tev_p2_c2_relaxed_9.pdb CLEAVED 89 | tev_p2_c2_relaxed_2.pdb CLEAVED 90 | tev_p2_c2_relaxed_6.pdb CLEAVED 91 | tev_p2_c2_relaxed_7.pdb CLEAVED 92 | tev_p6_c7_relaxed_7.pdb CLEAVED 93 | tev_p6_c7_relaxed_6.pdb CLEAVED 94 | tev_p6_c7_relaxed_9.pdb CLEAVED 95 | tev_p6_c7_relaxed_8.pdb CLEAVED 96 | tev_p6_c7_relaxed_3.pdb CLEAVED 97 | tev_p6_c7_relaxed_0.pdb CLEAVED 98 | tev_p6_c7_relaxed_5.pdb CLEAVED 99 | tev_p6_c7_relaxed_1.pdb CLEAVED 100 | tev_p6_c7_relaxed_4.pdb CLEAVED 101 | tev_p6_c7_relaxed_2.pdb CLEAVED 102 | tev_p6_c8_relaxed_1.pdb CLEAVED 103 | tev_p6_c8_relaxed_5.pdb CLEAVED 104 | tev_p6_c8_relaxed_4.pdb CLEAVED 105 | tev_p6_c8_relaxed_0.pdb CLEAVED 106 | tev_p6_c8_relaxed_3.pdb CLEAVED 107 | tev_p6_c8_relaxed_9.pdb CLEAVED 108 | tev_p6_c8_relaxed_7.pdb CLEAVED 109 | tev_p6_c8_relaxed_6.pdb CLEAVED 110 | tev_p6_c8_relaxed_2.pdb CLEAVED 111 | tev_p6_c8_relaxed_8.pdb CLEAVED 112 | tev_p6_c6_relaxed_0.pdb CLEAVED 113 | tev_p6_c6_relaxed_1.pdb CLEAVED 114 | tev_p6_c6_relaxed_5.pdb CLEAVED 115 | tev_p6_c6_relaxed_2.pdb CLEAVED 116 | tev_p6_c6_relaxed_8.pdb CLEAVED 117 | tev_p6_c6_relaxed_3.pdb CLEAVED 118 | tev_p6_c6_relaxed_7.pdb CLEAVED 119 | tev_p6_c6_relaxed_9.pdb CLEAVED 120 | tev_p6_c6_relaxed_6.pdb CLEAVED 121 | tev_p6_c6_relaxed_4.pdb CLEAVED 122 | -------------------------------------------------------------------------------- /graph/classifications/20220922_tev_design_dual_directions_cleavage.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | p2_c2_5.pdb CLEAVED 3 | p2_c2_2.pdb CLEAVED 4 | p2_c2_4.pdb CLEAVED 5 | p2_c2_7.pdb CLEAVED 6 | p2_c2_8.pdb CLEAVED 7 | p2_c2_1.pdb CLEAVED 8 | p2_c2_0.pdb CLEAVED 9 | p2_c2_3.pdb CLEAVED 10 | p2_c2_6.pdb CLEAVED 11 | p2_c2_9.pdb CLEAVED 12 | wt_wt_2.pdb CLEAVED 13 | wt_wt_4.pdb CLEAVED 14 | wt_wt_5.pdb CLEAVED 15 | wt_wt_6.pdb CLEAVED 16 | wt_wt_1.pdb CLEAVED 17 | wt_wt_8.pdb CLEAVED 18 | wt_wt_0.pdb CLEAVED 19 | wt_wt_9.pdb CLEAVED 20 | wt_wt_7.pdb CLEAVED 21 | wt_wt_3.pdb CLEAVED 22 | p6_c5_6.pdb CLEAVED 23 | p6_c5_9.pdb CLEAVED 24 | p6_c5_7.pdb CLEAVED 25 | p6_c5_8.pdb CLEAVED 26 | p6_c5_5.pdb CLEAVED 27 | p6_c5_1.pdb CLEAVED 28 | p6_c5_0.pdb CLEAVED 29 | p6_c5_2.pdb CLEAVED 30 | p6_c5_4.pdb CLEAVED 31 | p6_c5_3.pdb CLEAVED 32 | p2_c9_4.pdb CLEAVED 33 | p2_c9_3.pdb CLEAVED 34 | p2_c9_5.pdb CLEAVED 35 | p2_c9_9.pdb CLEAVED 36 | p2_c9_6.pdb CLEAVED 37 | p2_c9_8.pdb CLEAVED 38 | p2_c9_0.pdb CLEAVED 39 | p2_c9_2.pdb CLEAVED 40 | p2_c9_7.pdb CLEAVED 41 | p2_c9_1.pdb CLEAVED 42 | P6_c3_9.pdb CLEAVED 43 | P6_c3_3.pdb CLEAVED 44 | P6_c3_1.pdb CLEAVED 45 | P6_c3_0.pdb CLEAVED 46 | P6_c3_4.pdb CLEAVED 47 | P6_c3_5.pdb CLEAVED 48 | P6_c3_2.pdb CLEAVED 49 | P6_c3_7.pdb CLEAVED 50 | P6_c3_6.pdb CLEAVED 51 | P6_c3_8.pdb CLEAVED 52 | p6_c7_7.pdb CLEAVED 53 | p6_c7_3.pdb CLEAVED 54 | p6_c7_0.pdb CLEAVED 55 | p6_c7_5.pdb CLEAVED 56 | p6_c7_9.pdb CLEAVED 57 | p6_c7_1.pdb CLEAVED 58 | p6_c7_8.pdb CLEAVED 59 | p6_c7_2.pdb CLEAVED 60 | p6_c7_4.pdb CLEAVED 61 | p6_c7_6.pdb CLEAVED 62 | p2_wt_8.pdb UNCLEAVED 63 | p2_wt_4.pdb UNCLEAVED 64 | p2_wt_1.pdb UNCLEAVED 65 | p2_wt_0.pdb UNCLEAVED 66 | p2_wt_2.pdb UNCLEAVED 67 | p2_wt_6.pdb UNCLEAVED 68 | p2_wt_5.pdb UNCLEAVED 69 | p2_wt_7.pdb UNCLEAVED 70 | p2_wt_3.pdb UNCLEAVED 71 | p2_wt_9.pdb UNCLEAVED 72 | P6_c2_7.pdb CLEAVED 73 | P6_c2_0.pdb CLEAVED 74 | P6_c2_4.pdb CLEAVED 75 | P6_c2_9.pdb CLEAVED 76 | P6_c2_5.pdb CLEAVED 77 | P6_c2_1.pdb CLEAVED 78 | P6_c2_3.pdb CLEAVED 79 | P6_c2_8.pdb CLEAVED 80 | P6_c2_6.pdb CLEAVED 81 | P6_c2_2.pdb CLEAVED 82 | P6_wt_1.pdb UNCLEAVED 83 | P6_wt_2.pdb UNCLEAVED 84 | P6_wt_6.pdb UNCLEAVED 85 | P6_wt_5.pdb UNCLEAVED 86 | P6_wt_3.pdb UNCLEAVED 87 | P6_wt_8.pdb UNCLEAVED 88 | P6_wt_4.pdb UNCLEAVED 89 | P6_wt_7.pdb UNCLEAVED 90 | P6_wt_0.pdb UNCLEAVED 91 | P6_wt_9.pdb UNCLEAVED 92 | p6_c1_2.pdb UNCLEAVED 93 | p6_c1_4.pdb UNCLEAVED 94 | p6_c1_3.pdb UNCLEAVED 95 | p6_c1_7.pdb UNCLEAVED 96 | p6_c1_5.pdb UNCLEAVED 97 | p6_c1_8.pdb UNCLEAVED 98 | p6_c1_0.pdb UNCLEAVED 99 | p6_c1_9.pdb UNCLEAVED 100 | p6_c1_6.pdb UNCLEAVED 101 | p6_c1_1.pdb UNCLEAVED 102 | p2_c10_2.pdb CLEAVED 103 | p2_c10_8.pdb CLEAVED 104 | p2_c10_1.pdb CLEAVED 105 | p2_c10_0.pdb CLEAVED 106 | p2_c10_9.pdb CLEAVED 107 | p2_c10_5.pdb CLEAVED 108 | p2_c10_6.pdb CLEAVED 109 | p2_c10_4.pdb CLEAVED 110 | p2_c10_7.pdb CLEAVED 111 | p2_c10_3.pdb CLEAVED 112 | p6_c6_1.pdb CLEAVED 113 | p6_c6_6.pdb CLEAVED 114 | p6_c6_9.pdb CLEAVED 115 | p6_c6_4.pdb CLEAVED 116 | p6_c6_2.pdb CLEAVED 117 | p6_c6_5.pdb CLEAVED 118 | p6_c6_3.pdb CLEAVED 119 | p6_c6_7.pdb CLEAVED 120 | p6_c6_8.pdb CLEAVED 121 | p6_c6_0.pdb CLEAVED 122 | p2_c5_4.pdb UNCLEAVED 123 | p2_c5_7.pdb UNCLEAVED 124 | p2_c5_2.pdb UNCLEAVED 125 | p2_c5_3.pdb UNCLEAVED 126 | p2_c5_1.pdb UNCLEAVED 127 | p2_c5_0.pdb UNCLEAVED 128 | p2_c5_5.pdb UNCLEAVED 129 | p2_c5_8.pdb UNCLEAVED 130 | p2_c5_6.pdb UNCLEAVED 131 | p2_c5_9.pdb UNCLEAVED 132 | p6_c8_5.pdb CLEAVED 133 | p6_c8_6.pdb CLEAVED 134 | p6_c8_1.pdb CLEAVED 135 | p6_c8_8.pdb CLEAVED 136 | p6_c8_4.pdb CLEAVED 137 | p6_c8_9.pdb CLEAVED 138 | p6_c8_3.pdb CLEAVED 139 | p6_c8_0.pdb CLEAVED 140 | p6_c8_2.pdb CLEAVED 141 | p6_c8_7.pdb CLEAVED 142 | p2_c1_2.pdb UNCLEAVED 143 | p2_c1_8.pdb UNCLEAVED 144 | p2_c1_1.pdb UNCLEAVED 145 | p2_c1_0.pdb UNCLEAVED 146 | p2_c1_5.pdb UNCLEAVED 147 | p2_c1_7.pdb UNCLEAVED 148 | p2_c1_4.pdb UNCLEAVED 149 | p2_c1_3.pdb UNCLEAVED 150 | p2_c1_9.pdb UNCLEAVED 151 | p2_c1_6.pdb UNCLEAVED 152 | -------------------------------------------------------------------------------- /graph/classifications/20220925_tev_design_p3.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | p3_c2_5.pdb UNCLEAVED 3 | p3_c2_2.pdb UNCLEAVED 4 | p3_c2_4.pdb UNCLEAVED 5 | p3_c2_7.pdb UNCLEAVED 6 | p3_c2_8.pdb UNCLEAVED 7 | p3_c2_1.pdb UNCLEAVED 8 | p3_c2_0.pdb UNCLEAVED 9 | p3_c2_3.pdb UNCLEAVED 10 | p3_c2_6.pdb UNCLEAVED 11 | p3_c2_9.pdb UNCLEAVED 12 | p3_c3_9.pdb UNCLEAVED 13 | p3_c3_3.pdb UNCLEAVED 14 | p3_c3_1.pdb UNCLEAVED 15 | p3_c3_0.pdb UNCLEAVED 16 | p3_c3_4.pdb UNCLEAVED 17 | p3_c3_5.pdb UNCLEAVED 18 | p3_c3_2.pdb UNCLEAVED 19 | p3_c3_7.pdb UNCLEAVED 20 | p3_c3_6.pdb UNCLEAVED 21 | p3_c3_8.pdb UNCLEAVED 22 | p3_wt_8.pdb UNCLEAVED 23 | p3_wt_4.pdb UNCLEAVED 24 | p3_wt_1.pdb UNCLEAVED 25 | p3_wt_0.pdb UNCLEAVED 26 | p3_wt_2.pdb UNCLEAVED 27 | p3_wt_6.pdb UNCLEAVED 28 | p3_wt_5.pdb UNCLEAVED 29 | p3_wt_7.pdb UNCLEAVED 30 | p3_wt_3.pdb UNCLEAVED 31 | p3_wt_9.pdb UNCLEAVED 32 | p3_c1_2.pdb UNCLEAVED 33 | p3_c1_8.pdb UNCLEAVED 34 | p3_c1_1.pdb UNCLEAVED 35 | p3_c1_0.pdb UNCLEAVED 36 | p3_c1_5.pdb UNCLEAVED 37 | p3_c1_7.pdb UNCLEAVED 38 | p3_c1_4.pdb UNCLEAVED 39 | p3_c1_3.pdb UNCLEAVED 40 | p3_c1_9.pdb UNCLEAVED 41 | p3_c1_6.pdb UNCLEAVED 42 | -------------------------------------------------------------------------------- /graph/classifications/dvdar_design.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | hcv_no_swap_sub_1_31_relaxed_0.pdb CLEAVED 3 | hcv_all_swap_sub_1_11_relaxed_1.pdb CLEAVED 4 | hcv_all_swap_design_1_81_designed_1.pdb CLEAVED 5 | hcv_all_swap_design_3_9_designed_4.pdb CLEAVED 6 | hcv_all_swap_design_1_94_designed_3.pdb CLEAVED 7 | hcv_all_swap_design_3_67_designed_3.pdb CLEAVED 8 | hcv_all_swap_design_3_21_designed_4.pdb CLEAVED 9 | hcv_all_swap_design_3_7_designed_1.pdb CLEAVED 10 | hcv_all_swap_design_0_57_designed_0.pdb CLEAVED 11 | hcv_all_swap_design_3_47_designed_1.pdb CLEAVED 12 | hcv_all_swap_design_3_0_designed_7.pdb CLEAVED 13 | hcv_all_swap_design_3_29_designed_8.pdb CLEAVED 14 | hcv_all_swap_design_1_48_designed_3.pdb CLEAVED 15 | hcv_all_swap_design_3_55_designed_6.pdb CLEAVED 16 | hcv_all_swap_design_3_60_designed_9.pdb CLEAVED 17 | hcv_all_swap_design_3_93_designed_0.pdb CLEAVED 18 | hcv_all_swap_design_1_87_designed_3.pdb CLEAVED 19 | hcv_all_swap_design_3_84_designed_3.pdb CLEAVED 20 | hcv_all_swap_design_3_59_designed_1.pdb CLEAVED 21 | hcv_all_swap_design_3_20_designed_8.pdb CLEAVED 22 | hcv_all_swap_design_3_35_designed_4.pdb CLEAVED 23 | hcv_all_swap_design_3_56_designed_2.pdb CLEAVED 24 | hcv_all_swap_design_3_0_designed_6.pdb CLEAVED 25 | hcv_all_swap_design_0_97_designed_2.pdb CLEAVED 26 | hcv_all_swap_design_3_71_designed_3.pdb CLEAVED 27 | hcv_all_swap_design_3_62_designed_5.pdb CLEAVED 28 | hcv_all_swap_design_3_52_designed_3.pdb CLEAVED 29 | hcv_all_swap_design_3_14_designed_7.pdb CLEAVED 30 | hcv_all_swap_design_3_98_designed_4.pdb CLEAVED 31 | hcv_all_swap_design_3_48_designed_3.pdb CLEAVED 32 | hcv_all_swap_design_3_83_designed_6.pdb CLEAVED 33 | hcv_all_swap_design_1_82_designed_8.pdb CLEAVED 34 | hcv_all_swap_design_2_77_designed_5.pdb CLEAVED 35 | hcv_all_swap_design_3_59_designed_4.pdb CLEAVED 36 | hcv_all_swap_design_3_80_designed_4.pdb CLEAVED 37 | hcv_all_swap_design_3_28_designed_6.pdb CLEAVED 38 | hcv_all_swap_design_3_48_designed_6.pdb CLEAVED 39 | hcv_all_swap_design_3_70_designed_4.pdb CLEAVED 40 | hcv_all_swap_design_3_58_designed_4.pdb CLEAVED 41 | hcv_all_swap_design_0_45_designed_5.pdb CLEAVED 42 | hcv_all_swap_design_3_86_designed_2.pdb CLEAVED 43 | hcv_all_swap_design_3_61_designed_6.pdb CLEAVED 44 | hcv_all_swap_design_3_24_designed_6.pdb CLEAVED 45 | hcv_all_swap_design_3_59_designed_6.pdb CLEAVED 46 | hcv_all_swap_design_3_56_designed_6.pdb CLEAVED 47 | hcv_all_swap_design_3_89_designed_1.pdb CLEAVED 48 | hcv_all_swap_design_2_16_designed_7.pdb CLEAVED 49 | hcv_all_swap_design_2_89_designed_4.pdb CLEAVED 50 | hcv_all_swap_design_2_39_designed_1.pdb CLEAVED 51 | hcv_all_swap_design_2_23_designed_3.pdb CLEAVED 52 | hcv_all_swap_design_2_12_designed_0.pdb CLEAVED 53 | hcv_all_swap_design_2_90_designed_7.pdb CLEAVED 54 | hcv_all_swap_design_4_54_designed_3.pdb CLEAVED 55 | hcv_all_swap_design_2_69_designed_7.pdb CLEAVED 56 | hcv_all_swap_design_2_59_designed_3.pdb CLEAVED 57 | hcv_all_swap_design_4_74_designed_9.pdb CLEAVED 58 | hcv_all_swap_design_2_75_designed_2.pdb CLEAVED 59 | hcv_all_swap_design_2_11_designed_8.pdb CLEAVED 60 | hcv_all_swap_design_4_63_designed_0.pdb CLEAVED 61 | hcv_all_swap_design_2_38_designed_9.pdb CLEAVED 62 | hcv_all_swap_design_4_41_designed_8.pdb CLEAVED 63 | hcv_all_swap_design_4_67_designed_0.pdb CLEAVED 64 | hcv_all_swap_design_3_82_designed_0.pdb CLEAVED 65 | hcv_all_swap_design_2_40_designed_8.pdb CLEAVED 66 | hcv_all_swap_design_4_39_designed_9.pdb CLEAVED 67 | hcv_all_swap_design_0_41_designed_3.pdb CLEAVED 68 | hcv_all_swap_design_1_6_designed_8.pdb CLEAVED 69 | hcv_all_swap_design_2_65_designed_7.pdb CLEAVED 70 | hcv_all_swap_design_0_63_designed_6.pdb CLEAVED 71 | hcv_all_swap_design_2_41_designed_5.pdb CLEAVED 72 | hcv_all_swap_design_4_97_designed_4.pdb CLEAVED 73 | hcv_all_swap_design_0_78_designed_3.pdb CLEAVED 74 | hcv_all_swap_design_3_21_designed_8.pdb CLEAVED 75 | hcv_all_swap_design_4_58_designed_5.pdb CLEAVED 76 | hcv_all_swap_design_3_44_designed_7.pdb CLEAVED 77 | hcv_all_swap_design_2_1_designed_8.pdb CLEAVED 78 | hcv_all_swap_design_2_91_designed_6.pdb CLEAVED 79 | hcv_all_swap_design_3_65_designed_3.pdb CLEAVED 80 | hcv_all_swap_design_2_67_designed_4.pdb CLEAVED 81 | hcv_all_swap_design_4_14_designed_4.pdb CLEAVED 82 | hcv_all_swap_design_0_38_designed_0.pdb CLEAVED 83 | hcv_all_swap_design_2_23_designed_5.pdb CLEAVED 84 | hcv_all_swap_design_4_82_designed_1.pdb CLEAVED 85 | hcv_all_swap_design_4_22_designed_2.pdb CLEAVED 86 | hcv_all_swap_design_2_75_designed_8.pdb CLEAVED 87 | hcv_all_swap_design_3_20_designed_1.pdb CLEAVED 88 | hcv_all_swap_design_4_73_designed_7.pdb CLEAVED 89 | hcv_all_swap_design_2_46_designed_5.pdb CLEAVED 90 | hcv_all_swap_design_3_69_designed_8.pdb CLEAVED 91 | hcv_all_swap_design_4_75_designed_7.pdb CLEAVED 92 | hcv_all_swap_design_4_53_designed_1.pdb CLEAVED 93 | hcv_no_swap_sub_2_17_relaxed_1.pdb CLEAVED 94 | hcv_no_swap_sub_3_32_relaxed_1.pdb CLEAVED 95 | hcv_all_swap_sub_3_24_relaxed_1.pdb CLEAVED 96 | hcv_all_swap_design_0_16_designed_8.pdb CLEAVED 97 | hcv_all_swap_design_3_84_designed_1.pdb CLEAVED 98 | hcv_all_swap_design_4_90_designed_3.pdb CLEAVED 99 | hcv_all_swap_design_3_90_designed_1.pdb CLEAVED 100 | hcv_all_swap_design_3_60_designed_2.pdb CLEAVED 101 | hcv_all_swap_design_3_52_designed_4.pdb CLEAVED 102 | hcv_all_swap_design_3_86_designed_8.pdb CLEAVED 103 | hcv_all_swap_design_1_38_designed_3.pdb CLEAVED 104 | hcv_all_swap_design_0_9_designed_2.pdb CLEAVED 105 | hcv_all_swap_design_3_17_designed_2.pdb CLEAVED 106 | hcv_all_swap_design_3_93_designed_9.pdb CLEAVED 107 | hcv_all_swap_design_0_40_designed_0.pdb CLEAVED 108 | hcv_all_swap_design_0_94_designed_1.pdb CLEAVED 109 | hcv_all_swap_design_3_62_designed_8.pdb CLEAVED 110 | hcv_all_swap_design_3_86_designed_0.pdb CLEAVED 111 | hcv_all_swap_design_3_87_designed_0.pdb CLEAVED 112 | hcv_all_swap_design_3_34_designed_2.pdb CLEAVED 113 | hcv_all_swap_design_3_0_designed_8.pdb CLEAVED 114 | hcv_all_swap_design_3_55_designed_7.pdb CLEAVED 115 | hcv_all_swap_design_3_9_designed_7.pdb CLEAVED 116 | hcv_all_swap_design_0_94_designed_3.pdb CLEAVED 117 | hcv_all_swap_design_3_98_designed_8.pdb CLEAVED 118 | hcv_all_swap_design_1_48_designed_9.pdb CLEAVED 119 | hcv_all_swap_design_1_76_designed_9.pdb CLEAVED 120 | hcv_all_swap_design_3_34_designed_3.pdb CLEAVED 121 | hcv_all_swap_design_3_96_designed_8.pdb CLEAVED 122 | hcv_all_swap_design_4_5_designed_9.pdb CLEAVED 123 | hcv_all_swap_design_0_93_designed_0.pdb CLEAVED 124 | hcv_all_swap_design_3_73_designed_4.pdb CLEAVED 125 | hcv_all_swap_design_4_61_designed_4.pdb CLEAVED 126 | hcv_all_swap_design_2_59_designed_9.pdb CLEAVED 127 | hcv_all_swap_design_4_75_designed_5.pdb CLEAVED 128 | hcv_all_swap_design_3_58_designed_6.pdb CLEAVED 129 | hcv_all_swap_design_1_61_designed_8.pdb CLEAVED 130 | hcv_all_swap_design_3_28_designed_4.pdb CLEAVED 131 | hcv_all_swap_design_4_40_designed_0.pdb CLEAVED 132 | hcv_all_swap_design_3_0_designed_1.pdb CLEAVED 133 | hcv_all_swap_design_3_78_designed_5.pdb CLEAVED 134 | hcv_all_swap_design_4_85_designed_9.pdb CLEAVED 135 | hcv_all_swap_design_4_64_designed_5.pdb CLEAVED 136 | hcv_all_swap_design_3_39_designed_1.pdb CLEAVED 137 | hcv_all_swap_design_2_34_designed_7.pdb CLEAVED 138 | hcv_all_swap_design_2_53_designed_6.pdb CLEAVED 139 | hcv_all_swap_design_4_93_designed_2.pdb CLEAVED 140 | hcv_all_swap_design_4_4_designed_9.pdb CLEAVED 141 | hcv_all_swap_design_4_3_designed_5.pdb CLEAVED 142 | hcv_all_swap_design_3_12_designed_5.pdb CLEAVED 143 | hcv_all_swap_design_3_84_designed_0.pdb CLEAVED 144 | hcv_all_swap_design_3_6_designed_7.pdb CLEAVED 145 | hcv_all_swap_design_2_90_designed_4.pdb CLEAVED 146 | hcv_all_swap_design_4_61_designed_0.pdb CLEAVED 147 | hcv_all_swap_design_3_94_designed_4.pdb CLEAVED 148 | hcv_all_swap_design_2_1_designed_5.pdb CLEAVED 149 | hcv_all_swap_design_4_33_designed_3.pdb CLEAVED 150 | DVDAR_rand_des_00_2_relaxed_1.pdb UNCLEAVED 151 | DVDAR_rand_des_01_0_relaxed_1.pdb UNCLEAVED 152 | DVDAR_rand_des_02_0_relaxed_0.pdb UNCLEAVED 153 | DVDAR_rand_des_03_1_relaxed_0.pdb UNCLEAVED 154 | DVDAR_rand_des_04_4_relaxed_1.pdb UNCLEAVED 155 | DVDAR_rand_des_05_4_relaxed_1.pdb UNCLEAVED 156 | DVDAR_rand_des_06_3_relaxed_1.pdb UNCLEAVED 157 | DVDAR_rand_des_07_4_relaxed_0.pdb UNCLEAVED 158 | DVDAR_rand_des_08_0_relaxed_1.pdb UNCLEAVED 159 | DVDAR_rand_des_09_3_relaxed_1.pdb UNCLEAVED 160 | DVDAR_rand_des_19_3_relaxed_1.pdb UNCLEAVED 161 | DVDAR_rand_des_20_2_relaxed_0.pdb UNCLEAVED 162 | DVDAR_rand_des_21_3_relaxed_1.pdb UNCLEAVED 163 | DVDAR_rand_des_22_0_relaxed_1.pdb UNCLEAVED 164 | DVDAR_rand_des_23_0_relaxed_1.pdb UNCLEAVED 165 | DVDAR_rand_des_24_2_relaxed_1.pdb UNCLEAVED 166 | DVDAR_rand_des_25_0_relaxed_1.pdb UNCLEAVED 167 | DVDAR_rand_des_26_3_relaxed_0.pdb UNCLEAVED 168 | DVDAR_rand_des_27_0_relaxed_1.pdb UNCLEAVED 169 | DVDAR_rand_des_28_1_relaxed_1.pdb UNCLEAVED 170 | DVDAR_rand_des_29_4_relaxed_1.pdb UNCLEAVED 171 | DVDAR_rand_des_31_1_relaxed_0.pdb UNCLEAVED 172 | DVDAR_rand_des_32_2_relaxed_1.pdb UNCLEAVED 173 | DVDAR_rand_des_33_2_relaxed_0.pdb UNCLEAVED 174 | DVDAR_rand_des_34_0_relaxed_1.pdb UNCLEAVED 175 | DVDAR_rand_des_35_1_relaxed_0.pdb UNCLEAVED 176 | DVDAR_rand_des_36_1_relaxed_1.pdb UNCLEAVED 177 | DVDAR_rand_des_37_0_relaxed_1.pdb UNCLEAVED 178 | DVDAR_rand_des_38_0_relaxed_0.pdb UNCLEAVED 179 | DVDAR_rand_des_39_1_relaxed_0.pdb UNCLEAVED 180 | DVDAR_rand_des_40_2_relaxed_0.pdb UNCLEAVED 181 | DVDAR_rand_des_41_4_relaxed_1.pdb UNCLEAVED 182 | DVDAR_rand_des_42_1_relaxed_0.pdb UNCLEAVED 183 | DVDAR_rand_des_43_2_relaxed_0.pdb UNCLEAVED 184 | DVDAR_rand_des_44_0_relaxed_0.pdb UNCLEAVED 185 | DVDAR_rand_des_45_4_relaxed_1.pdb UNCLEAVED 186 | DVDAR_rand_des_46_2_relaxed_1.pdb UNCLEAVED 187 | DVDAR_rand_des_47_1_relaxed_0.pdb UNCLEAVED 188 | DVDAR_rand_des_48_1_relaxed_1.pdb UNCLEAVED 189 | DVDAR_rand_des_49_2_relaxed_0.pdb UNCLEAVED 190 | DVDAR_rand_des_50_3_relaxed_1.pdb UNCLEAVED 191 | DVDAR_rand_des_51_0_relaxed_1.pdb UNCLEAVED 192 | DVDAR_rand_des_52_0_relaxed_0.pdb UNCLEAVED 193 | DVDAR_rand_des_53_4_relaxed_1.pdb UNCLEAVED 194 | DVDAR_rand_des_54_0_relaxed_0.pdb UNCLEAVED 195 | DVDAR_rand_des_55_4_relaxed_0.pdb UNCLEAVED 196 | DVDAR_rand_des_56_1_relaxed_0.pdb UNCLEAVED 197 | DVDAR_rand_des_57_2_relaxed_0.pdb UNCLEAVED 198 | DVDAR_rand_des_58_3_relaxed_0.pdb UNCLEAVED 199 | DVDAR_rand_des_59_2_relaxed_1.pdb UNCLEAVED 200 | DVDAR_rand_des_60_4_relaxed_1.pdb UNCLEAVED 201 | DVDAR_rand_des_61_0_relaxed_1.pdb UNCLEAVED 202 | DVDAR_rand_des_62_4_relaxed_1.pdb UNCLEAVED 203 | DVDAR_rand_des_63_0_relaxed_0.pdb UNCLEAVED 204 | DVDAR_rand_des_64_2_relaxed_1.pdb UNCLEAVED 205 | DVDAR_rand_des_65_2_relaxed_1.pdb UNCLEAVED 206 | DVDAR_rand_des_66_3_relaxed_0.pdb UNCLEAVED 207 | DVDAR_rand_des_67_1_relaxed_0.pdb UNCLEAVED 208 | DVDAR_rand_des_68_0_relaxed_1.pdb UNCLEAVED 209 | DVDAR_rand_des_69_0_relaxed_1.pdb UNCLEAVED 210 | DVDAR_rand_des_70_1_relaxed_0.pdb UNCLEAVED 211 | DVDAR_rand_des_71_0_relaxed_0.pdb UNCLEAVED 212 | DVDAR_rand_des_72_4_relaxed_0.pdb UNCLEAVED 213 | DVDAR_rand_des_73_1_relaxed_1.pdb UNCLEAVED 214 | DVDAR_rand_des_74_2_relaxed_0.pdb UNCLEAVED 215 | DVDAR_rand_des_75_1_relaxed_0.pdb UNCLEAVED 216 | DVDAR_rand_des_76_1_relaxed_0.pdb UNCLEAVED 217 | DVDAR_rand_des_77_4_relaxed_1.pdb UNCLEAVED 218 | DVDAR_rand_des_78_4_relaxed_0.pdb UNCLEAVED 219 | DVDAR_rand_des_79_0_relaxed_0.pdb UNCLEAVED 220 | DVDAR_rand_des_80_4_relaxed_0.pdb UNCLEAVED 221 | DVDAR_rand_des_81_1_relaxed_1.pdb UNCLEAVED 222 | DVDAR_rand_des_82_1_relaxed_0.pdb UNCLEAVED 223 | DVDAR_rand_des_83_0_relaxed_1.pdb UNCLEAVED 224 | DVDAR_rand_des_84_3_relaxed_1.pdb UNCLEAVED 225 | DVDAR_rand_des_85_2_relaxed_0.pdb UNCLEAVED 226 | DVDAR_rand_des_86_4_relaxed_0.pdb UNCLEAVED 227 | DVDAR_rand_des_87_3_relaxed_1.pdb UNCLEAVED 228 | DVDAR_rand_des_88_1_relaxed_1.pdb UNCLEAVED 229 | DVDAR_rand_des_89_0_relaxed_0.pdb UNCLEAVED 230 | DVDAR_rand_des_90_2_relaxed_1.pdb UNCLEAVED 231 | DVDAR_rand_des_91_0_relaxed_0.pdb UNCLEAVED 232 | DVDAR_rand_des_92_2_relaxed_0.pdb UNCLEAVED 233 | DVDAR_rand_des_93_1_relaxed_0.pdb UNCLEAVED 234 | DVDAR_rand_des_94_1_relaxed_1.pdb UNCLEAVED 235 | DVDAR_rand_des_95_0_relaxed_0.pdb UNCLEAVED 236 | DVDAR_rand_des_96_1_relaxed_0.pdb UNCLEAVED 237 | DVDAR_rand_des_97_3_relaxed_0.pdb UNCLEAVED 238 | DVDAR_rand_des_98_4_relaxed_1.pdb UNCLEAVED 239 | DVDAR_rand_des_99_2_relaxed_0.pdb UNCLEAVED 240 | -------------------------------------------------------------------------------- /graph/classifications/protease_3c_designs.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | 2bof_KENVVQS_4.pdb UNCLEAVED 3 | 2bof_KENVVQS_0.pdb UNCLEAVED 4 | 2bof_KENVVQS_1.pdb UNCLEAVED 5 | 2bof_KENVVQS_3.pdb UNCLEAVED 6 | 2bof_KENVVQS_2.pdb UNCLEAVED 7 | 5y4l_LPSAREG_4.pdb UNCLEAVED 8 | 5y4l_LPSAREG_1.pdb UNCLEAVED 9 | 5y4l_LPSAREG_2.pdb UNCLEAVED 10 | 5y4l_LPSAREG_0.pdb UNCLEAVED 11 | 5y4l_LPSAREG_3.pdb UNCLEAVED 12 | 5y4l_FSIAKEG_0.pdb UNCLEAVED 13 | 5y4l_FSIAKEG_3.pdb UNCLEAVED 14 | 5y4l_FSIAKEG_2.pdb UNCLEAVED 15 | 5y4l_FSIAKEG_1.pdb UNCLEAVED 16 | 5y4l_FSIAKEG_4.pdb UNCLEAVED 17 | 5y4l_LNIREEG_4.pdb CLEAVED 18 | 5y4l_LNIREEG_0.pdb CLEAVED 19 | 5y4l_LNIREEG_3.pdb CLEAVED 20 | 5y4l_LNIREEG_1.pdb CLEAVED 21 | 5y4l_LNIREEG_2.pdb CLEAVED 22 | 5y4l_LSMAKEG_3.pdb UNCLEAVED 23 | 5y4l_LSMAKEG_1.pdb UNCLEAVED 24 | 5y4l_LSMAKEG_0.pdb UNCLEAVED 25 | 5y4l_LSMAKEG_4.pdb UNCLEAVED 26 | 5y4l_LSMAKEG_2.pdb UNCLEAVED 27 | 5y4l_AGKTKEG_2.pdb UNCLEAVED 28 | 5y4l_AGKTKEG_4.pdb UNCLEAVED 29 | 5y4l_AGKTKEG_0.pdb UNCLEAVED 30 | 5y4l_AGKTKEG_1.pdb UNCLEAVED 31 | 5y4l_AGKTKEG_3.pdb UNCLEAVED 32 | 5y4l_AEKTKEG_3.pdb UNCLEAVED 33 | 5y4l_AEKTKEG_0.pdb UNCLEAVED 34 | 5y4l_AEKTKEG_4.pdb UNCLEAVED 35 | 5y4l_AEKTKEG_2.pdb UNCLEAVED 36 | 5y4l_AEKTKEG_1.pdb UNCLEAVED 37 | 5y4l_VAPLKEG_1.pdb UNCLEAVED 38 | 5y4l_VAPLKEG_0.pdb UNCLEAVED 39 | 5y4l_VAPLKEG_3.pdb UNCLEAVED 40 | 5y4l_VAPLKEG_4.pdb UNCLEAVED 41 | 5y4l_VAPLKEG_2.pdb UNCLEAVED 42 | 2b0f_VAEEAQS_3.pdb UNCLEAVED 43 | 2b0f_VAEEAQS_0.pdb UNCLEAVED 44 | 2b0f_VAEEAQS_4.pdb UNCLEAVED 45 | 2b0f_VAEEAQS_1.pdb UNCLEAVED 46 | 2b0f_VAEEAQS_2.pdb UNCLEAVED 47 | 5y4l_RFFAREG_1.pdb CLEAVED 48 | 5y4l_RFFAREG_4.pdb CLEAVED 49 | 5y4l_RFFAREG_2.pdb CLEAVED 50 | 5y4l_RFFAREG_0.pdb CLEAVED 51 | 5y4l_RFFAREG_3.pdb CLEAVED 52 | 5y4l_LGKNEEG_2.pdb UNCLEAVED 53 | 5y4l_LGKNEEG_0.pdb UNCLEAVED 54 | 5y4l_LGKNEEG_1.pdb UNCLEAVED 55 | 5y4l_LGKNEEG_4.pdb UNCLEAVED 56 | 5y4l_LGKNEEG_3.pdb UNCLEAVED 57 | -------------------------------------------------------------------------------- /graph/classifications/protease_3c_designs_2bof.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | 2bof_KENVVQS_4.pdb UNCLEAVED 3 | 2bof_KENVVQS_0.pdb UNCLEAVED 4 | 2bof_KENVVQS_1.pdb UNCLEAVED 5 | 2bof_KENVVQS_3.pdb UNCLEAVED 6 | 2bof_KENVVQS_2.pdb UNCLEAVED 7 | 2b0f_VAEEAQS_3.pdb UNCLEAVED 8 | 2b0f_VAEEAQS_0.pdb UNCLEAVED 9 | 2b0f_VAEEAQS_4.pdb UNCLEAVED 10 | 2b0f_VAEEAQS_1.pdb UNCLEAVED 11 | 2b0f_VAEEAQS_2.pdb UNCLEAVED 12 | -------------------------------------------------------------------------------- /graph/classifications/protease_3c_designs_5y4l.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | 5y4l_LPSAREG_4.pdb UNCLEAVED 3 | 5y4l_LPSAREG_1.pdb UNCLEAVED 4 | 5y4l_LPSAREG_2.pdb UNCLEAVED 5 | 5y4l_LPSAREG_0.pdb UNCLEAVED 6 | 5y4l_LPSAREG_3.pdb UNCLEAVED 7 | 5y4l_FSIAKEG_0.pdb UNCLEAVED 8 | 5y4l_FSIAKEG_3.pdb UNCLEAVED 9 | 5y4l_FSIAKEG_2.pdb UNCLEAVED 10 | 5y4l_FSIAKEG_1.pdb UNCLEAVED 11 | 5y4l_FSIAKEG_4.pdb UNCLEAVED 12 | 5y4l_LNIREEG_4.pdb CLEAVED 13 | 5y4l_LNIREEG_0.pdb CLEAVED 14 | 5y4l_LNIREEG_3.pdb CLEAVED 15 | 5y4l_LNIREEG_1.pdb CLEAVED 16 | 5y4l_LNIREEG_2.pdb CLEAVED 17 | 5y4l_LSMAKEG_3.pdb UNCLEAVED 18 | 5y4l_LSMAKEG_1.pdb UNCLEAVED 19 | 5y4l_LSMAKEG_0.pdb UNCLEAVED 20 | 5y4l_LSMAKEG_4.pdb UNCLEAVED 21 | 5y4l_LSMAKEG_2.pdb UNCLEAVED 22 | 5y4l_AGKTKEG_2.pdb UNCLEAVED 23 | 5y4l_AGKTKEG_4.pdb UNCLEAVED 24 | 5y4l_AGKTKEG_0.pdb UNCLEAVED 25 | 5y4l_AGKTKEG_1.pdb UNCLEAVED 26 | 5y4l_AGKTKEG_3.pdb UNCLEAVED 27 | 5y4l_AEKTKEG_3.pdb UNCLEAVED 28 | 5y4l_AEKTKEG_0.pdb UNCLEAVED 29 | 5y4l_AEKTKEG_4.pdb UNCLEAVED 30 | 5y4l_AEKTKEG_2.pdb UNCLEAVED 31 | 5y4l_AEKTKEG_1.pdb UNCLEAVED 32 | 5y4l_VAPLKEG_1.pdb UNCLEAVED 33 | 5y4l_VAPLKEG_0.pdb UNCLEAVED 34 | 5y4l_VAPLKEG_3.pdb UNCLEAVED 35 | 5y4l_VAPLKEG_4.pdb UNCLEAVED 36 | 5y4l_VAPLKEG_2.pdb UNCLEAVED 37 | 5y4l_RFFAREG_1.pdb CLEAVED 38 | 5y4l_RFFAREG_4.pdb CLEAVED 39 | 5y4l_RFFAREG_2.pdb CLEAVED 40 | 5y4l_RFFAREG_0.pdb CLEAVED 41 | 5y4l_RFFAREG_3.pdb CLEAVED 42 | 5y4l_LGKNEEG_2.pdb UNCLEAVED 43 | 5y4l_LGKNEEG_0.pdb UNCLEAVED 44 | 5y4l_LGKNEEG_1.pdb UNCLEAVED 45 | 5y4l_LGKNEEG_4.pdb UNCLEAVED 46 | 5y4l_LGKNEEG_3.pdb UNCLEAVED 47 | -------------------------------------------------------------------------------- /graph/classifications/protease_3c_designs_oydv.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | oydv_EKTKEQA_2.pdb UNCLEAVED 3 | oydv_EKTKEQA_1.pdb UNCLEAVED 4 | oydv_EKTKEQA_4.pdb UNCLEAVED 5 | oydv_EKTKEQA_0.pdb UNCLEAVED 6 | oydv_EKTKEQA_3.pdb UNCLEAVED 7 | -------------------------------------------------------------------------------- /graph/classifications/tev-ER-summarized_label_singlePDB.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | LTQQCQA_0.pdb CLEAVED 3 | RFVVRQA_2.pdb CLEAVED 4 | LGLIYQA_3.pdb CLEAVED 5 | CVNPFQA_0.pdb CLEAVED 6 | CVVKRQA_2.pdb CLEAVED 7 | RVVKMQA_1.pdb CLEAVED 8 | INGCYQA_0.pdb CLEAVED 9 | HESVTQA_2.pdb CLEAVED 10 | LLALMQA_0.pdb CLEAVED 11 | WRISGQA_2.pdb CLEAVED 12 | GNQISQA_1.pdb CLEAVED 13 | DPRIIQA_3.pdb CLEAVED 14 | NHKSCQA_2.pdb CLEAVED 15 | QPFVKQA_0.pdb CLEAVED 16 | TCWGGQA_2.pdb CLEAVED 17 | KGQTRQA_2.pdb CLEAVED 18 | YMKNVQA_3.pdb CLEAVED 19 | GVMIGQA_2.pdb CLEAVED 20 | GKMYMQA_1.pdb CLEAVED 21 | WFAKVQA_1.pdb CLEAVED 22 | QLDVWQA_0.pdb CLEAVED 23 | RWKVGQA_1.pdb CLEAVED 24 | LILCLQA_1.pdb CLEAVED 25 | WTVNTQA_1.pdb CLEAVED 26 | AYGIEQA_1.pdb CLEAVED 27 | ELMYSQA_0.pdb CLEAVED 28 | IHYLLQA_2.pdb CLEAVED 29 | EDWYVQA_0.pdb CLEAVED 30 | DDEQSQA_1.pdb CLEAVED 31 | IRGVQQA_2.pdb CLEAVED 32 | TLLEGQA_2.pdb CLEAVED 33 | APCTGQA_2.pdb CLEAVED 34 | QSKHSQA_1.pdb CLEAVED 35 | FCFWMQA_2.pdb CLEAVED 36 | MYVDFQA_2.pdb CLEAVED 37 | KFSVAQA_2.pdb CLEAVED 38 | VNISHQA_4.pdb CLEAVED 39 | CYTGKQA_2.pdb CLEAVED 40 | GIGDVQA_3.pdb CLEAVED 41 | MTVIRQA_4.pdb CLEAVED 42 | GNLVMQA_4.pdb CLEAVED 43 | RFSTYQA_0.pdb CLEAVED 44 | LHILRQA_0.pdb CLEAVED 45 | MTWCTQA_2.pdb CLEAVED 46 | CLWCCQA_1.pdb CLEAVED 47 | MHTSSQA_3.pdb CLEAVED 48 | LQLRLQA_0.pdb CLEAVED 49 | LRIAVQA_4.pdb CLEAVED 50 | KRLTVQA_4.pdb CLEAVED 51 | IWVILQA_2.pdb CLEAVED 52 | MYVCGQA_4.pdb CLEAVED 53 | WSVVCQA_1.pdb CLEAVED 54 | MPMVLQA_4.pdb CLEAVED 55 | GPMARQA_0.pdb CLEAVED 56 | ESSRTQA_3.pdb CLEAVED 57 | WDRYTQA_2.pdb CLEAVED 58 | QGGSRQA_4.pdb CLEAVED 59 | LSTCLQA_4.pdb CLEAVED 60 | GDMVTQA_4.pdb CLEAVED 61 | MGCVSQA_0.pdb CLEAVED 62 | SVDTSQA_3.pdb CLEAVED 63 | RIRRRQA_0.pdb CLEAVED 64 | GERSMQA_4.pdb CLEAVED 65 | KIGTSQA_1.pdb CLEAVED 66 | TAMCAQA_3.pdb CLEAVED 67 | VDRWEQA_3.pdb CLEAVED 68 | KLRPRQA_2.pdb CLEAVED 69 | LNSVSQA_0.pdb CLEAVED 70 | HHFGRQA_2.pdb CLEAVED 71 | VRGSVQA_0.pdb CLEAVED 72 | PGGSRQA_1.pdb CLEAVED 73 | YTCLQQA_2.pdb CLEAVED 74 | LGDLPQA_1.pdb CLEAVED 75 | QCLGSQA_1.pdb CLEAVED 76 | NDGLNQA_0.pdb CLEAVED 77 | RDMSGQA_1.pdb CLEAVED 78 | WTSIVQA_3.pdb CLEAVED 79 | SRFLQQA_2.pdb CLEAVED 80 | ALWKSQA_4.pdb CLEAVED 81 | LSRRMQA_0.pdb CLEAVED 82 | LGGCSQA_2.pdb CLEAVED 83 | SVIPYQA_2.pdb CLEAVED 84 | NLVHCQA_4.pdb CLEAVED 85 | AERRQQA_1.pdb CLEAVED 86 | CAYVIQA_2.pdb CLEAVED 87 | KRAPLQA_0.pdb CLEAVED 88 | LSPVSQA_3.pdb CLEAVED 89 | TPRGTQA_2.pdb CLEAVED 90 | SENCTQA_0.pdb CLEAVED 91 | YLLIFQA_3.pdb CLEAVED 92 | QVGSMQA_3.pdb CLEAVED 93 | ISLDYQA_0.pdb CLEAVED 94 | RPSFGQA_4.pdb CLEAVED 95 | CRWEGQA_3.pdb CLEAVED 96 | CLCRYQA_1.pdb CLEAVED 97 | RRWRRQA_1.pdb CLEAVED 98 | PRAVKQA_0.pdb CLEAVED 99 | RKLWTQA_0.pdb CLEAVED 100 | RVSRRQA_4.pdb CLEAVED 101 | CTNHVQA_1.pdb CLEAVED 102 | LWQFNQA_4.pdb CLEAVED 103 | SVNSWQA_0.pdb CLEAVED 104 | IFSFMQA_0.pdb CLEAVED 105 | HMRCLQA_1.pdb CLEAVED 106 | LSTKYQA_1.pdb CLEAVED 107 | LPDLIQA_0.pdb CLEAVED 108 | KLGPSQA_4.pdb UNCLEAVED 109 | SVMACQA_0.pdb UNCLEAVED 110 | TCTPKQA_4.pdb UNCLEAVED 111 | FCALTQA_0.pdb UNCLEAVED 112 | CHLRYQA_2.pdb UNCLEAVED 113 | SLLRGQA_4.pdb UNCLEAVED 114 | YMFMIQA_4.pdb UNCLEAVED 115 | IRTRVQA_2.pdb UNCLEAVED 116 | SHGQAQA_3.pdb UNCLEAVED 117 | TGILSQA_0.pdb UNCLEAVED 118 | SCNGRQA_4.pdb UNCLEAVED 119 | IVLLIQA_3.pdb UNCLEAVED 120 | EYCACQA_0.pdb UNCLEAVED 121 | WIEWCQA_2.pdb UNCLEAVED 122 | SSMFIQA_4.pdb UNCLEAVED 123 | ERLEWQA_2.pdb UNCLEAVED 124 | CERLCQA_1.pdb UNCLEAVED 125 | DLFSLQA_0.pdb UNCLEAVED 126 | LCCMLQA_0.pdb UNCLEAVED 127 | DRRQIQA_4.pdb UNCLEAVED 128 | SFISMQA_4.pdb UNCLEAVED 129 | GWGYHQA_0.pdb UNCLEAVED 130 | FLAVSQA_2.pdb UNCLEAVED 131 | PAWSFQA_0.pdb UNCLEAVED 132 | SMNFVQA_1.pdb UNCLEAVED 133 | KVWVFQA_3.pdb UNCLEAVED 134 | DLTIIQA_3.pdb UNCLEAVED 135 | ARHAVQA_4.pdb UNCLEAVED 136 | GVNSRQA_2.pdb UNCLEAVED 137 | QLPGKQA_1.pdb UNCLEAVED 138 | PAGWEQA_4.pdb UNCLEAVED 139 | WAFPSQA_2.pdb UNCLEAVED 140 | NCTESQA_4.pdb UNCLEAVED 141 | DLALTQA_3.pdb UNCLEAVED 142 | CSLHCQA_2.pdb UNCLEAVED 143 | YGTIIQA_4.pdb UNCLEAVED 144 | CMYSAQA_3.pdb UNCLEAVED 145 | SNAQGQA_2.pdb UNCLEAVED 146 | RSEVGQA_3.pdb UNCLEAVED 147 | YDRHGQA_2.pdb UNCLEAVED 148 | WVNGLQA_2.pdb UNCLEAVED 149 | NHSLGQA_1.pdb UNCLEAVED 150 | PYLAYQA_3.pdb UNCLEAVED 151 | LTVASQA_0.pdb UNCLEAVED 152 | LTKLMQA_4.pdb UNCLEAVED 153 | TRASNQA_1.pdb UNCLEAVED 154 | FAPMHQA_4.pdb UNCLEAVED 155 | VCSGVQA_0.pdb UNCLEAVED 156 | RELYPQA_2.pdb UNCLEAVED 157 | QSFHSQA_4.pdb UNCLEAVED 158 | LLVGIQA_0.pdb UNCLEAVED 159 | PANIEQA_2.pdb UNCLEAVED 160 | MTRENQA_2.pdb UNCLEAVED 161 | WLGCMQA_2.pdb UNCLEAVED 162 | SGEAYQA_4.pdb UNCLEAVED 163 | RYGCSQA_1.pdb UNCLEAVED 164 | FAISAQA_1.pdb UNCLEAVED 165 | HLRSAQA_3.pdb UNCLEAVED 166 | ACGLDQA_4.pdb UNCLEAVED 167 | RCGPEQA_0.pdb UNCLEAVED 168 | PSDAPQA_0.pdb UNCLEAVED 169 | SNWMHQA_0.pdb UNCLEAVED 170 | CYVVSQA_1.pdb UNCLEAVED 171 | IPGNDQA_1.pdb UNCLEAVED 172 | YWGRFQA_3.pdb UNCLEAVED 173 | RKPGGQA_1.pdb UNCLEAVED 174 | PSHMFQA_2.pdb UNCLEAVED 175 | CPSNYQA_0.pdb UNCLEAVED 176 | IRFVGQA_1.pdb UNCLEAVED 177 | NRLYSQA_2.pdb UNCLEAVED 178 | VSDLLQA_1.pdb UNCLEAVED 179 | NWSFRQA_2.pdb UNCLEAVED 180 | AASGRQA_0.pdb UNCLEAVED 181 | YESRRQA_0.pdb UNCLEAVED 182 | TGPIGQA_1.pdb UNCLEAVED 183 | RIQSIQA_1.pdb UNCLEAVED 184 | CVEKDQA_0.pdb UNCLEAVED 185 | FPAAGQA_2.pdb UNCLEAVED 186 | WMLSPQA_2.pdb UNCLEAVED 187 | SSCVSQA_3.pdb UNCLEAVED 188 | GSRMYQA_2.pdb UNCLEAVED 189 | MEASCQA_3.pdb UNCLEAVED 190 | MWMCGQA_1.pdb UNCLEAVED 191 | YSNRMQA_0.pdb UNCLEAVED 192 | PGRTRQA_2.pdb UNCLEAVED 193 | LPRGEQA_1.pdb UNCLEAVED 194 | PQGAYQA_4.pdb UNCLEAVED 195 | SLMPDQA_4.pdb UNCLEAVED 196 | ITSRPQA_4.pdb UNCLEAVED 197 | LRNTMQA_1.pdb UNCLEAVED 198 | LNFSVQA_3.pdb UNCLEAVED 199 | INGCFQA_3.pdb UNCLEAVED 200 | SLDSHQA_4.pdb UNCLEAVED 201 | RSTLGQA_0.pdb UNCLEAVED 202 | LIIQGQA_4.pdb UNCLEAVED 203 | TLVAAQA_4.pdb UNCLEAVED 204 | YLRMGQA_4.pdb UNCLEAVED 205 | GCMIHQA_1.pdb UNCLEAVED 206 | CGALVQA_2.pdb UNCLEAVED 207 | FGKGNQA_0.pdb UNCLEAVED 208 | RRPVCQA_1.pdb UNCLEAVED 209 | SGVGSQA_3.pdb UNCLEAVED 210 | MMFKGQA_3.pdb UNCLEAVED 211 | LVYLGQA_0.pdb UNCLEAVED 212 | GSCCVQA_1.pdb UNCLEAVED 213 | MYWNGQA_2.pdb UNCLEAVED 214 | VGLPNQA_2.pdb UNCLEAVED 215 | THCPFQA_1.pdb UNCLEAVED 216 | PSYHQQA_3.pdb UNCLEAVED 217 | SSRPRQA_4.pdb UNCLEAVED 218 | DAISRQA_0.pdb UNCLEAVED 219 | WLFYIQA_3.pdb UNCLEAVED 220 | LFRAWQA_1.pdb UNCLEAVED 221 | CGTVVQA_3.pdb UNCLEAVED 222 | YQTTGQA_4.pdb UNCLEAVED 223 | LCSTNQA_3.pdb UNCLEAVED 224 | RPVASQA_1.pdb UNCLEAVED 225 | PYITYQA_2.pdb UNCLEAVED 226 | THGHSQA_2.pdb UNCLEAVED 227 | RRYHDQA_3.pdb UNCLEAVED 228 | HFSLFQA_0.pdb UNCLEAVED 229 | HAQAHQA_2.pdb UNCLEAVED 230 | ILRAHQA_4.pdb UNCLEAVED 231 | RLMVFQA_4.pdb UNCLEAVED 232 | ASWPPQA_0.pdb UNCLEAVED 233 | SSGRSQA_4.pdb UNCLEAVED 234 | WRGSEQA_4.pdb UNCLEAVED 235 | FCVLYQA_2.pdb UNCLEAVED 236 | STLSYQA_1.pdb UNCLEAVED 237 | PPGGIQA_0.pdb UNCLEAVED 238 | HYSESQA_4.pdb UNCLEAVED 239 | SEGANQA_3.pdb UNCLEAVED 240 | ERGFFQA_2.pdb UNCLEAVED 241 | TRTVAQA_4.pdb UNCLEAVED 242 | GGVRWQA_1.pdb UNCLEAVED 243 | SYPVRQA_4.pdb UNCLEAVED 244 | EFSDVQA_1.pdb UNCLEAVED 245 | FYHTGQA_0.pdb UNCLEAVED 246 | MSVKSQA_0.pdb UNCLEAVED 247 | GECVSQA_4.pdb UNCLEAVED 248 | RSHRGQA_2.pdb UNCLEAVED 249 | RHYRSQA_3.pdb UNCLEAVED 250 | RLMLGQA_2.pdb UNCLEAVED 251 | FEGTSQA_3.pdb UNCLEAVED 252 | VWMGFQA_1.pdb UNCLEAVED 253 | SRRVSQA_0.pdb UNCLEAVED 254 | NEVEVQA_4.pdb UNCLEAVED 255 | TVSTSQA_2.pdb UNCLEAVED 256 | CQSYDQA_4.pdb UNCLEAVED 257 | LRNRTQA_3.pdb UNCLEAVED 258 | YTSGSQA_2.pdb UNCLEAVED 259 | KCTWCQA_2.pdb UNCLEAVED 260 | YWDPSQA_0.pdb UNCLEAVED 261 | ACNHPQA_0.pdb UNCLEAVED 262 | DYYNRQA_0.pdb UNCLEAVED 263 | SRFCIQA_2.pdb UNCLEAVED 264 | MMSDSQA_2.pdb UNCLEAVED 265 | GQLKWQA_0.pdb UNCLEAVED 266 | CSRWVQA_1.pdb UNCLEAVED 267 | STLYSQA_2.pdb UNCLEAVED 268 | LVLSVQA_3.pdb UNCLEAVED 269 | RGWLGQA_1.pdb UNCLEAVED 270 | HAYVLQA_4.pdb UNCLEAVED 271 | RSISSQA_4.pdb UNCLEAVED 272 | PRWKAQA_0.pdb UNCLEAVED 273 | SWHMIQA_3.pdb UNCLEAVED 274 | DRRFTQA_0.pdb UNCLEAVED 275 | LNGRGQA_0.pdb UNCLEAVED 276 | LAKQQQA_0.pdb UNCLEAVED 277 | YWLCRQA_4.pdb UNCLEAVED 278 | TNRAYQA_4.pdb UNCLEAVED 279 | SLRMIQA_1.pdb UNCLEAVED 280 | YFVCSQA_0.pdb UNCLEAVED 281 | DFFQVQA_2.pdb UNCLEAVED 282 | RKIQNQA_0.pdb UNCLEAVED 283 | TLFPCQA_4.pdb UNCLEAVED 284 | SWKMGQA_0.pdb UNCLEAVED 285 | TCNLRQA_2.pdb UNCLEAVED 286 | GDMPSQA_1.pdb UNCLEAVED 287 | RCAGMQA_1.pdb UNCLEAVED 288 | PRYCDQA_2.pdb UNCLEAVED 289 | VWTVHQA_2.pdb UNCLEAVED 290 | RLWLYQA_0.pdb UNCLEAVED 291 | LGSLWQA_4.pdb UNCLEAVED 292 | NWRCVQA_1.pdb UNCLEAVED 293 | WLLRTQA_4.pdb UNCLEAVED 294 | WPGSFQA_3.pdb UNCLEAVED 295 | HDSETQA_3.pdb UNCLEAVED 296 | LLVSCQA_0.pdb UNCLEAVED 297 | GRLVGQA_3.pdb UNCLEAVED 298 | TAVYFQA_2.pdb UNCLEAVED 299 | SGSHDQA_4.pdb UNCLEAVED 300 | TGFADQA_4.pdb UNCLEAVED 301 | RQANSQA_0.pdb UNCLEAVED 302 | NQRSAQA_0.pdb UNCLEAVED 303 | -------------------------------------------------------------------------------- /graph/classifications/tev_design_20220912.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | tev_p2_c10_1_relaxed_2.pdb CLEAVED 3 | tev_p2_c10_1_relaxed_0.pdb CLEAVED 4 | tev_p2_c10_1_relaxed_5.pdb CLEAVED 5 | tev_p2_c10_1_relaxed_3.pdb CLEAVED 6 | tev_p2_c10_1_relaxed_6.pdb CLEAVED 7 | tev_p2_c10_1_relaxed_4.pdb CLEAVED 8 | tev_p2_c10_1_relaxed_1.pdb CLEAVED 9 | tev_p2_wt_relaxed_1.pdb UNCLEAVED 10 | tev_p2_wt_relaxed_0.pdb UNCLEAVED 11 | tev_p2_wt_relaxed_2.pdb UNCLEAVED 12 | tev_p2_c9_relaxed_0.pdb CLEAVED 13 | tev_p2_c2_relaxed_2.pdb CLEAVED 14 | tev_p2_c2_relaxed_1.pdb CLEAVED 15 | tev_p2_c2_relaxed_0.pdb CLEAVED 16 | tev_p2_c9_relaxed_2.pdb CLEAVED 17 | tev_p2_c10_relaxed_1.pdb CLEAVED 18 | tev_p2_c10_relaxed_2.pdb CLEAVED 19 | tev_p2_c10_relaxed_0.pdb CLEAVED 20 | tev_p2_c9_relaxed_1.pdb CLEAVED 21 | tev_p2_c2_1_relaxed_6.pdb CLEAVED 22 | tev_p2_c2_1_relaxed_3.pdb CLEAVED 23 | tev_p2_c2_1_relaxed_0.pdb CLEAVED 24 | tev_p2_c2_1_relaxed_5.pdb CLEAVED 25 | tev_p2_c2_1_relaxed_1.pdb CLEAVED 26 | tev_p2_c2_1_relaxed_4.pdb CLEAVED 27 | tev_p2_c2_1_relaxed_2.pdb CLEAVED 28 | tev_p2_c9_1_relaxed_4.pdb CLEAVED 29 | tev_p2_c9_1_relaxed_5.pdb CLEAVED 30 | tev_p2_c9_1_relaxed_6.pdb CLEAVED 31 | tev_p2_c9_1_relaxed_1.pdb CLEAVED 32 | tev_p2_c9_1_relaxed_2.pdb CLEAVED 33 | tev_p2_c9_1_relaxed_0.pdb CLEAVED 34 | tev_p2_c9_1_relaxed_3.pdb CLEAVED 35 | tev_p2_wt_1_relaxed_0.pdb UNCLEAVED 36 | tev_p2_wt_1_relaxed_1.pdb UNCLEAVED 37 | tev_p2_wt_1_relaxed_4.pdb UNCLEAVED 38 | tev_p2_wt_1_relaxed_5.pdb UNCLEAVED 39 | tev_p2_wt_1_relaxed_6.pdb UNCLEAVED 40 | tev_p2_wt_1_relaxed_2.pdb UNCLEAVED 41 | tev_p2_wt_1_relaxed_3.pdb UNCLEAVED 42 | tev_p6_c2_1_relaxed_1.pdb CLEAVED 43 | tev_p6_c2_1_relaxed_5.pdb CLEAVED 44 | tev_p6_c2_1_relaxed_6.pdb CLEAVED 45 | tev_p6_c2_1_relaxed_4.pdb CLEAVED 46 | tev_p6_c2_1_relaxed_0.pdb CLEAVED 47 | tev_p6_c2_1_relaxed_2.pdb CLEAVED 48 | tev_p6_c2_1_relaxed_3.pdb CLEAVED 49 | tev_p6_c6_relaxed_2.pdb CLEAVED 50 | tev_p6_c6_relaxed_1.pdb CLEAVED 51 | tev_p6_c6_relaxed_0.pdb CLEAVED 52 | tev_p6_c7_relaxed_2.pdb CLEAVED 53 | tev_p6_c7_relaxed_0.pdb CLEAVED 54 | tev_p6_c7_relaxed_1.pdb CLEAVED 55 | tev_p6_c2_relaxed_0.pdb CLEAVED 56 | tev_p6_c2_relaxed_1.pdb CLEAVED 57 | tev_p6_c2_relaxed_2.pdb CLEAVED 58 | tev_p6_c3_relaxed_0.pdb CLEAVED 59 | tev_p6_c3_relaxed_1.pdb CLEAVED 60 | tev_p6_c5_relaxed_0.pdb CLEAVED 61 | tev_p6_c5_relaxed_2.pdb CLEAVED 62 | tev_p6_c5_relaxed_1.pdb CLEAVED 63 | tev_p6_wt_relaxed_1.pdb UNCLEAVED 64 | tev_p6_wt_relaxed_0.pdb UNCLEAVED 65 | tev_p6_wt_relaxed_2.pdb UNCLEAVED 66 | tev_p6_c3_relaxed_2.pdb CLEAVED 67 | tev_p6_c8_relaxed_2.pdb CLEAVED 68 | tev_p6_c8_relaxed_1.pdb CLEAVED 69 | tev_p6_c8_relaxed_0.pdb CLEAVED 70 | tev_p6_c3_1_relaxed_2.pdb CLEAVED 71 | tev_p6_c3_1_relaxed_5.pdb CLEAVED 72 | tev_p6_c3_1_relaxed_6.pdb CLEAVED 73 | tev_p6_c3_1_relaxed_3.pdb CLEAVED 74 | tev_p6_c3_1_relaxed_1.pdb CLEAVED 75 | tev_p6_c3_1_relaxed_0.pdb CLEAVED 76 | tev_p6_c3_1_relaxed_4.pdb CLEAVED 77 | tev_p6_c5_1_relaxed_4.pdb CLEAVED 78 | tev_p6_c5_1_relaxed_2.pdb CLEAVED 79 | tev_p6_c5_1_relaxed_0.pdb CLEAVED 80 | tev_p6_c5_1_relaxed_1.pdb CLEAVED 81 | tev_p6_c5_1_relaxed_6.pdb CLEAVED 82 | tev_p6_c5_1_relaxed_5.pdb CLEAVED 83 | tev_p6_c5_1_relaxed_3.pdb CLEAVED 84 | tev_p6_c6_1_relaxed_5.pdb CLEAVED 85 | tev_p6_c6_1_relaxed_4.pdb CLEAVED 86 | tev_p6_c6_1_relaxed_0.pdb CLEAVED 87 | tev_p6_c6_1_relaxed_6.pdb CLEAVED 88 | tev_p6_c6_1_relaxed_2.pdb CLEAVED 89 | tev_p6_c6_1_relaxed_1.pdb CLEAVED 90 | tev_p6_c6_1_relaxed_3.pdb CLEAVED 91 | tev_p6_c7_1_relaxed_1.pdb CLEAVED 92 | tev_p6_c7_1_relaxed_6.pdb CLEAVED 93 | tev_p6_c7_1_relaxed_4.pdb CLEAVED 94 | tev_p6_c7_1_relaxed_0.pdb CLEAVED 95 | tev_p6_c7_1_relaxed_2.pdb CLEAVED 96 | tev_p6_c7_1_relaxed_5.pdb CLEAVED 97 | tev_p6_c7_1_relaxed_3.pdb CLEAVED 98 | tev_p6_c8_1_relaxed_5.pdb CLEAVED 99 | tev_p6_c8_1_relaxed_6.pdb CLEAVED 100 | tev_p6_c8_1_relaxed_2.pdb CLEAVED 101 | tev_p6_c8_1_relaxed_1.pdb CLEAVED 102 | tev_p6_c8_1_relaxed_0.pdb CLEAVED 103 | tev_p6_c8_1_relaxed_4.pdb CLEAVED 104 | tev_p6_c8_1_relaxed_3.pdb CLEAVED 105 | tev_p6_wt_1_relaxed_1.pdb UNCLEAVED 106 | tev_p6_wt_1_relaxed_0.pdb UNCLEAVED 107 | tev_p6_wt_1_relaxed_5.pdb UNCLEAVED 108 | tev_p6_wt_1_relaxed_6.pdb UNCLEAVED 109 | tev_p6_wt_1_relaxed_2.pdb UNCLEAVED 110 | tev_p6_wt_1_relaxed_4.pdb UNCLEAVED 111 | tev_p6_wt_1_relaxed_3.pdb UNCLEAVED 112 | tev_wt_wt_relaxed_7.pdb CLEAVED 113 | tev_wt_wt_relaxed_5.pdb CLEAVED 114 | tev_wt_wt_relaxed_0.pdb CLEAVED 115 | tev_wt_wt_relaxed_4.pdb CLEAVED 116 | tev_wt_wt_relaxed_6.pdb CLEAVED 117 | tev_wt_wt_relaxed_9.pdb CLEAVED 118 | tev_wt_wt_relaxed_1.pdb CLEAVED 119 | tev_wt_wt_relaxed_2.pdb CLEAVED 120 | tev_wt_wt_relaxed_3.pdb CLEAVED 121 | tev_wt_wt_relaxed_8.pdb CLEAVED 122 | -------------------------------------------------------------------------------- /graph/classifications/tev_design_for_validation_dual_directions_cleavage.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | p2_c2_5.pdb CLEAVED 3 | p2_c2_2.pdb CLEAVED 4 | p2_c2_4.pdb CLEAVED 5 | p2_c2_7.pdb CLEAVED 6 | p2_c2_8.pdb CLEAVED 7 | p2_c2_1.pdb CLEAVED 8 | p2_c2_0.pdb CLEAVED 9 | p2_c2_3.pdb CLEAVED 10 | p2_c2_6.pdb CLEAVED 11 | p2_c2_9.pdb CLEAVED 12 | p6_c5_6.pdb CLEAVED 13 | p6_c5_9.pdb CLEAVED 14 | p6_c5_7.pdb CLEAVED 15 | p6_c5_8.pdb CLEAVED 16 | p6_c5_5.pdb CLEAVED 17 | p6_c5_1.pdb CLEAVED 18 | p6_c5_0.pdb CLEAVED 19 | p6_c5_2.pdb CLEAVED 20 | p6_c5_4.pdb CLEAVED 21 | p6_c5_3.pdb CLEAVED 22 | p2_c9_4.pdb CLEAVED 23 | p2_c9_3.pdb CLEAVED 24 | p2_c9_5.pdb CLEAVED 25 | p2_c9_9.pdb CLEAVED 26 | p2_c9_6.pdb CLEAVED 27 | p2_c9_8.pdb CLEAVED 28 | p2_c9_0.pdb CLEAVED 29 | p2_c9_2.pdb CLEAVED 30 | p2_c9_7.pdb CLEAVED 31 | p2_c9_1.pdb CLEAVED 32 | P6_c3_9.pdb CLEAVED 33 | P6_c3_3.pdb CLEAVED 34 | P6_c3_1.pdb CLEAVED 35 | P6_c3_0.pdb CLEAVED 36 | P6_c3_4.pdb CLEAVED 37 | P6_c3_5.pdb CLEAVED 38 | P6_c3_2.pdb CLEAVED 39 | P6_c3_7.pdb CLEAVED 40 | P6_c3_6.pdb CLEAVED 41 | P6_c3_8.pdb CLEAVED 42 | p6_c7_7.pdb CLEAVED 43 | p6_c7_3.pdb CLEAVED 44 | p6_c7_0.pdb CLEAVED 45 | p6_c7_5.pdb CLEAVED 46 | p6_c7_9.pdb CLEAVED 47 | p6_c7_1.pdb CLEAVED 48 | p6_c7_8.pdb CLEAVED 49 | p6_c7_2.pdb CLEAVED 50 | p6_c7_4.pdb CLEAVED 51 | p6_c7_6.pdb CLEAVED 52 | P6_c2_7.pdb CLEAVED 53 | P6_c2_0.pdb CLEAVED 54 | P6_c2_4.pdb CLEAVED 55 | P6_c2_9.pdb CLEAVED 56 | P6_c2_5.pdb CLEAVED 57 | P6_c2_1.pdb CLEAVED 58 | P6_c2_3.pdb CLEAVED 59 | P6_c2_8.pdb CLEAVED 60 | P6_c2_6.pdb CLEAVED 61 | P6_c2_2.pdb CLEAVED 62 | p2_c10_2.pdb CLEAVED 63 | p2_c10_8.pdb CLEAVED 64 | p2_c10_1.pdb CLEAVED 65 | p2_c10_0.pdb CLEAVED 66 | p2_c10_9.pdb CLEAVED 67 | p2_c10_5.pdb CLEAVED 68 | p2_c10_6.pdb CLEAVED 69 | p2_c10_4.pdb CLEAVED 70 | p2_c10_7.pdb CLEAVED 71 | p2_c10_3.pdb CLEAVED 72 | p6_c6_1.pdb CLEAVED 73 | p6_c6_6.pdb CLEAVED 74 | p6_c6_9.pdb CLEAVED 75 | p6_c6_4.pdb CLEAVED 76 | p6_c6_2.pdb CLEAVED 77 | p6_c6_5.pdb CLEAVED 78 | p6_c6_3.pdb CLEAVED 79 | p6_c6_7.pdb CLEAVED 80 | p6_c6_8.pdb CLEAVED 81 | p6_c6_0.pdb CLEAVED 82 | p6_c8_5.pdb CLEAVED 83 | p6_c8_6.pdb CLEAVED 84 | p6_c8_1.pdb CLEAVED 85 | p6_c8_8.pdb CLEAVED 86 | p6_c8_4.pdb CLEAVED 87 | p6_c8_9.pdb CLEAVED 88 | p6_c8_3.pdb CLEAVED 89 | p6_c8_0.pdb CLEAVED 90 | p6_c8_2.pdb CLEAVED 91 | p6_c8_7.pdb CLEAVED 92 | p3_c2_5.pdb UNCLEAVED 93 | p3_c2_2.pdb UNCLEAVED 94 | p3_c2_4.pdb UNCLEAVED 95 | p3_c2_7.pdb UNCLEAVED 96 | p3_c2_8.pdb UNCLEAVED 97 | p3_c2_1.pdb UNCLEAVED 98 | p3_c2_0.pdb UNCLEAVED 99 | p3_c2_3.pdb UNCLEAVED 100 | p3_c2_6.pdb UNCLEAVED 101 | p3_c2_9.pdb UNCLEAVED 102 | p3_c3_9.pdb UNCLEAVED 103 | p3_c3_3.pdb UNCLEAVED 104 | p3_c3_1.pdb UNCLEAVED 105 | p3_c3_0.pdb UNCLEAVED 106 | p3_c3_4.pdb UNCLEAVED 107 | p3_c3_5.pdb UNCLEAVED 108 | p3_c3_2.pdb UNCLEAVED 109 | p3_c3_7.pdb UNCLEAVED 110 | p3_c3_6.pdb UNCLEAVED 111 | p3_c3_8.pdb UNCLEAVED 112 | p3_c1_2.pdb UNCLEAVED 113 | p3_c1_8.pdb UNCLEAVED 114 | p3_c1_1.pdb UNCLEAVED 115 | p3_c1_0.pdb UNCLEAVED 116 | p3_c1_5.pdb UNCLEAVED 117 | p3_c1_7.pdb UNCLEAVED 118 | p3_c1_4.pdb UNCLEAVED 119 | p3_c1_3.pdb UNCLEAVED 120 | p3_c1_9.pdb UNCLEAVED 121 | p3_c1_6.pdb UNCLEAVED 122 | tev_p2_V209I_W211L_M218D_relaxed_0.pdb UNCLEAVED 123 | tev_p2_V209I_W211L_M218D_relaxed_1.pdb UNCLEAVED 124 | tev_p2_V209I_W211L_M218D_relaxed_2.pdb UNCLEAVED 125 | tev_p2_V209I_W211L_M218D_relaxed_3.pdb UNCLEAVED 126 | tev_p2_V209I_W211L_M218D_relaxed_4.pdb UNCLEAVED 127 | tev_p2_V209I_W211L_M218D_relaxed_5.pdb UNCLEAVED 128 | tev_p2_V209I_W211L_M218D_relaxed_6.pdb UNCLEAVED 129 | tev_p2_V209I_W211L_M218D_relaxed_7.pdb UNCLEAVED 130 | tev_p2_V209I_W211L_M218D_relaxed_8.pdb UNCLEAVED 131 | tev_p2_V209I_W211L_M218D_relaxed_9.pdb UNCLEAVED 132 | tev_p2_V209S_W211R_G213R_relaxed_0.pdb UNCLEAVED 133 | tev_p2_V209S_W211R_G213R_relaxed_1.pdb UNCLEAVED 134 | tev_p2_V209S_W211R_G213R_relaxed_2.pdb UNCLEAVED 135 | tev_p2_V209S_W211R_G213R_relaxed_3.pdb UNCLEAVED 136 | tev_p2_V209S_W211R_G213R_relaxed_4.pdb UNCLEAVED 137 | tev_p2_V209S_W211R_G213R_relaxed_5.pdb UNCLEAVED 138 | tev_p2_V209S_W211R_G213R_relaxed_6.pdb UNCLEAVED 139 | tev_p2_V209S_W211R_G213R_relaxed_7.pdb UNCLEAVED 140 | tev_p2_V209S_W211R_G213R_relaxed_8.pdb UNCLEAVED 141 | tev_p2_V209S_W211R_G213R_relaxed_9.pdb UNCLEAVED 142 | tev_p2_V209S_W211R_M218D_relaxed_0.pdb UNCLEAVED 143 | tev_p2_V209S_W211R_M218D_relaxed_1.pdb UNCLEAVED 144 | tev_p2_V209S_W211R_M218D_relaxed_2.pdb UNCLEAVED 145 | tev_p2_V209S_W211R_M218D_relaxed_3.pdb UNCLEAVED 146 | tev_p2_V209S_W211R_M218D_relaxed_4.pdb UNCLEAVED 147 | tev_p2_V209S_W211R_M218D_relaxed_5.pdb UNCLEAVED 148 | tev_p2_V209S_W211R_M218D_relaxed_6.pdb UNCLEAVED 149 | tev_p2_V209S_W211R_M218D_relaxed_7.pdb UNCLEAVED 150 | tev_p2_V209S_W211R_M218D_relaxed_8.pdb UNCLEAVED 151 | tev_p2_V209S_W211R_M218D_relaxed_9.pdb UNCLEAVED 152 | tev_p2_V209S_W211R_relaxed_0.pdb UNCLEAVED 153 | tev_p2_V209S_W211R_relaxed_1.pdb UNCLEAVED 154 | tev_p2_V209S_W211R_relaxed_2.pdb UNCLEAVED 155 | tev_p2_V209S_W211R_relaxed_3.pdb UNCLEAVED 156 | tev_p2_V209S_W211R_relaxed_4.pdb UNCLEAVED 157 | tev_p2_V209S_W211R_relaxed_5.pdb UNCLEAVED 158 | tev_p2_V209S_W211R_relaxed_6.pdb UNCLEAVED 159 | tev_p2_V209S_W211R_relaxed_7.pdb UNCLEAVED 160 | tev_p2_V209S_W211R_relaxed_8.pdb UNCLEAVED 161 | tev_p2_V209S_W211R_relaxed_9.pdb UNCLEAVED 162 | tev_p6_F172Y_N174H_relaxed_0.pdb UNCLEAVED 163 | tev_p6_F172Y_N174H_relaxed_1.pdb UNCLEAVED 164 | tev_p6_F172Y_N174H_relaxed_2.pdb UNCLEAVED 165 | tev_p6_F172Y_N174H_relaxed_3.pdb UNCLEAVED 166 | tev_p6_F172Y_N174H_relaxed_4.pdb UNCLEAVED 167 | tev_p6_F172Y_N174H_relaxed_5.pdb UNCLEAVED 168 | tev_p6_F172Y_N174H_relaxed_6.pdb UNCLEAVED 169 | tev_p6_F172Y_N174H_relaxed_7.pdb UNCLEAVED 170 | tev_p6_F172Y_N174H_relaxed_8.pdb UNCLEAVED 171 | tev_p6_F172Y_N174H_relaxed_9.pdb UNCLEAVED 172 | tev_p6_K141E_T175P_relaxed_0.pdb UNCLEAVED 173 | tev_p6_K141E_T175P_relaxed_1.pdb UNCLEAVED 174 | tev_p6_K141E_T175P_relaxed_2.pdb UNCLEAVED 175 | tev_p6_K141E_T175P_relaxed_3.pdb UNCLEAVED 176 | tev_p6_K141E_T175P_relaxed_4.pdb UNCLEAVED 177 | tev_p6_K141E_T175P_relaxed_5.pdb UNCLEAVED 178 | tev_p6_K141E_T175P_relaxed_6.pdb UNCLEAVED 179 | tev_p6_K141E_T175P_relaxed_7.pdb UNCLEAVED 180 | tev_p6_K141E_T175P_relaxed_8.pdb UNCLEAVED 181 | tev_p6_K141E_T175P_relaxed_9.pdb UNCLEAVED 182 | tev_p6_T173A_T175A_N176D_relaxed_0.pdb UNCLEAVED 183 | tev_p6_T173A_T175A_N176D_relaxed_1.pdb UNCLEAVED 184 | tev_p6_T173A_T175A_N176D_relaxed_2.pdb UNCLEAVED 185 | tev_p6_T173A_T175A_N176D_relaxed_3.pdb UNCLEAVED 186 | tev_p6_T173A_T175A_N176D_relaxed_4.pdb UNCLEAVED 187 | tev_p6_T173A_T175A_N176D_relaxed_5.pdb UNCLEAVED 188 | tev_p6_T173A_T175A_N176D_relaxed_6.pdb UNCLEAVED 189 | tev_p6_T173A_T175A_N176D_relaxed_7.pdb UNCLEAVED 190 | tev_p6_T173A_T175A_N176D_relaxed_8.pdb UNCLEAVED 191 | tev_p6_T173A_T175A_N176D_relaxed_9.pdb UNCLEAVED 192 | -------------------------------------------------------------------------------- /graph/classifications/tev_design_negpool.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | tev_p2_V209I_W211L_M218D_relaxed_0.pdb None 3 | tev_p2_V209I_W211L_M218D_relaxed_1.pdb None 4 | tev_p2_V209I_W211L_M218D_relaxed_2.pdb None 5 | tev_p2_V209I_W211L_M218D_relaxed_3.pdb None 6 | tev_p2_V209I_W211L_M218D_relaxed_4.pdb None 7 | tev_p2_V209I_W211L_M218D_relaxed_5.pdb None 8 | tev_p2_V209I_W211L_M218D_relaxed_6.pdb None 9 | tev_p2_V209I_W211L_M218D_relaxed_7.pdb None 10 | tev_p2_V209I_W211L_M218D_relaxed_8.pdb None 11 | tev_p2_V209I_W211L_M218D_relaxed_9.pdb None 12 | tev_p2_V209S_W211R_G213R_relaxed_0.pdb None 13 | tev_p2_V209S_W211R_G213R_relaxed_1.pdb None 14 | tev_p2_V209S_W211R_G213R_relaxed_2.pdb None 15 | tev_p2_V209S_W211R_G213R_relaxed_3.pdb None 16 | tev_p2_V209S_W211R_G213R_relaxed_4.pdb None 17 | tev_p2_V209S_W211R_G213R_relaxed_5.pdb None 18 | tev_p2_V209S_W211R_G213R_relaxed_6.pdb None 19 | tev_p2_V209S_W211R_G213R_relaxed_7.pdb None 20 | tev_p2_V209S_W211R_G213R_relaxed_8.pdb None 21 | tev_p2_V209S_W211R_G213R_relaxed_9.pdb None 22 | tev_p2_V209S_W211R_M218D_relaxed_0.pdb None 23 | tev_p2_V209S_W211R_M218D_relaxed_1.pdb None 24 | tev_p2_V209S_W211R_M218D_relaxed_2.pdb None 25 | tev_p2_V209S_W211R_M218D_relaxed_3.pdb None 26 | tev_p2_V209S_W211R_M218D_relaxed_4.pdb None 27 | tev_p2_V209S_W211R_M218D_relaxed_5.pdb None 28 | tev_p2_V209S_W211R_M218D_relaxed_6.pdb None 29 | tev_p2_V209S_W211R_M218D_relaxed_7.pdb None 30 | tev_p2_V209S_W211R_M218D_relaxed_8.pdb None 31 | tev_p2_V209S_W211R_M218D_relaxed_9.pdb None 32 | tev_p2_V209S_W211R_relaxed_0.pdb None 33 | tev_p2_V209S_W211R_relaxed_1.pdb None 34 | tev_p2_V209S_W211R_relaxed_2.pdb None 35 | tev_p2_V209S_W211R_relaxed_3.pdb None 36 | tev_p2_V209S_W211R_relaxed_4.pdb None 37 | tev_p2_V209S_W211R_relaxed_5.pdb None 38 | tev_p2_V209S_W211R_relaxed_6.pdb None 39 | tev_p2_V209S_W211R_relaxed_7.pdb None 40 | tev_p2_V209S_W211R_relaxed_8.pdb None 41 | tev_p2_V209S_W211R_relaxed_9.pdb None 42 | tev_p6_F172Y_N174H_relaxed_0.pdb None 43 | tev_p6_F172Y_N174H_relaxed_1.pdb None 44 | tev_p6_F172Y_N174H_relaxed_2.pdb None 45 | tev_p6_F172Y_N174H_relaxed_3.pdb None 46 | tev_p6_F172Y_N174H_relaxed_4.pdb None 47 | tev_p6_F172Y_N174H_relaxed_5.pdb None 48 | tev_p6_F172Y_N174H_relaxed_6.pdb None 49 | tev_p6_F172Y_N174H_relaxed_7.pdb None 50 | tev_p6_F172Y_N174H_relaxed_8.pdb None 51 | tev_p6_F172Y_N174H_relaxed_9.pdb None 52 | tev_p6_K141E_T175P_relaxed_0.pdb None 53 | tev_p6_K141E_T175P_relaxed_1.pdb None 54 | tev_p6_K141E_T175P_relaxed_2.pdb None 55 | tev_p6_K141E_T175P_relaxed_3.pdb None 56 | tev_p6_K141E_T175P_relaxed_4.pdb None 57 | tev_p6_K141E_T175P_relaxed_5.pdb None 58 | tev_p6_K141E_T175P_relaxed_6.pdb None 59 | tev_p6_K141E_T175P_relaxed_7.pdb None 60 | tev_p6_K141E_T175P_relaxed_8.pdb None 61 | tev_p6_K141E_T175P_relaxed_9.pdb None 62 | tev_p6_T173A_T175A_N176D_relaxed_0.pdb None 63 | tev_p6_T173A_T175A_N176D_relaxed_1.pdb None 64 | tev_p6_T173A_T175A_N176D_relaxed_2.pdb None 65 | tev_p6_T173A_T175A_N176D_relaxed_3.pdb None 66 | tev_p6_T173A_T175A_N176D_relaxed_4.pdb None 67 | tev_p6_T173A_T175A_N176D_relaxed_5.pdb None 68 | tev_p6_T173A_T175A_N176D_relaxed_6.pdb None 69 | tev_p6_T173A_T175A_N176D_relaxed_7.pdb None 70 | tev_p6_T173A_T175A_N176D_relaxed_8.pdb None 71 | tev_p6_T173A_T175A_N176D_relaxed_9.pdb None 72 | -------------------------------------------------------------------------------- /graph/classifications/tev_oydv_raw_designs.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | tev_o_s2_single_47_designed_6.pdb None 3 | tev_o_s2_single_59_designed_1.pdb None 4 | tev_o_s2_single_60_designed_5.pdb None 5 | tev_o_s2_single_64_designed_8.pdb None 6 | tev_o_s2_single_81_designed_0.pdb None 7 | tev_o_s2_single_81_designed_6.pdb None 8 | tev_o_s2_single_81_designed_7.pdb None 9 | tev_o_s2_single_86_designed_2.pdb None 10 | tev_o_s2_single_88_designed_2.pdb None 11 | tev_o_s2_single_91_designed_5.pdb None 12 | tev_o_s2_single_92_designed_8.pdb None 13 | tev_o_s2_single_96_designed_5.pdb None 14 | tev_o_s3_single_0_designed_7.pdb None 15 | tev_o_s3_single_1_designed_1.pdb None 16 | tev_o_s3_single_1_designed_6.pdb None 17 | tev_o_s3_single_1_designed_9.pdb None 18 | tev_o_s3_single_20_designed_0.pdb None 19 | tev_o_s3_single_20_designed_4.pdb None 20 | tev_o_s3_single_22_designed_3.pdb None 21 | tev_o_s3_single_22_designed_9.pdb None 22 | tev_o_s3_single_23_designed_7.pdb None 23 | tev_o_s3_single_25_designed_4.pdb None 24 | tev_o_s3_single_25_designed_9.pdb None 25 | tev_o_s3_single_29_designed_6.pdb None 26 | tev_o_s3_single_2_designed_7.pdb None 27 | tev_o_s3_single_34_designed_1.pdb None 28 | tev_o_s3_single_35_designed_8.pdb None 29 | tev_o_s3_single_36_designed_1.pdb None 30 | tev_o_s3_single_36_designed_3.pdb None 31 | tev_o_s3_single_41_designed_7.pdb None 32 | tev_o_s3_single_41_designed_8.pdb None 33 | tev_o_s3_single_42_designed_1.pdb None 34 | tev_o_s3_single_42_designed_2.pdb None 35 | tev_o_s3_single_42_designed_8.pdb None 36 | tev_o_s3_single_43_designed_3.pdb None 37 | tev_o_s3_single_45_designed_0.pdb None 38 | tev_o_s3_single_46_designed_6.pdb None 39 | tev_o_s3_single_4_designed_6.pdb None 40 | tev_o_s3_single_4_designed_9.pdb None 41 | tev_o_s3_single_50_designed_5.pdb None 42 | tev_o_s3_single_51_designed_9.pdb None 43 | tev_o_s3_single_55_designed_6.pdb None 44 | tev_o_s3_single_56_designed_3.pdb None 45 | tev_o_s3_single_56_designed_9.pdb None 46 | tev_o_s3_single_58_designed_5.pdb None 47 | tev_o_s3_single_59_designed_9.pdb None 48 | tev_o_s3_single_5_designed_0.pdb None 49 | tev_o_s3_single_60_designed_2.pdb None 50 | tev_o_s3_single_60_designed_9.pdb None 51 | tev_o_s3_single_61_designed_2.pdb None 52 | tev_o_s3_single_61_designed_4.pdb None 53 | tev_o_s3_single_62_designed_4.pdb None 54 | tev_o_s3_single_65_designed_9.pdb None 55 | tev_o_s3_single_66_designed_0.pdb None 56 | tev_o_s3_single_66_designed_4.pdb None 57 | tev_o_s3_single_66_designed_6.pdb None 58 | tev_o_s3_single_69_designed_4.pdb None 59 | tev_o_s3_single_69_designed_9.pdb None 60 | tev_o_s3_single_76_designed_3.pdb None 61 | tev_o_s3_single_77_designed_9.pdb None 62 | tev_o_s3_single_78_designed_0.pdb None 63 | tev_o_s3_single_78_designed_2.pdb None 64 | tev_o_s3_single_81_designed_7.pdb None 65 | tev_o_s3_single_81_designed_9.pdb None 66 | tev_o_s3_single_82_designed_7.pdb None 67 | tev_o_s3_single_83_designed_4.pdb None 68 | tev_o_s3_single_84_designed_6.pdb None 69 | tev_o_s3_single_86_designed_4.pdb None 70 | tev_o_s3_single_89_designed_4.pdb None 71 | tev_o_s3_single_8_designed_0.pdb None 72 | tev_o_s3_single_8_designed_7.pdb None 73 | tev_o_s3_single_91_designed_6.pdb None 74 | tev_o_s3_single_95_designed_5.pdb None 75 | tev_o_s3_single_96_designed_1.pdb None 76 | tev_o_s3_single_97_designed_8.pdb None 77 | tev_o_s3_single_9_designed_5.pdb None 78 | tev_o_s4_single_94_designed_0.pdb None 79 | tev_o_s5_single_50_designed_1.pdb None 80 | tev_o_s5_single_50_designed_9.pdb None 81 | tev_o_s5_single_90_designed_4.pdb None 82 | tev_o_s6_single_6_designed_0.pdb None 83 | tev_o_s6_single_74_designed_0.pdb None 84 | tev_o_s6_single_95_designed_3.pdb None 85 | tev_o_s6_single_96_designed_3.pdb None 86 | tev_w_full_0_designed_0.pdb None 87 | tev_w_full_102_designed_0.pdb None 88 | tev_w_full_102_designed_1.pdb None 89 | tev_w_full_102_designed_2.pdb None 90 | tev_w_full_103_designed_1.pdb None 91 | tev_w_full_106_designed_3.pdb None 92 | tev_w_full_107_designed_1.pdb None 93 | tev_w_full_107_designed_3.pdb None 94 | tev_w_full_109_designed_1.pdb None 95 | tev_w_full_10_designed_0.pdb None 96 | tev_w_full_114_designed_1.pdb None 97 | tev_w_full_114_designed_2.pdb None 98 | tev_w_full_114_designed_3.pdb None 99 | tev_w_full_116_designed_2.pdb None 100 | tev_w_full_116_designed_3.pdb None 101 | tev_w_full_121_designed_2.pdb None 102 | tev_w_full_122_designed_0.pdb None 103 | tev_w_full_125_designed_1.pdb None 104 | tev_w_full_126_designed_0.pdb None 105 | tev_w_full_126_designed_3.pdb None 106 | tev_w_full_129_designed_2.pdb None 107 | tev_w_full_130_designed_3.pdb None 108 | tev_w_full_131_designed_1.pdb None 109 | tev_w_full_132_designed_2.pdb None 110 | tev_w_full_132_designed_3.pdb None 111 | tev_w_full_134_designed_0.pdb None 112 | tev_w_full_136_designed_2.pdb None 113 | tev_w_full_137_designed_0.pdb None 114 | tev_w_full_138_designed_0.pdb None 115 | tev_w_full_138_designed_2.pdb None 116 | tev_w_full_139_designed_0.pdb None 117 | tev_w_full_139_designed_1.pdb None 118 | tev_w_full_13_designed_0.pdb None 119 | tev_w_full_13_designed_3.pdb None 120 | tev_w_full_141_designed_2.pdb None 121 | tev_w_full_143_designed_0.pdb None 122 | tev_w_full_144_designed_0.pdb None 123 | tev_w_full_146_designed_0.pdb None 124 | tev_w_full_153_designed_2.pdb None 125 | tev_w_full_155_designed_2.pdb None 126 | tev_w_full_158_designed_1.pdb None 127 | tev_w_full_161_designed_1.pdb None 128 | tev_w_full_170_designed_1.pdb None 129 | tev_w_full_173_designed_0.pdb None 130 | tev_w_full_173_designed_3.pdb None 131 | tev_w_full_178_designed_1.pdb None 132 | tev_w_full_182_designed_0.pdb None 133 | tev_w_full_187_designed_0.pdb None 134 | tev_w_full_188_designed_0.pdb None 135 | tev_w_full_188_designed_3.pdb None 136 | tev_w_full_190_designed_2.pdb None 137 | tev_w_full_200_designed_0.pdb None 138 | tev_w_full_205_designed_2.pdb None 139 | tev_w_full_206_designed_0.pdb None 140 | tev_w_full_206_designed_3.pdb None 141 | tev_w_full_20_designed_3.pdb None 142 | tev_w_full_210_designed_2.pdb None 143 | tev_w_full_211_designed_0.pdb None 144 | tev_w_full_211_designed_3.pdb None 145 | tev_w_full_212_designed_2.pdb None 146 | tev_w_full_213_designed_0.pdb None 147 | tev_w_full_214_designed_0.pdb None 148 | tev_w_full_220_designed_3.pdb None 149 | tev_w_full_221_designed_1.pdb None 150 | tev_w_full_222_designed_2.pdb None 151 | tev_w_full_225_designed_3.pdb None 152 | tev_w_full_226_designed_0.pdb None 153 | tev_w_full_226_designed_1.pdb None 154 | tev_w_full_229_designed_1.pdb None 155 | tev_w_full_230_designed_2.pdb None 156 | tev_w_full_232_designed_1.pdb None 157 | tev_w_full_232_designed_3.pdb None 158 | tev_w_full_233_designed_1.pdb None 159 | tev_w_full_234_designed_3.pdb None 160 | tev_w_full_236_designed_3.pdb None 161 | tev_w_full_238_designed_1.pdb None 162 | tev_w_full_23_designed_2.pdb None 163 | tev_w_full_23_designed_3.pdb None 164 | tev_w_full_241_designed_2.pdb None 165 | tev_w_full_242_designed_3.pdb None 166 | tev_w_full_244_designed_1.pdb None 167 | tev_w_full_244_designed_2.pdb None 168 | tev_w_full_249_designed_0.pdb None 169 | tev_w_full_24_designed_0.pdb None 170 | tev_w_full_253_designed_3.pdb None 171 | tev_w_full_254_designed_2.pdb None 172 | tev_w_full_256_designed_1.pdb None 173 | tev_w_full_261_designed_2.pdb None 174 | tev_w_full_265_designed_3.pdb None 175 | tev_w_full_267_designed_0.pdb None 176 | tev_w_full_267_designed_1.pdb None 177 | tev_w_full_269_designed_1.pdb None 178 | tev_w_full_270_designed_2.pdb None 179 | tev_w_full_271_designed_1.pdb None 180 | tev_w_full_273_designed_1.pdb None 181 | tev_w_full_274_designed_0.pdb None 182 | tev_w_full_276_designed_3.pdb None 183 | tev_w_full_277_designed_1.pdb None 184 | tev_w_full_278_designed_1.pdb None 185 | tev_w_full_279_designed_0.pdb None 186 | tev_w_full_279_designed_1.pdb None 187 | tev_w_full_279_designed_3.pdb None 188 | tev_w_full_289_designed_1.pdb None 189 | tev_w_full_295_designed_0.pdb None 190 | tev_w_full_296_designed_0.pdb None 191 | tev_w_full_296_designed_3.pdb None 192 | tev_w_full_299_designed_3.pdb None 193 | tev_w_full_303_designed_3.pdb None 194 | tev_w_full_304_designed_2.pdb None 195 | tev_w_full_306_designed_0.pdb None 196 | tev_w_full_309_designed_1.pdb None 197 | tev_w_full_30_designed_2.pdb None 198 | tev_w_full_310_designed_1.pdb None 199 | tev_w_full_311_designed_3.pdb None 200 | tev_w_full_313_designed_2.pdb None 201 | tev_w_full_322_designed_3.pdb None 202 | tev_w_full_323_designed_1.pdb None 203 | tev_w_full_326_designed_3.pdb None 204 | tev_w_full_328_designed_0.pdb None 205 | tev_w_full_331_designed_3.pdb None 206 | tev_w_full_333_designed_2.pdb None 207 | tev_w_full_348_designed_1.pdb None 208 | tev_w_full_34_designed_3.pdb None 209 | tev_w_full_351_designed_0.pdb None 210 | tev_w_full_355_designed_2.pdb None 211 | tev_w_full_356_designed_1.pdb None 212 | tev_w_full_362_designed_3.pdb None 213 | tev_w_full_366_designed_2.pdb None 214 | tev_w_full_375_designed_1.pdb None 215 | tev_w_full_380_designed_3.pdb None 216 | tev_w_full_383_designed_0.pdb None 217 | tev_w_full_389_designed_0.pdb None 218 | tev_w_full_38_designed_0.pdb None 219 | tev_w_full_38_designed_3.pdb None 220 | tev_w_full_392_designed_3.pdb None 221 | tev_w_full_394_designed_3.pdb None 222 | tev_w_full_396_designed_1.pdb None 223 | tev_w_full_397_designed_3.pdb None 224 | tev_w_full_398_designed_2.pdb None 225 | tev_w_full_39_designed_0.pdb None 226 | tev_w_full_3_designed_0.pdb None 227 | tev_w_full_400_designed_1.pdb None 228 | tev_w_full_401_designed_0.pdb None 229 | tev_w_full_401_designed_2.pdb None 230 | tev_w_full_409_designed_2.pdb None 231 | tev_w_full_420_designed_1.pdb None 232 | tev_w_full_421_designed_3.pdb None 233 | tev_w_full_423_designed_0.pdb None 234 | tev_w_full_427_designed_0.pdb None 235 | tev_w_full_428_designed_1.pdb None 236 | tev_w_full_428_designed_2.pdb None 237 | tev_w_full_429_designed_0.pdb None 238 | tev_w_full_433_designed_2.pdb None 239 | tev_w_full_434_designed_1.pdb None 240 | tev_w_full_438_designed_1.pdb None 241 | tev_w_full_445_designed_0.pdb None 242 | tev_w_full_446_designed_0.pdb None 243 | tev_w_full_449_designed_3.pdb None 244 | tev_w_full_44_designed_1.pdb None 245 | tev_w_full_453_designed_2.pdb None 246 | tev_w_full_455_designed_3.pdb None 247 | tev_w_full_457_designed_2.pdb None 248 | tev_w_full_458_designed_3.pdb None 249 | tev_w_full_459_designed_2.pdb None 250 | tev_w_full_460_designed_1.pdb None 251 | tev_w_full_464_designed_0.pdb None 252 | tev_w_full_464_designed_3.pdb None 253 | tev_w_full_465_designed_2.pdb None 254 | tev_w_full_466_designed_1.pdb None 255 | tev_w_full_467_designed_0.pdb None 256 | tev_w_full_468_designed_0.pdb None 257 | tev_w_full_474_designed_1.pdb None 258 | tev_w_full_476_designed_3.pdb None 259 | tev_w_full_478_designed_0.pdb None 260 | tev_w_full_47_designed_0.pdb None 261 | tev_w_full_480_designed_1.pdb None 262 | tev_w_full_485_designed_1.pdb None 263 | tev_w_full_48_designed_3.pdb None 264 | tev_w_full_491_designed_3.pdb None 265 | tev_w_full_492_designed_2.pdb None 266 | tev_w_full_494_designed_3.pdb None 267 | tev_w_full_499_designed_2.pdb None 268 | tev_w_full_49_designed_0.pdb None 269 | tev_w_full_4_designed_2.pdb None 270 | tev_w_full_59_designed_0.pdb None 271 | tev_w_full_5_designed_1.pdb None 272 | tev_w_full_60_designed_0.pdb None 273 | tev_w_full_68_designed_3.pdb None 274 | tev_w_full_71_designed_0.pdb None 275 | tev_w_full_73_designed_0.pdb None 276 | tev_w_full_75_designed_1.pdb None 277 | tev_w_full_77_designed_2.pdb None 278 | tev_w_full_78_designed_3.pdb None 279 | tev_w_full_80_designed_3.pdb None 280 | tev_w_full_83_designed_3.pdb None 281 | tev_w_full_85_designed_1.pdb None 282 | tev_w_full_86_designed_0.pdb None 283 | tev_w_full_86_designed_3.pdb None 284 | tev_w_full_89_designed_1.pdb None 285 | tev_w_full_8_designed_0.pdb None 286 | tev_w_full_93_designed_2.pdb None 287 | tev_w_full_94_designed_2.pdb None 288 | tev_w_full_96_designed_0.pdb None 289 | tev_w_full_96_designed_1.pdb None 290 | tev_w_full_97_designed_2.pdb None 291 | tev_w_full_97_designed_3.pdb None 292 | tev_w_full_98_designed_1.pdb None 293 | tev_w_s2_single_26_designed_0.pdb None 294 | tev_w_s2_single_31_designed_7.pdb None 295 | tev_w_s2_single_43_designed_2.pdb None 296 | tev_w_s2_single_51_designed_4.pdb None 297 | tev_w_s2_single_67_designed_6.pdb None 298 | tev_w_s2_single_96_designed_4.pdb None 299 | tev_w_s2_single_98_designed_8.pdb None 300 | tev_w_s3_single_0_designed_3.pdb None 301 | tev_w_s3_single_10_designed_4.pdb None 302 | tev_w_s3_single_10_designed_6.pdb None 303 | tev_w_s3_single_10_designed_9.pdb None 304 | tev_w_s3_single_11_designed_4.pdb None 305 | tev_w_s3_single_14_designed_6.pdb None 306 | tev_w_s3_single_14_designed_8.pdb None 307 | tev_w_s3_single_15_designed_3.pdb None 308 | tev_w_s3_single_16_designed_5.pdb None 309 | tev_w_s3_single_18_designed_8.pdb None 310 | tev_w_s3_single_19_designed_7.pdb None 311 | tev_w_s3_single_1_designed_6.pdb None 312 | tev_w_s3_single_22_designed_0.pdb None 313 | tev_w_s3_single_22_designed_3.pdb None 314 | tev_w_s3_single_23_designed_0.pdb None 315 | tev_w_s3_single_24_designed_7.pdb None 316 | tev_w_s3_single_25_designed_6.pdb None 317 | tev_w_s3_single_26_designed_9.pdb None 318 | tev_w_s3_single_28_designed_1.pdb None 319 | tev_w_s3_single_29_designed_1.pdb None 320 | tev_w_s3_single_2_designed_2.pdb None 321 | tev_w_s3_single_30_designed_0.pdb None 322 | tev_w_s3_single_30_designed_5.pdb None 323 | tev_w_s3_single_31_designed_1.pdb None 324 | tev_w_s3_single_31_designed_2.pdb None 325 | tev_w_s3_single_33_designed_8.pdb None 326 | tev_w_s3_single_33_designed_9.pdb None 327 | tev_w_s3_single_3_designed_2.pdb None 328 | tev_w_s3_single_3_designed_3.pdb None 329 | tev_w_s3_single_3_designed_7.pdb None 330 | tev_w_s3_single_45_designed_3.pdb None 331 | tev_w_s3_single_47_designed_1.pdb None 332 | tev_w_s3_single_48_designed_5.pdb None 333 | tev_w_s3_single_49_designed_6.pdb None 334 | tev_w_s3_single_49_designed_7.pdb None 335 | tev_w_s3_single_51_designed_2.pdb None 336 | tev_w_s3_single_52_designed_5.pdb None 337 | tev_w_s3_single_52_designed_6.pdb None 338 | tev_w_s3_single_55_designed_2.pdb None 339 | tev_w_s3_single_56_designed_6.pdb None 340 | tev_w_s3_single_56_designed_8.pdb None 341 | tev_w_s3_single_57_designed_1.pdb None 342 | tev_w_s3_single_57_designed_5.pdb None 343 | tev_w_s3_single_57_designed_7.pdb None 344 | tev_w_s3_single_58_designed_3.pdb None 345 | tev_w_s3_single_5_designed_6.pdb None 346 | tev_w_s3_single_60_designed_0.pdb None 347 | tev_w_s3_single_62_designed_2.pdb None 348 | tev_w_s3_single_62_designed_4.pdb None 349 | tev_w_s3_single_64_designed_2.pdb None 350 | tev_w_s3_single_64_designed_8.pdb None 351 | tev_w_s3_single_65_designed_8.pdb None 352 | tev_w_s3_single_66_designed_2.pdb None 353 | tev_w_s3_single_66_designed_3.pdb None 354 | tev_w_s3_single_68_designed_2.pdb None 355 | tev_w_s3_single_6_designed_1.pdb None 356 | tev_w_s3_single_6_designed_6.pdb None 357 | tev_w_s3_single_70_designed_0.pdb None 358 | tev_w_s3_single_72_designed_9.pdb None 359 | tev_w_s3_single_77_designed_8.pdb None 360 | tev_w_s3_single_78_designed_4.pdb None 361 | tev_w_s3_single_79_designed_8.pdb None 362 | tev_w_s3_single_82_designed_7.pdb None 363 | tev_w_s3_single_82_designed_9.pdb None 364 | tev_w_s3_single_83_designed_7.pdb None 365 | tev_w_s3_single_85_designed_5.pdb None 366 | tev_w_s3_single_87_designed_1.pdb None 367 | tev_w_s3_single_91_designed_7.pdb None 368 | tev_w_s3_single_92_designed_0.pdb None 369 | tev_w_s3_single_94_designed_5.pdb None 370 | tev_w_s3_single_95_designed_3.pdb None 371 | tev_w_s3_single_97_designed_7.pdb None 372 | tev_w_s3_single_98_designed_4.pdb None 373 | tev_w_s4_single_61_designed_3.pdb None 374 | tev_w_s5_single_15_designed_8.pdb None 375 | tev_w_s5_single_18_designed_9.pdb None 376 | tev_w_s5_single_80_designed_2.pdb None 377 | tev_w_s6_single_2_designed_2.pdb None 378 | tev_w_s6_single_34_designed_5.pdb None 379 | tev_w_s6_single_55_designed_9.pdb None 380 | tev_w_s6_single_98_designed_1.pdb None 381 | -------------------------------------------------------------------------------- /helper/.ipynb_checkpoints/2yol-ER-summarized_label_singlePDB-checkpoint.txt: -------------------------------------------------------------------------------- 1 | Sequence Result 2 | RAAVGRG CLEAVED 3 | -------------------------------------------------------------------------------- /helper/.ipynb_checkpoints/RAAVGRG-checkpoint.fasc: -------------------------------------------------------------------------------- 1 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_1.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_1.pdb", "nstruct": 5, "angle_constraint": 1.8514584177562103, "atom_pair_constraint": 25.208273437267593, "coordinate_constraint": 7.039481860880478, "dihedral_constraint": 44.36198268834136, "dslf_fa13": 0.0, "fa_atr": -1200.1931602312552, "fa_dun": 278.70603817440116, "fa_elec": -361.65087748482324, "fa_intra_rep": 2.2782667001091976, "fa_intra_sol_xover4": 38.17452307210722, "fa_rep": 189.1203012738916, "fa_sol": 678.8920947635634, "hbond_bb_sc": -57.45066322268294, "hbond_lr_bb": -78.10956195511554, "hbond_sc": -33.406312620142714, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.34982674161976, "omega": 33.779462545409125, "p_aa_pp": -59.62864800911472, "pro_close": 0.7566121679053337, "rama_prepro": 26.595517386945655, "ref": 78.28747000000007, "total_score": -427.29691210847625, "yhh_planarity": 0.2202034607410545} 2 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_3.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_3.pdb", "nstruct": 5, "angle_constraint": 1.72319730851421, "atom_pair_constraint": 26.10647597741368, "coordinate_constraint": 3.631492624038999, "dihedral_constraint": 43.80581752738587, "dslf_fa13": 0.0, "fa_atr": -1200.3024862464708, "fa_dun": 268.0296932876475, "fa_elec": -359.6974162960206, "fa_intra_rep": 2.2745507677680545, "fa_intra_sol_xover4": 38.176624450479586, "fa_rep": 180.17539585703207, "fa_sol": 679.556153222227, "hbond_bb_sc": -57.00730357738894, "hbond_lr_bb": -78.11313854792967, "hbond_sc": -33.46744638015357, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.228640183703895, "omega": 34.17595287994555, "p_aa_pp": -59.589653722017594, "pro_close": 0.7616550100113841, "rama_prepro": 26.748175556904066, "ref": 78.28747000000007, "total_score": -446.51831372803525, "yhh_planarity": 0.21466454932361487} 3 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_0.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_0.pdb", "nstruct": 5, "angle_constraint": 1.7188536887926098, "atom_pair_constraint": 26.11225011795318, "coordinate_constraint": 3.6498371696450906, "dihedral_constraint": 43.881292204641206, "dslf_fa13": 0.0, "fa_atr": -1200.2814323816085, "fa_dun": 267.96129620017757, "fa_elec": -359.70300877059645, "fa_intra_rep": 2.2751948755380518, "fa_intra_sol_xover4": 38.17580997214633, "fa_rep": 180.12315240624406, "fa_sol": 679.5198732337798, "hbond_bb_sc": -56.99580894636125, "hbond_lr_bb": -78.1356658702019, "hbond_sc": -33.453014190918154, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.241547482541108, "omega": 34.18772081734078, "p_aa_pp": -59.59111293933735, "pro_close": 0.7516880755150057, "rama_prepro": 26.771567833890604, "ref": 78.28747000000007, "total_score": -446.5404916720142, "yhh_planarity": 0.224640106927815} 4 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_2.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_2.pdb", "nstruct": 5, "angle_constraint": 1.7005644160223428, "atom_pair_constraint": 26.28851282762922, "coordinate_constraint": 3.7912000090448896, "dihedral_constraint": 43.83768130020485, "dslf_fa13": 0.0, "fa_atr": -1199.8164258318577, "fa_dun": 267.2926721654119, "fa_elec": -359.2641771887585, "fa_intra_rep": 2.2754266717328373, "fa_intra_sol_xover4": 38.16627324154184, "fa_rep": 179.78007657794114, "fa_sol": 678.9609377666117, "hbond_bb_sc": -57.04690699040846, "hbond_lr_bb": -78.14498841336332, "hbond_sc": -32.92357507398168, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.368511777597316, "omega": 34.310327060005726, "p_aa_pp": -59.58881596162057, "pro_close": 0.7553744995982864, "rama_prepro": 26.802237753557165, "ref": 78.28747000000007, "total_score": -446.4661692154433, "yhh_planarity": 0.21802552588414464} 5 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_4.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_4.pdb", "nstruct": 5, "angle_constraint": 1.7005644160223428, "atom_pair_constraint": 26.28851282762922, "coordinate_constraint": 3.7912000090448896, "dihedral_constraint": 43.83768130020485, "dslf_fa13": 0.0, "fa_atr": -1199.8164258318577, "fa_dun": 267.2926721654119, "fa_elec": -359.2641771887585, "fa_intra_rep": 2.2754266717328373, "fa_intra_sol_xover4": 38.16627324154184, "fa_rep": 179.78007657794114, "fa_sol": 678.9609377666117, "hbond_bb_sc": -57.04690699040846, "hbond_lr_bb": -78.14498841336332, "hbond_sc": -32.92357507398168, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.368511777597316, "omega": 34.310327060005726, "p_aa_pp": -59.58881596162057, "pro_close": 0.7553744995982864, "rama_prepro": 26.802237753557165, "ref": 78.28747000000007, "total_score": -446.4661692154433, "yhh_planarity": 0.21802552588414464} 6 | -------------------------------------------------------------------------------- /helper/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import argparse" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 3, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "def parse_args():\n", 19 | " parser = argparse.ArgumentParser()\n", 20 | " parser.add_argument('-s', '--score_folder_path', type=str,\n", 21 | " default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures',\n", 22 | " help='Directory of generated structures.')\n", 23 | " parser.add_argument('-class', '--classification_file', type=str,\n", 24 | " default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt',\n", 25 | " help='Directory of generated structures.')\n", 26 | " return parser.parse_args()" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "def main(args):\n", 36 | " score_path = Path(args.score_folder_path)\n", 37 | " class_file = Path(args.classification_file)\n", 38 | " " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 36, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "from pathlib import Path\n", 48 | "import pandas as pd\n", 49 | "import numpy as np\n", 50 | "import json\n", 51 | "from collections import defaultdict" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 5, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "score_path = Path('./')\n", 61 | "class_file = Path('2yol-ER-summarized_label.txt')" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 9, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "df_class = pd.read_csv(class_file, delimiter='\\t')" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 59, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "# edit based on Vidur's code\n", 80 | "new_sequences = []\n", 81 | "for seq in ['RAAVGRG']: #df_class['Sequence']\n", 82 | " fasc = score_path / (seq + '.fasc')\n", 83 | " with open(fasc, 'r') as fp:\n", 84 | " for i, line in enumerate(fp):\n", 85 | " js = json.loads(line)\n", 86 | " if i == 0:\n", 87 | " dic_scores = defaultdict(list, { k:[v] for k,v in js.items()})\n", 88 | " else:\n", 89 | " for k in js.keys():\n", 90 | " dic_scores[k].append(js[k])\n", 91 | " df = pd.DataFrame(dic_scores)\n", 92 | " pdb = df.loc[df['total_score'].idxmin(),['filename']].values[0].split('/')[-1]\n", 93 | " new_sequences.append(seq)\n", 94 | "df = pd.DataFrame({'Sequence': new_sequences, 'Result': ['CLEAVED']}) #df_class['Result']\n", 95 | "df.to_csv(class_file.parent / (class_file.stem.split('.')[0] + '_singlePDB.txt'), sep='\\t', index=None)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": {}, 102 | "outputs": [], 103 | "source": [] 104 | } 105 | ], 106 | "metadata": { 107 | "kernelspec": { 108 | "display_name": "Python 3", 109 | "language": "python", 110 | "name": "python3" 111 | }, 112 | "language_info": { 113 | "codemirror_mode": { 114 | "name": "ipython", 115 | "version": 3 116 | }, 117 | "file_extension": ".py", 118 | "mimetype": "text/x-python", 119 | "name": "python", 120 | "nbconvert_exporter": "python", 121 | "pygments_lexer": "ipython3", 122 | "version": "3.7.1" 123 | } 124 | }, 125 | "nbformat": 4, 126 | "nbformat_minor": 4 127 | } 128 | -------------------------------------------------------------------------------- /helper/.ipynb_checkpoints/generate_class_singlePDB-checkpoint.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This script is to generate classification file if generated structures are in single PDB format. 3 | python -s /projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures -class /projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt 4 | Changpeng Lu 2023-04-16 5 | Vidur Sarma 2023-04-15 6 | ''' 7 | import argparse 8 | from pathlib import Path 9 | import pandas as pd 10 | import numpy as np 11 | import json 12 | from collections import defaultdict 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('-s', '--score_folder_path', type=str, 17 | default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures', 18 | help='Directory of generated structures.') 19 | parser.add_argument('-class', '--classification_file', type=str, 20 | default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt', 21 | help='Directory of generated structures.') 22 | return parser.parse_args() 23 | 24 | def main(args): 25 | score_path = Path(args.score_folder_path) 26 | class_file = Path(args.classification_file) 27 | df_class = pd.read_csv(class_file, delimiter='\t') 28 | # edit based on Vidur's code 29 | new_sequences = [] 30 | for seq in df_class['Sequence']: #df_class['Sequence'] 31 | fasc = score_path / (seq + '.fasc') 32 | with open(fasc, 'r') as fp: 33 | for i, line in enumerate(fp): 34 | js = json.loads(line) 35 | if i == 0: 36 | dic_scores = defaultdict(list, { k:[v] for k,v in js.items()}) 37 | else: 38 | for k in js.keys(): 39 | dic_scores[k].append(js[k]) 40 | df = pd.DataFrame(dic_scores) 41 | pdb = df.loc[df['total_score'].idxmin(),['filename']].values[0].split('/')[-1] 42 | new_sequences.append(seq) 43 | df = pd.DataFrame({'Sequence': new_sequences, 'Result': df_class['Result']}) #df_class['Result'] 44 | df.to_csv(class_file.parent / (class_file.stem.split('.')[0] + '_singlePDB.txt'), sep='\t', index=None) 45 | 46 | if __name__ == '__main__': 47 | args = parse_args() 48 | main(args) 49 | -------------------------------------------------------------------------------- /helper/.ipynb_checkpoints/make_modeling_commands-checkpoint.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import os 4 | from pathlib import Path 5 | import argparse 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('-s', '--info_file', type=str, 10 | default='/projects/f_sdk94_1/protease_3C/data_ngs_enrichment/2bof-ER-summarized.csv', 11 | help='Directory of the information for all structures to be generated. \ 12 | It should consist of three columns, (currently, the program cannot support multiple proteases)\ 13 | protease_name or protease_mutations, substrate_sequence, and label.') 14 | parser.add_argument('-p1p11', '--p1p11_wt', type=str, 15 | default='QS', 16 | help='index of p1, can be either negative or positive indices. \ 17 | e.g., p1=0 means p1 is the first of the substrate; \ 18 | p1=-2 means p1 is the last second of the substrate sequence. \ 19 | If you use . as a delimiter between P1 and P1 prime position, ignore this flag.') 20 | parser.add_argument('-p1_ind', '--p1_index_substrate', type=int, 21 | default=888, 22 | help='index of p1, can be either negative or positive indices. \ 23 | e.g., p1=0 means p1 is the first of the substrate; \ 24 | p1=-2 means p1 is the last second of the substrate sequence. \ 25 | If you use . as a delimiter between P1 and P1 prime position, ignore this flag.') 26 | parser.add_argument('-p1_pdb', '--p1_index_pdb', type=int, 27 | default=7, 28 | help='pdb index of p1.') 29 | parser.add_argument('-struct', '--starting_structures', type=str, 30 | default='/projects/f_sdk94_1/protease_3C/final_3C_protease_peptide_structures/2b0f_wt_pep.pdb', 31 | help='Directory of starting structure(s). It currently cannot handle multiple starting structures. \ 32 | If multiple starting strctures, make sure names of starting structures match \ 33 | protease_name in the info_file.') 34 | parser.add_argument('-script_path', '--script_path', type=str, 35 | default = '/projects/f_sdk94_1/EnzymeModelling/Protease-Substrate-Design', 36 | help='Where to save output file for all commands') 37 | parser.add_argument('-o', '--output_name', type=str, 38 | default='new.command.txt', 39 | help='output command file name') 40 | parser.add_argument('-f', '--output_format', choices=['sequence','silent'], 41 | default='sequence', 42 | help='two options of output format, either sequence, or silent files. \ 43 | Silent file mode will concatenate sequences which have same patterns into one same file.') 44 | parser.add_argument('-os', '--output_structure_directory', type=str, 45 | default='/projects/f_sdk94_1/EnzymeModelling/Protease3C/2bof', 46 | help='where to put generated Rosetta structures') 47 | parser.add_argument('-constraint', '--constraint_suffix', type=str, 48 | default="-site 215 -cons tev.cst -cr 39 74 144 -dprot 0 -dpep 0", 49 | help='Specify all flags for design_protease.py, e.g., -site 215 -cons tev.cst -cr 39 74 144 -dprot 0 -dpep 0 \ 50 | -site specifies the starting pose index of threading, -cr specifies three catalytic residues.') 51 | parser.add_argument('-jn', '--job_name', type=str, 52 | default=None, 53 | help='job name for Rosetta modeling') 54 | parser.add_argument('-bs', '--batch_size', type=int, 55 | default=5, 56 | help='') 57 | parser.add_argument('-cd', '--command_directory', type=str, 58 | default='/projects/f_sdk94_1/EnzymeModelling/Commands_OYDV') 59 | parser.add_argument('-mem', '--memory', type=int, 60 | default='Memory assigned to the processor') 61 | return parser.parse_args() 62 | 63 | def createCrys(p_wt, p, ind, root): 64 | letter1 = 'ARNDBCEQZGHILKMFPSTWYV' 65 | letter1 = list(letter1) 66 | letter3 = ['ALA', 'ARG', 'ASN', 'ASP', 'ASX', 'CYS', 'GLU', 'GLN', 'GLX', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 67 | 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] 68 | letterMap = {letter1[i]: letter3[i] for i in range(len(letter1))} 69 | 70 | with open(root.parent / (root.stem + '_' + p + '.pdb'), 'w') as gp: 71 | fp = open(root, 'r') 72 | # p1Count = 0 73 | # p2Count = 0 74 | pp = list(p) #['Q','S'] 75 | p1_motif = letterMap[p_wt[0]] + ' ' + str(ind) 76 | p2_motif = letterMap[p_wt[1]] + ' ' + str(ind+1) 77 | for line in fp: 78 | if line.find('REMARK') != 1 and line.find(p1_motif) != -1: 79 | p1Ind = line.find(p1_motif) 80 | line = line[0:p1Ind] + letterMap[pp[0]] + line[p1Ind + 3:] 81 | if line.find('REMARK') !=1 and line.find(p2_motif) != -1: 82 | p2Ind = line.find(p2_motif) 83 | line = line[0:p2Ind] + letterMap[pp[1]] + line[p2Ind + 3:] 84 | gp.write(line) 85 | 86 | def toCommands(args, info_set, constraint, mode = 'silent'): 87 | output_name = args.output_name 88 | script_path = args.script_path 89 | p1_ind = args.p1_index_substrate 90 | root = Path(args.starting_structures) 91 | outStructFolder = args.output_structure_directory 92 | 93 | # if mode == 'silent': 94 | # with open(os.path.join(out_path, output_name), 'w') as fp: 95 | # for silent in tmpSilent: 96 | # tmp = list(silent) 97 | # dotInd = silent.find('.') 98 | # p1p11 = ''.join(silent[dotInd-1] + silent[dotInd+1]) 99 | # fp.write('python design_protease.py -s ' + os.path.join(crysPath, crysPath.split('/')[-1] + '_' + p1p11 + '.pdb') + 100 | # ' -od ' + silentPath + ' -st ' + os.path.join(out, 'new.sequence.txt') + 101 | # ' -sf ' + silent + " " + constraint + '\n') 102 | # elif mode == 'sequence': 103 | sequences = info_set[0] 104 | mutant_list = info_set[1] 105 | with open(os.path.join(script_path, output_name), 'w') as fp: 106 | for i in range(len(sequences)): 107 | mutant = mutant_list[i] 108 | seq = sequences[i] 109 | p1p11, newSeq = locate_p1p11(seq, p1_ind) 110 | newStructPath = root.parent / (root.stem + '_' + p1p11 + '.pdb') 111 | name = mutant + '_' + newSeq 112 | if mutant == '': 113 | name = newSeq 114 | fp.write('python design_protease.py -s ' + str(newStructPath) + 115 | ' -od ' + outStructFolder + ' -seq ' + newSeq + ' -name ' + name + 116 | " " + constraint + '\n') 117 | 118 | def locate_p1p11(seq, p1_ind=None): 119 | dotInd = seq.find('.') 120 | p1p11 = ''.join(seq[dotInd - 1] + seq[dotInd + 1]) 121 | oriSeq = ''.join(seq[0:dotInd] + seq[dotInd + 1:]) 122 | if dotInd == -1: 123 | dotInd = p1_ind 124 | assert p1_ind != -1 125 | p1p11 = seq[dotInd] + seq[dotInd+1] 126 | oriSeq = seq 127 | return p1p11, oriSeq 128 | 129 | def printToBatchCommand(args): 130 | jobName = Path(args.info_file).stem 131 | mem = args.memory 132 | if args.job_name != None: 133 | jobName = args.job_name 134 | commandPath = args.command_directory 135 | nBatch = args.batch_size 136 | scriptPath = args.script_path 137 | output_name = args.output_name 138 | 139 | splitCommand = "python " + scriptPath + "/text_to_slurm.py -txt " + os.path.join(scriptPath, output_name) + " -job_name " + \ 140 | jobName + " -mem 12000 -path_operation " + scriptPath + " -path_sh " + \ 141 | commandPath + " -batch " + str(nBatch) + " -time 3-00:00:00" 142 | os.system(splitCommand) 143 | # print("python text_to_slurm.py -txt " + os.path.join(scriptPath, 'new.command.txt') + " -job_name " + 144 | # jobName + " -mem " + str(mem) + " -path_operation " + scriptPath + " -path_sh " + 145 | # commandPath + " -batch " + str(nBatch) + " -time 3-00:00:00") 146 | 147 | def mkdir(path): 148 | if not path.exists(): 149 | path.mkdir(parents=True) 150 | 151 | def main(args): 152 | mutSeqLabel = Path(args.info_file) #info_files_path 153 | p1_ind = args.p1_index_substrate 154 | p1_ind_pdb = args.p1_index_pdb 155 | starting_structure_path = Path(args.starting_structures) 156 | structure_save_path = starting_structure_path.parent 157 | p1p11_wt = args.p1p11_wt 158 | format = args.output_format 159 | constraintSuffix = args.constraint_suffix 160 | commandPath = Path(args.command_directory) 161 | mkdir(commandPath) 162 | 163 | # Use intermediate output from CleavEX as the input. Need to update in the future 164 | df = pd.read_csv(mutSeqLabel, index_col=0) 165 | mutant_list = [''] * df.shape[0] 166 | for column_name in df.columns: 167 | if column_name.lower().find('mutant') != -1: 168 | mutant_list = df[column_name] 169 | sequences = df.index.values 170 | p1p11s = [] 171 | new_c = 0 172 | for seq in sequences: 173 | # protease = df.iloc[i, 0] 174 | p1p11,_ = locate_p1p11(seq, p1_ind) 175 | # check whether file exists or not 176 | if (structure_save_path / (starting_structure_path.stem + '_' + p1p11 + '.pdb')).is_file(): #, protease + '_' + p1p11 + '.pdb' 177 | # print('starting structure for {} exists! Skip it....'.format(p1p11)) 178 | continue 179 | else: 180 | createCrys(p1p11_wt, p1p11, p1_ind_pdb, starting_structure_path) 181 | new_c += 1 182 | print('Swapping {} number of P1P11 combinations'.format(new_c)) 183 | # if format == 'silent': 184 | toCommands(args, (sequences, mutant_list), constraintSuffix, mode=format) 185 | printToBatchCommand(args) 186 | 187 | if __name__ == '__main__': 188 | args = parse_args() 189 | main(args) 190 | -------------------------------------------------------------------------------- /helper/BenchmarkMLTrainAfterPGCN.py: -------------------------------------------------------------------------------- 1 | # This script is to train and test ml models 2 | # Author: Changpeng Lu 3 | 4 | from sklearn import linear_model 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.tree import DecisionTreeClassifier 7 | from sklearn import svm 8 | import tensorflow as tf 9 | from tensorflow import keras 10 | import warnings 11 | warnings.filterwarnings('ignore') 12 | import pandas as pd 13 | import numpy as np 14 | import os 15 | import pickle as pkl 16 | from sklearn import preprocessing 17 | import argparse 18 | 19 | from utils import * 20 | 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('-data', type=str, help='data name') 23 | parser.add_argument('-model', type=str, help='svm or ann or classic') 24 | parser.add_argument('-feature', type=str, choices=['complete','seq','energy']) 25 | parser.add_argument('-save', type=str, default='./experiment1') 26 | args = parser.parse_args() 27 | 28 | makedirs(args.save) 29 | 30 | def trainSeqOnly(dataset, save = '/scratch/cl1205/ml-cleavage/outputs/seqOnly_20220217', model = 'logistic_regression', 31 | encoding = 'energy', split=2): 32 | classes = 2 33 | if split == 2: 34 | X_train, y_train, X_test, y_test = load_data(dataset, classes) 35 | elif split == 3: 36 | X_train, y_train, X_val, y_val, X_test, y_test = load_data(dataset, classes, 3) 37 | 38 | energy_indices = [] 39 | seq_indices = [] 40 | if dataset.find('TEV_all') != -1: 41 | for i in range(X_train.shape[1]): 42 | if i >= 1316: #1326: 43 | energy_indices.append(i) 44 | else: 45 | if i % 28 >= 20: # if having identifier, need to minus 10 46 | energy_indices.append(i) 47 | else: 48 | seq_indices.append(i) 49 | if dataset.find('HCV') != -1: 50 | for i in range(X_train.shape[1]): 51 | if i >= 952: #1326: 52 | energy_indices.append(i) 53 | else: 54 | if i % 28 >= 20: # if having identifier, need to minus 10 55 | energy_indices.append(i) 56 | else: 57 | seq_indices.append(i) 58 | if encoding == 'energy': 59 | X_train = X_train.iloc[:, energy_indices].copy() 60 | X_test = X_test.iloc[:, energy_indices].copy() 61 | if split == 3: 62 | X_val = X_val.iloc[:, energy_indices].copy() 63 | elif encoding == 'seq': 64 | X_train = X_train.iloc[:, seq_indices].copy() 65 | X_test = X_test.iloc[:, seq_indices].copy() 66 | if split == 3: 67 | X_val = X_val.iloc[:, seq_indices].copy() 68 | 69 | X_train = scale(X_train) 70 | X_test = scale(X_test) 71 | if split == 3: 72 | X_val = scale(X_val) 73 | 74 | if model == 'logistic_regression': 75 | from sklearn import linear_model 76 | lg = linear_model.LogisticRegression(C = 1, max_iter = 500) 77 | prob, acc = train_test(lg, X_train, y_train, X_test, y_test) 78 | print('Test Accuracy:{:.4f}'.format(acc)) 79 | np.savetxt(os.path.join(save, 'logits_' + model + '_' + str(dataset) + '_' + encoding), prob) 80 | elif model == 'random_forest': 81 | av_acc = 0 82 | for i in range(20): 83 | from sklearn.ensemble import RandomForestClassifier 84 | rf = RandomForestClassifier() 85 | prob, acc = train_test(rf, X_train, y_train, X_test, y_test) 86 | av_acc += acc 87 | av_acc = av_acc / 20 88 | print('Test Accuracy:{:.4f}'.format(av_acc)) 89 | np.savetxt(os.path.join(save, 'logits_' + model + '_' + str(dataset) + '_' + encoding), prob) 90 | elif model == 'decision_tree': 91 | from sklearn.tree import DecisionTreeClassifier 92 | dt = DecisionTreeClassifier() 93 | prob, acc = train_test(dt, X_train, y_train, X_test, y_test) 94 | print('Test Accuracy:{:.4f}'.format(acc)) 95 | np.savetxt(os.path.join(save, 'logits_' + model + '_' + str(dataset) + '_' + encoding), prob) 96 | elif model == 'svm': 97 | from sklearn import svm 98 | svmsvc = svm.SVC(C = 1, probability=True) 99 | prob, acc = train_test(svmsvc, X_train, y_train, X_test, y_test) 100 | print('Test Accuracy:{:.4f}'.format(acc)) 101 | np.savetxt(os.path.join(save, 'logits_' + model + '_' + str(dataset) + '_' + encoding), prob) 102 | elif model == 'ann': 103 | import tensorflow as tf 104 | from tensorflow import keras 105 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' 106 | accs = [] 107 | dropout_list = [0.5,0.4,0.3,0.2,0.1,0.05,0.01] 108 | learning_rate = [0.01,0.05,1e-3,5e-3,1e-4,5e-4] 109 | combinations = [] 110 | test_accs = [] 111 | for dropout in dropout_list: 112 | for lr in learning_rate: 113 | print('dropout: {}; learning rate: {}'.format(dropout, lr)) 114 | combinations.append([dropout, lr]) 115 | n_class = 2 116 | ann = keras.Sequential([keras.layers.Dense(1024, activation=tf.nn.relu), 117 | keras.layers.Dropout(dropout, input_shape = (1024,)), 118 | keras.layers.Dense(n_class, activation=tf.nn.softmax)]) 119 | 120 | ann.compile(optimizer=tf.train.AdamOptimizer(learning_rate = lr),loss='sparse_categorical_crossentropy',metrics=['accuracy']) 121 | if split == 2: 122 | prob, acc = train_test_ann(ann, n_class, X_train, y_train, X_test, y_test) 123 | elif split == 3: 124 | prob, acc, test_prob, test_acc = train_test_ann_split(ann, n_class, X_train, y_train, 125 | X_test, y_test, 126 | X_val, y_val) 127 | np.savetxt(os.path.join(save, 'logits_val_' + model + '_' + str(dataset) + '_' + encoding + 128 | '_dropout_' + str(dropout) + '_lr_' + str(lr) + '_epoch_100'), prob) 129 | np.savetxt(os.path.join(save, 'logits_test_' + model + '_' + str(dataset) + '_' + encoding + 130 | '_dropout_' + str(dropout) + '_lr_' + str(lr) + '_epoch_100'), test_prob) 131 | accs.append(acc) 132 | test_accs.append(test_acc) 133 | print('Validation Accuracy:{:.4f}'.format(max(accs))) 134 | i=np.argmax(np.array(accs)) 135 | print('Test Accuracy:{:.4f}'.format(test_accs[i])) 136 | print('Dropout: {:f}; Learning Rate: {:f}'.format(combinations[i][0], combinations[i][1])) 137 | 138 | enco = args.feature 139 | data = args.data 140 | model = args.model 141 | 142 | print(enco) 143 | print(data) 144 | 145 | # trisplit 146 | if model == 'classic': 147 | for m in ['logistic_regression','random_forest','decision_tree']: 148 | print(model) 149 | trainSeqOnly(data, model = m, encoding = enco, split=3, 150 | save = args.save) 151 | else: 152 | print(model) 153 | trainSeqOnly(data, model = model, encoding = enco, split=3, 154 | save = args.save) 155 | 156 | -------------------------------------------------------------------------------- /helper/generate_class_singlePDB.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This script is to generate classification file if generated structures are in single PDB format. 3 | python generate_class_singlePDB.py -s /projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures -class /projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt 4 | Changpeng Lu 2023-04-16 5 | Vidur Sarma 2023-04-15 6 | ''' 7 | import argparse 8 | from pathlib import Path 9 | import pandas as pd 10 | import numpy as np 11 | import json 12 | from collections import defaultdict 13 | 14 | def parse_args(): 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('-s', '--score_folder_path', type=str, 17 | default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures', 18 | help='Directory of generated structures.') 19 | parser.add_argument('-class', '--classification_file', type=str, 20 | default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt', 21 | help='Directory of generated structures.') 22 | return parser.parse_args() 23 | 24 | def main(args): 25 | score_path = Path(args.score_folder_path) 26 | class_file = Path(args.classification_file) 27 | df_class = pd.read_csv(class_file, delimiter='\t') 28 | # edit based on Vidur's code 29 | new_sequences = [] 30 | for seq in df_class['Sequence']: #df_class['Sequence'] 31 | fasc = score_path / (seq + '.fasc') 32 | with open(fasc, 'r') as fp: 33 | for i, line in enumerate(fp): 34 | js = json.loads(line) 35 | if i == 0: 36 | dic_scores = defaultdict(list, { k:[v] for k,v in js.items()}) 37 | else: 38 | for k in js.keys(): 39 | dic_scores[k].append(js[k]) 40 | df = pd.DataFrame(dic_scores) 41 | pdb = df.loc[df['total_score'].idxmin(),['filename']].values[0].split('/')[-1] 42 | new_sequences.append(pdb) 43 | df = pd.DataFrame({'Sequence': new_sequences, 'Result': df_class['Result']}) #df_class['Result'] 44 | df.to_csv(class_file.parent / (class_file.stem.split('.')[0] + '_singlePDB.txt'), sep='\t', index=None) 45 | 46 | if __name__ == '__main__': 47 | args = parse_args() 48 | main(args) 49 | -------------------------------------------------------------------------------- /helper/make_modeling_commands.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import os 4 | from pathlib import Path 5 | import argparse 6 | 7 | def parse_args(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('-s', '--info_file', type=str, 10 | default='/projects/f_sdk94_1/protease_3C/data_ngs_enrichment/2bof-ER-summarized.csv', 11 | help='Directory of the information for all structures to be generated. \ 12 | It should consist of three columns, (currently, the program cannot support multiple proteases)\ 13 | protease_name or protease_mutations, substrate_sequence, and label.') 14 | parser.add_argument('-p1p11', '--p1p11_wt', type=str, 15 | default='QS', 16 | help='index of p1, can be either negative or positive indices. \ 17 | e.g., p1=0 means p1 is the first of the substrate; \ 18 | p1=-2 means p1 is the last second of the substrate sequence. \ 19 | If you use . as a delimiter between P1 and P1 prime position, ignore this flag.') 20 | parser.add_argument('-p1_ind', '--p1_index_substrate', type=int, 21 | default=888, 22 | help='index of p1, can be either negative or positive indices. \ 23 | e.g., p1=0 means p1 is the first of the substrate; \ 24 | p1=-2 means p1 is the last second of the substrate sequence. \ 25 | If you use . as a delimiter between P1 and P1 prime position, ignore this flag.') 26 | parser.add_argument('-p1_pdb', '--p1_index_pdb', type=int, 27 | default=7, 28 | help='pdb index of p1.') 29 | parser.add_argument('-struct', '--starting_structures', type=str, 30 | default='/projects/f_sdk94_1/protease_3C/final_3C_protease_peptide_structures/2b0f_wt_pep.pdb', 31 | help='Directory of starting structure(s). It currently cannot handle multiple starting structures. \ 32 | If multiple starting strctures, make sure names of starting structures match \ 33 | protease_name in the info_file.') 34 | parser.add_argument('-script_path', '--script_path', type=str, 35 | default = '/projects/f_sdk94_1/EnzymeModelling/Protease-Substrate-Design', 36 | help='Where to save output file for all commands') 37 | parser.add_argument('-o', '--output_name', type=str, 38 | default='new.command.txt', 39 | help='output command file name') 40 | parser.add_argument('-f', '--output_format', choices=['sequence','silent'], 41 | default='sequence', 42 | help='two options of output format, either sequence, or silent files. \ 43 | Silent file mode will concatenate sequences which have same patterns into one same file.') 44 | parser.add_argument('-os', '--output_structure_directory', type=str, 45 | default='/projects/f_sdk94_1/EnzymeModelling/Protease3C/2bof', 46 | help='where to put generated Rosetta structures') 47 | parser.add_argument('-constraint', '--constraint_suffix', type=str, 48 | default="-site 215 -cons tev.cst -cr 39 74 144 -dprot 0 -dpep 0", 49 | help='Specify all flags for design_protease.py, e.g., -site 215 -cons tev.cst -cr 39 74 144 -dprot 0 -dpep 0 \ 50 | -site specifies the starting pose index of threading, -cr specifies three catalytic residues.') 51 | parser.add_argument('-jn', '--job_name', type=str, 52 | default=None, 53 | help='job name for Rosetta modeling') 54 | parser.add_argument('-bs', '--batch_size', type=int, 55 | default=5, 56 | help='') 57 | parser.add_argument('-cd', '--command_directory', type=str, 58 | default='/projects/f_sdk94_1/EnzymeModelling/Commands_OYDV') 59 | parser.add_argument('-mem', '--memory', type=int, 60 | default='Memory assigned to the processor') 61 | return parser.parse_args() 62 | 63 | def createCrys(p_wt, p, ind, root): 64 | letter1 = 'ARNDBCEQZGHILKMFPSTWYV' 65 | letter1 = list(letter1) 66 | letter3 = ['ALA', 'ARG', 'ASN', 'ASP', 'ASX', 'CYS', 'GLU', 'GLN', 'GLX', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 67 | 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] 68 | letterMap = {letter1[i]: letter3[i] for i in range(len(letter1))} 69 | 70 | with open(root.parent / (root.stem + '_' + p + '.pdb'), 'w') as gp: 71 | fp = open(root, 'r') 72 | # p1Count = 0 73 | # p2Count = 0 74 | pp = list(p) #['Q','S'] 75 | p1_motif = letterMap[p_wt[0]] + ' ' + str(ind) 76 | p2_motif = letterMap[p_wt[1]] + ' ' + str(ind+1) 77 | for line in fp: 78 | if line.find('REMARK') != 1 and line.find(p1_motif) != -1: 79 | p1Ind = line.find(p1_motif) 80 | line = line[0:p1Ind] + letterMap[pp[0]] + line[p1Ind + 3:] 81 | if line.find('REMARK') !=1 and line.find(p2_motif) != -1: 82 | p2Ind = line.find(p2_motif) 83 | line = line[0:p2Ind] + letterMap[pp[1]] + line[p2Ind + 3:] 84 | gp.write(line) 85 | 86 | def toCommands(args, info_set, constraint, mode = 'silent'): 87 | output_name = args.output_name 88 | script_path = args.script_path 89 | p1_ind = args.p1_index_substrate 90 | root = Path(args.starting_structures) 91 | outStructFolder = args.output_structure_directory 92 | 93 | # if mode == 'silent': 94 | # with open(os.path.join(out_path, output_name), 'w') as fp: 95 | # for silent in tmpSilent: 96 | # tmp = list(silent) 97 | # dotInd = silent.find('.') 98 | # p1p11 = ''.join(silent[dotInd-1] + silent[dotInd+1]) 99 | # fp.write('python design_protease.py -s ' + os.path.join(crysPath, crysPath.split('/')[-1] + '_' + p1p11 + '.pdb') + 100 | # ' -od ' + silentPath + ' -st ' + os.path.join(out, 'new.sequence.txt') + 101 | # ' -sf ' + silent + " " + constraint + '\n') 102 | # elif mode == 'sequence': 103 | sequences = info_set[0] 104 | mutant_list = info_set[1] 105 | with open(os.path.join(script_path, output_name), 'w') as fp: 106 | for i in range(len(sequences)): 107 | mutant = mutant_list[i] 108 | seq = sequences[i] 109 | p1p11, newSeq = locate_p1p11(seq, p1_ind) 110 | newStructPath = root.parent / (root.stem + '_' + p1p11 + '.pdb') 111 | name = mutant + '_' + newSeq 112 | if mutant == '': 113 | name = newSeq 114 | fp.write('python design_protease.py -s ' + str(newStructPath) + 115 | ' -od ' + outStructFolder + ' -seq ' + newSeq + ' -name ' + name + 116 | " " + constraint + '\n') 117 | 118 | def locate_p1p11(seq, p1_ind=None): 119 | dotInd = seq.find('.') 120 | p1p11 = ''.join(seq[dotInd - 1] + seq[dotInd + 1]) 121 | oriSeq = ''.join(seq[0:dotInd] + seq[dotInd + 1:]) 122 | if dotInd == -1: 123 | dotInd = p1_ind 124 | assert p1_ind != -1 125 | p1p11 = seq[dotInd] + seq[dotInd+1] 126 | oriSeq = seq 127 | return p1p11, oriSeq 128 | 129 | def printToBatchCommand(args): 130 | jobName = Path(args.info_file).stem 131 | mem = args.memory 132 | if args.job_name != None: 133 | jobName = args.job_name 134 | commandPath = args.command_directory 135 | nBatch = args.batch_size 136 | scriptPath = args.script_path 137 | output_name = args.output_name 138 | 139 | splitCommand = "python " + scriptPath + "/text_to_slurm.py -txt " + os.path.join(scriptPath, output_name) + " -job_name " + \ 140 | jobName + " -mem 12000 -path_operation " + scriptPath + " -path_sh " + \ 141 | commandPath + " -batch " + str(nBatch) + " -time 3-00:00:00" 142 | os.system(splitCommand) 143 | # print("python text_to_slurm.py -txt " + os.path.join(scriptPath, 'new.command.txt') + " -job_name " + 144 | # jobName + " -mem " + str(mem) + " -path_operation " + scriptPath + " -path_sh " + 145 | # commandPath + " -batch " + str(nBatch) + " -time 3-00:00:00") 146 | 147 | def mkdir(path): 148 | if not path.exists(): 149 | path.mkdir(parents=True) 150 | 151 | def main(args): 152 | mutSeqLabel = Path(args.info_file) #info_files_path 153 | p1_ind = args.p1_index_substrate 154 | p1_ind_pdb = args.p1_index_pdb 155 | starting_structure_path = Path(args.starting_structures) 156 | structure_save_path = starting_structure_path.parent 157 | p1p11_wt = args.p1p11_wt 158 | format = args.output_format 159 | constraintSuffix = args.constraint_suffix 160 | commandPath = Path(args.command_directory) 161 | mkdir(commandPath) 162 | 163 | # Use intermediate output from CleavEX as the input. Need to update in the future 164 | df = pd.read_csv(mutSeqLabel, index_col=0) 165 | mutant_list = [''] * df.shape[0] 166 | for column_name in df.columns: 167 | if column_name.lower().find('mutant') != -1: 168 | mutant_list = df[column_name] 169 | sequences = df.index.values 170 | p1p11s = [] 171 | new_c = 0 172 | for seq in sequences: 173 | # protease = df.iloc[i, 0] 174 | p1p11,_ = locate_p1p11(seq, p1_ind) 175 | # check whether file exists or not 176 | if (structure_save_path / (starting_structure_path.stem + '_' + p1p11 + '.pdb')).is_file(): #, protease + '_' + p1p11 + '.pdb' 177 | # print('starting structure for {} exists! Skip it....'.format(p1p11)) 178 | continue 179 | else: 180 | createCrys(p1p11_wt, p1p11, p1_ind_pdb, starting_structure_path) 181 | new_c += 1 182 | print('Swapping {} number of P1P11 combinations'.format(new_c)) 183 | # if format == 'silent': 184 | toCommands(args, (sequences, mutant_list), constraintSuffix, mode=format) 185 | printToBatchCommand(args) 186 | 187 | if __name__ == '__main__': 188 | args = parse_args() 189 | main(args) 190 | -------------------------------------------------------------------------------- /helper/text_to_slurm.py: -------------------------------------------------------------------------------- 1 | # This lets you read a list of commands from a text file given in a flag and does all the slurming for you. 2 | # By default they are run at /scratch/ss3410/GCNN. Additionally, you can specify where to put the .sh output file. 3 | # By default they go down on file directory ex) /scratch/ss3410/GCNN/ 4 | 5 | """ 6 | python text_to_slurm.py -txt /projects/f_sdk94_1/EnzymeModelling/Protease-Substrate-Design/HCV_D183A_commands.txt -job_name HCV_D183A -mem 12000 -path_operation /projects/f_sdk94_1/EnzymeModelling/Protease-Substrate-Design -path_sh /projects/f_sdk94_1/EnzymeModelling/Commands -batch 20 -time 2-00:00:00 7 | """ 8 | 9 | import argparse 10 | import os 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("-txt", type=str) 14 | parser.add_argument("-job_name", type=str) 15 | parser.add_argument("-path_operation", type=str) 16 | parser.add_argument("-path_sh", type=str) 17 | parser.add_argument("-mem", type=str) 18 | parser.add_argument("-delay", type=int) 19 | parser.add_argument("-batch", type=int) 20 | parser.add_argument("-np",type=int, help="ratio in each batch that should be parallel") 21 | parser.add_argument("-time", type=str) 22 | 23 | args = parser.parse_args() 24 | 25 | filename = args.txt 26 | job_name = args.job_name 27 | path = args.path_operation 28 | sh = args.path_sh 29 | delay = args.delay 30 | mem = args.mem 31 | batch = args.batch 32 | np = args.np 33 | time = args.time 34 | 35 | if np == None: 36 | np = 1 37 | 38 | if batch == None: 39 | batch == 1 40 | 41 | if delay == None: 42 | delay = "" 43 | 44 | if mem == None: 45 | mem = 2000 46 | 47 | if path == None: 48 | path = "/projects/f_SDK94_1/EnymeModelling/Commands" 49 | 50 | if job_name == None: 51 | raise ValueError("no name given") 52 | 53 | if time == None: 54 | time = "3-00:00:00" 55 | 56 | if not os.path.exists(filename) and not os.path.exists(os.path.join(os.getcwd(), filename)): 57 | raise ValueError("file specified not found") 58 | 59 | with open(filename) as f: 60 | lineList = f.readlines() 61 | 62 | header ="""#!/bin/bash 63 | #SBATCH --export=ALL 64 | #SBATCH --job-name {0}.{1} 65 | #SBATCH --partition main 66 | #SBATCH --ntasks {2} 67 | #SBATCH --cpus-per-task 1 68 | #SBATCH --mem {3} 69 | #SBATCH --output {0}.{1}.log 70 | #SBATCH --error {0}.err 71 | #SBATCH --time {5} 72 | #SBATCH --begin now 73 | 74 | cd {4} 75 | 76 | """ 77 | 78 | lineList = [x.strip() for x in lineList] 79 | 80 | if sh == None: 81 | sh = "../Commands/" 82 | else: 83 | sh += "/" 84 | 85 | i = 0 86 | counter = 1 87 | 88 | while i < len(lineList) + batch: 89 | command = r"{}{}_{}.sh".format(sh, job_name, counter) 90 | header_specific = header.format(job_name, counter, np, mem, path, time) 91 | if os.path.isfile(command): 92 | os.remove(command) 93 | f = open(command, "w") 94 | f.write(header_specific) 95 | for j in range(batch): 96 | if i + j < len(lineList): 97 | if (i + j) % np == 0: 98 | line = lineList[i+j] 99 | file_as_string = "\nsrun {}\n".format(line) 100 | f.write(file_as_string) 101 | else: 102 | line = lineList[i+j] 103 | file_as_string = "\nsrun {} &\n".format(line) 104 | f.write(file_as_string) 105 | f.write("printf done\n") 106 | f.close() 107 | i += batch 108 | counter += 1 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /model/findBestAcc.py: -------------------------------------------------------------------------------- 1 | # Find best acc from logits calculation 2 | # Author: Changpeng Lu 3 | 4 | import os 5 | import time 6 | import logging 7 | import argparse 8 | import numpy as np 9 | import pandas as pd 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | import torch.optim as optim 15 | from torch.utils.data import DataLoader 16 | import torchvision.datasets as datasets 17 | import torchvision.transforms as transforms 18 | import math 19 | import scipy.sparse as sp 20 | from torch.nn.parameter import Parameter 21 | #os.chdir('/scratch/cl1205/protease-gcnn-pytorch/model') 22 | #print(os.getcwd()) 23 | from utils import * 24 | from models import * 25 | 26 | def findBestAcc(dataset = 'HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond', 27 | testset = 'HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond', 28 | is_energy_only = True, hidden = 20, valset = None, 29 | modelPath = '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_finalize_20220211/HCV_WT_binary_10_ang_energy_7_energyedge_5_hbond'): 30 | if valset == None: 31 | adj_ls, features, labels, sequences, proteases, labelorder, train_mask, test_mask = load_data(dataset, is_test=testset, norm_type=True, test_format = 'split', energy_only=is_energy_only, noenergy=False) 32 | else: 33 | adj_ls, features, labels, sequences, proteases, labelorder, train_mask, val_mask, test_mask = load_data(dataset, is_test=testset, is_val=valset, norm_type=True, test_format = 'split', energy_only=is_energy_only, noenergy=False) 34 | 35 | folder = modelPath # /projects/f_sdk94_1/PGCN/outputs/tt_finalize_20210413 36 | max_acc = [0,0,0] 37 | path_fin = ["","",""] 38 | for root, dirs, files in os.walk(folder): 39 | for name in files: 40 | if name.split('.')[-1] == 'pth': 41 | path = root + os.sep + name 42 | # /projects/f_sdk94_1/PGCN/outputs/tt_finalize_20210413/HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond/bs_50/ 43 | # model_for_test_seed_3_hidden_20_linear_0_lr_0.001_wd_0.005_bs_50_dt_0.3.pth 44 | bs = int(name.split('_')[-3]) 45 | wd = float(name.split('_')[-5]) 46 | lr = float(name.split('_')[-7]) 47 | dt = float(name.split('_')[-1][0:-4]) 48 | seed = int(name.split('_')[4]) 49 | model = GCN(nnode=features.shape[1], 50 | nfeat=features.shape[2], 51 | mfeat=adj_ls.shape[3], 52 | # ngcn=args.ngcn, 53 | hidden1=hidden, 54 | depth=2, 55 | # hidden2=args.hidden2, 56 | natt=0, # one layer 57 | linear=0, 58 | weight='pre', 59 | is_des=False, 60 | nclass=2, #labels.shape[1], 61 | dropout=dt, 62 | cheby=None) 63 | 64 | logit_test, acc_test = test(X=features, graph=adj_ls, y=labels, testmask=test_mask, model_for_test=model, 65 | hidden1=hidden, linear=0, learning_rate=lr, weight_decay=wd, batch_size=bs, 66 | dropout=dt, 67 | path_save=path, 68 | new=False) 69 | if acc_test > max_acc[seed-1]: 70 | logit_test_fin = logit_test 71 | max_acc[seed-1] = acc_test 72 | path_fin[seed-1] = path 73 | return logit_test_fin, max_acc, path_fin 74 | 75 | def test(X, graph, y, testmask, model_for_test, hidden1, linear, learning_rate, weight_decay, batch_size, dropout, path_save,new=False): 76 | #checkpoint = torch.load(os.path.join(path_save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(hidden1) + '_linear_' + str(linear) +'_lr_'+str(learning_rate)+'_wd_'+str(weight_decay)+'_bs_'+str(batch_size)+ '_dt_' + str(dropout) + '.pth')) 77 | try: 78 | checkpoint = torch.load(path_save) 79 | except: 80 | print(path_save) 81 | return None,0 82 | 83 | model_for_test.load_state_dict(checkpoint['state_dict']) 84 | if new == False: 85 | X = X[testmask] 86 | graph = graph[testmask] 87 | y = y[testmask] 88 | #else: 89 | # print('testmask is none. bad!') 90 | max_acc = 0 91 | with torch.no_grad(): 92 | model_for_test.eval() 93 | #for j in range(100): 94 | logits_test = model_for_test(X, graph) 95 | test_acc = accuracy(logits_test, torch.argmax(y,axis=1)) 96 | # if test_acc > max_acc: 97 | # logits_test_fin = logits_test 98 | # max_acc = test_acc 99 | return logits_test, test_acc 100 | else: 101 | with torch.no_grad(): 102 | model_for_test.eval() 103 | logits_test = model_for_test(X, graph) 104 | return logits_test 105 | 106 | for i in ['WT','A171T','D183A','Triple','all']: 107 | logit, acc, path = findBestAcc('HCV_' + i + '_binary_10_ang_aa', 108 | 'HCV_' + i + '_binary_10_ang_aa', 109 | False, 20, 110 | 'HCV_' + i + '_binary_10_ang_aa', 111 | '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/hcv_20220708_trisplit_seqOnly/HCV_' + i + '_binary_10_ang_aa') 112 | print(acc, path) 113 | 114 | #for i in ['WT','A171T','D183A','Triple', 'all']: 115 | #logit, acc, path = findBestAcc('TEV_' + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 116 | # 'TEV_' + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 117 | # False, '/projects/f_sdk94_1/PGCN/TEV/WT/outputs/tt_finalize_energy_only/') 118 | #print(acc, path) 119 | 120 | # logit, acc, path = findBestAcc('HCV_' + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 121 | # 'HCV_' + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 122 | # False, 123 | # '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_finalize_20220211/HCV_' \ 124 | # + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond/') 125 | #'/projects/f_sdk94_1/PGCN/TEV/WT/outputs/tt_finalize_aa/') 126 | #for i in ['all']: #['WT','A171T','D183A','Triple', 'all']: 127 | # logit, acc, path = findBestAcc('TEV_' + 'WT' + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 128 | # 'TEV_' + 'WT' + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 129 | # False, 10, 130 | # '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_tev_20220403/TEV_' \ 131 | # + 'WT' + '_binary_10_ang_aa_energy_7_energyedge_5_hbond_epoch_' + i + '/') 132 | #for model in ['all']: 133 | # logit, acc, path = findBestAcc('TEV_' + model + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 134 | # 'TEV_' + model + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 135 | # True, 20, 136 | # 'TEV_' + model + '_binary_10_ang_aa_energy_7_energyedge_5_hbond', 137 | # '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_tev_20220629_trisplit/TEV_' + model + '_binary_10_ang_energy_7_energyedge_5_hbond/') 138 | # print(acc, path) 139 | 140 | #for model in ['all']: 141 | # logit, acc, path = findBestAcc('TEV_' + model + '_binary_10_ang_aa', 142 | # 'TEV_' + model + '_binary_10_ang_aa', 143 | # False, 20, 144 | # 'TEV_' + model + '_binary_10_ang_aa', 145 | # '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_tev_20220629_trisplit/TEV_' + model + '_binary_10_ang_aa_/') 146 | # print(acc, path) 147 | -------------------------------------------------------------------------------- /model/findBestAcc_from_log.py: -------------------------------------------------------------------------------- 1 | import os 2 | #import pandas as pd 3 | import numpy as np 4 | 5 | max_acc = 0 6 | for f in os.listdir('./'): 7 | if f.split('.')[-1] == 'out' and f.split('.')[0] == 'tt': 8 | with open(f, 'r') as fp: 9 | lines = fp.readlines() 10 | best_epoch = int(lines[-1].split(':')[-1].strip()) 11 | for i, line in enumerate(lines): 12 | if line.split(':')[-1].strip() == str(best_epoch): 13 | ind = i 14 | break 15 | val_acc = float(lines[ind+4].split(':')[-1].strip()) 16 | if val_acc > max_acc: 17 | max_acc = val_acc 18 | good_f = f 19 | print(good_f) 20 | print('accuracy: {}'.format(max_acc)) 21 | 22 | -------------------------------------------------------------------------------- /model/importance.py: -------------------------------------------------------------------------------- 1 | # This script is to calculate dropping accuracy for each node/edge to show each importance 2 | # Author: Changpeng Lu 3 | # Usage 4 | # python importance.py --importance --dataset HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond --hidden1 20 --depth 2 --linear 0 --att 0 --batch_size 500 --lr 0.005 --dropout 0.05 --weight_decay 5e-4 --save 'outputs/tt/HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond/bs_500/' 5 | 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import os 10 | import time 11 | import logging 12 | import argparse 13 | import numpy as np 14 | import pandas as pd 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | import torch.optim as optim 20 | from torch.utils.data import DataLoader 21 | import torchvision.datasets as datasets 22 | import torchvision.transforms as transforms 23 | import math 24 | import scipy.sparse as sp 25 | from torch.nn.parameter import Parameter 26 | 27 | from utils import * 28 | from models import * 29 | 30 | # Training settings 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument('--gpu', type=int, default=0, 33 | help='number of gpus.') 34 | #parser.add_argument('--fastmode', action='store_true', default=False, 35 | # help='Validate during training pass.') 36 | parser.add_argument('--seed', type=int, default=42, help='Random seed.') 37 | parser.add_argument('--epochs', type=int, default=200, 38 | help='Number of epochs to train.') 39 | parser.add_argument('--lr', type=float, default=0.01, 40 | help='Initial learning rate.') 41 | parser.add_argument('--weight_decay', type=float, default=5e-4, 42 | help='Weight decay (L2 loss on parameters).') 43 | parser.add_argument('--hidden1', type=int, default=10, 44 | help='Number of hidden units for nodes.') 45 | parser.add_argument('--depth', type=int, default=10, help='Number of gcnn layers') 46 | parser.add_argument('--att', type=int, default=0, help='the dimension of weight matrices for key and query') 47 | parser.add_argument('--linear', type=int, default=0) 48 | parser.add_argument('--dropout', type=float, default=0.1, 49 | help='Dropout rate (1 - keep probability).') 50 | parser.add_argument('--no_energy', action='store_true', default=False) 51 | parser.add_argument('--test_dataset',type=str) 52 | parser.add_argument('--data_path', default= None, type=str) #'/projects/f_sdk94_1/PGCN/Data/new_subs' 53 | parser.add_argument('--test_logits_path', type=str) 54 | parser.add_argument('--val_dataset', type=str, default=None) 55 | parser.add_argument('--dataset',type=str, help='input dataset string') 56 | parser.add_argument('--model', type = str, default = 'gcn',choices=['gcn','chebyshev']) 57 | parser.add_argument('--scale_type', choices = ['exp','minmax'], default='exp') 58 | parser.add_argument('--max_degree',type=int, default = 3, help='number of supports') 59 | parser.add_argument('--batch_size',type=int, default=8) 60 | parser.add_argument('--weight', type=str, default='pre',choices=['pre','post']) 61 | parser.add_argument('--dim_des',action='store_true',default=False) 62 | parser.add_argument('--new', action='store_true', default=False) 63 | parser.add_argument('--energy_only', action='store_true', default=False) 64 | parser.add_argument('--seq_only',action='store_true',default=False) 65 | parser.add_argument('--feature',choices=['d','s+d','s','e','s+e','s+e+d'],default='s+e') 66 | parser.add_argument('--save', type=str, default='./experiment1') 67 | parser.add_argument('--importance',action='store_true', default = False, help='Whether calculate each variable''s importance.') 68 | args = parser.parse_args() 69 | 70 | makedirs(args.save) 71 | logger = get_logger(logpath=os.path.join('logs'), filepath=os.path.abspath(__file__)) 72 | logger.info(args) 73 | 74 | # test 75 | def test(X, graph, y, testmask, model_for_test, hidden1, linear, learning_rate, weight_decay, batch_size, dropout, path_save,new=False): 76 | #checkpoint = torch.load(os.path.join(path_save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(hidden1) + '_linear_' + str(linear) +'_lr_'+str(learning_rate)+'_wd_'+str(weight_decay)+'_bs_'+str(batch_size)+ '_dt_' + str(dropout) + '.pth')) 77 | checkpoint = torch.load(path_save) 78 | logger.info("best epoch is:" + str(checkpoint['epoch'])) 79 | model_for_test.load_state_dict(checkpoint['state_dict']) 80 | print('model loaded') 81 | if new == False: 82 | #if testmask != None: 83 | print('testmask is not none. good.') 84 | X = X[testmask] 85 | graph = graph[testmask] 86 | y = y[testmask] 87 | #else: 88 | # print('testmask is none. bad!') 89 | with torch.no_grad(): 90 | model_for_test.eval() 91 | #for j in range(100): 92 | logits_test = model_for_test(X, graph) 93 | test_acc = accuracy(logits_test, torch.argmax(y,axis=1)) 94 | #if test_acc > max_acc: 95 | # logits_test_fin = logits_test 96 | # max_acc = test_acc 97 | return logits_test, test_acc 98 | else: 99 | with torch.no_grad(): 100 | model_for_test.eval() 101 | logits_test = model_for_test(X, graph) 102 | return logits_test 103 | 104 | 105 | # variable importance 106 | def importance(all_features, all_graph, ys, full_test_mask, trained_model, hidden1, linear, learning_rate, \ 107 | weight_decay, batch_size, dropout, path_save): 108 | num_node = all_graph.shape[1] 109 | var = int(num_node + num_node * (num_node - 1) / 2) # the number of nodes and edges 110 | acc_arr = np.zeros(int(var)) 111 | logger.info('number of candidate node/edges:{}'.format(var)) 112 | logger.info('number of nodes:{}'.format(num_node)) 113 | logger.info('number of edges:{}'.format(var - num_node)) 114 | 115 | edge_ind = [] 116 | for ind in range(num_node): 117 | k = ind + 1 118 | while k < num_node: 119 | edge_ind.append((ind,k)) 120 | k += 1 121 | 122 | for i in range(var): # for each variable 123 | #adj_ls, features, labels, sequences, proteases, labelorder, train_mask, val_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset, norm_type=is_cheby) 124 | tmp_adj_ls = all_graph[full_test_mask].clone() 125 | tmp_features = all_features[full_test_mask].clone() 126 | tmp_y = ys[full_test_mask].clone() 127 | OOB_mask = np.asarray([1 for i in tmp_features],dtype=np.bool) 128 | if i < num_node: 129 | for j in range(tmp_features.shape[2]): # for each node feature 130 | np.random.shuffle(tmp_features[:,i,j].cpu().numpy()) 131 | print("Shuffling Node Feature: {}".format(i+1)) 132 | else: 133 | for j in range(tmp_adj_ls.shape[3]): # for each edge feature 134 | edge_node = i - num_node 135 | np.random.shuffle(tmp_adj_ls[:, edge_ind[edge_node][0], edge_ind[edge_node][1],j]) 136 | after_shuffle = tmp_adj_ls[:,edge_ind[edge_node][0], edge_ind[edge_node][1], j] 137 | tmp_adj_ls[:,edge_ind[edge_node][1], edge_ind[edge_node][0], j] = after_shuffle 138 | print("Shuffling Edge Feature: {}".format(edge_node + 1)) 139 | logit_vi, acc_vi = test(X=tmp_features, graph=tmp_adj_ls, y=tmp_y, testmask=OOB_mask, model_for_test=trained_model, \ 140 | hidden1=hidden1, linear=linear, learning_rate=learning_rate, \ 141 | weight_decay=weight_decay, batch_size=batch_size, dropout=dropout, path_save=path_save) 142 | if i < num_node: 143 | logger.info("Node {:04d} | Test Accuracy: {:.4f}".format(i+1, acc_vi)) 144 | else: 145 | logger.info("Edge {:04d} | Test Accuracy: {:.4f}".format(i-num_node+1, acc_vi)) 146 | acc_arr[i] = acc_vi 147 | return acc_arr 148 | 149 | is_energy_only = args.energy_only 150 | no_energy = True if args.no_energy == True else False 151 | if args.new == False: 152 | if args.val_dataset != None: 153 | logger.info('TripleSplit!') 154 | adj_ls, features, labels, sequences, labelorder, train_mask, val_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset, is_val=args.val_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only, seq_only=args.seq_only, feature_type=args.feature) 155 | logger.info("|Training| {},|Validation| {}, |Testing| {}".format(np.sum(train_mask), np.sum(val_mask), np.sum(test_mask))) 156 | tmp_mask = train_mask 157 | else: 158 | adj_ls, features, labels, sequences, labelorder, train_mask, val_mask = load_data(args.dataset, is_test=args.test_dataset, is_val=args.val_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only, seq_only = args.seq_only, feature_type=args.feature) #scale_type determines node feature scale 159 | tmp_mask = np.array([(not idx) for idx in val_mask], dtype=np.bool) 160 | # Size of Different Sets 161 | logger.info("|Training| {},|Testing| {}".format(np.sum(tmp_mask), np.sum(val_mask))) 162 | else: 163 | adj_ls, features, sequences, labelorder = load_data(args.dataset, norm_type=True, energy_only=is_energy_only, seq_only=args.seq_only, noenergy=args.no_energy, data_path=args.data_path, feature_type=args.feature) 164 | 165 | cheby_params = args.max_degree if args.model == 'chebyshev' else None 166 | weight_mode = args.weight 167 | dim_des = args.dim_des 168 | 169 | model = GCN(nnode=features.shape[1], 170 | nfeat=features.shape[2], 171 | mfeat=adj_ls.shape[3], 172 | # ngcn=args.ngcn, 173 | hidden1=args.hidden1, 174 | depth=args.depth, 175 | # hidden2=args.hidden2, 176 | natt=args.att, # one layer 177 | linear=args.linear, 178 | weight=weight_mode, 179 | is_des=dim_des, 180 | nclass=len(labelorder), 181 | dropout=args.dropout, 182 | cheby=cheby_params) 183 | logger.info(model) 184 | logger.info('Number of parameters: {}'.format(count_parameters(model))) 185 | 186 | batch_size = args.batch_size 187 | 188 | # load trained model and test first 189 | if args.new == False: 190 | logit_test, acc_test = test(X=features, graph=adj_ls, y=labels, testmask=test_mask, model_for_test=model, hidden1=args.hidden1, linear=args.linear, learning_rate=args.lr, weight_decay=args.weight_decay, batch_size=args.batch_size, dropout=args.dropout, path_save=args.save, new=False) 191 | print("Original Test Accuracy is:" + str(acc_test)) 192 | else: 193 | logger.info('testing begin') 194 | logit_test = test(X=features, graph=adj_ls, y=None, testmask=None, model_for_test=model, hidden1=args.hidden1, linear=args.linear, learning_rate=args.lr, weight_decay=args.weight_decay, batch_size=args.batch_size, dropout=args.dropout, path_save=args.save, new=True) 195 | 196 | logger.info('logits printing') 197 | logger.info(args.save.split('/')[-1][:-4]) 198 | dump_path = os.path.join(args.test_logits_path, '-'.join(args.save.split('/')[1:])) 199 | logger.info('dump path set up') 200 | logger.info(dump_path) 201 | makedirs(dump_path) 202 | #pkl.dump(logit_test,open('outputs/new_subs_energy_only_20220718' + suffix + '/logits_test_' + args.dataset + '_energy_only_' + str(args.energy_only),'wb')) 203 | # print('outputs/new_subs_energy_only_20220718' + suffix + '/logits_test_' + args.dataset + '_energy_only_' + str(args.energy_only)) 204 | logger.info('make folder') 205 | pkl.dump(logit_test, open(dump_path + '/logits_test_' + args.dataset, 'wb')) 206 | logger.info('dump successful') 207 | if args.importance == True: 208 | acc_vi_arr = importance(all_features=features, all_graph=adj_ls, ys=labels, \ 209 | full_test_mask=test_mask, trained_model=model, hidden1=args.hidden1, \ 210 | linear=args.linear, learning_rate=args.lr, \ 211 | weight_decay=args.weight_decay, batch_size=args.batch_size, \ 212 | dropout=args.dropout, path_save=args.save) 213 | df = pd.DataFrame(acc_vi_arr, index = range(acc_vi_arr.shape[0])) # node + edge 214 | df.to_csv(os.path.join('-'.join(args.save.split('/')[1:]) + "_variable_importance.csv")) 215 | -------------------------------------------------------------------------------- /model/layers.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.parameter import Parameter 6 | from torch.nn.modules.module import Module 7 | from torch.nn.functional import softmax 8 | from utils import chebyshev 9 | 10 | 11 | class GraphConvolution(Module): 12 | """ 13 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 14 | """ 15 | 16 | def __init__(self, in_features, out_features, bias=True): 17 | super(GraphConvolution, self).__init__() 18 | self.in_features = in_features 19 | self.out_features = out_features 20 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 21 | if bias: 22 | self.bias = Parameter(torch.FloatTensor(out_features)) 23 | else: 24 | self.register_parameter('bias', None) 25 | self.reset_parameters() 26 | 27 | def reset_parameters(self): 28 | stdv = 1. / math.sqrt(self.weight.size(1)) 29 | self.weight.data.uniform_(-stdv, stdv) 30 | if self.bias is not None: 31 | self.bias.data.uniform_(-stdv, stdv) 32 | 33 | def forward(self, input, adj): 34 | support = torch.matmul(adj, input) 35 | output = torch.matmul(support, self.weight) 36 | if self.bias is not None: 37 | return output + self.bias 38 | else: 39 | return output 40 | 41 | def __repr__(self): 42 | return self.__class__.__name__ + ' (' \ 43 | + str(self.in_features) + ' -> ' \ 44 | + str(self.out_features) + ')' 45 | 46 | class GraphConvolutionChebyshev(Module): 47 | def __init__(self, in_features, out_features, cheby, bias=True): 48 | super(GraphConvolutionChebyshev, self).__init__() 49 | self.in_features = in_features 50 | self.out_features = out_features 51 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 52 | self.K = Parameter(torch.FloatTensor(cheby,1)) # degree of chebyshev polynomial 53 | self.max_degree = cheby 54 | if bias: 55 | self.bias = Parameter(torch.FloatTensor(out_features)) 56 | else: 57 | self.register_parameter('bias', None) 58 | self.reset_parameters() 59 | 60 | def reset_parameters(self): 61 | stdv = 1. / math.sqrt(self.weight.size(1)) 62 | self.weight.data.uniform_(-stdv, stdv) 63 | stdv2 = 1. / math.sqrt(self.K.size(1)) 64 | self.K.data.uniform_(-stdv2, stdv2) 65 | if self.bias is not None: 66 | self.bias.data.uniform_(-stdv, stdv) 67 | 68 | def forward(self, input, adj): 69 | support = chebyshev(input, adj, self.max_degree) # build the tensor form of chebyshev polynomials 70 | support = torch.matmul(support, self.K).view(support.shape[0],support.shape[1],-1) 71 | output = torch.matmul(support, self.weight) 72 | if self.bias is not None: 73 | return output + self.bias 74 | else: 75 | return output 76 | 77 | def __repr__(self): 78 | return self.__class__.__name__ + ' (' \ 79 | + str(self.in_features) + ' -> ' \ 80 | + str(self.out_features) + ')' 81 | 82 | class Flatten(Module): 83 | 84 | def __init__(self): 85 | super(Flatten, self).__init__() 86 | 87 | def forward(self, x, adj): 88 | shape = torch.prod(torch.tensor(x.shape[1:])).item() 89 | return x.view(-1, shape) 90 | 91 | class ConcatLinear(Module): 92 | def __init__(self, in_dim, out_dim): 93 | super(ConcatLinear, self).__init__() 94 | self.linear = torch.nn.Linear(in_dim, out_dim) 95 | 96 | def forward(self, x, adj): 97 | out = self.linear(x) 98 | return out 99 | 100 | class ConcatReLU(Module): 101 | def __init__(self): 102 | super(ConcatReLU, self).__init__() 103 | self.relu = torch.nn.ReLU() 104 | def forward(self, x, adj): 105 | out = self.relu(x) 106 | return out 107 | 108 | class norm(Module): 109 | def __init__(self, in_features, mode): 110 | super(norm, self).__init__() 111 | if mode == 'pre': 112 | self.norm = torch.nn.BatchNorm1d(in_features) 113 | elif mode == 'post': 114 | self.norm = torch.nn.BatchNorm2d(in_features) 115 | def forward(self,x,adj): 116 | out = self.norm(x) 117 | return out 118 | 119 | class SelfAttention(Module): 120 | def __init__(self, in_features, w_features): 121 | super(SelfAttention, self).__init__() 122 | self.w_key = Parameter(torch.FloatTensor(in_features, w_features)) 123 | self.w_value = Parameter(torch.FloatTensor(in_features, in_features)) 124 | self.w_query = Parameter(torch.FloatTensor(in_features, w_features)) 125 | def forward(self,x,adj): 126 | keys = x @ self.w_key # ? x N x W 127 | querys = x @ self.w_query # ? x N x W 128 | values = x @ self.w_value # ? x N x F 129 | attn_scores = torch.zeros(x.shape[0],x.shape[1],x.shape[1]) 130 | for b in range(x.shape[0]): 131 | attn_scores[b] = softmax(querys[b] @ keys[b].T, dim=-1) # ? x N x N 132 | out = torch.zeros_like(values) # ? x N x F 133 | for b in range(x.shape[0]): 134 | weighted_values = values[b][:,None] * attn_scores[b].T[:,:,None] 135 | out[b] = weighted_values.sum(dim=0) 136 | return out 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /model/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.parameter import Parameter 5 | from layers import * 6 | 7 | 8 | class GCN(nn.Module): 9 | def __init__(self, nnode, nfeat, mfeat, hidden1, linear, depth, natt, nclass, dropout, weight, is_des,cheby): 10 | super(GCN, self).__init__() 11 | # ngcn_list = ngcn.strip('[]').split(',') 12 | # nfull_list = nfull.strip('[]').split(',') 13 | # natt_list = natt.strip('[]').split(',') 14 | nin = nfeat # in_features 15 | self.dropout = dropout 16 | self.mfeat = mfeat 17 | self.weight = weight 18 | if self.weight == 'pre': 19 | ch = nnode 20 | elif self.weight == 'post': 21 | ch = mfeat 22 | if is_des == True: 23 | self.hidden = [50,50,50,50,50,20,20,20,20,20] 24 | else: 25 | self.hidden = [hidden1] * depth 26 | gcn_layers = [] # build a list for gcnn layers 27 | if cheby == None: 28 | for nhid in range(depth): 29 | gcn_layers.append(GraphConvolution(nin, self.hidden[nhid])) 30 | gcn_layers.append(norm(ch,weight)) 31 | gcn_layers.append(ConcatReLU()) 32 | nin = int(self.hidden[nhid]) 33 | else: 34 | self.gcn1 = GraphConvolutionChebyshev(nfeat, hidden1, cheby) 35 | self.gcn2 = GraphConvolutionChebyshev(hidden1, hidden1, cheby) 36 | if natt != 0: 37 | self.att = SelfAttention(self.hidden[-1], natt) # dimension of value keeps the same with input size 38 | # single head for now 39 | self.is_att = True 40 | else: 41 | self.is_att = False 42 | self.gc = nn.Sequential(*gcn_layers) 43 | self.flatten = Flatten() 44 | full_layers = [] 45 | nin_full = self.hidden[-1] * nnode # in_features for fully connected layers, which is gcn output * number of node 46 | if linear != 0: 47 | full_layers.append(ConcatLinear(nin_full, int(linear))) 48 | full_layers.append(ConcatLinear(int(linear), nclass)) 49 | else: 50 | full_layers.append(ConcatLinear(nin_full, nclass)) 51 | self.linear = nn.Sequential(*full_layers) 52 | # modification here: replace FloatTensor with rand to make sure values in adjacency matrix are more than zero. 53 | self.edgeweight = Parameter(torch.rand(mfeat,1)) 54 | 55 | def forward(self, x, adj): 56 | if self.weight == 'pre': 57 | adj = torch.matmul(adj, self.edgeweight).view(adj.shape[0],adj.shape[1],-1) 58 | elif self.weight == 'post': 59 | x = x.view(x.shape[0],x.shape[1],-1,1).expand(x.shape[0],x.shape[1],x.shape[2],self.mfeat) 60 | 61 | x = torch.transpose(x,-1,-3) 62 | x = torch.transpose(x,-1,-2) 63 | adj = torch.transpose(adj,-1,-3) 64 | adj = torch.transpose(adj,-1,-2) 65 | for func in self.gc: 66 | x = func(x,adj) 67 | if self.is_att == True: 68 | x = self.att(x,adj) 69 | if self.weight == 'post': 70 | x = torch.transpose(x,-1,-3) 71 | x = torch.transpose(x,-3,-2) 72 | x = torch.matmul(x, self.edgeweight).view(x.shape[0],x.shape[1],-1) 73 | x = F.dropout(x, self.dropout, training=self.training) 74 | x = self.flatten(x,adj) 75 | #x = self.linear(x,adj) 76 | for func_full in self.linear: 77 | x = func_full(x,adj) 78 | return x #F.log_softmax(x, dim=1) 79 | -------------------------------------------------------------------------------- /model/outputs/HCV_A171T_model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_A171T_model.pth -------------------------------------------------------------------------------- /model/outputs/HCV_Combined_model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_Combined_model.pth -------------------------------------------------------------------------------- /model/outputs/HCV_D183A_model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_D183A_model.pth -------------------------------------------------------------------------------- /model/outputs/HCV_Triple_model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_Triple_model.pth -------------------------------------------------------------------------------- /model/outputs/HCV_WT_model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_WT_model.pth -------------------------------------------------------------------------------- /model/outputs/TEV_model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/TEV_model.pth -------------------------------------------------------------------------------- /model/test.py: -------------------------------------------------------------------------------- 1 | # This script is to test GCNN with already-trained gcnn model 2 | # Author: Changpeng Lu 3 | # Usage: 4 | # python test.py --dataset HCV_binary_10_ang_aa_sinusoidal_encoding_4_energy_7_energyedge_5_hbond --test_dataset HCV_binary_10_ang_aa_sinusoidal_encoding_2_energy_7_energyedge_5_hbond --epochs 500 --hidden1 20 --depth 2 --linear 1024 --att 0 --model gcn --batch_size 500 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save 'outputs/tt/HCV_binary_10_ang_aa_sinusoidal_encoding_4_energy_7_energyedge_5_hbond/bs_500/' 5 | # Train and test each epoch for this new version, instead of testing only after all training are done. Changes below aren't applied to wider_deeper and more epoch trials with the base setting. Replace Adam with SGD, also add lr_scheduler. Also calculate average train accuracy and loss instead of the last batch. Also, we use earlystop to let the model train enough epochs. 1) if test accuracy always go smaller, then the model will stop; 2) if the test accuracy always the same as the former accuracy, then it means converges, then the model will stop as well. 6 | 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import os 11 | import time 12 | import logging 13 | import argparse 14 | import numpy as np 15 | import random 16 | 17 | from comet_ml import Experiment 18 | import torch 19 | import torch.nn as nn 20 | import torch.nn.functional as F 21 | import torch.optim as optim 22 | from torch.utils.data import DataLoader 23 | import torchvision.datasets as datasets 24 | import torchvision.transforms as transforms 25 | import math 26 | import scipy.sparse as sp 27 | from torch.nn.parameter import Parameter 28 | 29 | from utils import * 30 | from models import * 31 | 32 | # Training settings 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('--gpu', type=int, default=0, 35 | help='number of gpus.') 36 | #parser.add_argument('--fastmode', action='store_true', default=False, 37 | # help='Validate during training pass.') 38 | parser.add_argument('--seed', type=int, default=1, help='Random seed.') 39 | parser.add_argument('--epochs', type=int, default=200, 40 | help='Number of epochs to train.') 41 | parser.add_argument('--lr', type=float, default=0.01, 42 | help='Initial learning rate.') 43 | parser.add_argument('--weight_decay', type=float, default=5e-4, 44 | help='Weight decay (L2 loss on parameters).') 45 | parser.add_argument('--hidden1', type=int, default=10, 46 | help='Number of hidden units for nodes.') 47 | parser.add_argument('--depth', type=int, default=10, help='Number of gcnn layers') 48 | parser.add_argument('--att', type=int, default=0, help='the dimension of weight matrices for key and query') 49 | parser.add_argument('--linear', type=int, default=0) 50 | parser.add_argument('--dropout', type=float, default=0.1, 51 | help='Dropout rate (1 - keep probability).') 52 | parser.add_argument('--no_energy', action='store_true', default=False) 53 | parser.add_argument('--energy_only', action='store_true', default=False) 54 | parser.add_argument('--scale_type', choices = ['exp','minmax'], default='exp') 55 | parser.add_argument('--test_dataset',type=str) 56 | parser.add_argument('--dataset',type=str, help='input dataset string') 57 | parser.add_argument('--model', type = str, default = 'gcn',choices=['gcn','chebyshev']) 58 | parser.add_argument('--max_degree',type=int, default = 3, help='number of supports') 59 | parser.add_argument('--batch_size',type=int, default=8) 60 | parser.add_argument('--weight', type=str, default='pre',choices=['pre','post']) 61 | parser.add_argument('--dim_des',action='store_true',default=False) 62 | parser.add_argument('--save', type=str, default='./experiment1') 63 | args = parser.parse_args() 64 | 65 | makedirs(args.save) 66 | logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) 67 | logger.info(args) 68 | experiment = Experiment(project_name = args.dataset, api_key="yOMD8snU8WrOgVJM6jTErziMh", workspace="hcvpgcntrain") 69 | hyper_params = {"seed": args.seed, "weight_decay": args.weight_decay, "learning_rate": args.lr, "dropout": args.dropout, "batch_size": args.batch_size} 70 | experiment.log_parameters(hyper_params) 71 | 72 | # test 73 | #def test(): 74 | # checkpoint = torch.load(os.path.join(args.save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout) + '.pth')) 75 | # print("best epoch is:" + str(checkpoint['epoch'])) 76 | # model.load_state_dict(checkpoint['state_dict']) 77 | # max_acc = 0 78 | # with torch.no_grad(): 79 | # model.eval() 80 | #for j in range(100): 81 | # logits_test = model(features[test_mask], adj_ls[test_mask]) 82 | # test_acc = accuracy(logits_test, torch.argmax(labels[test_mask],axis=1)) 83 | # if test_acc > max_acc: 84 | #logits_test_fin = logits_test 85 | # max_acc = test_acc 86 | # logger.info("Test accuracy is:" + str(test_acc)) 87 | # pkl.dump(logits_test,open(os.path.join(args.save, 'logits_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout)),'wb')) 88 | 89 | torch.manual_seed(args.seed) 90 | np.random.seed(args.seed) 91 | random.seed(args.seed) 92 | # cheby is no longer useful 93 | is_cheby = True if args.model == 'chebyshev' else False 94 | no_energy = True if args.no_energy == True else False 95 | adj_ls, features, labels, sequences, proteases, labelorder, train_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only) #scale_type determines node feature scale 96 | cheby_params = args.max_degree if args.model == 'chebyshev' else None 97 | weight_mode = args.weight 98 | dim_des = args.dim_des 99 | tmp_mask = np.array([(not idx) for idx in test_mask], dtype=np.bool) 100 | 101 | # Size of Different Sets 102 | logger.info("|Training| {},|Testing| {}".format(np.sum(tmp_mask), np.sum(test_mask))) 103 | 104 | model = GCN(nnode=features.shape[1], 105 | nfeat=features.shape[2], 106 | mfeat=adj_ls.shape[3], 107 | # ngcn=args.ngcn, 108 | hidden1=args.hidden1, 109 | depth=args.depth, 110 | # hidden2=args.hidden2, 111 | natt=args.att, # one layer 112 | linear=args.linear, 113 | weight=weight_mode, 114 | is_des=dim_des, 115 | nclass=labels.shape[1], 116 | dropout=args.dropout, 117 | cheby=cheby_params) 118 | logger.info(model) 119 | logger.info('Number of parameters: {}'.format(count_parameters(model))) 120 | 121 | batch_size = args.batch_size 122 | 123 | criterion = nn.CrossEntropyLoss() 124 | optimizer = optim.Adam(model.parameters(),lr=args.lr, weight_decay=args.weight_decay) 125 | #optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 126 | nepoch = args.epochs #willbe useless if set earlystop 127 | #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, int(nepoch / 10)) 128 | #scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10000) 129 | 130 | best_acc = 0 131 | print("Total number of forward processes:" + str(args.epochs * args.batch_size)) 132 | with experiment.train(): 133 | for i in range(nepoch): 134 | #i = 0 # epoch 135 | #converge_detect = 0 136 | #former_acc = 0 137 | #while True: 138 | #n = 0 139 | model.train() 140 | tmp_accs = [] 141 | tmp_losses = [] 142 | for batch_mask in get_batch_iterator(tmp_mask, batch_size): 143 | optimizer.zero_grad() 144 | #n = n + 1 145 | x = features[batch_mask] 146 | y = labels[batch_mask] 147 | y = torch.argmax(y,axis=1) 148 | adj = adj_ls[batch_mask] 149 | logits = model(x, adj) 150 | loss = criterion(logits,y) 151 | train_acc = accuracy(logits,y) 152 | loss.backward() 153 | optimizer.step() 154 | tmp_losses.append(loss.item()) 155 | tmp_accs.append(train_acc.item()) 156 | # scheduler.step() 157 | #train_acc = accuracy(logits, y) # only record the last batch accuracy for each epoch 158 | #experiment.log_metric("epoch_loss", sum(tmp_losses) / len(tmp_losses), step=i+1) 159 | #experiment.log_metric("epoch_accuracy", sum(tmp_accs) / len(tmp_accs), step=i+1) 160 | #print("train accuracy for {0}th epoch is: {1}".format(i+1, train_acc)) 161 | # print("train loss for {0}th epoch is : {1}".format(i+1, loss)) 162 | print("epoch: " + str(i+1)) 163 | print("train_loss: " + str(sum(tmp_losses) / len(tmp_losses))) #loss.item())) 164 | print("train_acc: " + str(sum(tmp_accs) / len(tmp_accs))) #train_acc.item())) 165 | with torch.no_grad(): 166 | with experiment.validate(): 167 | model.eval() 168 | #for j in range(100): 169 | logits_test = model(features[test_mask], adj_ls[test_mask]) 170 | test_loss = criterion(logits_test, torch.argmax(labels[test_mask],axis=1)) 171 | test_acc = accuracy(logits_test, torch.argmax(labels[test_mask],axis=1)) 172 | print("test_loss: " + str(test_loss.item())) 173 | print("test_acc: " + str(test_acc.item())) 174 | #experiment.log_metric("test_accuracy", test_acc.item(), step=i+1) 175 | #experiment.log_metric("test_loss", test_loss.item(), step=i+1) 176 | # if test_acc > max_acc: 177 | #logits_test_fin = logits_test 178 | # max_acc = test_acc 179 | # logger.info("Test accuracy is:" + str(test_acc)) 180 | if test_acc > best_acc: 181 | torch.save({'epoch': i+1,'state_dict': model.state_dict()}, os.path.join(args.save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout) + '.pth')) 182 | pkl.dump(logits_test,open(os.path.join(args.save, 'logits_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout)),'wb')) 183 | #print('save successfully') 184 | best_acc = test_acc 185 | best_epo = i 186 | #if abs(test_acc - former_acc) < 0.0001: 187 | # converge_detect += 1 188 | # if converge_detect == 100: 189 | # break 190 | #elif test_acc < former_acc : 191 | # overfit_detect += 1 192 | # if overfit_detect >= 100: 193 | # break 194 | #i += 1 195 | print("best_test_acc: " + str(best_acc.item())) 196 | #logger.info( 197 | # "Epoch {:04d} | " 198 | # "Best Acc {:.4f}".format( 199 | # best_epo, best_acc 200 | # )) 201 | #makedirs(args.save) 202 | #logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) 203 | #logger.info(args) 204 | #device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu') 205 | # 206 | ### Determine Number of Supports and Assign Model Function 207 | ##if args.model == 'gcn': 208 | ## num_supports = 1 209 | ## model_func = GCN 210 | ##elif args.model == 'gcn_cheby': 211 | ## num_supports = 1 + args.max_degree 212 | ## model_func = GCN 213 | ##else: 214 | ## raise ValueError('Invalid argument for model: ' + str(FLAGS.model)) 215 | # 216 | ## Load data 217 | #adj_ls, features, labels, sequences, proteases, labelorder, train_mask, val_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset) 218 | # 219 | ## Size of Different Sets 220 | #print("|Training| {}, |Validation| {}, |Testing| {}".format(np.sum(train_mask), np.sum(val_mask), np.sum(test_mask))) 221 | # 222 | ## Model and optimizer 223 | #model = GCN(nnode=features.shape[1], 224 | # nfeat=features.shape[2], 225 | # mfeat=adj_ls.shape[3], 226 | # nhid1=args.hidden1, 227 | # nhid2=args.hidden2, 228 | # nclass=labels.shape[1], 229 | # dropout=args.dropout).to(device) 230 | #logger.info(model) 231 | #logger.info('Number of parameters: {}'.format(count_parameters(model))) 232 | # 233 | #criterion = nn.NLLLoss().to(device) 234 | # 235 | #optimizer = optim.Adam(model.parameters(),lr=args.lr, weight_decay=args.weight_decay) 236 | # 237 | #best_acc = 0 238 | #batch_time_meter = RunningAverageMeter() 239 | #end = time.time() 240 | #print("Total number of forward processes:" + str(args.epochs * args.batch_size)) 241 | # 242 | ##batches_per_epoch = int(sum(train_mask) / args.batch_size) 243 | ##print("Batches per epoch is:" + str(batches_per_epoch)) 244 | #batch_size = args.batch_size 245 | #epochs_num = args.epochs 246 | # 247 | #if args.save_validation == True: 248 | # val_df = np.zeros([args.epochs*sum(val_mask),labels.shape[1]]) 249 | # 250 | ##mask = np.array([x or y for (x,y) in zip(train_mask, val_mask)], dtype = np.bool) 251 | #for epoch in range(epochs_num): 252 | # n = 0 253 | # for batch_mask in get_batch_iterator(train_mask, batch_size): 254 | # optimizer.zero_grad() 255 | # n = n + 1 256 | # print('this is the {}th batch'.format(n)) 257 | # x = features[batch_mask].to(device) 258 | # y = labels[batch_mask] 259 | # y = torch.argmax(y,axis=1).to(device) 260 | # adj = adj_ls[batch_mask].to(device) 261 | # model.train() 262 | # logits = model(x, adj) 263 | # loss = criterion(logits,y) 264 | # loss.backward() 265 | # optimizer.step() 266 | # train_acc = accuracy(logits, y) 267 | # print("train loss is {}".format(loss)) 268 | # print("train accuracy is {}".format(train_acc)) 269 | # batch_time_meter.update(time.time() - end) 270 | # end = time.time() 271 | # with torch.no_grad(): 272 | # #train_acc = accuracy(model, logits, labels[train_mask]) 273 | # model.eval() 274 | # logits_val = model(features[val_mask], adj_ls[val_mask]) 275 | # loss_val = criterion(logits_val,torch.argmax(labels[val_mask],axis=1)) 276 | # val_acc = accuracy(logits_val, torch.argmax(labels[val_mask],axis=1)) 277 | # print("accuracy for {0}th epoch is: {1}".format(epoch,val_acc)) 278 | # print("loss is {0}:".format(loss_val)) 279 | # if val_acc > best_acc: 280 | # torch.save({'epoch': epoch,'state_dict': model.state_dict(), 'args': args}, os.path.join(args.save, 'model.pth')) 281 | # best_acc = val_acc 282 | # best_epo = epoch 283 | # logger.info( 284 | # "Epoch {:04d} | Time {:.3f} ({:.3f}) | " 285 | # "Val Acc {:.4f}".format( 286 | # epoch, batch_time_meter.val, batch_time_meter.avg, val_acc 287 | # ) 288 | # ) 289 | # f = open(args.save + "epoch_record.txt","a") 290 | # f.write("batch_size_{0}_lr_{1}_gc_{2}_decay_{3}_epoch_{4}\tacc:{5}".format(batch_size,args.lr,args.hidden1,args.weight_decay,epoch,val_acc)) 291 | # f.close() 292 | # val_df[(epoch)*sum(val_mask):(epoch + 1) * sum(val_mask), :] = logits_val 293 | #pkl.dump(val_df, open(os.path.join(args.save, args.dataset + '_validation.csv'),'wb')) 294 | #test() 295 | 296 | 297 | -------------------------------------------------------------------------------- /model/train.py: -------------------------------------------------------------------------------- 1 | # This script is to test GCNN with already-trained gcnn model 2 | # Author: Changpeng Lu 3 | # Usage: 4 | # python test.py --dataset HCV_binary_10_ang_aa_sinusoidal_encoding_4_energy_7_energyedge_5_hbond --test_dataset HCV_binary_10_ang_aa_sinusoidal_encoding_2_energy_7_energyedge_5_hbond --epochs 500 --hidden1 20 --depth 2 --linear 1024 --att 0 --model gcn --batch_size 500 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save 'outputs/tt/HCV_binary_10_ang_aa_sinusoidal_encoding_4_energy_7_energyedge_5_hbond/bs_500/' 5 | # Train and test each epoch for this new version, instead of testing only after all training are done. Changes below aren't applied to wider_deeper and more epoch trials with the base setting. Replace Adam with SGD, also add lr_scheduler. Also calculate average train accuracy and loss instead of the last batch. Also, we use earlystop to let the model train enough epochs. 1) if test accuracy always go smaller, then the model will stop; 2) if the test accuracy always the same as the former accuracy, then it means converges, then the model will stop as well. 6 | 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import os 11 | import time 12 | import logging 13 | import argparse 14 | import numpy as np 15 | import random 16 | 17 | from comet_ml import Experiment 18 | import torch 19 | import torch.nn as nn 20 | import torch.nn.functional as F 21 | import torch.optim as optim 22 | from torch.utils.data import DataLoader 23 | import torchvision.datasets as datasets 24 | import torchvision.transforms as transforms 25 | import math 26 | import scipy.sparse as sp 27 | from torch.nn.parameter import Parameter 28 | 29 | from utils import * 30 | from models import * 31 | 32 | # Training settings 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('--gpu', type=int, default=0, 35 | help='number of gpus.') 36 | #parser.add_argument('--fastmode', action='store_true', default=False, 37 | # help='Validate during training pass.') 38 | parser.add_argument('--seed', type=int, default=1, help='Random seed.') 39 | parser.add_argument('--epochs', type=int, default=200, 40 | help='Number of epochs to train.') 41 | parser.add_argument('--lr', type=float, default=0.01, 42 | help='Initial learning rate.') 43 | parser.add_argument('--weight_decay', type=float, default=5e-4, 44 | help='Weight decay (L2 loss on parameters).') 45 | parser.add_argument('--hidden1', type=int, default=10, 46 | help='Number of hidden units for nodes.') 47 | parser.add_argument('--depth', type=int, default=10, help='Number of gcnn layers') 48 | parser.add_argument('--att', type=int, default=0, help='the dimension of weight matrices for key and query') 49 | parser.add_argument('--linear', type=int, default=0) 50 | parser.add_argument('--dropout', type=float, default=0.1, 51 | help='Dropout rate (1 - keep probability).') 52 | parser.add_argument('--no_energy', action='store_true', default=False) 53 | parser.add_argument('--energy_only', action='store_true', default=False) 54 | parser.add_argument('--seq_only', action='store_true', default=False) 55 | parser.add_argument('--feature',choices=['d','s+d','s','e','s+e','s+e+d'],default='s+e') 56 | parser.add_argument('--scale_type', choices = ['exp','minmax'], default='exp') 57 | parser.add_argument('--test_dataset',type=str, default=None) 58 | parser.add_argument('--val_dataset', type=str, default=None) 59 | parser.add_argument('--resampling', type=str, default=None) 60 | parser.add_argument('--dataset',type=str, help='input dataset string') 61 | parser.add_argument('--model', type = str, default = 'gcn',choices=['gcn','chebyshev']) 62 | parser.add_argument('--max_degree',type=int, default = 3, help='number of supports') 63 | parser.add_argument('--batch_size',type=int, default=8) 64 | parser.add_argument('--weight', type=str, default='pre',choices=['pre','post']) 65 | parser.add_argument('--dim_des',action='store_true',default=False) 66 | parser.add_argument('--save', type=str, default='./experiment1') 67 | args = parser.parse_args() 68 | 69 | makedirs(args.save) 70 | logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__)) 71 | logger.info(args) 72 | experiment = Experiment(project_name = args.dataset, api_key="yOMD8snU8WrOgVJM6jTErziMh", workspace="tevtrisplit") 73 | hyper_params = {"seed": args.seed, "weight_decay": args.weight_decay, "learning_rate": args.lr, "dropout": args.dropout, "batch_size": args.batch_size} 74 | experiment.log_parameters(hyper_params) 75 | 76 | torch.manual_seed(args.seed) 77 | np.random.seed(args.seed) 78 | random.seed(args.seed) 79 | # cheby is no longer useful 80 | is_cheby = True if args.model == 'chebyshev' else False 81 | no_energy = True if args.no_energy == True else False 82 | if args.val_dataset != None: 83 | logger.info('TripleSplit!') 84 | adj_ls, features, labels, sequences, labelorder, train_mask, val_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset, is_val=args.val_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only, seq_only = args.seq_only, feature_type=args.feature) 85 | logger.info("|Training| {},|Validation| {}, |Testing| {}".format(np.sum(train_mask), np.sum(val_mask), np.sum(test_mask))) 86 | tmp_mask = train_mask 87 | else: 88 | adj_ls, features, labels, sequences, labelorder, train_mask, val_mask = load_data(args.dataset, is_test=args.test_dataset, is_val=args.val_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only, seq_only = args.seq_only, feature_type=args.feature) #scale_type determines node feature scale 89 | tmp_mask = np.array([(not idx) for idx in val_mask], dtype=np.bool) 90 | # Size of Different Sets 91 | logger.info("|Training| {},|Testing| {}".format(np.sum(tmp_mask), np.sum(val_mask))) 92 | 93 | cheby_params = args.max_degree if args.model == 'chebyshev' else None 94 | weight_mode = args.weight 95 | dim_des = args.dim_des 96 | 97 | model = GCN(nnode=features.shape[1], 98 | nfeat=features.shape[2], 99 | mfeat=adj_ls.shape[3], 100 | # ngcn=args.ngcn, 101 | hidden1=args.hidden1, 102 | depth=args.depth, 103 | # hidden2=args.hidden2, 104 | natt=args.att, # one layer 105 | linear=args.linear, 106 | weight=weight_mode, 107 | is_des=dim_des, 108 | nclass=labels.shape[1], 109 | dropout=args.dropout, 110 | cheby=cheby_params) 111 | logger.info(model) 112 | logger.info('Number of parameters: {}'.format(count_parameters(model))) 113 | 114 | batch_size = args.batch_size 115 | 116 | criterion = nn.CrossEntropyLoss() 117 | #criterion = nn.NLLLoss() 118 | optimizer = optim.Adam(model.parameters(),lr=args.lr, weight_decay=args.weight_decay) 119 | #optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 120 | nepoch = args.epochs #willbe useless if set earlystop 121 | #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, int(nepoch / 10)) 122 | #scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10000) 123 | resampling = args.resampling 124 | best_acc = 0 125 | print("Total number of forward processes:" + str(args.epochs * args.batch_size)) 126 | #patience = 100 127 | #last_loss = 100 128 | #triggertimes = 0 129 | with experiment.train(): 130 | for i in range(nepoch): 131 | #i = 0 # epoch 132 | #converge_detect = 0 133 | #former_acc = 0 134 | #while True: 135 | #n = 0 136 | model.train() 137 | tmp_accs = [] 138 | tmp_losses = [] 139 | batch_mask_ls, count_mask_ls = get_batch_iterator(tmp_mask, batch_size, sampling=resampling) 140 | for batch_mask, count_mask in zip(batch_mask_ls, count_mask_ls): 141 | optimizer.zero_grad() 142 | x = features[batch_mask] 143 | y = labels[batch_mask] 144 | adj = adj_ls[batch_mask] 145 | for i, count in enumerate(count_mask): 146 | if count > 1: 147 | while count - 1 > 0: 148 | x = torch.concat([x, features[i,:,:].reshape(1,features.shape[1],-1)]) 149 | y = torch.concat([y, labels[i,:].reshape(1, -1)]) 150 | adj = torch.concat([adj, adj_ls[i,:,:,:].reshape(1,adj_ls.shape[1], adj_ls.shape[2],-1)]) 151 | count -= 1 152 | assert x.shape[0] == y.shape[0] 153 | y = torch.argmax(y,axis=1) 154 | logits = model(x, adj) 155 | loss = criterion(logits,y) 156 | train_acc = accuracy(logits,y) 157 | loss.backward() 158 | optimizer.step() 159 | tmp_losses.append(loss.item()) 160 | tmp_accs.append(train_acc.item()) 161 | # scheduler.step() 162 | #train_acc = accuracy(logits, y) # only record the last batch accuracy for each epoch 163 | experiment.log_metric("epoch_loss", sum(tmp_losses) / len(tmp_losses), step=i+1) 164 | experiment.log_metric("epoch_accuracy", sum(tmp_accs) / len(tmp_accs), step=i+1) 165 | #print("train accuracy for {0}th epoch is: {1}".format(i+1, train_acc)) 166 | # print("train loss for {0}th epoch is : {1}".format(i+1, loss)) 167 | print("epoch: " + str(i+1)) 168 | print("train_loss: " + str(sum(tmp_losses) / len(tmp_losses))) #loss.item())) 169 | print("train_acc: " + str(sum(tmp_accs) / len(tmp_accs))) #train_acc.item())) 170 | with torch.no_grad(): 171 | with experiment.validate(): 172 | model.eval() 173 | logits_test = model(features[val_mask], adj_ls[val_mask]) 174 | val_loss = criterion(logits_test, torch.argmax(labels[val_mask],axis=1)) 175 | val_acc = accuracy(logits_test, torch.argmax(labels[val_mask],axis=1)) 176 | print("val_loss: " + str(val_loss.item())) 177 | print("val_acc: " + str(val_acc.item())) 178 | experiment.log_metric("val_accuracy", val_acc.item(), step=i+1) 179 | experiment.log_metric("val_loss", val_loss.item(), step=i+1) 180 | # if test_acc > max_acc: 181 | #logits_test_fin = logits_test 182 | # max_acc = test_acc 183 | # logger.info("Test accuracy is:" + str(test_acc)) 184 | 185 | if val_acc > best_acc: 186 | torch.save({'epoch': i+1,'state_dict': model.state_dict(),'optimizer_state_dict': optimizer.state_dict(), 'loss':loss}, os.path.join(args.save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout) + '.pth')) 187 | pkl.dump(logits_test,open(os.path.join(args.save, 'logits_val_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout)),'wb')) 188 | best_acc = val_acc 189 | best_epo = i 190 | #if abs(test_acc - former_acc) < 0.0001: 191 | # converge_detect += 1 192 | # if converge_detect == 100: 193 | # break 194 | #elif test_acc < former_acc : 195 | # overfit_detect += 1 196 | # if overfit_detect >= 100: 197 | # break 198 | #i += 1 199 | logger.info("best_val_acc: " + str(best_acc.item())) 200 | #logger.info( 201 | # "Epoch {:04d} | " 202 | # "Best Acc {:.4f}".format( 203 | # best_epo, best_acc 204 | # )) 205 | 206 | # test 207 | if args.val_dataset != None: 208 | checkpoint = torch.load(os.path.join(args.save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout) + '.pth')) 209 | print("best epoch is:" + str(checkpoint['epoch'])) 210 | model.load_state_dict(checkpoint['state_dict']) 211 | max_acc = 0 212 | with torch.no_grad(): 213 | model.eval() 214 | logits_test = model(features[test_mask], adj_ls[test_mask]) 215 | test_acc = accuracy(logits_test, torch.argmax(labels[test_mask],axis=1)) 216 | #if test_acc > max_acc: 217 | #logits_test_fin = logits_test 218 | # max_acc = test_acc 219 | logger.info("Test accuracy is:" + str(test_acc)) 220 | pkl.dump(logits_test,open(os.path.join(args.save, 'logits_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout)),'wb')) 221 | 222 | 223 | -------------------------------------------------------------------------------- /pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/pipeline.png --------------------------------------------------------------------------------