├── .DS_Store
├── .idea
├── .gitignore
├── .name
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── protease-gcnn-pytorch.iml
└── vcs.xml
├── README.md
├── __init__.py
├── __pycache__
├── layers.cpython-37.pyc
├── models.cpython-37.pyc
└── utils.cpython-37.pyc
├── analysis
├── BenchmarkMLTrainigAfterPGCN.ipynb
├── MetricCalculationAfterTrain.ipynb
├── PlotLogoPlotSeqIdentityDataSummary.ipynb
├── PlotSankeyBarplot_Mutation.ipynb
├── PostAnalysisCrossTrainTest.ipynb
├── TestIndexSelection.ipynb
├── node_edge_weight_analysis_Joey.ipynb
└── suppl
│ ├── Table-S1A-HCV_sequence_protease_label.xlsx
│ ├── Table-S1B-TEV_sequence_protease_label.xlsx
│ ├── Table-S2-MetricSummary_ML_PGCN_suppl.xlsx
│ ├── Table-S4-node_edge_importance_binary.xlsx
│ ├── Table-S5-TEV_Daivd_Liu_Mutation_Sites_Numbering.xlsx
│ ├── Table-S8-Yeast-assay_PGCN_predicted_results_TEV_designs .xlsx
│ └── Table-S9-CrossTestSummary.xlsx
├── bin
├── graph_generation.sh
├── ml_benchmark.sh
├── test.sh
├── train.sh
└── variable_importance.sh
├── data
└── .ipynb_checkpoints
│ └── ind.None-checkpoint.pose_indices
├── design_protease.py
├── graph
├── __pycache__
│ └── protein_graph.cpython-38.pyc
├── classifications
│ ├── .ipynb_checkpoints
│ │ ├── HCV_A171T-checkpoint.txt
│ │ ├── PDZ_class-checkpoint.txt
│ │ └── tev_design_for_validation_dual_directions_cleavage-checkpoint.txt
│ ├── 20220911_tev_design_successes_p1pA.txt
│ ├── 20220922_tev_design_dual_directions_cleavage.txt
│ ├── 20220925_tev_design_p3.txt
│ ├── 2bof-ER-summarized_label_singlePDB.txt
│ ├── 2yol-ER-summarized_label_singlePDB.txt
│ ├── 5gj4-ER-summarized_label_singlePDB.txt
│ ├── 5y4l-ER-summarized_label_singlePDB.txt
│ ├── HCV.txt
│ ├── HCV_A171T.txt
│ ├── HCV_D183A.txt
│ ├── HCV_R170K_A171T_D183A.txt
│ ├── PDZ_class.txt
│ ├── TEV.txt
│ ├── TEV_David_Liu_mutants_sequences_labels_no_single_three_libraries_duplicates_used4structGen_05_10_2022.csv
│ ├── TEV_WT_balanced.txt
│ ├── TEV_WT_not_exclusive.txt
│ ├── TEV_final_all_var_noDup.txt
│ ├── dvdar_design.txt
│ ├── oydv-ER-summarized_label_singlePDB.txt
│ ├── pgcn_rbd_class_file.txt
│ ├── pgcn_rbd_class_file_ternary.txt
│ ├── protease_3c_designs.txt
│ ├── protease_3c_designs_2bof.txt
│ ├── protease_3c_designs_5y4l.txt
│ ├── protease_3c_designs_oydv.txt
│ ├── tev-ER-summarized_label_singlePDB.txt
│ ├── tev_design_20220912.txt
│ ├── tev_design_for_validation_dual_directions_cleavage.txt
│ ├── tev_design_negpool.txt
│ ├── tev_oydv_design_candidates.txt
│ ├── tev_oydv_expt_library_best_decoys.txt
│ └── tev_oydv_raw_designs.txt
├── crystal_structures
│ ├── HCV.pdb
│ └── TEV_QS.pdb
└── protein_graph.py
├── helper
├── .ipynb_checkpoints
│ ├── 2yol-ER-summarized_label-checkpoint.txt
│ ├── 2yol-ER-summarized_label_singlePDB-checkpoint.txt
│ ├── RAAVGRG-checkpoint.fasc
│ ├── Untitled-checkpoint.ipynb
│ ├── generate_class_singlePDB-checkpoint.py
│ └── make_modeling_commands-checkpoint.py
├── BenchmarkMLTrainAfterPGCN.py
├── generate_class_singlePDB.py
├── make_modeling_commands.py
└── text_to_slurm.py
├── model
├── findBestAcc.py
├── findBestAcc_from_log.py
├── importance.py
├── layers.py
├── models.py
├── outputs
│ ├── HCV_A171T_model.pth
│ ├── HCV_Combined_model.pth
│ ├── HCV_D183A_model.pth
│ ├── HCV_Triple_model.pth
│ ├── HCV_WT_model.pth
│ └── TEV_model.pth
├── test.py
├── train.py
└── utils.py
└── pipeline.png
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/.DS_Store
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | train.py
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/protease-gcnn-pytorch.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # protease-gcnn-pytorch
2 | This project is to present a graph-based convolutional neural network, called protein convolutional neural network (PGCN) to predict protease specificity. We propose a new creation of feature set that holds natural energy information for proteins, which could best represent protein activities.
3 |
4 | 
5 |
6 | To use our method, first download this repository by using the following command:
7 | ```git clone https://github.com/Nucleus2014/protease-gcnn-pytorch```
8 |
9 | Rosetta models were generated using *design_protease.py* script. If you need source structures for pre-trained HCV/TEV models, please contact us.
10 | Pre-trained models for HCV/TEV are in [model/outputs](https://github.com/Nucleus2014/protease-gcnn-pytorch/tree/master/model/outputs), and cleavage information for HCV/TEV/TEV_design are in [graph/classifications folder](https://github.com/Nucleus2014/protease-gcnn-pytorch/tree/master/graph/classifications).
11 | Analysis scripts are in [analysis folder](https://github.com/Nucleus2014/protease-gcnn-pytorch/tree/master/analysis)
12 |
13 | ## Step 1: Generation of graphs
14 | Go to *graph* folder and excecute *protein_graph.py*:
15 | ```
16 | cd graph
17 | python protein_graph.py -o -pr_path /projects/f_sdk94_1/EnzymeModelling/TEVFinalStructures -class TEV.txt -prot TEV_QS.pdb -d 10
18 | ```
19 | ### Description of generated data
20 | If the suffix is one of the below,
21 | ``: the edge feature tensor in the dimension of (K,N,N,M)
22 | ``: the node feature matrix in the dimension of (K,N,F)
23 | ``: labels in the dimension of $(K,2)$; CLEAVED if $[1,0]$, UNCLEAVED if $[0,1]$
24 | ``: the indicator of which class is for the columns in ``
25 | ``: the list of sample names
26 | Where K is the number of samples (graphs), N is the number of nodes, M is the number of edge features, F is the number of node features.
27 |
28 | Slit data and save their original indices in:
29 | and : indices of samples if triple splitting data into training, validation and test sets. Indices starts from 0.
30 |
31 | ## Step 2: Train, validate and test
32 | Go to *model* folder and excecute *train.py*:
33 | ```
34 | cd model
35 | python train.py --dataset TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond
36 | --test_dataset TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond
37 | --val_dataset TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond
38 | --seed 1 --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0
39 | --model gcn --batch_size 100 --lr 0.005 --dropout 0.2 --weight_decay 0.0005
40 | --save "outputs/tev/TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond/bs_100/'
41 | ```
42 | Options of hyperparameter tuning:
43 | ```
44 | weight_decay=(1e-3 5e-3 1e-4 5e-4)
45 | learning_rate=(1e-2 5e-2 1e-3 5e-3 1e-4 5e-4)
46 | dropout=(0.01 0.05 0.1 0.2 0.3 0.4 0.5)
47 | batch_size=(500 100 1000 50 10)
48 | ```
49 |
50 | When model has been trained, *train.py* saves the model to the corresponding preset directory (using the flag *--save*).
51 | I wrote a script to find the model with best accuracy, named as *find_best_acc.py*.
52 |
53 | ## Test with the pre-trained model
54 | If you would like to test with already-trained pgcn model, you could use *importance.py* in *model* folder. It will load existed pytorch model file and test data that you specify. Currently, we offer pre-trained models located at *model/outputs* for HCV wild type, HCV A171T, HCV D183A, HCV R170K_A171T_D183A, HCV Combined and TEV Combined.
55 | ```
56 | cd model
57 | python importance.py --dataset ${data} --hidden1 20 --depth 2 --linear 0 --att 0
58 | --batch_size ${bs} --lr ${lr} --dropout ${dt} --weight_decay ${wd} --seed ${seed}
59 | --save --data_path --new
60 | --test_logits_path
61 | ```
62 |
63 | ## Variable Importance Analysis (Alternative)
64 | Here we propose a method to represent importance of nodes and edges. You could leverage it by using following command:
65 | ```
66 | cd analysis
67 | python importance.py --importance --dataset HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond --hidden1 20 --depth 2 --linear 0 --att 0 --batch_size 500 --lr 0.005 --dropout 0.05 --weight_decay 5e-4 --save
68 | ```
69 | ## Comparison with other machine learning methods
70 | In the paper, we compare GCNN + new generated feature set with five machine learning models. For those results (parameter tuning + train and test) using machine learning models,
71 | ```
72 | cd helper
73 | python BenchmarkMLTrainAfterPGCN.py -data HCV_all_10_ang_aa_energy_7_energyedge_5_hbond_flattened -feature complete -model ann -save outputs/hcv_ann"
74 | ```
75 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from __future__ import division
3 |
4 | from .layers import *
5 | from .models import *
6 | from .utils import *
--------------------------------------------------------------------------------
/__pycache__/layers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/__pycache__/layers.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/__pycache__/models.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/analysis/TestIndexSelection.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pickle as pkl\n",
10 | "import numpy as np\n",
11 | "import os\n",
12 | "os.chdir('/scratch/cl1205/protease-gcnn-pytorch/model')\n",
13 | "from utils import *\n",
14 | "import torch\n",
15 | "from torch import nn"
16 | ]
17 | },
18 | {
19 | "cell_type": "code",
20 | "execution_count": 3,
21 | "metadata": {},
22 | "outputs": [],
23 | "source": [
24 | "def TestIndexSave(dataset_str):\n",
25 | " cwd = os.getcwd()\n",
26 | " names = ['x', 'y', 'graph', 'sequences', 'proteases', 'labelorder']\n",
27 | " features, y_arr, adj_ls, sequences, proteases, labelorder = tuple(load_input(dataset_str, names, input_type='train'))\n",
28 | "\n",
29 | " idx = np.arange(y_arr.shape[0])\n",
30 | " print(y_arr.shape[0])\n",
31 | " np.random.shuffle(idx)\n",
32 | " cutoff_2 = int(0.7 * len(idx)) # 10% of the benchmark set as testing data\n",
33 | " idx_test = idx[cutoff_2:]\n",
34 | " idx_train = idx[:cutoff_2]\n",
35 | " print(len(idx_test))\n",
36 | " np.savetxt('../data/ind.' + dataset_str + '.test.index', idx_test, fmt='%d')\n",
37 | " return idx_test\n"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 4,
43 | "metadata": {},
44 | "outputs": [
45 | {
46 | "name": "stdout",
47 | "output_type": "stream",
48 | "text": [
49 | "5425\n",
50 | "1628\n"
51 | ]
52 | }
53 | ],
54 | "source": [
55 | "idx = TestIndexSave('TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond')"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 5,
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/plain": [
66 | "array([3947, 4140, 777, ..., 224, 5388, 1575])"
67 | ]
68 | },
69 | "execution_count": 5,
70 | "metadata": {},
71 | "output_type": "execute_result"
72 | }
73 | ],
74 | "source": [
75 | "idx"
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "# Validation - Test Dataset"
83 | ]
84 | },
85 | {
86 | "cell_type": "code",
87 | "execution_count": 14,
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "def ValTestIndex(dataset_str):\n",
92 | " cwd = os.getcwd()\n",
93 | " names = ['x', 'y', 'graph', 'sequences', 'proteases', 'labelorder']\n",
94 | " features, y_arr, adj_ls, sequences, proteases, labelorder = tuple(load_input(dataset_str, names, input_type='train'))\n",
95 | "\n",
96 | " idx = np.arange(y_arr.shape[0])\n",
97 | " np.random.shuffle(idx)\n",
98 | " cutoff = int(0.8 * len(idx)) # 10% of the benchmark set as testing data\n",
99 | " cutoff_2 = int(0.9 * len(idx))\n",
100 | " idx_test = idx[cutoff_2:]\n",
101 | " idx_train = idx[:cutoff]\n",
102 | " idx_val = idx[cutoff: cutoff_2]\n",
103 | " print(len(idx_train), len(idx_val), len(idx_test))\n",
104 | " np.savetxt('../data/ind.' + dataset_str + '.trisplit.test.index', idx_test, fmt='%d')\n",
105 | " np.savetxt('../data/ind.' + dataset_str + '.trisplit.val.index', idx_val, fmt='%d')\n",
106 | " return idx_val, idx_test\n"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 15,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
119 | "4340 542 543\n",
120 | "HCV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
121 | "31399 3925 3925\n",
122 | "HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
123 | "5873 734 735\n",
124 | "HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
125 | "10564 1320 1321\n",
126 | "HCV_D183A_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
127 | "9491 1186 1187\n",
128 | "HCV_Triple_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
129 | "5470 684 684\n"
130 | ]
131 | }
132 | ],
133 | "source": [
134 | "for data in ['TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
135 | " 'HCV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
136 | " 'HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
137 | " 'HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
138 | " 'HCV_D183A_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
139 | " 'HCV_Triple_binary_10_ang_aa_energy_7_energyedge_5_hbond']:\n",
140 | " print(data)\n",
141 | " idx_val, idx_test = ValTestIndex(data)\n",
142 | " "
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "# Training/Val/Test Data Simple Statistics"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 2,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "def raw_data_statistics(dataset):\n",
159 | " idy = pkl.load(open('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.y'.format(dataset), 'rb'))\n",
160 | " test_index = np.loadtxt('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.test.index'.format(dataset), dtype=int)\n",
161 | " sequences = pkl.load(open('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.sequences'.format(dataset), 'rb'))\n",
162 | " test_index = np.loadtxt('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.trisplit.test.index'.format(dataset), dtype=int)\n",
163 | " val_index = np.loadtxt('/scratch/cl1205/protease-gcnn-pytorch/data/ind.{}.trisplit.val.index'.format(dataset), dtype=int)\n",
164 | " test_index = np.sort(test_index)\n",
165 | " val_index = np.sort(val_index)\n",
166 | " y_val = idy[val_index]\n",
167 | " y_test = idy[test_index]\n",
168 | " \n",
169 | " train_mask = np.array([i not in test_index and i not in val_index for i in range(idy.shape[0])]) \n",
170 | " y_train = idy[train_mask]\n",
171 | " print(np.array(sequences)[train_mask][0]) # 1 0 means cleaved\n",
172 | " print('Train:| Cleaved {} | Uncleaved {} | Total {} |'.format(np.sum(y_train==[1,0],axis=0)[0], \n",
173 | " np.sum(y_train==[0,1],axis=0)[0], \n",
174 | " y_train.shape[0]))\n",
175 | " print('Val:| Cleaved {} | Uncleaved {} | Total {} |'.format(np.sum(y_val==[1,0],axis=0)[0], \n",
176 | " np.sum(y_val==[0,1],axis=0)[0], \n",
177 | " y_val.shape[0]))\n",
178 | " print('Test:| Cleaved {} | Uncleaved {} | Total {} |'.format(np.sum(y_test==[1,0],axis=0)[0], \n",
179 | " np.sum(y_test==[0,1],axis=0)[0], \n",
180 | " y_test.shape[0]))\n",
181 | " print('Total: {}'.format(y_train.shape[0] + y_val.shape[0] + y_test.shape[0]))\n",
182 | " return sequences, test_index, val_index"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 32,
188 | "metadata": {},
189 | "outputs": [
190 | {
191 | "name": "stdout",
192 | "output_type": "stream",
193 | "text": [
194 | "N176I_TAHLYFQSGT.pdb\n",
195 | "Train:| Cleaved 2111 | Uncleaved 2229 | Total 4340 |\n",
196 | "Val:| Cleaved 259 | Uncleaved 283 | Total 542 |\n",
197 | "Test:| Cleaved 238 | Uncleaved 305 | Total 543 |\n",
198 | "Total: 5425\n"
199 | ]
200 | }
201 | ],
202 | "source": [
203 | "raw_data_statistics('TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond')"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 3,
209 | "metadata": {},
210 | "outputs": [
211 | {
212 | "name": "stdout",
213 | "output_type": "stream",
214 | "text": [
215 | "N176I_TAHLYFQSGT.pdb\n",
216 | "Train:| Cleaved 2111 | Uncleaved 2229 | Total 4340 |\n",
217 | "Val:| Cleaved 259 | Uncleaved 283 | Total 542 |\n",
218 | "Test:| Cleaved 238 | Uncleaved 305 | Total 543 |\n",
219 | "Total: 5425\n"
220 | ]
221 | }
222 | ],
223 | "source": [
224 | "sequences, test_index, val_index = raw_data_statistics('TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond')"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 7,
230 | "metadata": {},
231 | "outputs": [],
232 | "source": [
233 | "test_sequences = np.array(sequences)[test_index]"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 9,
239 | "metadata": {},
240 | "outputs": [],
241 | "source": [
242 | "for seq in test_sequences:\n",
243 | " if seq == 'WT_TENLYFQSGT.pdb':\n",
244 | " print('in test')\n",
245 | "val_sequences = np.array(sequences)[val_index]\n",
246 | "for seq in val_sequences:\n",
247 | " if seq == 'WT_TENLYFQSGT.pdb':\n",
248 | " print('in_val')"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 27,
254 | "metadata": {},
255 | "outputs": [
256 | {
257 | "data": {
258 | "text/plain": [
259 | "2111"
260 | ]
261 | },
262 | "execution_count": 27,
263 | "metadata": {},
264 | "output_type": "execute_result"
265 | }
266 | ],
267 | "source": [
268 | "np.sum(y_train==[1,0], axis=0)[0]"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 33,
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "name": "stdout",
278 | "output_type": "stream",
279 | "text": [
280 | "TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
281 | "N176I_TAHLYFQSGT.pdb\n",
282 | "Train:| Cleaved 2111 | Uncleaved 2229 | Total 4340 |\n",
283 | "Val:| Cleaved 259 | Uncleaved 283 | Total 542 |\n",
284 | "Test:| Cleaved 238 | Uncleaved 305 | Total 543 |\n",
285 | "Total: 5425\n",
286 | "HCV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
287 | "AYYYEPC.ASHL\n",
288 | "Train:| Cleaved 10404 | Uncleaved 20995 | Total 31399 |\n",
289 | "Val:| Cleaved 1319 | Uncleaved 2606 | Total 3925 |\n",
290 | "Test:| Cleaved 1338 | Uncleaved 2587 | Total 3925 |\n",
291 | "Total: 39249\n",
292 | "HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
293 | "AYYYEPC.ASHL\n",
294 | "Train:| Cleaved 1566 | Uncleaved 4307 | Total 5873 |\n",
295 | "Val:| Cleaved 175 | Uncleaved 559 | Total 734 |\n",
296 | "Test:| Cleaved 191 | Uncleaved 544 | Total 735 |\n",
297 | "Total: 7342\n",
298 | "HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
299 | "AETMLLC.ASHL\n",
300 | "Train:| Cleaved 2905 | Uncleaved 7659 | Total 10564 |\n",
301 | "Val:| Cleaved 366 | Uncleaved 954 | Total 1320 |\n",
302 | "Test:| Cleaved 373 | Uncleaved 948 | Total 1321 |\n",
303 | "Total: 13205\n",
304 | "HCV_D183A_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
305 | "ADLMDDC.ASHL\n",
306 | "Train:| Cleaved 3538 | Uncleaved 5953 | Total 9491 |\n",
307 | "Val:| Cleaved 422 | Uncleaved 764 | Total 1186 |\n",
308 | "Test:| Cleaved 390 | Uncleaved 797 | Total 1187 |\n",
309 | "Total: 11864\n",
310 | "HCV_Triple_binary_10_ang_aa_energy_7_energyedge_5_hbond\n",
311 | "AKQTISC.ASHL\n",
312 | "Train:| Cleaved 2496 | Uncleaved 2974 | Total 5470 |\n",
313 | "Val:| Cleaved 315 | Uncleaved 369 | Total 684 |\n",
314 | "Test:| Cleaved 324 | Uncleaved 360 | Total 684 |\n",
315 | "Total: 6838\n"
316 | ]
317 | }
318 | ],
319 | "source": [
320 | "for data in ['TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
321 | " 'HCV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
322 | " 'HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
323 | " 'HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
324 | " 'HCV_D183A_binary_10_ang_aa_energy_7_energyedge_5_hbond',\n",
325 | " 'HCV_Triple_binary_10_ang_aa_energy_7_energyedge_5_hbond']:\n",
326 | " print(data)\n",
327 | " raw_data_statistics(data)"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": null,
333 | "metadata": {},
334 | "outputs": [],
335 | "source": []
336 | }
337 | ],
338 | "metadata": {
339 | "kernelspec": {
340 | "display_name": "Python 3",
341 | "language": "python",
342 | "name": "python3"
343 | },
344 | "language_info": {
345 | "codemirror_mode": {
346 | "name": "ipython",
347 | "version": 3
348 | },
349 | "file_extension": ".py",
350 | "mimetype": "text/x-python",
351 | "name": "python",
352 | "nbconvert_exporter": "python",
353 | "pygments_lexer": "ipython3",
354 | "version": "3.6.4"
355 | }
356 | },
357 | "nbformat": 4,
358 | "nbformat_minor": 2
359 | }
360 |
--------------------------------------------------------------------------------
/analysis/suppl/Table-S1A-HCV_sequence_protease_label.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S1A-HCV_sequence_protease_label.xlsx
--------------------------------------------------------------------------------
/analysis/suppl/Table-S1B-TEV_sequence_protease_label.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S1B-TEV_sequence_protease_label.xlsx
--------------------------------------------------------------------------------
/analysis/suppl/Table-S2-MetricSummary_ML_PGCN_suppl.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S2-MetricSummary_ML_PGCN_suppl.xlsx
--------------------------------------------------------------------------------
/analysis/suppl/Table-S4-node_edge_importance_binary.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S4-node_edge_importance_binary.xlsx
--------------------------------------------------------------------------------
/analysis/suppl/Table-S5-TEV_Daivd_Liu_Mutation_Sites_Numbering.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S5-TEV_Daivd_Liu_Mutation_Sites_Numbering.xlsx
--------------------------------------------------------------------------------
/analysis/suppl/Table-S8-Yeast-assay_PGCN_predicted_results_TEV_designs .xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S8-Yeast-assay_PGCN_predicted_results_TEV_designs .xlsx
--------------------------------------------------------------------------------
/analysis/suppl/Table-S9-CrossTestSummary.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/analysis/suppl/Table-S9-CrossTestSummary.xlsx
--------------------------------------------------------------------------------
/bin/graph_generation.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --partition=main # Partition (job queue)
4 | #SBATCH --requeue # Return job to the queue if preempted
5 | #SBATCH --job-name=TEV_all # Assign an short name to your job
6 | #SBATCH --nodes=1 # Number of nodes you require
7 | #SBATCH --ntasks=1 # Total # of tasks across all nodes
8 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks)
9 | #SBATCH --mem=32000 # Real memory (RAM) required (MB)
10 | #SBATCH --time=3-00:00:00 # Total run time limit (HH:MM:SS)
11 | #SBATCH --output=gg.tev_all.10_ang_aa_energy_7_energyedge_5_hbond.%N.%j.out # STDOUT output file
12 | #SBATCH --error=gg.tev_all.10_ang_aa_energy_7_energyedge_5_hbond.%N.%j.err # STDERR output file (optional)
13 | #SBATCH --export=ALL # Export you current env to the job env
14 |
15 | cd /scratch/cl1205/protease-gcnn-pytorch/graph/
16 |
17 | srun python protein_graph.py -o TEV_all_binary_10_ang_aa_energy_7_energyedge_5_hbond -pr_path /projects/f_sdk94_1/EnzymeModelling/TEVFinalStructures -class TEV_final_all_var_noDup.txt -prot TEV_QS.pdb -d 10
18 |
19 |
--------------------------------------------------------------------------------
/bin/ml_benchmark.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --partition=main # Partition (job queue)
4 | #SBATCH --requeue # Return job to the queue if preempted
5 | #SBATCH --job-name=ml # Assign an short name to your job
6 | #SBATCH --ntasks=1 # Total # of tasks across all nodes
7 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks)
8 | #SBATCH --mem=32000 # Real memory (RAM) required (MB)
9 | #SBATCH --time=3-00:00:00 # Total run time limit (HH:MM:SS)
10 | #SBATCH --output=tt.HCV_flatten.%a.%N.%j.out # STDOUT output file
11 | #SBATCH --error=tt.HCV_flatten.%a.%N.%j.err # STDERR output file (optional)
12 | #SBATCH --export=ALL # Export you current env to the job env
13 | data=$1
14 | feature=$2
15 | model=$3
16 |
17 | cd /scratch/cl1205/ml-cleavage/scripts
18 | python BenchmarkMLTrainAfterPGCN.py -data $data -feature $feature -model $model -save "/scratch/cl1205/ml-cleavage/outputs/hcv_noProtID_trisplit_20220705"
19 |
20 |
--------------------------------------------------------------------------------
/bin/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --partition=main # Partition (job queue)
4 | #SBATCH --requeue # Return job to the queue if preempted
5 | #SBATCH --job-name=new1 # Assign an short name to your job
6 | #SBATCH --nodes=1 # Number of nodes you require
7 | #SBATCH --ntasks=1 # Total # of tasks across all nodes
8 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks)
9 | #SBATCH --mem=32000 # Real memory (RAM) required (MB)
10 | #SBATCH --time=02:00:00 # Total run time limit (HH:MM:SS)
11 | #SBATCH --output=testnew.binary.%N.%j.out # STDOUT output file
12 | #SBATCH --error=testnew.binary.%N.%j.err # STDERR output file (optional)
13 | #SBATCH --export=ALL # Export you current env to the job env
14 |
15 | cd /scratch/cl1205/protease-gcnn-pytorch/model
16 | data=$1
17 | seed=$2
18 | feature=$3
19 | wd=$4
20 | lr=$5
21 | dt=$6
22 | bs=$7
23 | ind=$8
24 | #echo "data: $data"
25 | #echo "seed: $seed"
26 | #echo "feature: $feature"
27 | #echo "weight_decay: $wd"
28 | #echo "learning_rate: $lr"
29 | #echo "dropout: $dt"
30 | #echo "batch_size: $bs"
31 | if [ ${feature} == _ ]
32 | then
33 | flag=--energy_only
34 | #rerun='_rerun/'
35 | else
36 | flag=
37 | #rerun='/'
38 | fi
39 | # call coord, but actually no coord in it
40 | python importance.py --dataset HCV_${data}_binary_new_10_ang_aa_energy_7_coord_energyedge_5_hbond_${ind} --hidden1 20 --depth 2 --linear 0 --att 0 --batch_size ${bs} --lr ${lr} --dropout ${dt} --weight_decay ${wd} --seed ${seed} --save "outputs/tt_finalize_20210413/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond_bs_${bs}/" ${flag} --new #&> tt.log
41 |
42 |
--------------------------------------------------------------------------------
/bin/train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --partition=main # Partition (job queue)
4 | #SBATCH --requeue # Return job to the queue if preempted
5 | #SBATCH --job-name=tt1 # Assign an short name to your job
6 | #SBATCH --array=0-167
7 | #SBATCH --ntasks=1 # Total # of tasks across all nodes
8 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks)
9 | #SBATCH --mem=32000 # Real memory (RAM) required (MB)
10 | #SBATCH --time=24:00:00 # Total run time limit (HH:MM:SS)
11 | #SBATCH --output=tt.HCV_binary_10_ang_aa_energy_7_energyedge_5_hbond.%a.%N.%j.out # STDOUT output file
12 | #SBATCH --error=tt.HCV_binary_10_ang_aa_energy_7_energyedge_5_hbond.%a.%N.%j.err # STDERR output file (optional)
13 | #SBATCH --export=ALL # Export you current env to the job env
14 |
15 | data=$1
16 | seed=$2
17 | feature=$3
18 |
19 | echo "data: $data"
20 | echo "seed: $seed"
21 | echo "feature: $feature"
22 | cd /scratch/cl1205/protease-gcnn-pytorch/model/
23 | weight_decay=(1e-3 5e-3 1e-4 5e-4)
24 | learning_rate=(1e-2 5e-2 1e-3 5e-3 1e-4 5e-4)
25 | dropout=(0.01 0.05 0.1 0.2 0.3 0.4 0.5)
26 | wd=()
27 | lr=()
28 | dt=()
29 | for i in {0..3}
30 | do
31 | for j in {0..5}
32 | do
33 | for k in {0..6}
34 | do
35 | wd+=(${weight_decay[$i]})
36 | lr+=(${learning_rate[$j]})
37 | dt+=(${dropout[$k]})
38 | done
39 | done
40 | done
41 | echo "array id: $SLRUM_ARRAY_TASK_ID"
42 | echo "weight decay: ${wd[$SLURM_ARRAY_TASK_ID]}"
43 | echo "learning rate: ${lr[$SLURM_ARRAY_TASK_ID]}"
44 | echo "dropout rate: ${dt[$SLURM_ARRAY_TASK_ID]}"
45 | tmp_wd=${wd[$SLURM_ARRAY_TASK_ID]}
46 | tmp_lr=${lr[$SLURM_ARRAY_TASK_ID]}
47 | tmp_dt=${dt[$SLURM_ARRAY_TASK_ID]}
48 |
49 | if [ ${feature} == _ ]
50 | then
51 | flag=--energy_only
52 | else
53 | flag=
54 | fi
55 |
56 | echo "batch_size: 500"
57 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_aa_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 500 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_500/" ${flag} #&> tt.log
58 |
59 | echo "batch_size: 100"
60 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 100 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_100/" ${flag}
61 |
62 | echo "batch_size: 1000"
63 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 1000 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_1000/" ${flag}
64 |
65 | echo "batch_size: 50"
66 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 50 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_50/" ${flag}
67 |
68 | echo "batch_size: 10"
69 | python test.py --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --seed ${seed} --epochs 500 --hidden1 20 --depth 2 --linear 0 --att 0 --model gcn --batch_size 10 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save "outputs/tt_finalize_20220211/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond/bs_10/" ${flag}
70 |
71 |
72 |
--------------------------------------------------------------------------------
/bin/variable_importance.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #SBATCH --partition=main # Partition (job queue)
4 | #SBATCH --requeue # Return job to the queue if preempted
5 | #SBATCH --job-name=vi1 # Assign an short name to your job
6 | #SBATCH --nodes=1 # Number of nodes you require
7 | #SBATCH --ntasks=1 # Total # of tasks across all nodes
8 | #SBATCH --cpus-per-task=1 # Cores per task (>1 if multithread tasks)
9 | #SBATCH --mem=32000 # Real memory (RAM) required (MB)
10 | #SBATCH --time=2-00:00:00 # Total run time limit (HH:MM:SS)
11 | #SBATCH --output=varimport.binary.%N.%j.out # STDOUT output file
12 | #SBATCH --error=varimport.binary.%N.%j.err # STDERR output file (optional)
13 | #SBATCH --export=ALL # Export you current env to the job env
14 |
15 | cd /scratch/cl1205/protease-gcnn-pytorch/model
16 | data=$1
17 | seed=$2
18 | feature=$3
19 | wd=$4
20 | lr=$5
21 | dt=$6
22 | bs=$7
23 | echo "data: $data"
24 | echo "seed: $seed"
25 | echo "feature: $feature"
26 | echo "weight_decay: $wd"
27 | echo "learning_rate: $lr"
28 | echo "dropout: $dt"
29 | echo "batch_size: $bs"
30 |
31 | if [ ${feature} == _ ]
32 | then
33 | label=--energy_only
34 | rerun='_rerun/'
35 | else
36 | label=
37 | rerun='/'
38 | fi
39 | srun python importance.py --importance --dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_${data}_binary_10_ang_aa_energy_7_energyedge_5_hbond --hidden1 20 --depth 2 --linear 0 --att 0 --batch_size ${bs} --lr ${lr} --dropout ${dt} --weight_decay ${wd} --seed ${seed} --save "outputs/tt_finalize_20210413/HCV_${data}_binary_10_ang${feature}energy_7_energyedge_5_hbond${rerun}bs_${bs}/" ${label} #&> tt.log
40 |
41 |
--------------------------------------------------------------------------------
/data/.ipynb_checkpoints/ind.None-checkpoint.pose_indices:
--------------------------------------------------------------------------------
1 | ,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
2 | GRIP2_4_DAAHHDESNC_0_afd.pdb,104,105,106,107,108,109,110,111,112,113,12,13,14,15,16,17,22,24,49,50,51,52,53,54,55,56,57,90,91,92,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3 | MPP2_1_QPCFNKLFPL_0_afd.pdb,87,88,89,90,91,92,93,94,95,96,10,14,15,16,17,18,19,20,21,22,23,24,27,29,30,31,32,33,38,44,50,62,63,65,66,68,69,70,72,78
4 |
--------------------------------------------------------------------------------
/graph/__pycache__/protein_graph.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/graph/__pycache__/protein_graph.cpython-38.pyc
--------------------------------------------------------------------------------
/graph/classifications/.ipynb_checkpoints/tev_design_for_validation_dual_directions_cleavage-checkpoint.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | p2_c2_5.pdb CLEAVED
3 | p2_c2_2.pdb CLEAVED
4 | p2_c2_4.pdb CLEAVED
5 | p2_c2_7.pdb CLEAVED
6 | p2_c2_8.pdb CLEAVED
7 | p2_c2_1.pdb CLEAVED
8 | p2_c2_0.pdb CLEAVED
9 | p2_c2_3.pdb CLEAVED
10 | p2_c2_6.pdb CLEAVED
11 | p2_c2_9.pdb CLEAVED
12 | p6_c5_6.pdb CLEAVED
13 | p6_c5_9.pdb CLEAVED
14 | p6_c5_7.pdb CLEAVED
15 | p6_c5_8.pdb CLEAVED
16 | p6_c5_5.pdb CLEAVED
17 | p6_c5_1.pdb CLEAVED
18 | p6_c5_0.pdb CLEAVED
19 | p6_c5_2.pdb CLEAVED
20 | p6_c5_4.pdb CLEAVED
21 | p6_c5_3.pdb CLEAVED
22 | p2_c9_4.pdb CLEAVED
23 | p2_c9_3.pdb CLEAVED
24 | p2_c9_5.pdb CLEAVED
25 | p2_c9_9.pdb CLEAVED
26 | p2_c9_6.pdb CLEAVED
27 | p2_c9_8.pdb CLEAVED
28 | p2_c9_0.pdb CLEAVED
29 | p2_c9_2.pdb CLEAVED
30 | p2_c9_7.pdb CLEAVED
31 | p2_c9_1.pdb CLEAVED
32 | P6_c3_9.pdb CLEAVED
33 | P6_c3_3.pdb CLEAVED
34 | P6_c3_1.pdb CLEAVED
35 | P6_c3_0.pdb CLEAVED
36 | P6_c3_4.pdb CLEAVED
37 | P6_c3_5.pdb CLEAVED
38 | P6_c3_2.pdb CLEAVED
39 | P6_c3_7.pdb CLEAVED
40 | P6_c3_6.pdb CLEAVED
41 | P6_c3_8.pdb CLEAVED
42 | p6_c7_7.pdb CLEAVED
43 | p6_c7_3.pdb CLEAVED
44 | p6_c7_0.pdb CLEAVED
45 | p6_c7_5.pdb CLEAVED
46 | p6_c7_9.pdb CLEAVED
47 | p6_c7_1.pdb CLEAVED
48 | p6_c7_8.pdb CLEAVED
49 | p6_c7_2.pdb CLEAVED
50 | p6_c7_4.pdb CLEAVED
51 | p6_c7_6.pdb CLEAVED
52 | P6_c2_7.pdb CLEAVED
53 | P6_c2_0.pdb CLEAVED
54 | P6_c2_4.pdb CLEAVED
55 | P6_c2_9.pdb CLEAVED
56 | P6_c2_5.pdb CLEAVED
57 | P6_c2_1.pdb CLEAVED
58 | P6_c2_3.pdb CLEAVED
59 | P6_c2_8.pdb CLEAVED
60 | P6_c2_6.pdb CLEAVED
61 | P6_c2_2.pdb CLEAVED
62 | p2_c10_2.pdb CLEAVED
63 | p2_c10_8.pdb CLEAVED
64 | p2_c10_1.pdb CLEAVED
65 | p2_c10_0.pdb CLEAVED
66 | p2_c10_9.pdb CLEAVED
67 | p2_c10_5.pdb CLEAVED
68 | p2_c10_6.pdb CLEAVED
69 | p2_c10_4.pdb CLEAVED
70 | p2_c10_7.pdb CLEAVED
71 | p2_c10_3.pdb CLEAVED
72 | p6_c6_1.pdb CLEAVED
73 | p6_c6_6.pdb CLEAVED
74 | p6_c6_9.pdb CLEAVED
75 | p6_c6_4.pdb CLEAVED
76 | p6_c6_2.pdb CLEAVED
77 | p6_c6_5.pdb CLEAVED
78 | p6_c6_3.pdb CLEAVED
79 | p6_c6_7.pdb CLEAVED
80 | p6_c6_8.pdb CLEAVED
81 | p6_c6_0.pdb CLEAVED
82 | p6_c8_5.pdb CLEAVED
83 | p6_c8_6.pdb CLEAVED
84 | p6_c8_1.pdb CLEAVED
85 | p6_c8_8.pdb CLEAVED
86 | p6_c8_4.pdb CLEAVED
87 | p6_c8_9.pdb CLEAVED
88 | p6_c8_3.pdb CLEAVED
89 | p6_c8_0.pdb CLEAVED
90 | p6_c8_2.pdb CLEAVED
91 | p6_c8_7.pdb CLEAVED
92 | p3_c2_5.pdb UNCLEAVED
93 | p3_c2_2.pdb UNCLEAVED
94 | p3_c2_4.pdb UNCLEAVED
95 | p3_c2_7.pdb UNCLEAVED
96 | p3_c2_8.pdb UNCLEAVED
97 | p3_c2_1.pdb UNCLEAVED
98 | p3_c2_0.pdb UNCLEAVED
99 | p3_c2_3.pdb UNCLEAVED
100 | p3_c2_6.pdb UNCLEAVED
101 | p3_c2_9.pdb UNCLEAVED
102 | p3_c3_9.pdb UNCLEAVED
103 | p3_c3_3.pdb UNCLEAVED
104 | p3_c3_1.pdb UNCLEAVED
105 | p3_c3_0.pdb UNCLEAVED
106 | p3_c3_4.pdb UNCLEAVED
107 | p3_c3_5.pdb UNCLEAVED
108 | p3_c3_2.pdb UNCLEAVED
109 | p3_c3_7.pdb UNCLEAVED
110 | p3_c3_6.pdb UNCLEAVED
111 | p3_c3_8.pdb UNCLEAVED
112 | p3_c1_2.pdb UNCLEAVED
113 | p3_c1_8.pdb UNCLEAVED
114 | p3_c1_1.pdb UNCLEAVED
115 | p3_c1_0.pdb UNCLEAVED
116 | p3_c1_5.pdb UNCLEAVED
117 | p3_c1_7.pdb UNCLEAVED
118 | p3_c1_4.pdb UNCLEAVED
119 | p3_c1_3.pdb UNCLEAVED
120 | p3_c1_9.pdb UNCLEAVED
121 | p3_c1_6.pdb UNCLEAVED
122 | tev_p2_V209I_W211L_M218D_relaxed_0.pdb UNCLEAVED
123 | tev_p2_V209I_W211L_M218D_relaxed_1.pdb UNCLEAVED
124 | tev_p2_V209I_W211L_M218D_relaxed_2.pdb UNCLEAVED
125 | tev_p2_V209I_W211L_M218D_relaxed_3.pdb UNCLEAVED
126 | tev_p2_V209I_W211L_M218D_relaxed_4.pdb UNCLEAVED
127 | tev_p2_V209I_W211L_M218D_relaxed_5.pdb UNCLEAVED
128 | tev_p2_V209I_W211L_M218D_relaxed_6.pdb UNCLEAVED
129 | tev_p2_V209I_W211L_M218D_relaxed_7.pdb UNCLEAVED
130 | tev_p2_V209I_W211L_M218D_relaxed_8.pdb UNCLEAVED
131 | tev_p2_V209I_W211L_M218D_relaxed_9.pdb UNCLEAVED
132 | tev_p2_V209S_W211R_G213R_relaxed_0.pdb UNCLEAVED
133 | tev_p2_V209S_W211R_G213R_relaxed_1.pdb UNCLEAVED
134 | tev_p2_V209S_W211R_G213R_relaxed_2.pdb UNCLEAVED
135 | tev_p2_V209S_W211R_G213R_relaxed_3.pdb UNCLEAVED
136 | tev_p2_V209S_W211R_G213R_relaxed_4.pdb UNCLEAVED
137 | tev_p2_V209S_W211R_G213R_relaxed_5.pdb UNCLEAVED
138 | tev_p2_V209S_W211R_G213R_relaxed_6.pdb UNCLEAVED
139 | tev_p2_V209S_W211R_G213R_relaxed_7.pdb UNCLEAVED
140 | tev_p2_V209S_W211R_G213R_relaxed_8.pdb UNCLEAVED
141 | tev_p2_V209S_W211R_G213R_relaxed_9.pdb UNCLEAVED
142 | tev_p2_V209S_W211R_M218D_relaxed_0.pdb UNCLEAVED
143 | tev_p2_V209S_W211R_M218D_relaxed_1.pdb UNCLEAVED
144 | tev_p2_V209S_W211R_M218D_relaxed_2.pdb UNCLEAVED
145 | tev_p2_V209S_W211R_M218D_relaxed_3.pdb UNCLEAVED
146 | tev_p2_V209S_W211R_M218D_relaxed_4.pdb UNCLEAVED
147 | tev_p2_V209S_W211R_M218D_relaxed_5.pdb UNCLEAVED
148 | tev_p2_V209S_W211R_M218D_relaxed_6.pdb UNCLEAVED
149 | tev_p2_V209S_W211R_M218D_relaxed_7.pdb UNCLEAVED
150 | tev_p2_V209S_W211R_M218D_relaxed_8.pdb UNCLEAVED
151 | tev_p2_V209S_W211R_M218D_relaxed_9.pdb UNCLEAVED
152 | tev_p2_V209S_W211R_relaxed_0.pdb UNCLEAVED
153 | tev_p2_V209S_W211R_relaxed_1.pdb UNCLEAVED
154 | tev_p2_V209S_W211R_relaxed_2.pdb UNCLEAVED
155 | tev_p2_V209S_W211R_relaxed_3.pdb UNCLEAVED
156 | tev_p2_V209S_W211R_relaxed_4.pdb UNCLEAVED
157 | tev_p2_V209S_W211R_relaxed_5.pdb UNCLEAVED
158 | tev_p2_V209S_W211R_relaxed_6.pdb UNCLEAVED
159 | tev_p2_V209S_W211R_relaxed_7.pdb UNCLEAVED
160 | tev_p2_V209S_W211R_relaxed_8.pdb UNCLEAVED
161 | tev_p2_V209S_W211R_relaxed_9.pdb UNCLEAVED
162 | tev_p6_F172Y_N174H_relaxed_0.pdb UNCLEAVED
163 | tev_p6_F172Y_N174H_relaxed_1.pdb UNCLEAVED
164 | tev_p6_F172Y_N174H_relaxed_2.pdb UNCLEAVED
165 | tev_p6_F172Y_N174H_relaxed_3.pdb UNCLEAVED
166 | tev_p6_F172Y_N174H_relaxed_4.pdb UNCLEAVED
167 | tev_p6_F172Y_N174H_relaxed_5.pdb UNCLEAVED
168 | tev_p6_F172Y_N174H_relaxed_6.pdb UNCLEAVED
169 | tev_p6_F172Y_N174H_relaxed_7.pdb UNCLEAVED
170 | tev_p6_F172Y_N174H_relaxed_8.pdb UNCLEAVED
171 | tev_p6_F172Y_N174H_relaxed_9.pdb UNCLEAVED
172 | tev_p6_K141E_T175P_relaxed_0.pdb UNCLEAVED
173 | tev_p6_K141E_T175P_relaxed_1.pdb UNCLEAVED
174 | tev_p6_K141E_T175P_relaxed_2.pdb UNCLEAVED
175 | tev_p6_K141E_T175P_relaxed_3.pdb UNCLEAVED
176 | tev_p6_K141E_T175P_relaxed_4.pdb UNCLEAVED
177 | tev_p6_K141E_T175P_relaxed_5.pdb UNCLEAVED
178 | tev_p6_K141E_T175P_relaxed_6.pdb UNCLEAVED
179 | tev_p6_K141E_T175P_relaxed_7.pdb UNCLEAVED
180 | tev_p6_K141E_T175P_relaxed_8.pdb UNCLEAVED
181 | tev_p6_K141E_T175P_relaxed_9.pdb UNCLEAVED
182 | tev_p6_T173A_T175A_N176D_relaxed_0.pdb UNCLEAVED
183 | tev_p6_T173A_T175A_N176D_relaxed_1.pdb UNCLEAVED
184 | tev_p6_T173A_T175A_N176D_relaxed_2.pdb UNCLEAVED
185 | tev_p6_T173A_T175A_N176D_relaxed_3.pdb UNCLEAVED
186 | tev_p6_T173A_T175A_N176D_relaxed_4.pdb UNCLEAVED
187 | tev_p6_T173A_T175A_N176D_relaxed_5.pdb UNCLEAVED
188 | tev_p6_T173A_T175A_N176D_relaxed_6.pdb UNCLEAVED
189 | tev_p6_T173A_T175A_N176D_relaxed_7.pdb UNCLEAVED
190 | tev_p6_T173A_T175A_N176D_relaxed_8.pdb UNCLEAVED
191 | tev_p6_T173A_T175A_N176D_relaxed_9.pdb UNCLEAVED
192 |
--------------------------------------------------------------------------------
/graph/classifications/20220911_tev_design_successes_p1pA.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | tev_p2_wt_relaxed_3.pdb UNCLEAVED
3 | tev_p2_wt_relaxed_6.pdb UNCLEAVED
4 | tev_p2_wt_relaxed_0.pdb UNCLEAVED
5 | tev_p2_wt_relaxed_8.pdb UNCLEAVED
6 | tev_p2_wt_relaxed_7.pdb UNCLEAVED
7 | tev_p2_wt_relaxed_5.pdb UNCLEAVED
8 | tev_p2_wt_relaxed_1.pdb UNCLEAVED
9 | tev_p2_wt_relaxed_2.pdb UNCLEAVED
10 | tev_p2_wt_relaxed_4.pdb UNCLEAVED
11 | tev_p2_wt_relaxed_9.pdb UNCLEAVED
12 | tev_p6_c3_relaxed_2.pdb CLEAVED
13 | tev_p6_c3_relaxed_6.pdb CLEAVED
14 | tev_p6_c3_relaxed_7.pdb CLEAVED
15 | tev_p6_c3_relaxed_3.pdb CLEAVED
16 | tev_p6_c3_relaxed_4.pdb CLEAVED
17 | tev_p6_c3_relaxed_0.pdb CLEAVED
18 | tev_p6_c3_relaxed_5.pdb CLEAVED
19 | tev_p6_c3_relaxed_8.pdb CLEAVED
20 | tev_p6_c3_relaxed_9.pdb CLEAVED
21 | tev_p6_c3_relaxed_1.pdb CLEAVED
22 | tev_p6_wt_relaxed_7.pdb UNCLEAVED
23 | tev_p6_wt_relaxed_2.pdb UNCLEAVED
24 | tev_p6_wt_relaxed_9.pdb UNCLEAVED
25 | tev_p6_wt_relaxed_8.pdb UNCLEAVED
26 | tev_p6_wt_relaxed_6.pdb UNCLEAVED
27 | tev_p6_wt_relaxed_1.pdb UNCLEAVED
28 | tev_p6_wt_relaxed_3.pdb UNCLEAVED
29 | tev_p6_wt_relaxed_5.pdb UNCLEAVED
30 | tev_p6_wt_relaxed_0.pdb UNCLEAVED
31 | tev_p6_wt_relaxed_4.pdb UNCLEAVED
32 | tev_p6_c2_relaxed_4.pdb CLEAVED
33 | tev_p6_c2_relaxed_6.pdb CLEAVED
34 | tev_p6_c2_relaxed_1.pdb CLEAVED
35 | tev_p6_c2_relaxed_7.pdb CLEAVED
36 | tev_p6_c2_relaxed_8.pdb CLEAVED
37 | tev_p6_c2_relaxed_5.pdb CLEAVED
38 | tev_p6_c2_relaxed_2.pdb CLEAVED
39 | tev_p6_c2_relaxed_0.pdb CLEAVED
40 | tev_p6_c2_relaxed_3.pdb CLEAVED
41 | tev_p6_c2_relaxed_9.pdb CLEAVED
42 | tev_p6_c5_relaxed_0.pdb CLEAVED
43 | tev_p6_c5_relaxed_5.pdb CLEAVED
44 | tev_p6_c5_relaxed_7.pdb CLEAVED
45 | tev_p6_c5_relaxed_9.pdb CLEAVED
46 | tev_p6_c5_relaxed_6.pdb CLEAVED
47 | tev_p6_c5_relaxed_8.pdb CLEAVED
48 | tev_p6_c5_relaxed_3.pdb CLEAVED
49 | tev_p6_c5_relaxed_1.pdb CLEAVED
50 | tev_p6_c5_relaxed_4.pdb CLEAVED
51 | tev_p6_c5_relaxed_2.pdb CLEAVED
52 | tev_p2_c10_relaxed_5.pdb CLEAVED
53 | tev_p2_c10_relaxed_7.pdb CLEAVED
54 | tev_p2_c10_relaxed_9.pdb CLEAVED
55 | tev_p2_c10_relaxed_4.pdb CLEAVED
56 | tev_p2_c10_relaxed_6.pdb CLEAVED
57 | tev_p2_c10_relaxed_2.pdb CLEAVED
58 | tev_p2_c10_relaxed_8.pdb CLEAVED
59 | tev_p2_c10_relaxed_1.pdb CLEAVED
60 | tev_p2_c10_relaxed_0.pdb CLEAVED
61 | tev_p2_c10_relaxed_3.pdb CLEAVED
62 | tev_p2_c9_relaxed_9.pdb CLEAVED
63 | tev_p2_c9_relaxed_4.pdb CLEAVED
64 | tev_p2_c9_relaxed_3.pdb CLEAVED
65 | tev_p2_c9_relaxed_7.pdb CLEAVED
66 | tev_p2_c9_relaxed_0.pdb CLEAVED
67 | tev_p2_c9_relaxed_5.pdb CLEAVED
68 | tev_p2_c9_relaxed_8.pdb CLEAVED
69 | tev_p2_c9_relaxed_1.pdb CLEAVED
70 | tev_p2_c9_relaxed_6.pdb CLEAVED
71 | tev_p2_c9_relaxed_2.pdb CLEAVED
72 | tev_wt_wt_relaxed_0.pdb CLEAVED
73 | tev_wt_wt_relaxed_1.pdb CLEAVED
74 | tev_wt_wt_relaxed_9.pdb CLEAVED
75 | tev_wt_wt_relaxed_5.pdb CLEAVED
76 | tev_wt_wt_relaxed_8.pdb CLEAVED
77 | tev_wt_wt_relaxed_7.pdb CLEAVED
78 | tev_wt_wt_relaxed_2.pdb CLEAVED
79 | tev_wt_wt_relaxed_6.pdb CLEAVED
80 | tev_wt_wt_relaxed_4.pdb CLEAVED
81 | tev_wt_wt_relaxed_3.pdb CLEAVED
82 | tev_p2_c2_relaxed_8.pdb CLEAVED
83 | tev_p2_c2_relaxed_1.pdb CLEAVED
84 | tev_p2_c2_relaxed_5.pdb CLEAVED
85 | tev_p2_c2_relaxed_0.pdb CLEAVED
86 | tev_p2_c2_relaxed_4.pdb CLEAVED
87 | tev_p2_c2_relaxed_3.pdb CLEAVED
88 | tev_p2_c2_relaxed_9.pdb CLEAVED
89 | tev_p2_c2_relaxed_2.pdb CLEAVED
90 | tev_p2_c2_relaxed_6.pdb CLEAVED
91 | tev_p2_c2_relaxed_7.pdb CLEAVED
92 | tev_p6_c7_relaxed_7.pdb CLEAVED
93 | tev_p6_c7_relaxed_6.pdb CLEAVED
94 | tev_p6_c7_relaxed_9.pdb CLEAVED
95 | tev_p6_c7_relaxed_8.pdb CLEAVED
96 | tev_p6_c7_relaxed_3.pdb CLEAVED
97 | tev_p6_c7_relaxed_0.pdb CLEAVED
98 | tev_p6_c7_relaxed_5.pdb CLEAVED
99 | tev_p6_c7_relaxed_1.pdb CLEAVED
100 | tev_p6_c7_relaxed_4.pdb CLEAVED
101 | tev_p6_c7_relaxed_2.pdb CLEAVED
102 | tev_p6_c8_relaxed_1.pdb CLEAVED
103 | tev_p6_c8_relaxed_5.pdb CLEAVED
104 | tev_p6_c8_relaxed_4.pdb CLEAVED
105 | tev_p6_c8_relaxed_0.pdb CLEAVED
106 | tev_p6_c8_relaxed_3.pdb CLEAVED
107 | tev_p6_c8_relaxed_9.pdb CLEAVED
108 | tev_p6_c8_relaxed_7.pdb CLEAVED
109 | tev_p6_c8_relaxed_6.pdb CLEAVED
110 | tev_p6_c8_relaxed_2.pdb CLEAVED
111 | tev_p6_c8_relaxed_8.pdb CLEAVED
112 | tev_p6_c6_relaxed_0.pdb CLEAVED
113 | tev_p6_c6_relaxed_1.pdb CLEAVED
114 | tev_p6_c6_relaxed_5.pdb CLEAVED
115 | tev_p6_c6_relaxed_2.pdb CLEAVED
116 | tev_p6_c6_relaxed_8.pdb CLEAVED
117 | tev_p6_c6_relaxed_3.pdb CLEAVED
118 | tev_p6_c6_relaxed_7.pdb CLEAVED
119 | tev_p6_c6_relaxed_9.pdb CLEAVED
120 | tev_p6_c6_relaxed_6.pdb CLEAVED
121 | tev_p6_c6_relaxed_4.pdb CLEAVED
122 |
--------------------------------------------------------------------------------
/graph/classifications/20220922_tev_design_dual_directions_cleavage.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | p2_c2_5.pdb CLEAVED
3 | p2_c2_2.pdb CLEAVED
4 | p2_c2_4.pdb CLEAVED
5 | p2_c2_7.pdb CLEAVED
6 | p2_c2_8.pdb CLEAVED
7 | p2_c2_1.pdb CLEAVED
8 | p2_c2_0.pdb CLEAVED
9 | p2_c2_3.pdb CLEAVED
10 | p2_c2_6.pdb CLEAVED
11 | p2_c2_9.pdb CLEAVED
12 | wt_wt_2.pdb CLEAVED
13 | wt_wt_4.pdb CLEAVED
14 | wt_wt_5.pdb CLEAVED
15 | wt_wt_6.pdb CLEAVED
16 | wt_wt_1.pdb CLEAVED
17 | wt_wt_8.pdb CLEAVED
18 | wt_wt_0.pdb CLEAVED
19 | wt_wt_9.pdb CLEAVED
20 | wt_wt_7.pdb CLEAVED
21 | wt_wt_3.pdb CLEAVED
22 | p6_c5_6.pdb CLEAVED
23 | p6_c5_9.pdb CLEAVED
24 | p6_c5_7.pdb CLEAVED
25 | p6_c5_8.pdb CLEAVED
26 | p6_c5_5.pdb CLEAVED
27 | p6_c5_1.pdb CLEAVED
28 | p6_c5_0.pdb CLEAVED
29 | p6_c5_2.pdb CLEAVED
30 | p6_c5_4.pdb CLEAVED
31 | p6_c5_3.pdb CLEAVED
32 | p2_c9_4.pdb CLEAVED
33 | p2_c9_3.pdb CLEAVED
34 | p2_c9_5.pdb CLEAVED
35 | p2_c9_9.pdb CLEAVED
36 | p2_c9_6.pdb CLEAVED
37 | p2_c9_8.pdb CLEAVED
38 | p2_c9_0.pdb CLEAVED
39 | p2_c9_2.pdb CLEAVED
40 | p2_c9_7.pdb CLEAVED
41 | p2_c9_1.pdb CLEAVED
42 | P6_c3_9.pdb CLEAVED
43 | P6_c3_3.pdb CLEAVED
44 | P6_c3_1.pdb CLEAVED
45 | P6_c3_0.pdb CLEAVED
46 | P6_c3_4.pdb CLEAVED
47 | P6_c3_5.pdb CLEAVED
48 | P6_c3_2.pdb CLEAVED
49 | P6_c3_7.pdb CLEAVED
50 | P6_c3_6.pdb CLEAVED
51 | P6_c3_8.pdb CLEAVED
52 | p6_c7_7.pdb CLEAVED
53 | p6_c7_3.pdb CLEAVED
54 | p6_c7_0.pdb CLEAVED
55 | p6_c7_5.pdb CLEAVED
56 | p6_c7_9.pdb CLEAVED
57 | p6_c7_1.pdb CLEAVED
58 | p6_c7_8.pdb CLEAVED
59 | p6_c7_2.pdb CLEAVED
60 | p6_c7_4.pdb CLEAVED
61 | p6_c7_6.pdb CLEAVED
62 | p2_wt_8.pdb UNCLEAVED
63 | p2_wt_4.pdb UNCLEAVED
64 | p2_wt_1.pdb UNCLEAVED
65 | p2_wt_0.pdb UNCLEAVED
66 | p2_wt_2.pdb UNCLEAVED
67 | p2_wt_6.pdb UNCLEAVED
68 | p2_wt_5.pdb UNCLEAVED
69 | p2_wt_7.pdb UNCLEAVED
70 | p2_wt_3.pdb UNCLEAVED
71 | p2_wt_9.pdb UNCLEAVED
72 | P6_c2_7.pdb CLEAVED
73 | P6_c2_0.pdb CLEAVED
74 | P6_c2_4.pdb CLEAVED
75 | P6_c2_9.pdb CLEAVED
76 | P6_c2_5.pdb CLEAVED
77 | P6_c2_1.pdb CLEAVED
78 | P6_c2_3.pdb CLEAVED
79 | P6_c2_8.pdb CLEAVED
80 | P6_c2_6.pdb CLEAVED
81 | P6_c2_2.pdb CLEAVED
82 | P6_wt_1.pdb UNCLEAVED
83 | P6_wt_2.pdb UNCLEAVED
84 | P6_wt_6.pdb UNCLEAVED
85 | P6_wt_5.pdb UNCLEAVED
86 | P6_wt_3.pdb UNCLEAVED
87 | P6_wt_8.pdb UNCLEAVED
88 | P6_wt_4.pdb UNCLEAVED
89 | P6_wt_7.pdb UNCLEAVED
90 | P6_wt_0.pdb UNCLEAVED
91 | P6_wt_9.pdb UNCLEAVED
92 | p6_c1_2.pdb UNCLEAVED
93 | p6_c1_4.pdb UNCLEAVED
94 | p6_c1_3.pdb UNCLEAVED
95 | p6_c1_7.pdb UNCLEAVED
96 | p6_c1_5.pdb UNCLEAVED
97 | p6_c1_8.pdb UNCLEAVED
98 | p6_c1_0.pdb UNCLEAVED
99 | p6_c1_9.pdb UNCLEAVED
100 | p6_c1_6.pdb UNCLEAVED
101 | p6_c1_1.pdb UNCLEAVED
102 | p2_c10_2.pdb CLEAVED
103 | p2_c10_8.pdb CLEAVED
104 | p2_c10_1.pdb CLEAVED
105 | p2_c10_0.pdb CLEAVED
106 | p2_c10_9.pdb CLEAVED
107 | p2_c10_5.pdb CLEAVED
108 | p2_c10_6.pdb CLEAVED
109 | p2_c10_4.pdb CLEAVED
110 | p2_c10_7.pdb CLEAVED
111 | p2_c10_3.pdb CLEAVED
112 | p6_c6_1.pdb CLEAVED
113 | p6_c6_6.pdb CLEAVED
114 | p6_c6_9.pdb CLEAVED
115 | p6_c6_4.pdb CLEAVED
116 | p6_c6_2.pdb CLEAVED
117 | p6_c6_5.pdb CLEAVED
118 | p6_c6_3.pdb CLEAVED
119 | p6_c6_7.pdb CLEAVED
120 | p6_c6_8.pdb CLEAVED
121 | p6_c6_0.pdb CLEAVED
122 | p2_c5_4.pdb UNCLEAVED
123 | p2_c5_7.pdb UNCLEAVED
124 | p2_c5_2.pdb UNCLEAVED
125 | p2_c5_3.pdb UNCLEAVED
126 | p2_c5_1.pdb UNCLEAVED
127 | p2_c5_0.pdb UNCLEAVED
128 | p2_c5_5.pdb UNCLEAVED
129 | p2_c5_8.pdb UNCLEAVED
130 | p2_c5_6.pdb UNCLEAVED
131 | p2_c5_9.pdb UNCLEAVED
132 | p6_c8_5.pdb CLEAVED
133 | p6_c8_6.pdb CLEAVED
134 | p6_c8_1.pdb CLEAVED
135 | p6_c8_8.pdb CLEAVED
136 | p6_c8_4.pdb CLEAVED
137 | p6_c8_9.pdb CLEAVED
138 | p6_c8_3.pdb CLEAVED
139 | p6_c8_0.pdb CLEAVED
140 | p6_c8_2.pdb CLEAVED
141 | p6_c8_7.pdb CLEAVED
142 | p2_c1_2.pdb UNCLEAVED
143 | p2_c1_8.pdb UNCLEAVED
144 | p2_c1_1.pdb UNCLEAVED
145 | p2_c1_0.pdb UNCLEAVED
146 | p2_c1_5.pdb UNCLEAVED
147 | p2_c1_7.pdb UNCLEAVED
148 | p2_c1_4.pdb UNCLEAVED
149 | p2_c1_3.pdb UNCLEAVED
150 | p2_c1_9.pdb UNCLEAVED
151 | p2_c1_6.pdb UNCLEAVED
152 |
--------------------------------------------------------------------------------
/graph/classifications/20220925_tev_design_p3.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | p3_c2_5.pdb UNCLEAVED
3 | p3_c2_2.pdb UNCLEAVED
4 | p3_c2_4.pdb UNCLEAVED
5 | p3_c2_7.pdb UNCLEAVED
6 | p3_c2_8.pdb UNCLEAVED
7 | p3_c2_1.pdb UNCLEAVED
8 | p3_c2_0.pdb UNCLEAVED
9 | p3_c2_3.pdb UNCLEAVED
10 | p3_c2_6.pdb UNCLEAVED
11 | p3_c2_9.pdb UNCLEAVED
12 | p3_c3_9.pdb UNCLEAVED
13 | p3_c3_3.pdb UNCLEAVED
14 | p3_c3_1.pdb UNCLEAVED
15 | p3_c3_0.pdb UNCLEAVED
16 | p3_c3_4.pdb UNCLEAVED
17 | p3_c3_5.pdb UNCLEAVED
18 | p3_c3_2.pdb UNCLEAVED
19 | p3_c3_7.pdb UNCLEAVED
20 | p3_c3_6.pdb UNCLEAVED
21 | p3_c3_8.pdb UNCLEAVED
22 | p3_wt_8.pdb UNCLEAVED
23 | p3_wt_4.pdb UNCLEAVED
24 | p3_wt_1.pdb UNCLEAVED
25 | p3_wt_0.pdb UNCLEAVED
26 | p3_wt_2.pdb UNCLEAVED
27 | p3_wt_6.pdb UNCLEAVED
28 | p3_wt_5.pdb UNCLEAVED
29 | p3_wt_7.pdb UNCLEAVED
30 | p3_wt_3.pdb UNCLEAVED
31 | p3_wt_9.pdb UNCLEAVED
32 | p3_c1_2.pdb UNCLEAVED
33 | p3_c1_8.pdb UNCLEAVED
34 | p3_c1_1.pdb UNCLEAVED
35 | p3_c1_0.pdb UNCLEAVED
36 | p3_c1_5.pdb UNCLEAVED
37 | p3_c1_7.pdb UNCLEAVED
38 | p3_c1_4.pdb UNCLEAVED
39 | p3_c1_3.pdb UNCLEAVED
40 | p3_c1_9.pdb UNCLEAVED
41 | p3_c1_6.pdb UNCLEAVED
42 |
--------------------------------------------------------------------------------
/graph/classifications/dvdar_design.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | hcv_no_swap_sub_1_31_relaxed_0.pdb CLEAVED
3 | hcv_all_swap_sub_1_11_relaxed_1.pdb CLEAVED
4 | hcv_all_swap_design_1_81_designed_1.pdb CLEAVED
5 | hcv_all_swap_design_3_9_designed_4.pdb CLEAVED
6 | hcv_all_swap_design_1_94_designed_3.pdb CLEAVED
7 | hcv_all_swap_design_3_67_designed_3.pdb CLEAVED
8 | hcv_all_swap_design_3_21_designed_4.pdb CLEAVED
9 | hcv_all_swap_design_3_7_designed_1.pdb CLEAVED
10 | hcv_all_swap_design_0_57_designed_0.pdb CLEAVED
11 | hcv_all_swap_design_3_47_designed_1.pdb CLEAVED
12 | hcv_all_swap_design_3_0_designed_7.pdb CLEAVED
13 | hcv_all_swap_design_3_29_designed_8.pdb CLEAVED
14 | hcv_all_swap_design_1_48_designed_3.pdb CLEAVED
15 | hcv_all_swap_design_3_55_designed_6.pdb CLEAVED
16 | hcv_all_swap_design_3_60_designed_9.pdb CLEAVED
17 | hcv_all_swap_design_3_93_designed_0.pdb CLEAVED
18 | hcv_all_swap_design_1_87_designed_3.pdb CLEAVED
19 | hcv_all_swap_design_3_84_designed_3.pdb CLEAVED
20 | hcv_all_swap_design_3_59_designed_1.pdb CLEAVED
21 | hcv_all_swap_design_3_20_designed_8.pdb CLEAVED
22 | hcv_all_swap_design_3_35_designed_4.pdb CLEAVED
23 | hcv_all_swap_design_3_56_designed_2.pdb CLEAVED
24 | hcv_all_swap_design_3_0_designed_6.pdb CLEAVED
25 | hcv_all_swap_design_0_97_designed_2.pdb CLEAVED
26 | hcv_all_swap_design_3_71_designed_3.pdb CLEAVED
27 | hcv_all_swap_design_3_62_designed_5.pdb CLEAVED
28 | hcv_all_swap_design_3_52_designed_3.pdb CLEAVED
29 | hcv_all_swap_design_3_14_designed_7.pdb CLEAVED
30 | hcv_all_swap_design_3_98_designed_4.pdb CLEAVED
31 | hcv_all_swap_design_3_48_designed_3.pdb CLEAVED
32 | hcv_all_swap_design_3_83_designed_6.pdb CLEAVED
33 | hcv_all_swap_design_1_82_designed_8.pdb CLEAVED
34 | hcv_all_swap_design_2_77_designed_5.pdb CLEAVED
35 | hcv_all_swap_design_3_59_designed_4.pdb CLEAVED
36 | hcv_all_swap_design_3_80_designed_4.pdb CLEAVED
37 | hcv_all_swap_design_3_28_designed_6.pdb CLEAVED
38 | hcv_all_swap_design_3_48_designed_6.pdb CLEAVED
39 | hcv_all_swap_design_3_70_designed_4.pdb CLEAVED
40 | hcv_all_swap_design_3_58_designed_4.pdb CLEAVED
41 | hcv_all_swap_design_0_45_designed_5.pdb CLEAVED
42 | hcv_all_swap_design_3_86_designed_2.pdb CLEAVED
43 | hcv_all_swap_design_3_61_designed_6.pdb CLEAVED
44 | hcv_all_swap_design_3_24_designed_6.pdb CLEAVED
45 | hcv_all_swap_design_3_59_designed_6.pdb CLEAVED
46 | hcv_all_swap_design_3_56_designed_6.pdb CLEAVED
47 | hcv_all_swap_design_3_89_designed_1.pdb CLEAVED
48 | hcv_all_swap_design_2_16_designed_7.pdb CLEAVED
49 | hcv_all_swap_design_2_89_designed_4.pdb CLEAVED
50 | hcv_all_swap_design_2_39_designed_1.pdb CLEAVED
51 | hcv_all_swap_design_2_23_designed_3.pdb CLEAVED
52 | hcv_all_swap_design_2_12_designed_0.pdb CLEAVED
53 | hcv_all_swap_design_2_90_designed_7.pdb CLEAVED
54 | hcv_all_swap_design_4_54_designed_3.pdb CLEAVED
55 | hcv_all_swap_design_2_69_designed_7.pdb CLEAVED
56 | hcv_all_swap_design_2_59_designed_3.pdb CLEAVED
57 | hcv_all_swap_design_4_74_designed_9.pdb CLEAVED
58 | hcv_all_swap_design_2_75_designed_2.pdb CLEAVED
59 | hcv_all_swap_design_2_11_designed_8.pdb CLEAVED
60 | hcv_all_swap_design_4_63_designed_0.pdb CLEAVED
61 | hcv_all_swap_design_2_38_designed_9.pdb CLEAVED
62 | hcv_all_swap_design_4_41_designed_8.pdb CLEAVED
63 | hcv_all_swap_design_4_67_designed_0.pdb CLEAVED
64 | hcv_all_swap_design_3_82_designed_0.pdb CLEAVED
65 | hcv_all_swap_design_2_40_designed_8.pdb CLEAVED
66 | hcv_all_swap_design_4_39_designed_9.pdb CLEAVED
67 | hcv_all_swap_design_0_41_designed_3.pdb CLEAVED
68 | hcv_all_swap_design_1_6_designed_8.pdb CLEAVED
69 | hcv_all_swap_design_2_65_designed_7.pdb CLEAVED
70 | hcv_all_swap_design_0_63_designed_6.pdb CLEAVED
71 | hcv_all_swap_design_2_41_designed_5.pdb CLEAVED
72 | hcv_all_swap_design_4_97_designed_4.pdb CLEAVED
73 | hcv_all_swap_design_0_78_designed_3.pdb CLEAVED
74 | hcv_all_swap_design_3_21_designed_8.pdb CLEAVED
75 | hcv_all_swap_design_4_58_designed_5.pdb CLEAVED
76 | hcv_all_swap_design_3_44_designed_7.pdb CLEAVED
77 | hcv_all_swap_design_2_1_designed_8.pdb CLEAVED
78 | hcv_all_swap_design_2_91_designed_6.pdb CLEAVED
79 | hcv_all_swap_design_3_65_designed_3.pdb CLEAVED
80 | hcv_all_swap_design_2_67_designed_4.pdb CLEAVED
81 | hcv_all_swap_design_4_14_designed_4.pdb CLEAVED
82 | hcv_all_swap_design_0_38_designed_0.pdb CLEAVED
83 | hcv_all_swap_design_2_23_designed_5.pdb CLEAVED
84 | hcv_all_swap_design_4_82_designed_1.pdb CLEAVED
85 | hcv_all_swap_design_4_22_designed_2.pdb CLEAVED
86 | hcv_all_swap_design_2_75_designed_8.pdb CLEAVED
87 | hcv_all_swap_design_3_20_designed_1.pdb CLEAVED
88 | hcv_all_swap_design_4_73_designed_7.pdb CLEAVED
89 | hcv_all_swap_design_2_46_designed_5.pdb CLEAVED
90 | hcv_all_swap_design_3_69_designed_8.pdb CLEAVED
91 | hcv_all_swap_design_4_75_designed_7.pdb CLEAVED
92 | hcv_all_swap_design_4_53_designed_1.pdb CLEAVED
93 | hcv_no_swap_sub_2_17_relaxed_1.pdb CLEAVED
94 | hcv_no_swap_sub_3_32_relaxed_1.pdb CLEAVED
95 | hcv_all_swap_sub_3_24_relaxed_1.pdb CLEAVED
96 | hcv_all_swap_design_0_16_designed_8.pdb CLEAVED
97 | hcv_all_swap_design_3_84_designed_1.pdb CLEAVED
98 | hcv_all_swap_design_4_90_designed_3.pdb CLEAVED
99 | hcv_all_swap_design_3_90_designed_1.pdb CLEAVED
100 | hcv_all_swap_design_3_60_designed_2.pdb CLEAVED
101 | hcv_all_swap_design_3_52_designed_4.pdb CLEAVED
102 | hcv_all_swap_design_3_86_designed_8.pdb CLEAVED
103 | hcv_all_swap_design_1_38_designed_3.pdb CLEAVED
104 | hcv_all_swap_design_0_9_designed_2.pdb CLEAVED
105 | hcv_all_swap_design_3_17_designed_2.pdb CLEAVED
106 | hcv_all_swap_design_3_93_designed_9.pdb CLEAVED
107 | hcv_all_swap_design_0_40_designed_0.pdb CLEAVED
108 | hcv_all_swap_design_0_94_designed_1.pdb CLEAVED
109 | hcv_all_swap_design_3_62_designed_8.pdb CLEAVED
110 | hcv_all_swap_design_3_86_designed_0.pdb CLEAVED
111 | hcv_all_swap_design_3_87_designed_0.pdb CLEAVED
112 | hcv_all_swap_design_3_34_designed_2.pdb CLEAVED
113 | hcv_all_swap_design_3_0_designed_8.pdb CLEAVED
114 | hcv_all_swap_design_3_55_designed_7.pdb CLEAVED
115 | hcv_all_swap_design_3_9_designed_7.pdb CLEAVED
116 | hcv_all_swap_design_0_94_designed_3.pdb CLEAVED
117 | hcv_all_swap_design_3_98_designed_8.pdb CLEAVED
118 | hcv_all_swap_design_1_48_designed_9.pdb CLEAVED
119 | hcv_all_swap_design_1_76_designed_9.pdb CLEAVED
120 | hcv_all_swap_design_3_34_designed_3.pdb CLEAVED
121 | hcv_all_swap_design_3_96_designed_8.pdb CLEAVED
122 | hcv_all_swap_design_4_5_designed_9.pdb CLEAVED
123 | hcv_all_swap_design_0_93_designed_0.pdb CLEAVED
124 | hcv_all_swap_design_3_73_designed_4.pdb CLEAVED
125 | hcv_all_swap_design_4_61_designed_4.pdb CLEAVED
126 | hcv_all_swap_design_2_59_designed_9.pdb CLEAVED
127 | hcv_all_swap_design_4_75_designed_5.pdb CLEAVED
128 | hcv_all_swap_design_3_58_designed_6.pdb CLEAVED
129 | hcv_all_swap_design_1_61_designed_8.pdb CLEAVED
130 | hcv_all_swap_design_3_28_designed_4.pdb CLEAVED
131 | hcv_all_swap_design_4_40_designed_0.pdb CLEAVED
132 | hcv_all_swap_design_3_0_designed_1.pdb CLEAVED
133 | hcv_all_swap_design_3_78_designed_5.pdb CLEAVED
134 | hcv_all_swap_design_4_85_designed_9.pdb CLEAVED
135 | hcv_all_swap_design_4_64_designed_5.pdb CLEAVED
136 | hcv_all_swap_design_3_39_designed_1.pdb CLEAVED
137 | hcv_all_swap_design_2_34_designed_7.pdb CLEAVED
138 | hcv_all_swap_design_2_53_designed_6.pdb CLEAVED
139 | hcv_all_swap_design_4_93_designed_2.pdb CLEAVED
140 | hcv_all_swap_design_4_4_designed_9.pdb CLEAVED
141 | hcv_all_swap_design_4_3_designed_5.pdb CLEAVED
142 | hcv_all_swap_design_3_12_designed_5.pdb CLEAVED
143 | hcv_all_swap_design_3_84_designed_0.pdb CLEAVED
144 | hcv_all_swap_design_3_6_designed_7.pdb CLEAVED
145 | hcv_all_swap_design_2_90_designed_4.pdb CLEAVED
146 | hcv_all_swap_design_4_61_designed_0.pdb CLEAVED
147 | hcv_all_swap_design_3_94_designed_4.pdb CLEAVED
148 | hcv_all_swap_design_2_1_designed_5.pdb CLEAVED
149 | hcv_all_swap_design_4_33_designed_3.pdb CLEAVED
150 | DVDAR_rand_des_00_2_relaxed_1.pdb UNCLEAVED
151 | DVDAR_rand_des_01_0_relaxed_1.pdb UNCLEAVED
152 | DVDAR_rand_des_02_0_relaxed_0.pdb UNCLEAVED
153 | DVDAR_rand_des_03_1_relaxed_0.pdb UNCLEAVED
154 | DVDAR_rand_des_04_4_relaxed_1.pdb UNCLEAVED
155 | DVDAR_rand_des_05_4_relaxed_1.pdb UNCLEAVED
156 | DVDAR_rand_des_06_3_relaxed_1.pdb UNCLEAVED
157 | DVDAR_rand_des_07_4_relaxed_0.pdb UNCLEAVED
158 | DVDAR_rand_des_08_0_relaxed_1.pdb UNCLEAVED
159 | DVDAR_rand_des_09_3_relaxed_1.pdb UNCLEAVED
160 | DVDAR_rand_des_19_3_relaxed_1.pdb UNCLEAVED
161 | DVDAR_rand_des_20_2_relaxed_0.pdb UNCLEAVED
162 | DVDAR_rand_des_21_3_relaxed_1.pdb UNCLEAVED
163 | DVDAR_rand_des_22_0_relaxed_1.pdb UNCLEAVED
164 | DVDAR_rand_des_23_0_relaxed_1.pdb UNCLEAVED
165 | DVDAR_rand_des_24_2_relaxed_1.pdb UNCLEAVED
166 | DVDAR_rand_des_25_0_relaxed_1.pdb UNCLEAVED
167 | DVDAR_rand_des_26_3_relaxed_0.pdb UNCLEAVED
168 | DVDAR_rand_des_27_0_relaxed_1.pdb UNCLEAVED
169 | DVDAR_rand_des_28_1_relaxed_1.pdb UNCLEAVED
170 | DVDAR_rand_des_29_4_relaxed_1.pdb UNCLEAVED
171 | DVDAR_rand_des_31_1_relaxed_0.pdb UNCLEAVED
172 | DVDAR_rand_des_32_2_relaxed_1.pdb UNCLEAVED
173 | DVDAR_rand_des_33_2_relaxed_0.pdb UNCLEAVED
174 | DVDAR_rand_des_34_0_relaxed_1.pdb UNCLEAVED
175 | DVDAR_rand_des_35_1_relaxed_0.pdb UNCLEAVED
176 | DVDAR_rand_des_36_1_relaxed_1.pdb UNCLEAVED
177 | DVDAR_rand_des_37_0_relaxed_1.pdb UNCLEAVED
178 | DVDAR_rand_des_38_0_relaxed_0.pdb UNCLEAVED
179 | DVDAR_rand_des_39_1_relaxed_0.pdb UNCLEAVED
180 | DVDAR_rand_des_40_2_relaxed_0.pdb UNCLEAVED
181 | DVDAR_rand_des_41_4_relaxed_1.pdb UNCLEAVED
182 | DVDAR_rand_des_42_1_relaxed_0.pdb UNCLEAVED
183 | DVDAR_rand_des_43_2_relaxed_0.pdb UNCLEAVED
184 | DVDAR_rand_des_44_0_relaxed_0.pdb UNCLEAVED
185 | DVDAR_rand_des_45_4_relaxed_1.pdb UNCLEAVED
186 | DVDAR_rand_des_46_2_relaxed_1.pdb UNCLEAVED
187 | DVDAR_rand_des_47_1_relaxed_0.pdb UNCLEAVED
188 | DVDAR_rand_des_48_1_relaxed_1.pdb UNCLEAVED
189 | DVDAR_rand_des_49_2_relaxed_0.pdb UNCLEAVED
190 | DVDAR_rand_des_50_3_relaxed_1.pdb UNCLEAVED
191 | DVDAR_rand_des_51_0_relaxed_1.pdb UNCLEAVED
192 | DVDAR_rand_des_52_0_relaxed_0.pdb UNCLEAVED
193 | DVDAR_rand_des_53_4_relaxed_1.pdb UNCLEAVED
194 | DVDAR_rand_des_54_0_relaxed_0.pdb UNCLEAVED
195 | DVDAR_rand_des_55_4_relaxed_0.pdb UNCLEAVED
196 | DVDAR_rand_des_56_1_relaxed_0.pdb UNCLEAVED
197 | DVDAR_rand_des_57_2_relaxed_0.pdb UNCLEAVED
198 | DVDAR_rand_des_58_3_relaxed_0.pdb UNCLEAVED
199 | DVDAR_rand_des_59_2_relaxed_1.pdb UNCLEAVED
200 | DVDAR_rand_des_60_4_relaxed_1.pdb UNCLEAVED
201 | DVDAR_rand_des_61_0_relaxed_1.pdb UNCLEAVED
202 | DVDAR_rand_des_62_4_relaxed_1.pdb UNCLEAVED
203 | DVDAR_rand_des_63_0_relaxed_0.pdb UNCLEAVED
204 | DVDAR_rand_des_64_2_relaxed_1.pdb UNCLEAVED
205 | DVDAR_rand_des_65_2_relaxed_1.pdb UNCLEAVED
206 | DVDAR_rand_des_66_3_relaxed_0.pdb UNCLEAVED
207 | DVDAR_rand_des_67_1_relaxed_0.pdb UNCLEAVED
208 | DVDAR_rand_des_68_0_relaxed_1.pdb UNCLEAVED
209 | DVDAR_rand_des_69_0_relaxed_1.pdb UNCLEAVED
210 | DVDAR_rand_des_70_1_relaxed_0.pdb UNCLEAVED
211 | DVDAR_rand_des_71_0_relaxed_0.pdb UNCLEAVED
212 | DVDAR_rand_des_72_4_relaxed_0.pdb UNCLEAVED
213 | DVDAR_rand_des_73_1_relaxed_1.pdb UNCLEAVED
214 | DVDAR_rand_des_74_2_relaxed_0.pdb UNCLEAVED
215 | DVDAR_rand_des_75_1_relaxed_0.pdb UNCLEAVED
216 | DVDAR_rand_des_76_1_relaxed_0.pdb UNCLEAVED
217 | DVDAR_rand_des_77_4_relaxed_1.pdb UNCLEAVED
218 | DVDAR_rand_des_78_4_relaxed_0.pdb UNCLEAVED
219 | DVDAR_rand_des_79_0_relaxed_0.pdb UNCLEAVED
220 | DVDAR_rand_des_80_4_relaxed_0.pdb UNCLEAVED
221 | DVDAR_rand_des_81_1_relaxed_1.pdb UNCLEAVED
222 | DVDAR_rand_des_82_1_relaxed_0.pdb UNCLEAVED
223 | DVDAR_rand_des_83_0_relaxed_1.pdb UNCLEAVED
224 | DVDAR_rand_des_84_3_relaxed_1.pdb UNCLEAVED
225 | DVDAR_rand_des_85_2_relaxed_0.pdb UNCLEAVED
226 | DVDAR_rand_des_86_4_relaxed_0.pdb UNCLEAVED
227 | DVDAR_rand_des_87_3_relaxed_1.pdb UNCLEAVED
228 | DVDAR_rand_des_88_1_relaxed_1.pdb UNCLEAVED
229 | DVDAR_rand_des_89_0_relaxed_0.pdb UNCLEAVED
230 | DVDAR_rand_des_90_2_relaxed_1.pdb UNCLEAVED
231 | DVDAR_rand_des_91_0_relaxed_0.pdb UNCLEAVED
232 | DVDAR_rand_des_92_2_relaxed_0.pdb UNCLEAVED
233 | DVDAR_rand_des_93_1_relaxed_0.pdb UNCLEAVED
234 | DVDAR_rand_des_94_1_relaxed_1.pdb UNCLEAVED
235 | DVDAR_rand_des_95_0_relaxed_0.pdb UNCLEAVED
236 | DVDAR_rand_des_96_1_relaxed_0.pdb UNCLEAVED
237 | DVDAR_rand_des_97_3_relaxed_0.pdb UNCLEAVED
238 | DVDAR_rand_des_98_4_relaxed_1.pdb UNCLEAVED
239 | DVDAR_rand_des_99_2_relaxed_0.pdb UNCLEAVED
240 |
--------------------------------------------------------------------------------
/graph/classifications/protease_3c_designs.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | 2bof_KENVVQS_4.pdb UNCLEAVED
3 | 2bof_KENVVQS_0.pdb UNCLEAVED
4 | 2bof_KENVVQS_1.pdb UNCLEAVED
5 | 2bof_KENVVQS_3.pdb UNCLEAVED
6 | 2bof_KENVVQS_2.pdb UNCLEAVED
7 | 5y4l_LPSAREG_4.pdb UNCLEAVED
8 | 5y4l_LPSAREG_1.pdb UNCLEAVED
9 | 5y4l_LPSAREG_2.pdb UNCLEAVED
10 | 5y4l_LPSAREG_0.pdb UNCLEAVED
11 | 5y4l_LPSAREG_3.pdb UNCLEAVED
12 | 5y4l_FSIAKEG_0.pdb UNCLEAVED
13 | 5y4l_FSIAKEG_3.pdb UNCLEAVED
14 | 5y4l_FSIAKEG_2.pdb UNCLEAVED
15 | 5y4l_FSIAKEG_1.pdb UNCLEAVED
16 | 5y4l_FSIAKEG_4.pdb UNCLEAVED
17 | 5y4l_LNIREEG_4.pdb CLEAVED
18 | 5y4l_LNIREEG_0.pdb CLEAVED
19 | 5y4l_LNIREEG_3.pdb CLEAVED
20 | 5y4l_LNIREEG_1.pdb CLEAVED
21 | 5y4l_LNIREEG_2.pdb CLEAVED
22 | 5y4l_LSMAKEG_3.pdb UNCLEAVED
23 | 5y4l_LSMAKEG_1.pdb UNCLEAVED
24 | 5y4l_LSMAKEG_0.pdb UNCLEAVED
25 | 5y4l_LSMAKEG_4.pdb UNCLEAVED
26 | 5y4l_LSMAKEG_2.pdb UNCLEAVED
27 | 5y4l_AGKTKEG_2.pdb UNCLEAVED
28 | 5y4l_AGKTKEG_4.pdb UNCLEAVED
29 | 5y4l_AGKTKEG_0.pdb UNCLEAVED
30 | 5y4l_AGKTKEG_1.pdb UNCLEAVED
31 | 5y4l_AGKTKEG_3.pdb UNCLEAVED
32 | 5y4l_AEKTKEG_3.pdb UNCLEAVED
33 | 5y4l_AEKTKEG_0.pdb UNCLEAVED
34 | 5y4l_AEKTKEG_4.pdb UNCLEAVED
35 | 5y4l_AEKTKEG_2.pdb UNCLEAVED
36 | 5y4l_AEKTKEG_1.pdb UNCLEAVED
37 | 5y4l_VAPLKEG_1.pdb UNCLEAVED
38 | 5y4l_VAPLKEG_0.pdb UNCLEAVED
39 | 5y4l_VAPLKEG_3.pdb UNCLEAVED
40 | 5y4l_VAPLKEG_4.pdb UNCLEAVED
41 | 5y4l_VAPLKEG_2.pdb UNCLEAVED
42 | 2b0f_VAEEAQS_3.pdb UNCLEAVED
43 | 2b0f_VAEEAQS_0.pdb UNCLEAVED
44 | 2b0f_VAEEAQS_4.pdb UNCLEAVED
45 | 2b0f_VAEEAQS_1.pdb UNCLEAVED
46 | 2b0f_VAEEAQS_2.pdb UNCLEAVED
47 | 5y4l_RFFAREG_1.pdb CLEAVED
48 | 5y4l_RFFAREG_4.pdb CLEAVED
49 | 5y4l_RFFAREG_2.pdb CLEAVED
50 | 5y4l_RFFAREG_0.pdb CLEAVED
51 | 5y4l_RFFAREG_3.pdb CLEAVED
52 | 5y4l_LGKNEEG_2.pdb UNCLEAVED
53 | 5y4l_LGKNEEG_0.pdb UNCLEAVED
54 | 5y4l_LGKNEEG_1.pdb UNCLEAVED
55 | 5y4l_LGKNEEG_4.pdb UNCLEAVED
56 | 5y4l_LGKNEEG_3.pdb UNCLEAVED
57 |
--------------------------------------------------------------------------------
/graph/classifications/protease_3c_designs_2bof.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | 2bof_KENVVQS_4.pdb UNCLEAVED
3 | 2bof_KENVVQS_0.pdb UNCLEAVED
4 | 2bof_KENVVQS_1.pdb UNCLEAVED
5 | 2bof_KENVVQS_3.pdb UNCLEAVED
6 | 2bof_KENVVQS_2.pdb UNCLEAVED
7 | 2b0f_VAEEAQS_3.pdb UNCLEAVED
8 | 2b0f_VAEEAQS_0.pdb UNCLEAVED
9 | 2b0f_VAEEAQS_4.pdb UNCLEAVED
10 | 2b0f_VAEEAQS_1.pdb UNCLEAVED
11 | 2b0f_VAEEAQS_2.pdb UNCLEAVED
12 |
--------------------------------------------------------------------------------
/graph/classifications/protease_3c_designs_5y4l.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | 5y4l_LPSAREG_4.pdb UNCLEAVED
3 | 5y4l_LPSAREG_1.pdb UNCLEAVED
4 | 5y4l_LPSAREG_2.pdb UNCLEAVED
5 | 5y4l_LPSAREG_0.pdb UNCLEAVED
6 | 5y4l_LPSAREG_3.pdb UNCLEAVED
7 | 5y4l_FSIAKEG_0.pdb UNCLEAVED
8 | 5y4l_FSIAKEG_3.pdb UNCLEAVED
9 | 5y4l_FSIAKEG_2.pdb UNCLEAVED
10 | 5y4l_FSIAKEG_1.pdb UNCLEAVED
11 | 5y4l_FSIAKEG_4.pdb UNCLEAVED
12 | 5y4l_LNIREEG_4.pdb CLEAVED
13 | 5y4l_LNIREEG_0.pdb CLEAVED
14 | 5y4l_LNIREEG_3.pdb CLEAVED
15 | 5y4l_LNIREEG_1.pdb CLEAVED
16 | 5y4l_LNIREEG_2.pdb CLEAVED
17 | 5y4l_LSMAKEG_3.pdb UNCLEAVED
18 | 5y4l_LSMAKEG_1.pdb UNCLEAVED
19 | 5y4l_LSMAKEG_0.pdb UNCLEAVED
20 | 5y4l_LSMAKEG_4.pdb UNCLEAVED
21 | 5y4l_LSMAKEG_2.pdb UNCLEAVED
22 | 5y4l_AGKTKEG_2.pdb UNCLEAVED
23 | 5y4l_AGKTKEG_4.pdb UNCLEAVED
24 | 5y4l_AGKTKEG_0.pdb UNCLEAVED
25 | 5y4l_AGKTKEG_1.pdb UNCLEAVED
26 | 5y4l_AGKTKEG_3.pdb UNCLEAVED
27 | 5y4l_AEKTKEG_3.pdb UNCLEAVED
28 | 5y4l_AEKTKEG_0.pdb UNCLEAVED
29 | 5y4l_AEKTKEG_4.pdb UNCLEAVED
30 | 5y4l_AEKTKEG_2.pdb UNCLEAVED
31 | 5y4l_AEKTKEG_1.pdb UNCLEAVED
32 | 5y4l_VAPLKEG_1.pdb UNCLEAVED
33 | 5y4l_VAPLKEG_0.pdb UNCLEAVED
34 | 5y4l_VAPLKEG_3.pdb UNCLEAVED
35 | 5y4l_VAPLKEG_4.pdb UNCLEAVED
36 | 5y4l_VAPLKEG_2.pdb UNCLEAVED
37 | 5y4l_RFFAREG_1.pdb CLEAVED
38 | 5y4l_RFFAREG_4.pdb CLEAVED
39 | 5y4l_RFFAREG_2.pdb CLEAVED
40 | 5y4l_RFFAREG_0.pdb CLEAVED
41 | 5y4l_RFFAREG_3.pdb CLEAVED
42 | 5y4l_LGKNEEG_2.pdb UNCLEAVED
43 | 5y4l_LGKNEEG_0.pdb UNCLEAVED
44 | 5y4l_LGKNEEG_1.pdb UNCLEAVED
45 | 5y4l_LGKNEEG_4.pdb UNCLEAVED
46 | 5y4l_LGKNEEG_3.pdb UNCLEAVED
47 |
--------------------------------------------------------------------------------
/graph/classifications/protease_3c_designs_oydv.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | oydv_EKTKEQA_2.pdb UNCLEAVED
3 | oydv_EKTKEQA_1.pdb UNCLEAVED
4 | oydv_EKTKEQA_4.pdb UNCLEAVED
5 | oydv_EKTKEQA_0.pdb UNCLEAVED
6 | oydv_EKTKEQA_3.pdb UNCLEAVED
7 |
--------------------------------------------------------------------------------
/graph/classifications/tev-ER-summarized_label_singlePDB.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | LTQQCQA_0.pdb CLEAVED
3 | RFVVRQA_2.pdb CLEAVED
4 | LGLIYQA_3.pdb CLEAVED
5 | CVNPFQA_0.pdb CLEAVED
6 | CVVKRQA_2.pdb CLEAVED
7 | RVVKMQA_1.pdb CLEAVED
8 | INGCYQA_0.pdb CLEAVED
9 | HESVTQA_2.pdb CLEAVED
10 | LLALMQA_0.pdb CLEAVED
11 | WRISGQA_2.pdb CLEAVED
12 | GNQISQA_1.pdb CLEAVED
13 | DPRIIQA_3.pdb CLEAVED
14 | NHKSCQA_2.pdb CLEAVED
15 | QPFVKQA_0.pdb CLEAVED
16 | TCWGGQA_2.pdb CLEAVED
17 | KGQTRQA_2.pdb CLEAVED
18 | YMKNVQA_3.pdb CLEAVED
19 | GVMIGQA_2.pdb CLEAVED
20 | GKMYMQA_1.pdb CLEAVED
21 | WFAKVQA_1.pdb CLEAVED
22 | QLDVWQA_0.pdb CLEAVED
23 | RWKVGQA_1.pdb CLEAVED
24 | LILCLQA_1.pdb CLEAVED
25 | WTVNTQA_1.pdb CLEAVED
26 | AYGIEQA_1.pdb CLEAVED
27 | ELMYSQA_0.pdb CLEAVED
28 | IHYLLQA_2.pdb CLEAVED
29 | EDWYVQA_0.pdb CLEAVED
30 | DDEQSQA_1.pdb CLEAVED
31 | IRGVQQA_2.pdb CLEAVED
32 | TLLEGQA_2.pdb CLEAVED
33 | APCTGQA_2.pdb CLEAVED
34 | QSKHSQA_1.pdb CLEAVED
35 | FCFWMQA_2.pdb CLEAVED
36 | MYVDFQA_2.pdb CLEAVED
37 | KFSVAQA_2.pdb CLEAVED
38 | VNISHQA_4.pdb CLEAVED
39 | CYTGKQA_2.pdb CLEAVED
40 | GIGDVQA_3.pdb CLEAVED
41 | MTVIRQA_4.pdb CLEAVED
42 | GNLVMQA_4.pdb CLEAVED
43 | RFSTYQA_0.pdb CLEAVED
44 | LHILRQA_0.pdb CLEAVED
45 | MTWCTQA_2.pdb CLEAVED
46 | CLWCCQA_1.pdb CLEAVED
47 | MHTSSQA_3.pdb CLEAVED
48 | LQLRLQA_0.pdb CLEAVED
49 | LRIAVQA_4.pdb CLEAVED
50 | KRLTVQA_4.pdb CLEAVED
51 | IWVILQA_2.pdb CLEAVED
52 | MYVCGQA_4.pdb CLEAVED
53 | WSVVCQA_1.pdb CLEAVED
54 | MPMVLQA_4.pdb CLEAVED
55 | GPMARQA_0.pdb CLEAVED
56 | ESSRTQA_3.pdb CLEAVED
57 | WDRYTQA_2.pdb CLEAVED
58 | QGGSRQA_4.pdb CLEAVED
59 | LSTCLQA_4.pdb CLEAVED
60 | GDMVTQA_4.pdb CLEAVED
61 | MGCVSQA_0.pdb CLEAVED
62 | SVDTSQA_3.pdb CLEAVED
63 | RIRRRQA_0.pdb CLEAVED
64 | GERSMQA_4.pdb CLEAVED
65 | KIGTSQA_1.pdb CLEAVED
66 | TAMCAQA_3.pdb CLEAVED
67 | VDRWEQA_3.pdb CLEAVED
68 | KLRPRQA_2.pdb CLEAVED
69 | LNSVSQA_0.pdb CLEAVED
70 | HHFGRQA_2.pdb CLEAVED
71 | VRGSVQA_0.pdb CLEAVED
72 | PGGSRQA_1.pdb CLEAVED
73 | YTCLQQA_2.pdb CLEAVED
74 | LGDLPQA_1.pdb CLEAVED
75 | QCLGSQA_1.pdb CLEAVED
76 | NDGLNQA_0.pdb CLEAVED
77 | RDMSGQA_1.pdb CLEAVED
78 | WTSIVQA_3.pdb CLEAVED
79 | SRFLQQA_2.pdb CLEAVED
80 | ALWKSQA_4.pdb CLEAVED
81 | LSRRMQA_0.pdb CLEAVED
82 | LGGCSQA_2.pdb CLEAVED
83 | SVIPYQA_2.pdb CLEAVED
84 | NLVHCQA_4.pdb CLEAVED
85 | AERRQQA_1.pdb CLEAVED
86 | CAYVIQA_2.pdb CLEAVED
87 | KRAPLQA_0.pdb CLEAVED
88 | LSPVSQA_3.pdb CLEAVED
89 | TPRGTQA_2.pdb CLEAVED
90 | SENCTQA_0.pdb CLEAVED
91 | YLLIFQA_3.pdb CLEAVED
92 | QVGSMQA_3.pdb CLEAVED
93 | ISLDYQA_0.pdb CLEAVED
94 | RPSFGQA_4.pdb CLEAVED
95 | CRWEGQA_3.pdb CLEAVED
96 | CLCRYQA_1.pdb CLEAVED
97 | RRWRRQA_1.pdb CLEAVED
98 | PRAVKQA_0.pdb CLEAVED
99 | RKLWTQA_0.pdb CLEAVED
100 | RVSRRQA_4.pdb CLEAVED
101 | CTNHVQA_1.pdb CLEAVED
102 | LWQFNQA_4.pdb CLEAVED
103 | SVNSWQA_0.pdb CLEAVED
104 | IFSFMQA_0.pdb CLEAVED
105 | HMRCLQA_1.pdb CLEAVED
106 | LSTKYQA_1.pdb CLEAVED
107 | LPDLIQA_0.pdb CLEAVED
108 | KLGPSQA_4.pdb UNCLEAVED
109 | SVMACQA_0.pdb UNCLEAVED
110 | TCTPKQA_4.pdb UNCLEAVED
111 | FCALTQA_0.pdb UNCLEAVED
112 | CHLRYQA_2.pdb UNCLEAVED
113 | SLLRGQA_4.pdb UNCLEAVED
114 | YMFMIQA_4.pdb UNCLEAVED
115 | IRTRVQA_2.pdb UNCLEAVED
116 | SHGQAQA_3.pdb UNCLEAVED
117 | TGILSQA_0.pdb UNCLEAVED
118 | SCNGRQA_4.pdb UNCLEAVED
119 | IVLLIQA_3.pdb UNCLEAVED
120 | EYCACQA_0.pdb UNCLEAVED
121 | WIEWCQA_2.pdb UNCLEAVED
122 | SSMFIQA_4.pdb UNCLEAVED
123 | ERLEWQA_2.pdb UNCLEAVED
124 | CERLCQA_1.pdb UNCLEAVED
125 | DLFSLQA_0.pdb UNCLEAVED
126 | LCCMLQA_0.pdb UNCLEAVED
127 | DRRQIQA_4.pdb UNCLEAVED
128 | SFISMQA_4.pdb UNCLEAVED
129 | GWGYHQA_0.pdb UNCLEAVED
130 | FLAVSQA_2.pdb UNCLEAVED
131 | PAWSFQA_0.pdb UNCLEAVED
132 | SMNFVQA_1.pdb UNCLEAVED
133 | KVWVFQA_3.pdb UNCLEAVED
134 | DLTIIQA_3.pdb UNCLEAVED
135 | ARHAVQA_4.pdb UNCLEAVED
136 | GVNSRQA_2.pdb UNCLEAVED
137 | QLPGKQA_1.pdb UNCLEAVED
138 | PAGWEQA_4.pdb UNCLEAVED
139 | WAFPSQA_2.pdb UNCLEAVED
140 | NCTESQA_4.pdb UNCLEAVED
141 | DLALTQA_3.pdb UNCLEAVED
142 | CSLHCQA_2.pdb UNCLEAVED
143 | YGTIIQA_4.pdb UNCLEAVED
144 | CMYSAQA_3.pdb UNCLEAVED
145 | SNAQGQA_2.pdb UNCLEAVED
146 | RSEVGQA_3.pdb UNCLEAVED
147 | YDRHGQA_2.pdb UNCLEAVED
148 | WVNGLQA_2.pdb UNCLEAVED
149 | NHSLGQA_1.pdb UNCLEAVED
150 | PYLAYQA_3.pdb UNCLEAVED
151 | LTVASQA_0.pdb UNCLEAVED
152 | LTKLMQA_4.pdb UNCLEAVED
153 | TRASNQA_1.pdb UNCLEAVED
154 | FAPMHQA_4.pdb UNCLEAVED
155 | VCSGVQA_0.pdb UNCLEAVED
156 | RELYPQA_2.pdb UNCLEAVED
157 | QSFHSQA_4.pdb UNCLEAVED
158 | LLVGIQA_0.pdb UNCLEAVED
159 | PANIEQA_2.pdb UNCLEAVED
160 | MTRENQA_2.pdb UNCLEAVED
161 | WLGCMQA_2.pdb UNCLEAVED
162 | SGEAYQA_4.pdb UNCLEAVED
163 | RYGCSQA_1.pdb UNCLEAVED
164 | FAISAQA_1.pdb UNCLEAVED
165 | HLRSAQA_3.pdb UNCLEAVED
166 | ACGLDQA_4.pdb UNCLEAVED
167 | RCGPEQA_0.pdb UNCLEAVED
168 | PSDAPQA_0.pdb UNCLEAVED
169 | SNWMHQA_0.pdb UNCLEAVED
170 | CYVVSQA_1.pdb UNCLEAVED
171 | IPGNDQA_1.pdb UNCLEAVED
172 | YWGRFQA_3.pdb UNCLEAVED
173 | RKPGGQA_1.pdb UNCLEAVED
174 | PSHMFQA_2.pdb UNCLEAVED
175 | CPSNYQA_0.pdb UNCLEAVED
176 | IRFVGQA_1.pdb UNCLEAVED
177 | NRLYSQA_2.pdb UNCLEAVED
178 | VSDLLQA_1.pdb UNCLEAVED
179 | NWSFRQA_2.pdb UNCLEAVED
180 | AASGRQA_0.pdb UNCLEAVED
181 | YESRRQA_0.pdb UNCLEAVED
182 | TGPIGQA_1.pdb UNCLEAVED
183 | RIQSIQA_1.pdb UNCLEAVED
184 | CVEKDQA_0.pdb UNCLEAVED
185 | FPAAGQA_2.pdb UNCLEAVED
186 | WMLSPQA_2.pdb UNCLEAVED
187 | SSCVSQA_3.pdb UNCLEAVED
188 | GSRMYQA_2.pdb UNCLEAVED
189 | MEASCQA_3.pdb UNCLEAVED
190 | MWMCGQA_1.pdb UNCLEAVED
191 | YSNRMQA_0.pdb UNCLEAVED
192 | PGRTRQA_2.pdb UNCLEAVED
193 | LPRGEQA_1.pdb UNCLEAVED
194 | PQGAYQA_4.pdb UNCLEAVED
195 | SLMPDQA_4.pdb UNCLEAVED
196 | ITSRPQA_4.pdb UNCLEAVED
197 | LRNTMQA_1.pdb UNCLEAVED
198 | LNFSVQA_3.pdb UNCLEAVED
199 | INGCFQA_3.pdb UNCLEAVED
200 | SLDSHQA_4.pdb UNCLEAVED
201 | RSTLGQA_0.pdb UNCLEAVED
202 | LIIQGQA_4.pdb UNCLEAVED
203 | TLVAAQA_4.pdb UNCLEAVED
204 | YLRMGQA_4.pdb UNCLEAVED
205 | GCMIHQA_1.pdb UNCLEAVED
206 | CGALVQA_2.pdb UNCLEAVED
207 | FGKGNQA_0.pdb UNCLEAVED
208 | RRPVCQA_1.pdb UNCLEAVED
209 | SGVGSQA_3.pdb UNCLEAVED
210 | MMFKGQA_3.pdb UNCLEAVED
211 | LVYLGQA_0.pdb UNCLEAVED
212 | GSCCVQA_1.pdb UNCLEAVED
213 | MYWNGQA_2.pdb UNCLEAVED
214 | VGLPNQA_2.pdb UNCLEAVED
215 | THCPFQA_1.pdb UNCLEAVED
216 | PSYHQQA_3.pdb UNCLEAVED
217 | SSRPRQA_4.pdb UNCLEAVED
218 | DAISRQA_0.pdb UNCLEAVED
219 | WLFYIQA_3.pdb UNCLEAVED
220 | LFRAWQA_1.pdb UNCLEAVED
221 | CGTVVQA_3.pdb UNCLEAVED
222 | YQTTGQA_4.pdb UNCLEAVED
223 | LCSTNQA_3.pdb UNCLEAVED
224 | RPVASQA_1.pdb UNCLEAVED
225 | PYITYQA_2.pdb UNCLEAVED
226 | THGHSQA_2.pdb UNCLEAVED
227 | RRYHDQA_3.pdb UNCLEAVED
228 | HFSLFQA_0.pdb UNCLEAVED
229 | HAQAHQA_2.pdb UNCLEAVED
230 | ILRAHQA_4.pdb UNCLEAVED
231 | RLMVFQA_4.pdb UNCLEAVED
232 | ASWPPQA_0.pdb UNCLEAVED
233 | SSGRSQA_4.pdb UNCLEAVED
234 | WRGSEQA_4.pdb UNCLEAVED
235 | FCVLYQA_2.pdb UNCLEAVED
236 | STLSYQA_1.pdb UNCLEAVED
237 | PPGGIQA_0.pdb UNCLEAVED
238 | HYSESQA_4.pdb UNCLEAVED
239 | SEGANQA_3.pdb UNCLEAVED
240 | ERGFFQA_2.pdb UNCLEAVED
241 | TRTVAQA_4.pdb UNCLEAVED
242 | GGVRWQA_1.pdb UNCLEAVED
243 | SYPVRQA_4.pdb UNCLEAVED
244 | EFSDVQA_1.pdb UNCLEAVED
245 | FYHTGQA_0.pdb UNCLEAVED
246 | MSVKSQA_0.pdb UNCLEAVED
247 | GECVSQA_4.pdb UNCLEAVED
248 | RSHRGQA_2.pdb UNCLEAVED
249 | RHYRSQA_3.pdb UNCLEAVED
250 | RLMLGQA_2.pdb UNCLEAVED
251 | FEGTSQA_3.pdb UNCLEAVED
252 | VWMGFQA_1.pdb UNCLEAVED
253 | SRRVSQA_0.pdb UNCLEAVED
254 | NEVEVQA_4.pdb UNCLEAVED
255 | TVSTSQA_2.pdb UNCLEAVED
256 | CQSYDQA_4.pdb UNCLEAVED
257 | LRNRTQA_3.pdb UNCLEAVED
258 | YTSGSQA_2.pdb UNCLEAVED
259 | KCTWCQA_2.pdb UNCLEAVED
260 | YWDPSQA_0.pdb UNCLEAVED
261 | ACNHPQA_0.pdb UNCLEAVED
262 | DYYNRQA_0.pdb UNCLEAVED
263 | SRFCIQA_2.pdb UNCLEAVED
264 | MMSDSQA_2.pdb UNCLEAVED
265 | GQLKWQA_0.pdb UNCLEAVED
266 | CSRWVQA_1.pdb UNCLEAVED
267 | STLYSQA_2.pdb UNCLEAVED
268 | LVLSVQA_3.pdb UNCLEAVED
269 | RGWLGQA_1.pdb UNCLEAVED
270 | HAYVLQA_4.pdb UNCLEAVED
271 | RSISSQA_4.pdb UNCLEAVED
272 | PRWKAQA_0.pdb UNCLEAVED
273 | SWHMIQA_3.pdb UNCLEAVED
274 | DRRFTQA_0.pdb UNCLEAVED
275 | LNGRGQA_0.pdb UNCLEAVED
276 | LAKQQQA_0.pdb UNCLEAVED
277 | YWLCRQA_4.pdb UNCLEAVED
278 | TNRAYQA_4.pdb UNCLEAVED
279 | SLRMIQA_1.pdb UNCLEAVED
280 | YFVCSQA_0.pdb UNCLEAVED
281 | DFFQVQA_2.pdb UNCLEAVED
282 | RKIQNQA_0.pdb UNCLEAVED
283 | TLFPCQA_4.pdb UNCLEAVED
284 | SWKMGQA_0.pdb UNCLEAVED
285 | TCNLRQA_2.pdb UNCLEAVED
286 | GDMPSQA_1.pdb UNCLEAVED
287 | RCAGMQA_1.pdb UNCLEAVED
288 | PRYCDQA_2.pdb UNCLEAVED
289 | VWTVHQA_2.pdb UNCLEAVED
290 | RLWLYQA_0.pdb UNCLEAVED
291 | LGSLWQA_4.pdb UNCLEAVED
292 | NWRCVQA_1.pdb UNCLEAVED
293 | WLLRTQA_4.pdb UNCLEAVED
294 | WPGSFQA_3.pdb UNCLEAVED
295 | HDSETQA_3.pdb UNCLEAVED
296 | LLVSCQA_0.pdb UNCLEAVED
297 | GRLVGQA_3.pdb UNCLEAVED
298 | TAVYFQA_2.pdb UNCLEAVED
299 | SGSHDQA_4.pdb UNCLEAVED
300 | TGFADQA_4.pdb UNCLEAVED
301 | RQANSQA_0.pdb UNCLEAVED
302 | NQRSAQA_0.pdb UNCLEAVED
303 |
--------------------------------------------------------------------------------
/graph/classifications/tev_design_20220912.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | tev_p2_c10_1_relaxed_2.pdb CLEAVED
3 | tev_p2_c10_1_relaxed_0.pdb CLEAVED
4 | tev_p2_c10_1_relaxed_5.pdb CLEAVED
5 | tev_p2_c10_1_relaxed_3.pdb CLEAVED
6 | tev_p2_c10_1_relaxed_6.pdb CLEAVED
7 | tev_p2_c10_1_relaxed_4.pdb CLEAVED
8 | tev_p2_c10_1_relaxed_1.pdb CLEAVED
9 | tev_p2_wt_relaxed_1.pdb UNCLEAVED
10 | tev_p2_wt_relaxed_0.pdb UNCLEAVED
11 | tev_p2_wt_relaxed_2.pdb UNCLEAVED
12 | tev_p2_c9_relaxed_0.pdb CLEAVED
13 | tev_p2_c2_relaxed_2.pdb CLEAVED
14 | tev_p2_c2_relaxed_1.pdb CLEAVED
15 | tev_p2_c2_relaxed_0.pdb CLEAVED
16 | tev_p2_c9_relaxed_2.pdb CLEAVED
17 | tev_p2_c10_relaxed_1.pdb CLEAVED
18 | tev_p2_c10_relaxed_2.pdb CLEAVED
19 | tev_p2_c10_relaxed_0.pdb CLEAVED
20 | tev_p2_c9_relaxed_1.pdb CLEAVED
21 | tev_p2_c2_1_relaxed_6.pdb CLEAVED
22 | tev_p2_c2_1_relaxed_3.pdb CLEAVED
23 | tev_p2_c2_1_relaxed_0.pdb CLEAVED
24 | tev_p2_c2_1_relaxed_5.pdb CLEAVED
25 | tev_p2_c2_1_relaxed_1.pdb CLEAVED
26 | tev_p2_c2_1_relaxed_4.pdb CLEAVED
27 | tev_p2_c2_1_relaxed_2.pdb CLEAVED
28 | tev_p2_c9_1_relaxed_4.pdb CLEAVED
29 | tev_p2_c9_1_relaxed_5.pdb CLEAVED
30 | tev_p2_c9_1_relaxed_6.pdb CLEAVED
31 | tev_p2_c9_1_relaxed_1.pdb CLEAVED
32 | tev_p2_c9_1_relaxed_2.pdb CLEAVED
33 | tev_p2_c9_1_relaxed_0.pdb CLEAVED
34 | tev_p2_c9_1_relaxed_3.pdb CLEAVED
35 | tev_p2_wt_1_relaxed_0.pdb UNCLEAVED
36 | tev_p2_wt_1_relaxed_1.pdb UNCLEAVED
37 | tev_p2_wt_1_relaxed_4.pdb UNCLEAVED
38 | tev_p2_wt_1_relaxed_5.pdb UNCLEAVED
39 | tev_p2_wt_1_relaxed_6.pdb UNCLEAVED
40 | tev_p2_wt_1_relaxed_2.pdb UNCLEAVED
41 | tev_p2_wt_1_relaxed_3.pdb UNCLEAVED
42 | tev_p6_c2_1_relaxed_1.pdb CLEAVED
43 | tev_p6_c2_1_relaxed_5.pdb CLEAVED
44 | tev_p6_c2_1_relaxed_6.pdb CLEAVED
45 | tev_p6_c2_1_relaxed_4.pdb CLEAVED
46 | tev_p6_c2_1_relaxed_0.pdb CLEAVED
47 | tev_p6_c2_1_relaxed_2.pdb CLEAVED
48 | tev_p6_c2_1_relaxed_3.pdb CLEAVED
49 | tev_p6_c6_relaxed_2.pdb CLEAVED
50 | tev_p6_c6_relaxed_1.pdb CLEAVED
51 | tev_p6_c6_relaxed_0.pdb CLEAVED
52 | tev_p6_c7_relaxed_2.pdb CLEAVED
53 | tev_p6_c7_relaxed_0.pdb CLEAVED
54 | tev_p6_c7_relaxed_1.pdb CLEAVED
55 | tev_p6_c2_relaxed_0.pdb CLEAVED
56 | tev_p6_c2_relaxed_1.pdb CLEAVED
57 | tev_p6_c2_relaxed_2.pdb CLEAVED
58 | tev_p6_c3_relaxed_0.pdb CLEAVED
59 | tev_p6_c3_relaxed_1.pdb CLEAVED
60 | tev_p6_c5_relaxed_0.pdb CLEAVED
61 | tev_p6_c5_relaxed_2.pdb CLEAVED
62 | tev_p6_c5_relaxed_1.pdb CLEAVED
63 | tev_p6_wt_relaxed_1.pdb UNCLEAVED
64 | tev_p6_wt_relaxed_0.pdb UNCLEAVED
65 | tev_p6_wt_relaxed_2.pdb UNCLEAVED
66 | tev_p6_c3_relaxed_2.pdb CLEAVED
67 | tev_p6_c8_relaxed_2.pdb CLEAVED
68 | tev_p6_c8_relaxed_1.pdb CLEAVED
69 | tev_p6_c8_relaxed_0.pdb CLEAVED
70 | tev_p6_c3_1_relaxed_2.pdb CLEAVED
71 | tev_p6_c3_1_relaxed_5.pdb CLEAVED
72 | tev_p6_c3_1_relaxed_6.pdb CLEAVED
73 | tev_p6_c3_1_relaxed_3.pdb CLEAVED
74 | tev_p6_c3_1_relaxed_1.pdb CLEAVED
75 | tev_p6_c3_1_relaxed_0.pdb CLEAVED
76 | tev_p6_c3_1_relaxed_4.pdb CLEAVED
77 | tev_p6_c5_1_relaxed_4.pdb CLEAVED
78 | tev_p6_c5_1_relaxed_2.pdb CLEAVED
79 | tev_p6_c5_1_relaxed_0.pdb CLEAVED
80 | tev_p6_c5_1_relaxed_1.pdb CLEAVED
81 | tev_p6_c5_1_relaxed_6.pdb CLEAVED
82 | tev_p6_c5_1_relaxed_5.pdb CLEAVED
83 | tev_p6_c5_1_relaxed_3.pdb CLEAVED
84 | tev_p6_c6_1_relaxed_5.pdb CLEAVED
85 | tev_p6_c6_1_relaxed_4.pdb CLEAVED
86 | tev_p6_c6_1_relaxed_0.pdb CLEAVED
87 | tev_p6_c6_1_relaxed_6.pdb CLEAVED
88 | tev_p6_c6_1_relaxed_2.pdb CLEAVED
89 | tev_p6_c6_1_relaxed_1.pdb CLEAVED
90 | tev_p6_c6_1_relaxed_3.pdb CLEAVED
91 | tev_p6_c7_1_relaxed_1.pdb CLEAVED
92 | tev_p6_c7_1_relaxed_6.pdb CLEAVED
93 | tev_p6_c7_1_relaxed_4.pdb CLEAVED
94 | tev_p6_c7_1_relaxed_0.pdb CLEAVED
95 | tev_p6_c7_1_relaxed_2.pdb CLEAVED
96 | tev_p6_c7_1_relaxed_5.pdb CLEAVED
97 | tev_p6_c7_1_relaxed_3.pdb CLEAVED
98 | tev_p6_c8_1_relaxed_5.pdb CLEAVED
99 | tev_p6_c8_1_relaxed_6.pdb CLEAVED
100 | tev_p6_c8_1_relaxed_2.pdb CLEAVED
101 | tev_p6_c8_1_relaxed_1.pdb CLEAVED
102 | tev_p6_c8_1_relaxed_0.pdb CLEAVED
103 | tev_p6_c8_1_relaxed_4.pdb CLEAVED
104 | tev_p6_c8_1_relaxed_3.pdb CLEAVED
105 | tev_p6_wt_1_relaxed_1.pdb UNCLEAVED
106 | tev_p6_wt_1_relaxed_0.pdb UNCLEAVED
107 | tev_p6_wt_1_relaxed_5.pdb UNCLEAVED
108 | tev_p6_wt_1_relaxed_6.pdb UNCLEAVED
109 | tev_p6_wt_1_relaxed_2.pdb UNCLEAVED
110 | tev_p6_wt_1_relaxed_4.pdb UNCLEAVED
111 | tev_p6_wt_1_relaxed_3.pdb UNCLEAVED
112 | tev_wt_wt_relaxed_7.pdb CLEAVED
113 | tev_wt_wt_relaxed_5.pdb CLEAVED
114 | tev_wt_wt_relaxed_0.pdb CLEAVED
115 | tev_wt_wt_relaxed_4.pdb CLEAVED
116 | tev_wt_wt_relaxed_6.pdb CLEAVED
117 | tev_wt_wt_relaxed_9.pdb CLEAVED
118 | tev_wt_wt_relaxed_1.pdb CLEAVED
119 | tev_wt_wt_relaxed_2.pdb CLEAVED
120 | tev_wt_wt_relaxed_3.pdb CLEAVED
121 | tev_wt_wt_relaxed_8.pdb CLEAVED
122 |
--------------------------------------------------------------------------------
/graph/classifications/tev_design_for_validation_dual_directions_cleavage.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | p2_c2_5.pdb CLEAVED
3 | p2_c2_2.pdb CLEAVED
4 | p2_c2_4.pdb CLEAVED
5 | p2_c2_7.pdb CLEAVED
6 | p2_c2_8.pdb CLEAVED
7 | p2_c2_1.pdb CLEAVED
8 | p2_c2_0.pdb CLEAVED
9 | p2_c2_3.pdb CLEAVED
10 | p2_c2_6.pdb CLEAVED
11 | p2_c2_9.pdb CLEAVED
12 | p6_c5_6.pdb CLEAVED
13 | p6_c5_9.pdb CLEAVED
14 | p6_c5_7.pdb CLEAVED
15 | p6_c5_8.pdb CLEAVED
16 | p6_c5_5.pdb CLEAVED
17 | p6_c5_1.pdb CLEAVED
18 | p6_c5_0.pdb CLEAVED
19 | p6_c5_2.pdb CLEAVED
20 | p6_c5_4.pdb CLEAVED
21 | p6_c5_3.pdb CLEAVED
22 | p2_c9_4.pdb CLEAVED
23 | p2_c9_3.pdb CLEAVED
24 | p2_c9_5.pdb CLEAVED
25 | p2_c9_9.pdb CLEAVED
26 | p2_c9_6.pdb CLEAVED
27 | p2_c9_8.pdb CLEAVED
28 | p2_c9_0.pdb CLEAVED
29 | p2_c9_2.pdb CLEAVED
30 | p2_c9_7.pdb CLEAVED
31 | p2_c9_1.pdb CLEAVED
32 | P6_c3_9.pdb CLEAVED
33 | P6_c3_3.pdb CLEAVED
34 | P6_c3_1.pdb CLEAVED
35 | P6_c3_0.pdb CLEAVED
36 | P6_c3_4.pdb CLEAVED
37 | P6_c3_5.pdb CLEAVED
38 | P6_c3_2.pdb CLEAVED
39 | P6_c3_7.pdb CLEAVED
40 | P6_c3_6.pdb CLEAVED
41 | P6_c3_8.pdb CLEAVED
42 | p6_c7_7.pdb CLEAVED
43 | p6_c7_3.pdb CLEAVED
44 | p6_c7_0.pdb CLEAVED
45 | p6_c7_5.pdb CLEAVED
46 | p6_c7_9.pdb CLEAVED
47 | p6_c7_1.pdb CLEAVED
48 | p6_c7_8.pdb CLEAVED
49 | p6_c7_2.pdb CLEAVED
50 | p6_c7_4.pdb CLEAVED
51 | p6_c7_6.pdb CLEAVED
52 | P6_c2_7.pdb CLEAVED
53 | P6_c2_0.pdb CLEAVED
54 | P6_c2_4.pdb CLEAVED
55 | P6_c2_9.pdb CLEAVED
56 | P6_c2_5.pdb CLEAVED
57 | P6_c2_1.pdb CLEAVED
58 | P6_c2_3.pdb CLEAVED
59 | P6_c2_8.pdb CLEAVED
60 | P6_c2_6.pdb CLEAVED
61 | P6_c2_2.pdb CLEAVED
62 | p2_c10_2.pdb CLEAVED
63 | p2_c10_8.pdb CLEAVED
64 | p2_c10_1.pdb CLEAVED
65 | p2_c10_0.pdb CLEAVED
66 | p2_c10_9.pdb CLEAVED
67 | p2_c10_5.pdb CLEAVED
68 | p2_c10_6.pdb CLEAVED
69 | p2_c10_4.pdb CLEAVED
70 | p2_c10_7.pdb CLEAVED
71 | p2_c10_3.pdb CLEAVED
72 | p6_c6_1.pdb CLEAVED
73 | p6_c6_6.pdb CLEAVED
74 | p6_c6_9.pdb CLEAVED
75 | p6_c6_4.pdb CLEAVED
76 | p6_c6_2.pdb CLEAVED
77 | p6_c6_5.pdb CLEAVED
78 | p6_c6_3.pdb CLEAVED
79 | p6_c6_7.pdb CLEAVED
80 | p6_c6_8.pdb CLEAVED
81 | p6_c6_0.pdb CLEAVED
82 | p6_c8_5.pdb CLEAVED
83 | p6_c8_6.pdb CLEAVED
84 | p6_c8_1.pdb CLEAVED
85 | p6_c8_8.pdb CLEAVED
86 | p6_c8_4.pdb CLEAVED
87 | p6_c8_9.pdb CLEAVED
88 | p6_c8_3.pdb CLEAVED
89 | p6_c8_0.pdb CLEAVED
90 | p6_c8_2.pdb CLEAVED
91 | p6_c8_7.pdb CLEAVED
92 | p3_c2_5.pdb UNCLEAVED
93 | p3_c2_2.pdb UNCLEAVED
94 | p3_c2_4.pdb UNCLEAVED
95 | p3_c2_7.pdb UNCLEAVED
96 | p3_c2_8.pdb UNCLEAVED
97 | p3_c2_1.pdb UNCLEAVED
98 | p3_c2_0.pdb UNCLEAVED
99 | p3_c2_3.pdb UNCLEAVED
100 | p3_c2_6.pdb UNCLEAVED
101 | p3_c2_9.pdb UNCLEAVED
102 | p3_c3_9.pdb UNCLEAVED
103 | p3_c3_3.pdb UNCLEAVED
104 | p3_c3_1.pdb UNCLEAVED
105 | p3_c3_0.pdb UNCLEAVED
106 | p3_c3_4.pdb UNCLEAVED
107 | p3_c3_5.pdb UNCLEAVED
108 | p3_c3_2.pdb UNCLEAVED
109 | p3_c3_7.pdb UNCLEAVED
110 | p3_c3_6.pdb UNCLEAVED
111 | p3_c3_8.pdb UNCLEAVED
112 | p3_c1_2.pdb UNCLEAVED
113 | p3_c1_8.pdb UNCLEAVED
114 | p3_c1_1.pdb UNCLEAVED
115 | p3_c1_0.pdb UNCLEAVED
116 | p3_c1_5.pdb UNCLEAVED
117 | p3_c1_7.pdb UNCLEAVED
118 | p3_c1_4.pdb UNCLEAVED
119 | p3_c1_3.pdb UNCLEAVED
120 | p3_c1_9.pdb UNCLEAVED
121 | p3_c1_6.pdb UNCLEAVED
122 | tev_p2_V209I_W211L_M218D_relaxed_0.pdb UNCLEAVED
123 | tev_p2_V209I_W211L_M218D_relaxed_1.pdb UNCLEAVED
124 | tev_p2_V209I_W211L_M218D_relaxed_2.pdb UNCLEAVED
125 | tev_p2_V209I_W211L_M218D_relaxed_3.pdb UNCLEAVED
126 | tev_p2_V209I_W211L_M218D_relaxed_4.pdb UNCLEAVED
127 | tev_p2_V209I_W211L_M218D_relaxed_5.pdb UNCLEAVED
128 | tev_p2_V209I_W211L_M218D_relaxed_6.pdb UNCLEAVED
129 | tev_p2_V209I_W211L_M218D_relaxed_7.pdb UNCLEAVED
130 | tev_p2_V209I_W211L_M218D_relaxed_8.pdb UNCLEAVED
131 | tev_p2_V209I_W211L_M218D_relaxed_9.pdb UNCLEAVED
132 | tev_p2_V209S_W211R_G213R_relaxed_0.pdb UNCLEAVED
133 | tev_p2_V209S_W211R_G213R_relaxed_1.pdb UNCLEAVED
134 | tev_p2_V209S_W211R_G213R_relaxed_2.pdb UNCLEAVED
135 | tev_p2_V209S_W211R_G213R_relaxed_3.pdb UNCLEAVED
136 | tev_p2_V209S_W211R_G213R_relaxed_4.pdb UNCLEAVED
137 | tev_p2_V209S_W211R_G213R_relaxed_5.pdb UNCLEAVED
138 | tev_p2_V209S_W211R_G213R_relaxed_6.pdb UNCLEAVED
139 | tev_p2_V209S_W211R_G213R_relaxed_7.pdb UNCLEAVED
140 | tev_p2_V209S_W211R_G213R_relaxed_8.pdb UNCLEAVED
141 | tev_p2_V209S_W211R_G213R_relaxed_9.pdb UNCLEAVED
142 | tev_p2_V209S_W211R_M218D_relaxed_0.pdb UNCLEAVED
143 | tev_p2_V209S_W211R_M218D_relaxed_1.pdb UNCLEAVED
144 | tev_p2_V209S_W211R_M218D_relaxed_2.pdb UNCLEAVED
145 | tev_p2_V209S_W211R_M218D_relaxed_3.pdb UNCLEAVED
146 | tev_p2_V209S_W211R_M218D_relaxed_4.pdb UNCLEAVED
147 | tev_p2_V209S_W211R_M218D_relaxed_5.pdb UNCLEAVED
148 | tev_p2_V209S_W211R_M218D_relaxed_6.pdb UNCLEAVED
149 | tev_p2_V209S_W211R_M218D_relaxed_7.pdb UNCLEAVED
150 | tev_p2_V209S_W211R_M218D_relaxed_8.pdb UNCLEAVED
151 | tev_p2_V209S_W211R_M218D_relaxed_9.pdb UNCLEAVED
152 | tev_p2_V209S_W211R_relaxed_0.pdb UNCLEAVED
153 | tev_p2_V209S_W211R_relaxed_1.pdb UNCLEAVED
154 | tev_p2_V209S_W211R_relaxed_2.pdb UNCLEAVED
155 | tev_p2_V209S_W211R_relaxed_3.pdb UNCLEAVED
156 | tev_p2_V209S_W211R_relaxed_4.pdb UNCLEAVED
157 | tev_p2_V209S_W211R_relaxed_5.pdb UNCLEAVED
158 | tev_p2_V209S_W211R_relaxed_6.pdb UNCLEAVED
159 | tev_p2_V209S_W211R_relaxed_7.pdb UNCLEAVED
160 | tev_p2_V209S_W211R_relaxed_8.pdb UNCLEAVED
161 | tev_p2_V209S_W211R_relaxed_9.pdb UNCLEAVED
162 | tev_p6_F172Y_N174H_relaxed_0.pdb UNCLEAVED
163 | tev_p6_F172Y_N174H_relaxed_1.pdb UNCLEAVED
164 | tev_p6_F172Y_N174H_relaxed_2.pdb UNCLEAVED
165 | tev_p6_F172Y_N174H_relaxed_3.pdb UNCLEAVED
166 | tev_p6_F172Y_N174H_relaxed_4.pdb UNCLEAVED
167 | tev_p6_F172Y_N174H_relaxed_5.pdb UNCLEAVED
168 | tev_p6_F172Y_N174H_relaxed_6.pdb UNCLEAVED
169 | tev_p6_F172Y_N174H_relaxed_7.pdb UNCLEAVED
170 | tev_p6_F172Y_N174H_relaxed_8.pdb UNCLEAVED
171 | tev_p6_F172Y_N174H_relaxed_9.pdb UNCLEAVED
172 | tev_p6_K141E_T175P_relaxed_0.pdb UNCLEAVED
173 | tev_p6_K141E_T175P_relaxed_1.pdb UNCLEAVED
174 | tev_p6_K141E_T175P_relaxed_2.pdb UNCLEAVED
175 | tev_p6_K141E_T175P_relaxed_3.pdb UNCLEAVED
176 | tev_p6_K141E_T175P_relaxed_4.pdb UNCLEAVED
177 | tev_p6_K141E_T175P_relaxed_5.pdb UNCLEAVED
178 | tev_p6_K141E_T175P_relaxed_6.pdb UNCLEAVED
179 | tev_p6_K141E_T175P_relaxed_7.pdb UNCLEAVED
180 | tev_p6_K141E_T175P_relaxed_8.pdb UNCLEAVED
181 | tev_p6_K141E_T175P_relaxed_9.pdb UNCLEAVED
182 | tev_p6_T173A_T175A_N176D_relaxed_0.pdb UNCLEAVED
183 | tev_p6_T173A_T175A_N176D_relaxed_1.pdb UNCLEAVED
184 | tev_p6_T173A_T175A_N176D_relaxed_2.pdb UNCLEAVED
185 | tev_p6_T173A_T175A_N176D_relaxed_3.pdb UNCLEAVED
186 | tev_p6_T173A_T175A_N176D_relaxed_4.pdb UNCLEAVED
187 | tev_p6_T173A_T175A_N176D_relaxed_5.pdb UNCLEAVED
188 | tev_p6_T173A_T175A_N176D_relaxed_6.pdb UNCLEAVED
189 | tev_p6_T173A_T175A_N176D_relaxed_7.pdb UNCLEAVED
190 | tev_p6_T173A_T175A_N176D_relaxed_8.pdb UNCLEAVED
191 | tev_p6_T173A_T175A_N176D_relaxed_9.pdb UNCLEAVED
192 |
--------------------------------------------------------------------------------
/graph/classifications/tev_design_negpool.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | tev_p2_V209I_W211L_M218D_relaxed_0.pdb None
3 | tev_p2_V209I_W211L_M218D_relaxed_1.pdb None
4 | tev_p2_V209I_W211L_M218D_relaxed_2.pdb None
5 | tev_p2_V209I_W211L_M218D_relaxed_3.pdb None
6 | tev_p2_V209I_W211L_M218D_relaxed_4.pdb None
7 | tev_p2_V209I_W211L_M218D_relaxed_5.pdb None
8 | tev_p2_V209I_W211L_M218D_relaxed_6.pdb None
9 | tev_p2_V209I_W211L_M218D_relaxed_7.pdb None
10 | tev_p2_V209I_W211L_M218D_relaxed_8.pdb None
11 | tev_p2_V209I_W211L_M218D_relaxed_9.pdb None
12 | tev_p2_V209S_W211R_G213R_relaxed_0.pdb None
13 | tev_p2_V209S_W211R_G213R_relaxed_1.pdb None
14 | tev_p2_V209S_W211R_G213R_relaxed_2.pdb None
15 | tev_p2_V209S_W211R_G213R_relaxed_3.pdb None
16 | tev_p2_V209S_W211R_G213R_relaxed_4.pdb None
17 | tev_p2_V209S_W211R_G213R_relaxed_5.pdb None
18 | tev_p2_V209S_W211R_G213R_relaxed_6.pdb None
19 | tev_p2_V209S_W211R_G213R_relaxed_7.pdb None
20 | tev_p2_V209S_W211R_G213R_relaxed_8.pdb None
21 | tev_p2_V209S_W211R_G213R_relaxed_9.pdb None
22 | tev_p2_V209S_W211R_M218D_relaxed_0.pdb None
23 | tev_p2_V209S_W211R_M218D_relaxed_1.pdb None
24 | tev_p2_V209S_W211R_M218D_relaxed_2.pdb None
25 | tev_p2_V209S_W211R_M218D_relaxed_3.pdb None
26 | tev_p2_V209S_W211R_M218D_relaxed_4.pdb None
27 | tev_p2_V209S_W211R_M218D_relaxed_5.pdb None
28 | tev_p2_V209S_W211R_M218D_relaxed_6.pdb None
29 | tev_p2_V209S_W211R_M218D_relaxed_7.pdb None
30 | tev_p2_V209S_W211R_M218D_relaxed_8.pdb None
31 | tev_p2_V209S_W211R_M218D_relaxed_9.pdb None
32 | tev_p2_V209S_W211R_relaxed_0.pdb None
33 | tev_p2_V209S_W211R_relaxed_1.pdb None
34 | tev_p2_V209S_W211R_relaxed_2.pdb None
35 | tev_p2_V209S_W211R_relaxed_3.pdb None
36 | tev_p2_V209S_W211R_relaxed_4.pdb None
37 | tev_p2_V209S_W211R_relaxed_5.pdb None
38 | tev_p2_V209S_W211R_relaxed_6.pdb None
39 | tev_p2_V209S_W211R_relaxed_7.pdb None
40 | tev_p2_V209S_W211R_relaxed_8.pdb None
41 | tev_p2_V209S_W211R_relaxed_9.pdb None
42 | tev_p6_F172Y_N174H_relaxed_0.pdb None
43 | tev_p6_F172Y_N174H_relaxed_1.pdb None
44 | tev_p6_F172Y_N174H_relaxed_2.pdb None
45 | tev_p6_F172Y_N174H_relaxed_3.pdb None
46 | tev_p6_F172Y_N174H_relaxed_4.pdb None
47 | tev_p6_F172Y_N174H_relaxed_5.pdb None
48 | tev_p6_F172Y_N174H_relaxed_6.pdb None
49 | tev_p6_F172Y_N174H_relaxed_7.pdb None
50 | tev_p6_F172Y_N174H_relaxed_8.pdb None
51 | tev_p6_F172Y_N174H_relaxed_9.pdb None
52 | tev_p6_K141E_T175P_relaxed_0.pdb None
53 | tev_p6_K141E_T175P_relaxed_1.pdb None
54 | tev_p6_K141E_T175P_relaxed_2.pdb None
55 | tev_p6_K141E_T175P_relaxed_3.pdb None
56 | tev_p6_K141E_T175P_relaxed_4.pdb None
57 | tev_p6_K141E_T175P_relaxed_5.pdb None
58 | tev_p6_K141E_T175P_relaxed_6.pdb None
59 | tev_p6_K141E_T175P_relaxed_7.pdb None
60 | tev_p6_K141E_T175P_relaxed_8.pdb None
61 | tev_p6_K141E_T175P_relaxed_9.pdb None
62 | tev_p6_T173A_T175A_N176D_relaxed_0.pdb None
63 | tev_p6_T173A_T175A_N176D_relaxed_1.pdb None
64 | tev_p6_T173A_T175A_N176D_relaxed_2.pdb None
65 | tev_p6_T173A_T175A_N176D_relaxed_3.pdb None
66 | tev_p6_T173A_T175A_N176D_relaxed_4.pdb None
67 | tev_p6_T173A_T175A_N176D_relaxed_5.pdb None
68 | tev_p6_T173A_T175A_N176D_relaxed_6.pdb None
69 | tev_p6_T173A_T175A_N176D_relaxed_7.pdb None
70 | tev_p6_T173A_T175A_N176D_relaxed_8.pdb None
71 | tev_p6_T173A_T175A_N176D_relaxed_9.pdb None
72 |
--------------------------------------------------------------------------------
/graph/classifications/tev_oydv_raw_designs.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | tev_o_s2_single_47_designed_6.pdb None
3 | tev_o_s2_single_59_designed_1.pdb None
4 | tev_o_s2_single_60_designed_5.pdb None
5 | tev_o_s2_single_64_designed_8.pdb None
6 | tev_o_s2_single_81_designed_0.pdb None
7 | tev_o_s2_single_81_designed_6.pdb None
8 | tev_o_s2_single_81_designed_7.pdb None
9 | tev_o_s2_single_86_designed_2.pdb None
10 | tev_o_s2_single_88_designed_2.pdb None
11 | tev_o_s2_single_91_designed_5.pdb None
12 | tev_o_s2_single_92_designed_8.pdb None
13 | tev_o_s2_single_96_designed_5.pdb None
14 | tev_o_s3_single_0_designed_7.pdb None
15 | tev_o_s3_single_1_designed_1.pdb None
16 | tev_o_s3_single_1_designed_6.pdb None
17 | tev_o_s3_single_1_designed_9.pdb None
18 | tev_o_s3_single_20_designed_0.pdb None
19 | tev_o_s3_single_20_designed_4.pdb None
20 | tev_o_s3_single_22_designed_3.pdb None
21 | tev_o_s3_single_22_designed_9.pdb None
22 | tev_o_s3_single_23_designed_7.pdb None
23 | tev_o_s3_single_25_designed_4.pdb None
24 | tev_o_s3_single_25_designed_9.pdb None
25 | tev_o_s3_single_29_designed_6.pdb None
26 | tev_o_s3_single_2_designed_7.pdb None
27 | tev_o_s3_single_34_designed_1.pdb None
28 | tev_o_s3_single_35_designed_8.pdb None
29 | tev_o_s3_single_36_designed_1.pdb None
30 | tev_o_s3_single_36_designed_3.pdb None
31 | tev_o_s3_single_41_designed_7.pdb None
32 | tev_o_s3_single_41_designed_8.pdb None
33 | tev_o_s3_single_42_designed_1.pdb None
34 | tev_o_s3_single_42_designed_2.pdb None
35 | tev_o_s3_single_42_designed_8.pdb None
36 | tev_o_s3_single_43_designed_3.pdb None
37 | tev_o_s3_single_45_designed_0.pdb None
38 | tev_o_s3_single_46_designed_6.pdb None
39 | tev_o_s3_single_4_designed_6.pdb None
40 | tev_o_s3_single_4_designed_9.pdb None
41 | tev_o_s3_single_50_designed_5.pdb None
42 | tev_o_s3_single_51_designed_9.pdb None
43 | tev_o_s3_single_55_designed_6.pdb None
44 | tev_o_s3_single_56_designed_3.pdb None
45 | tev_o_s3_single_56_designed_9.pdb None
46 | tev_o_s3_single_58_designed_5.pdb None
47 | tev_o_s3_single_59_designed_9.pdb None
48 | tev_o_s3_single_5_designed_0.pdb None
49 | tev_o_s3_single_60_designed_2.pdb None
50 | tev_o_s3_single_60_designed_9.pdb None
51 | tev_o_s3_single_61_designed_2.pdb None
52 | tev_o_s3_single_61_designed_4.pdb None
53 | tev_o_s3_single_62_designed_4.pdb None
54 | tev_o_s3_single_65_designed_9.pdb None
55 | tev_o_s3_single_66_designed_0.pdb None
56 | tev_o_s3_single_66_designed_4.pdb None
57 | tev_o_s3_single_66_designed_6.pdb None
58 | tev_o_s3_single_69_designed_4.pdb None
59 | tev_o_s3_single_69_designed_9.pdb None
60 | tev_o_s3_single_76_designed_3.pdb None
61 | tev_o_s3_single_77_designed_9.pdb None
62 | tev_o_s3_single_78_designed_0.pdb None
63 | tev_o_s3_single_78_designed_2.pdb None
64 | tev_o_s3_single_81_designed_7.pdb None
65 | tev_o_s3_single_81_designed_9.pdb None
66 | tev_o_s3_single_82_designed_7.pdb None
67 | tev_o_s3_single_83_designed_4.pdb None
68 | tev_o_s3_single_84_designed_6.pdb None
69 | tev_o_s3_single_86_designed_4.pdb None
70 | tev_o_s3_single_89_designed_4.pdb None
71 | tev_o_s3_single_8_designed_0.pdb None
72 | tev_o_s3_single_8_designed_7.pdb None
73 | tev_o_s3_single_91_designed_6.pdb None
74 | tev_o_s3_single_95_designed_5.pdb None
75 | tev_o_s3_single_96_designed_1.pdb None
76 | tev_o_s3_single_97_designed_8.pdb None
77 | tev_o_s3_single_9_designed_5.pdb None
78 | tev_o_s4_single_94_designed_0.pdb None
79 | tev_o_s5_single_50_designed_1.pdb None
80 | tev_o_s5_single_50_designed_9.pdb None
81 | tev_o_s5_single_90_designed_4.pdb None
82 | tev_o_s6_single_6_designed_0.pdb None
83 | tev_o_s6_single_74_designed_0.pdb None
84 | tev_o_s6_single_95_designed_3.pdb None
85 | tev_o_s6_single_96_designed_3.pdb None
86 | tev_w_full_0_designed_0.pdb None
87 | tev_w_full_102_designed_0.pdb None
88 | tev_w_full_102_designed_1.pdb None
89 | tev_w_full_102_designed_2.pdb None
90 | tev_w_full_103_designed_1.pdb None
91 | tev_w_full_106_designed_3.pdb None
92 | tev_w_full_107_designed_1.pdb None
93 | tev_w_full_107_designed_3.pdb None
94 | tev_w_full_109_designed_1.pdb None
95 | tev_w_full_10_designed_0.pdb None
96 | tev_w_full_114_designed_1.pdb None
97 | tev_w_full_114_designed_2.pdb None
98 | tev_w_full_114_designed_3.pdb None
99 | tev_w_full_116_designed_2.pdb None
100 | tev_w_full_116_designed_3.pdb None
101 | tev_w_full_121_designed_2.pdb None
102 | tev_w_full_122_designed_0.pdb None
103 | tev_w_full_125_designed_1.pdb None
104 | tev_w_full_126_designed_0.pdb None
105 | tev_w_full_126_designed_3.pdb None
106 | tev_w_full_129_designed_2.pdb None
107 | tev_w_full_130_designed_3.pdb None
108 | tev_w_full_131_designed_1.pdb None
109 | tev_w_full_132_designed_2.pdb None
110 | tev_w_full_132_designed_3.pdb None
111 | tev_w_full_134_designed_0.pdb None
112 | tev_w_full_136_designed_2.pdb None
113 | tev_w_full_137_designed_0.pdb None
114 | tev_w_full_138_designed_0.pdb None
115 | tev_w_full_138_designed_2.pdb None
116 | tev_w_full_139_designed_0.pdb None
117 | tev_w_full_139_designed_1.pdb None
118 | tev_w_full_13_designed_0.pdb None
119 | tev_w_full_13_designed_3.pdb None
120 | tev_w_full_141_designed_2.pdb None
121 | tev_w_full_143_designed_0.pdb None
122 | tev_w_full_144_designed_0.pdb None
123 | tev_w_full_146_designed_0.pdb None
124 | tev_w_full_153_designed_2.pdb None
125 | tev_w_full_155_designed_2.pdb None
126 | tev_w_full_158_designed_1.pdb None
127 | tev_w_full_161_designed_1.pdb None
128 | tev_w_full_170_designed_1.pdb None
129 | tev_w_full_173_designed_0.pdb None
130 | tev_w_full_173_designed_3.pdb None
131 | tev_w_full_178_designed_1.pdb None
132 | tev_w_full_182_designed_0.pdb None
133 | tev_w_full_187_designed_0.pdb None
134 | tev_w_full_188_designed_0.pdb None
135 | tev_w_full_188_designed_3.pdb None
136 | tev_w_full_190_designed_2.pdb None
137 | tev_w_full_200_designed_0.pdb None
138 | tev_w_full_205_designed_2.pdb None
139 | tev_w_full_206_designed_0.pdb None
140 | tev_w_full_206_designed_3.pdb None
141 | tev_w_full_20_designed_3.pdb None
142 | tev_w_full_210_designed_2.pdb None
143 | tev_w_full_211_designed_0.pdb None
144 | tev_w_full_211_designed_3.pdb None
145 | tev_w_full_212_designed_2.pdb None
146 | tev_w_full_213_designed_0.pdb None
147 | tev_w_full_214_designed_0.pdb None
148 | tev_w_full_220_designed_3.pdb None
149 | tev_w_full_221_designed_1.pdb None
150 | tev_w_full_222_designed_2.pdb None
151 | tev_w_full_225_designed_3.pdb None
152 | tev_w_full_226_designed_0.pdb None
153 | tev_w_full_226_designed_1.pdb None
154 | tev_w_full_229_designed_1.pdb None
155 | tev_w_full_230_designed_2.pdb None
156 | tev_w_full_232_designed_1.pdb None
157 | tev_w_full_232_designed_3.pdb None
158 | tev_w_full_233_designed_1.pdb None
159 | tev_w_full_234_designed_3.pdb None
160 | tev_w_full_236_designed_3.pdb None
161 | tev_w_full_238_designed_1.pdb None
162 | tev_w_full_23_designed_2.pdb None
163 | tev_w_full_23_designed_3.pdb None
164 | tev_w_full_241_designed_2.pdb None
165 | tev_w_full_242_designed_3.pdb None
166 | tev_w_full_244_designed_1.pdb None
167 | tev_w_full_244_designed_2.pdb None
168 | tev_w_full_249_designed_0.pdb None
169 | tev_w_full_24_designed_0.pdb None
170 | tev_w_full_253_designed_3.pdb None
171 | tev_w_full_254_designed_2.pdb None
172 | tev_w_full_256_designed_1.pdb None
173 | tev_w_full_261_designed_2.pdb None
174 | tev_w_full_265_designed_3.pdb None
175 | tev_w_full_267_designed_0.pdb None
176 | tev_w_full_267_designed_1.pdb None
177 | tev_w_full_269_designed_1.pdb None
178 | tev_w_full_270_designed_2.pdb None
179 | tev_w_full_271_designed_1.pdb None
180 | tev_w_full_273_designed_1.pdb None
181 | tev_w_full_274_designed_0.pdb None
182 | tev_w_full_276_designed_3.pdb None
183 | tev_w_full_277_designed_1.pdb None
184 | tev_w_full_278_designed_1.pdb None
185 | tev_w_full_279_designed_0.pdb None
186 | tev_w_full_279_designed_1.pdb None
187 | tev_w_full_279_designed_3.pdb None
188 | tev_w_full_289_designed_1.pdb None
189 | tev_w_full_295_designed_0.pdb None
190 | tev_w_full_296_designed_0.pdb None
191 | tev_w_full_296_designed_3.pdb None
192 | tev_w_full_299_designed_3.pdb None
193 | tev_w_full_303_designed_3.pdb None
194 | tev_w_full_304_designed_2.pdb None
195 | tev_w_full_306_designed_0.pdb None
196 | tev_w_full_309_designed_1.pdb None
197 | tev_w_full_30_designed_2.pdb None
198 | tev_w_full_310_designed_1.pdb None
199 | tev_w_full_311_designed_3.pdb None
200 | tev_w_full_313_designed_2.pdb None
201 | tev_w_full_322_designed_3.pdb None
202 | tev_w_full_323_designed_1.pdb None
203 | tev_w_full_326_designed_3.pdb None
204 | tev_w_full_328_designed_0.pdb None
205 | tev_w_full_331_designed_3.pdb None
206 | tev_w_full_333_designed_2.pdb None
207 | tev_w_full_348_designed_1.pdb None
208 | tev_w_full_34_designed_3.pdb None
209 | tev_w_full_351_designed_0.pdb None
210 | tev_w_full_355_designed_2.pdb None
211 | tev_w_full_356_designed_1.pdb None
212 | tev_w_full_362_designed_3.pdb None
213 | tev_w_full_366_designed_2.pdb None
214 | tev_w_full_375_designed_1.pdb None
215 | tev_w_full_380_designed_3.pdb None
216 | tev_w_full_383_designed_0.pdb None
217 | tev_w_full_389_designed_0.pdb None
218 | tev_w_full_38_designed_0.pdb None
219 | tev_w_full_38_designed_3.pdb None
220 | tev_w_full_392_designed_3.pdb None
221 | tev_w_full_394_designed_3.pdb None
222 | tev_w_full_396_designed_1.pdb None
223 | tev_w_full_397_designed_3.pdb None
224 | tev_w_full_398_designed_2.pdb None
225 | tev_w_full_39_designed_0.pdb None
226 | tev_w_full_3_designed_0.pdb None
227 | tev_w_full_400_designed_1.pdb None
228 | tev_w_full_401_designed_0.pdb None
229 | tev_w_full_401_designed_2.pdb None
230 | tev_w_full_409_designed_2.pdb None
231 | tev_w_full_420_designed_1.pdb None
232 | tev_w_full_421_designed_3.pdb None
233 | tev_w_full_423_designed_0.pdb None
234 | tev_w_full_427_designed_0.pdb None
235 | tev_w_full_428_designed_1.pdb None
236 | tev_w_full_428_designed_2.pdb None
237 | tev_w_full_429_designed_0.pdb None
238 | tev_w_full_433_designed_2.pdb None
239 | tev_w_full_434_designed_1.pdb None
240 | tev_w_full_438_designed_1.pdb None
241 | tev_w_full_445_designed_0.pdb None
242 | tev_w_full_446_designed_0.pdb None
243 | tev_w_full_449_designed_3.pdb None
244 | tev_w_full_44_designed_1.pdb None
245 | tev_w_full_453_designed_2.pdb None
246 | tev_w_full_455_designed_3.pdb None
247 | tev_w_full_457_designed_2.pdb None
248 | tev_w_full_458_designed_3.pdb None
249 | tev_w_full_459_designed_2.pdb None
250 | tev_w_full_460_designed_1.pdb None
251 | tev_w_full_464_designed_0.pdb None
252 | tev_w_full_464_designed_3.pdb None
253 | tev_w_full_465_designed_2.pdb None
254 | tev_w_full_466_designed_1.pdb None
255 | tev_w_full_467_designed_0.pdb None
256 | tev_w_full_468_designed_0.pdb None
257 | tev_w_full_474_designed_1.pdb None
258 | tev_w_full_476_designed_3.pdb None
259 | tev_w_full_478_designed_0.pdb None
260 | tev_w_full_47_designed_0.pdb None
261 | tev_w_full_480_designed_1.pdb None
262 | tev_w_full_485_designed_1.pdb None
263 | tev_w_full_48_designed_3.pdb None
264 | tev_w_full_491_designed_3.pdb None
265 | tev_w_full_492_designed_2.pdb None
266 | tev_w_full_494_designed_3.pdb None
267 | tev_w_full_499_designed_2.pdb None
268 | tev_w_full_49_designed_0.pdb None
269 | tev_w_full_4_designed_2.pdb None
270 | tev_w_full_59_designed_0.pdb None
271 | tev_w_full_5_designed_1.pdb None
272 | tev_w_full_60_designed_0.pdb None
273 | tev_w_full_68_designed_3.pdb None
274 | tev_w_full_71_designed_0.pdb None
275 | tev_w_full_73_designed_0.pdb None
276 | tev_w_full_75_designed_1.pdb None
277 | tev_w_full_77_designed_2.pdb None
278 | tev_w_full_78_designed_3.pdb None
279 | tev_w_full_80_designed_3.pdb None
280 | tev_w_full_83_designed_3.pdb None
281 | tev_w_full_85_designed_1.pdb None
282 | tev_w_full_86_designed_0.pdb None
283 | tev_w_full_86_designed_3.pdb None
284 | tev_w_full_89_designed_1.pdb None
285 | tev_w_full_8_designed_0.pdb None
286 | tev_w_full_93_designed_2.pdb None
287 | tev_w_full_94_designed_2.pdb None
288 | tev_w_full_96_designed_0.pdb None
289 | tev_w_full_96_designed_1.pdb None
290 | tev_w_full_97_designed_2.pdb None
291 | tev_w_full_97_designed_3.pdb None
292 | tev_w_full_98_designed_1.pdb None
293 | tev_w_s2_single_26_designed_0.pdb None
294 | tev_w_s2_single_31_designed_7.pdb None
295 | tev_w_s2_single_43_designed_2.pdb None
296 | tev_w_s2_single_51_designed_4.pdb None
297 | tev_w_s2_single_67_designed_6.pdb None
298 | tev_w_s2_single_96_designed_4.pdb None
299 | tev_w_s2_single_98_designed_8.pdb None
300 | tev_w_s3_single_0_designed_3.pdb None
301 | tev_w_s3_single_10_designed_4.pdb None
302 | tev_w_s3_single_10_designed_6.pdb None
303 | tev_w_s3_single_10_designed_9.pdb None
304 | tev_w_s3_single_11_designed_4.pdb None
305 | tev_w_s3_single_14_designed_6.pdb None
306 | tev_w_s3_single_14_designed_8.pdb None
307 | tev_w_s3_single_15_designed_3.pdb None
308 | tev_w_s3_single_16_designed_5.pdb None
309 | tev_w_s3_single_18_designed_8.pdb None
310 | tev_w_s3_single_19_designed_7.pdb None
311 | tev_w_s3_single_1_designed_6.pdb None
312 | tev_w_s3_single_22_designed_0.pdb None
313 | tev_w_s3_single_22_designed_3.pdb None
314 | tev_w_s3_single_23_designed_0.pdb None
315 | tev_w_s3_single_24_designed_7.pdb None
316 | tev_w_s3_single_25_designed_6.pdb None
317 | tev_w_s3_single_26_designed_9.pdb None
318 | tev_w_s3_single_28_designed_1.pdb None
319 | tev_w_s3_single_29_designed_1.pdb None
320 | tev_w_s3_single_2_designed_2.pdb None
321 | tev_w_s3_single_30_designed_0.pdb None
322 | tev_w_s3_single_30_designed_5.pdb None
323 | tev_w_s3_single_31_designed_1.pdb None
324 | tev_w_s3_single_31_designed_2.pdb None
325 | tev_w_s3_single_33_designed_8.pdb None
326 | tev_w_s3_single_33_designed_9.pdb None
327 | tev_w_s3_single_3_designed_2.pdb None
328 | tev_w_s3_single_3_designed_3.pdb None
329 | tev_w_s3_single_3_designed_7.pdb None
330 | tev_w_s3_single_45_designed_3.pdb None
331 | tev_w_s3_single_47_designed_1.pdb None
332 | tev_w_s3_single_48_designed_5.pdb None
333 | tev_w_s3_single_49_designed_6.pdb None
334 | tev_w_s3_single_49_designed_7.pdb None
335 | tev_w_s3_single_51_designed_2.pdb None
336 | tev_w_s3_single_52_designed_5.pdb None
337 | tev_w_s3_single_52_designed_6.pdb None
338 | tev_w_s3_single_55_designed_2.pdb None
339 | tev_w_s3_single_56_designed_6.pdb None
340 | tev_w_s3_single_56_designed_8.pdb None
341 | tev_w_s3_single_57_designed_1.pdb None
342 | tev_w_s3_single_57_designed_5.pdb None
343 | tev_w_s3_single_57_designed_7.pdb None
344 | tev_w_s3_single_58_designed_3.pdb None
345 | tev_w_s3_single_5_designed_6.pdb None
346 | tev_w_s3_single_60_designed_0.pdb None
347 | tev_w_s3_single_62_designed_2.pdb None
348 | tev_w_s3_single_62_designed_4.pdb None
349 | tev_w_s3_single_64_designed_2.pdb None
350 | tev_w_s3_single_64_designed_8.pdb None
351 | tev_w_s3_single_65_designed_8.pdb None
352 | tev_w_s3_single_66_designed_2.pdb None
353 | tev_w_s3_single_66_designed_3.pdb None
354 | tev_w_s3_single_68_designed_2.pdb None
355 | tev_w_s3_single_6_designed_1.pdb None
356 | tev_w_s3_single_6_designed_6.pdb None
357 | tev_w_s3_single_70_designed_0.pdb None
358 | tev_w_s3_single_72_designed_9.pdb None
359 | tev_w_s3_single_77_designed_8.pdb None
360 | tev_w_s3_single_78_designed_4.pdb None
361 | tev_w_s3_single_79_designed_8.pdb None
362 | tev_w_s3_single_82_designed_7.pdb None
363 | tev_w_s3_single_82_designed_9.pdb None
364 | tev_w_s3_single_83_designed_7.pdb None
365 | tev_w_s3_single_85_designed_5.pdb None
366 | tev_w_s3_single_87_designed_1.pdb None
367 | tev_w_s3_single_91_designed_7.pdb None
368 | tev_w_s3_single_92_designed_0.pdb None
369 | tev_w_s3_single_94_designed_5.pdb None
370 | tev_w_s3_single_95_designed_3.pdb None
371 | tev_w_s3_single_97_designed_7.pdb None
372 | tev_w_s3_single_98_designed_4.pdb None
373 | tev_w_s4_single_61_designed_3.pdb None
374 | tev_w_s5_single_15_designed_8.pdb None
375 | tev_w_s5_single_18_designed_9.pdb None
376 | tev_w_s5_single_80_designed_2.pdb None
377 | tev_w_s6_single_2_designed_2.pdb None
378 | tev_w_s6_single_34_designed_5.pdb None
379 | tev_w_s6_single_55_designed_9.pdb None
380 | tev_w_s6_single_98_designed_1.pdb None
381 |
--------------------------------------------------------------------------------
/helper/.ipynb_checkpoints/2yol-ER-summarized_label_singlePDB-checkpoint.txt:
--------------------------------------------------------------------------------
1 | Sequence Result
2 | RAAVGRG CLEAVED
3 |
--------------------------------------------------------------------------------
/helper/.ipynb_checkpoints/RAAVGRG-checkpoint.fasc:
--------------------------------------------------------------------------------
1 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_1.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_1.pdb", "nstruct": 5, "angle_constraint": 1.8514584177562103, "atom_pair_constraint": 25.208273437267593, "coordinate_constraint": 7.039481860880478, "dihedral_constraint": 44.36198268834136, "dslf_fa13": 0.0, "fa_atr": -1200.1931602312552, "fa_dun": 278.70603817440116, "fa_elec": -361.65087748482324, "fa_intra_rep": 2.2782667001091976, "fa_intra_sol_xover4": 38.17452307210722, "fa_rep": 189.1203012738916, "fa_sol": 678.8920947635634, "hbond_bb_sc": -57.45066322268294, "hbond_lr_bb": -78.10956195511554, "hbond_sc": -33.406312620142714, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.34982674161976, "omega": 33.779462545409125, "p_aa_pp": -59.62864800911472, "pro_close": 0.7566121679053337, "rama_prepro": 26.595517386945655, "ref": 78.28747000000007, "total_score": -427.29691210847625, "yhh_planarity": 0.2202034607410545}
2 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_3.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_3.pdb", "nstruct": 5, "angle_constraint": 1.72319730851421, "atom_pair_constraint": 26.10647597741368, "coordinate_constraint": 3.631492624038999, "dihedral_constraint": 43.80581752738587, "dslf_fa13": 0.0, "fa_atr": -1200.3024862464708, "fa_dun": 268.0296932876475, "fa_elec": -359.6974162960206, "fa_intra_rep": 2.2745507677680545, "fa_intra_sol_xover4": 38.176624450479586, "fa_rep": 180.17539585703207, "fa_sol": 679.556153222227, "hbond_bb_sc": -57.00730357738894, "hbond_lr_bb": -78.11313854792967, "hbond_sc": -33.46744638015357, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.228640183703895, "omega": 34.17595287994555, "p_aa_pp": -59.589653722017594, "pro_close": 0.7616550100113841, "rama_prepro": 26.748175556904066, "ref": 78.28747000000007, "total_score": -446.51831372803525, "yhh_planarity": 0.21466454932361487}
3 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_0.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_0.pdb", "nstruct": 5, "angle_constraint": 1.7188536887926098, "atom_pair_constraint": 26.11225011795318, "coordinate_constraint": 3.6498371696450906, "dihedral_constraint": 43.881292204641206, "dslf_fa13": 0.0, "fa_atr": -1200.2814323816085, "fa_dun": 267.96129620017757, "fa_elec": -359.70300877059645, "fa_intra_rep": 2.2751948755380518, "fa_intra_sol_xover4": 38.17580997214633, "fa_rep": 180.12315240624406, "fa_sol": 679.5198732337798, "hbond_bb_sc": -56.99580894636125, "hbond_lr_bb": -78.1356658702019, "hbond_sc": -33.453014190918154, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.241547482541108, "omega": 34.18772081734078, "p_aa_pp": -59.59111293933735, "pro_close": 0.7516880755150057, "rama_prepro": 26.771567833890604, "ref": 78.28747000000007, "total_score": -446.5404916720142, "yhh_planarity": 0.224640106927815}
4 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_2.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_2.pdb", "nstruct": 5, "angle_constraint": 1.7005644160223428, "atom_pair_constraint": 26.28851282762922, "coordinate_constraint": 3.7912000090448896, "dihedral_constraint": 43.83768130020485, "dslf_fa13": 0.0, "fa_atr": -1199.8164258318577, "fa_dun": 267.2926721654119, "fa_elec": -359.2641771887585, "fa_intra_rep": 2.2754266717328373, "fa_intra_sol_xover4": 38.16627324154184, "fa_rep": 179.78007657794114, "fa_sol": 678.9609377666117, "hbond_bb_sc": -57.04690699040846, "hbond_lr_bb": -78.14498841336332, "hbond_sc": -32.92357507398168, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.368511777597316, "omega": 34.310327060005726, "p_aa_pp": -59.58881596162057, "pro_close": 0.7553744995982864, "rama_prepro": 26.802237753557165, "ref": 78.28747000000007, "total_score": -446.4661692154433, "yhh_planarity": 0.21802552588414464}
5 | {"pdb_name": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG", "decoy": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_4.pdb", "filename": "/projectsp/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures/RAAVGRG_4.pdb", "nstruct": 5, "angle_constraint": 1.7005644160223428, "atom_pair_constraint": 26.28851282762922, "coordinate_constraint": 3.7912000090448896, "dihedral_constraint": 43.83768130020485, "dslf_fa13": 0.0, "fa_atr": -1199.8164258318577, "fa_dun": 267.2926721654119, "fa_elec": -359.2641771887585, "fa_intra_rep": 2.2754266717328373, "fa_intra_sol_xover4": 38.16627324154184, "fa_rep": 179.78007657794114, "fa_sol": 678.9609377666117, "hbond_bb_sc": -57.04690699040846, "hbond_lr_bb": -78.14498841336332, "hbond_sc": -32.92357507398168, "hbond_sr_bb": -23.77954779304183, "lk_ball_wtd": -18.368511777597316, "omega": 34.310327060005726, "p_aa_pp": -59.58881596162057, "pro_close": 0.7553744995982864, "rama_prepro": 26.802237753557165, "ref": 78.28747000000007, "total_score": -446.4661692154433, "yhh_planarity": 0.21802552588414464}
6 |
--------------------------------------------------------------------------------
/helper/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import argparse"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 3,
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "def parse_args():\n",
19 | " parser = argparse.ArgumentParser()\n",
20 | " parser.add_argument('-s', '--score_folder_path', type=str,\n",
21 | " default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures',\n",
22 | " help='Directory of generated structures.')\n",
23 | " parser.add_argument('-class', '--classification_file', type=str,\n",
24 | " default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt',\n",
25 | " help='Directory of generated structures.')\n",
26 | " return parser.parse_args()"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "def main(args):\n",
36 | " score_path = Path(args.score_folder_path)\n",
37 | " class_file = Path(args.classification_file)\n",
38 | " "
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 36,
44 | "metadata": {},
45 | "outputs": [],
46 | "source": [
47 | "from pathlib import Path\n",
48 | "import pandas as pd\n",
49 | "import numpy as np\n",
50 | "import json\n",
51 | "from collections import defaultdict"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 5,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "score_path = Path('./')\n",
61 | "class_file = Path('2yol-ER-summarized_label.txt')"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 9,
67 | "metadata": {},
68 | "outputs": [],
69 | "source": [
70 | "df_class = pd.read_csv(class_file, delimiter='\\t')"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 59,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "# edit based on Vidur's code\n",
80 | "new_sequences = []\n",
81 | "for seq in ['RAAVGRG']: #df_class['Sequence']\n",
82 | " fasc = score_path / (seq + '.fasc')\n",
83 | " with open(fasc, 'r') as fp:\n",
84 | " for i, line in enumerate(fp):\n",
85 | " js = json.loads(line)\n",
86 | " if i == 0:\n",
87 | " dic_scores = defaultdict(list, { k:[v] for k,v in js.items()})\n",
88 | " else:\n",
89 | " for k in js.keys():\n",
90 | " dic_scores[k].append(js[k])\n",
91 | " df = pd.DataFrame(dic_scores)\n",
92 | " pdb = df.loc[df['total_score'].idxmin(),['filename']].values[0].split('/')[-1]\n",
93 | " new_sequences.append(seq)\n",
94 | "df = pd.DataFrame({'Sequence': new_sequences, 'Result': ['CLEAVED']}) #df_class['Result']\n",
95 | "df.to_csv(class_file.parent / (class_file.stem.split('.')[0] + '_singlePDB.txt'), sep='\\t', index=None)"
96 | ]
97 | },
98 | {
99 | "cell_type": "code",
100 | "execution_count": null,
101 | "metadata": {},
102 | "outputs": [],
103 | "source": []
104 | }
105 | ],
106 | "metadata": {
107 | "kernelspec": {
108 | "display_name": "Python 3",
109 | "language": "python",
110 | "name": "python3"
111 | },
112 | "language_info": {
113 | "codemirror_mode": {
114 | "name": "ipython",
115 | "version": 3
116 | },
117 | "file_extension": ".py",
118 | "mimetype": "text/x-python",
119 | "name": "python",
120 | "nbconvert_exporter": "python",
121 | "pygments_lexer": "ipython3",
122 | "version": "3.7.1"
123 | }
124 | },
125 | "nbformat": 4,
126 | "nbformat_minor": 4
127 | }
128 |
--------------------------------------------------------------------------------
/helper/.ipynb_checkpoints/generate_class_singlePDB-checkpoint.py:
--------------------------------------------------------------------------------
1 | '''
2 | This script is to generate classification file if generated structures are in single PDB format.
3 | python -s /projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures -class /projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt
4 | Changpeng Lu 2023-04-16
5 | Vidur Sarma 2023-04-15
6 | '''
7 | import argparse
8 | from pathlib import Path
9 | import pandas as pd
10 | import numpy as np
11 | import json
12 | from collections import defaultdict
13 |
14 | def parse_args():
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('-s', '--score_folder_path', type=str,
17 | default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures',
18 | help='Directory of generated structures.')
19 | parser.add_argument('-class', '--classification_file', type=str,
20 | default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt',
21 | help='Directory of generated structures.')
22 | return parser.parse_args()
23 |
24 | def main(args):
25 | score_path = Path(args.score_folder_path)
26 | class_file = Path(args.classification_file)
27 | df_class = pd.read_csv(class_file, delimiter='\t')
28 | # edit based on Vidur's code
29 | new_sequences = []
30 | for seq in df_class['Sequence']: #df_class['Sequence']
31 | fasc = score_path / (seq + '.fasc')
32 | with open(fasc, 'r') as fp:
33 | for i, line in enumerate(fp):
34 | js = json.loads(line)
35 | if i == 0:
36 | dic_scores = defaultdict(list, { k:[v] for k,v in js.items()})
37 | else:
38 | for k in js.keys():
39 | dic_scores[k].append(js[k])
40 | df = pd.DataFrame(dic_scores)
41 | pdb = df.loc[df['total_score'].idxmin(),['filename']].values[0].split('/')[-1]
42 | new_sequences.append(seq)
43 | df = pd.DataFrame({'Sequence': new_sequences, 'Result': df_class['Result']}) #df_class['Result']
44 | df.to_csv(class_file.parent / (class_file.stem.split('.')[0] + '_singlePDB.txt'), sep='\t', index=None)
45 |
46 | if __name__ == '__main__':
47 | args = parse_args()
48 | main(args)
49 |
--------------------------------------------------------------------------------
/helper/.ipynb_checkpoints/make_modeling_commands-checkpoint.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import os
4 | from pathlib import Path
5 | import argparse
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser()
9 | parser.add_argument('-s', '--info_file', type=str,
10 | default='/projects/f_sdk94_1/protease_3C/data_ngs_enrichment/2bof-ER-summarized.csv',
11 | help='Directory of the information for all structures to be generated. \
12 | It should consist of three columns, (currently, the program cannot support multiple proteases)\
13 | protease_name or protease_mutations, substrate_sequence, and label.')
14 | parser.add_argument('-p1p11', '--p1p11_wt', type=str,
15 | default='QS',
16 | help='index of p1, can be either negative or positive indices. \
17 | e.g., p1=0 means p1 is the first of the substrate; \
18 | p1=-2 means p1 is the last second of the substrate sequence. \
19 | If you use . as a delimiter between P1 and P1 prime position, ignore this flag.')
20 | parser.add_argument('-p1_ind', '--p1_index_substrate', type=int,
21 | default=888,
22 | help='index of p1, can be either negative or positive indices. \
23 | e.g., p1=0 means p1 is the first of the substrate; \
24 | p1=-2 means p1 is the last second of the substrate sequence. \
25 | If you use . as a delimiter between P1 and P1 prime position, ignore this flag.')
26 | parser.add_argument('-p1_pdb', '--p1_index_pdb', type=int,
27 | default=7,
28 | help='pdb index of p1.')
29 | parser.add_argument('-struct', '--starting_structures', type=str,
30 | default='/projects/f_sdk94_1/protease_3C/final_3C_protease_peptide_structures/2b0f_wt_pep.pdb',
31 | help='Directory of starting structure(s). It currently cannot handle multiple starting structures. \
32 | If multiple starting strctures, make sure names of starting structures match \
33 | protease_name in the info_file.')
34 | parser.add_argument('-script_path', '--script_path', type=str,
35 | default = '/projects/f_sdk94_1/EnzymeModelling/Protease-Substrate-Design',
36 | help='Where to save output file for all commands')
37 | parser.add_argument('-o', '--output_name', type=str,
38 | default='new.command.txt',
39 | help='output command file name')
40 | parser.add_argument('-f', '--output_format', choices=['sequence','silent'],
41 | default='sequence',
42 | help='two options of output format, either sequence, or silent files. \
43 | Silent file mode will concatenate sequences which have same patterns into one same file.')
44 | parser.add_argument('-os', '--output_structure_directory', type=str,
45 | default='/projects/f_sdk94_1/EnzymeModelling/Protease3C/2bof',
46 | help='where to put generated Rosetta structures')
47 | parser.add_argument('-constraint', '--constraint_suffix', type=str,
48 | default="-site 215 -cons tev.cst -cr 39 74 144 -dprot 0 -dpep 0",
49 | help='Specify all flags for design_protease.py, e.g., -site 215 -cons tev.cst -cr 39 74 144 -dprot 0 -dpep 0 \
50 | -site specifies the starting pose index of threading, -cr specifies three catalytic residues.')
51 | parser.add_argument('-jn', '--job_name', type=str,
52 | default=None,
53 | help='job name for Rosetta modeling')
54 | parser.add_argument('-bs', '--batch_size', type=int,
55 | default=5,
56 | help='')
57 | parser.add_argument('-cd', '--command_directory', type=str,
58 | default='/projects/f_sdk94_1/EnzymeModelling/Commands_OYDV')
59 | parser.add_argument('-mem', '--memory', type=int,
60 | default='Memory assigned to the processor')
61 | return parser.parse_args()
62 |
63 | def createCrys(p_wt, p, ind, root):
64 | letter1 = 'ARNDBCEQZGHILKMFPSTWYV'
65 | letter1 = list(letter1)
66 | letter3 = ['ALA', 'ARG', 'ASN', 'ASP', 'ASX', 'CYS', 'GLU', 'GLN', 'GLX', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS',
67 | 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
68 | letterMap = {letter1[i]: letter3[i] for i in range(len(letter1))}
69 |
70 | with open(root.parent / (root.stem + '_' + p + '.pdb'), 'w') as gp:
71 | fp = open(root, 'r')
72 | # p1Count = 0
73 | # p2Count = 0
74 | pp = list(p) #['Q','S']
75 | p1_motif = letterMap[p_wt[0]] + ' ' + str(ind)
76 | p2_motif = letterMap[p_wt[1]] + ' ' + str(ind+1)
77 | for line in fp:
78 | if line.find('REMARK') != 1 and line.find(p1_motif) != -1:
79 | p1Ind = line.find(p1_motif)
80 | line = line[0:p1Ind] + letterMap[pp[0]] + line[p1Ind + 3:]
81 | if line.find('REMARK') !=1 and line.find(p2_motif) != -1:
82 | p2Ind = line.find(p2_motif)
83 | line = line[0:p2Ind] + letterMap[pp[1]] + line[p2Ind + 3:]
84 | gp.write(line)
85 |
86 | def toCommands(args, info_set, constraint, mode = 'silent'):
87 | output_name = args.output_name
88 | script_path = args.script_path
89 | p1_ind = args.p1_index_substrate
90 | root = Path(args.starting_structures)
91 | outStructFolder = args.output_structure_directory
92 |
93 | # if mode == 'silent':
94 | # with open(os.path.join(out_path, output_name), 'w') as fp:
95 | # for silent in tmpSilent:
96 | # tmp = list(silent)
97 | # dotInd = silent.find('.')
98 | # p1p11 = ''.join(silent[dotInd-1] + silent[dotInd+1])
99 | # fp.write('python design_protease.py -s ' + os.path.join(crysPath, crysPath.split('/')[-1] + '_' + p1p11 + '.pdb') +
100 | # ' -od ' + silentPath + ' -st ' + os.path.join(out, 'new.sequence.txt') +
101 | # ' -sf ' + silent + " " + constraint + '\n')
102 | # elif mode == 'sequence':
103 | sequences = info_set[0]
104 | mutant_list = info_set[1]
105 | with open(os.path.join(script_path, output_name), 'w') as fp:
106 | for i in range(len(sequences)):
107 | mutant = mutant_list[i]
108 | seq = sequences[i]
109 | p1p11, newSeq = locate_p1p11(seq, p1_ind)
110 | newStructPath = root.parent / (root.stem + '_' + p1p11 + '.pdb')
111 | name = mutant + '_' + newSeq
112 | if mutant == '':
113 | name = newSeq
114 | fp.write('python design_protease.py -s ' + str(newStructPath) +
115 | ' -od ' + outStructFolder + ' -seq ' + newSeq + ' -name ' + name +
116 | " " + constraint + '\n')
117 |
118 | def locate_p1p11(seq, p1_ind=None):
119 | dotInd = seq.find('.')
120 | p1p11 = ''.join(seq[dotInd - 1] + seq[dotInd + 1])
121 | oriSeq = ''.join(seq[0:dotInd] + seq[dotInd + 1:])
122 | if dotInd == -1:
123 | dotInd = p1_ind
124 | assert p1_ind != -1
125 | p1p11 = seq[dotInd] + seq[dotInd+1]
126 | oriSeq = seq
127 | return p1p11, oriSeq
128 |
129 | def printToBatchCommand(args):
130 | jobName = Path(args.info_file).stem
131 | mem = args.memory
132 | if args.job_name != None:
133 | jobName = args.job_name
134 | commandPath = args.command_directory
135 | nBatch = args.batch_size
136 | scriptPath = args.script_path
137 | output_name = args.output_name
138 |
139 | splitCommand = "python " + scriptPath + "/text_to_slurm.py -txt " + os.path.join(scriptPath, output_name) + " -job_name " + \
140 | jobName + " -mem 12000 -path_operation " + scriptPath + " -path_sh " + \
141 | commandPath + " -batch " + str(nBatch) + " -time 3-00:00:00"
142 | os.system(splitCommand)
143 | # print("python text_to_slurm.py -txt " + os.path.join(scriptPath, 'new.command.txt') + " -job_name " +
144 | # jobName + " -mem " + str(mem) + " -path_operation " + scriptPath + " -path_sh " +
145 | # commandPath + " -batch " + str(nBatch) + " -time 3-00:00:00")
146 |
147 | def mkdir(path):
148 | if not path.exists():
149 | path.mkdir(parents=True)
150 |
151 | def main(args):
152 | mutSeqLabel = Path(args.info_file) #info_files_path
153 | p1_ind = args.p1_index_substrate
154 | p1_ind_pdb = args.p1_index_pdb
155 | starting_structure_path = Path(args.starting_structures)
156 | structure_save_path = starting_structure_path.parent
157 | p1p11_wt = args.p1p11_wt
158 | format = args.output_format
159 | constraintSuffix = args.constraint_suffix
160 | commandPath = Path(args.command_directory)
161 | mkdir(commandPath)
162 |
163 | # Use intermediate output from CleavEX as the input. Need to update in the future
164 | df = pd.read_csv(mutSeqLabel, index_col=0)
165 | mutant_list = [''] * df.shape[0]
166 | for column_name in df.columns:
167 | if column_name.lower().find('mutant') != -1:
168 | mutant_list = df[column_name]
169 | sequences = df.index.values
170 | p1p11s = []
171 | new_c = 0
172 | for seq in sequences:
173 | # protease = df.iloc[i, 0]
174 | p1p11,_ = locate_p1p11(seq, p1_ind)
175 | # check whether file exists or not
176 | if (structure_save_path / (starting_structure_path.stem + '_' + p1p11 + '.pdb')).is_file(): #, protease + '_' + p1p11 + '.pdb'
177 | # print('starting structure for {} exists! Skip it....'.format(p1p11))
178 | continue
179 | else:
180 | createCrys(p1p11_wt, p1p11, p1_ind_pdb, starting_structure_path)
181 | new_c += 1
182 | print('Swapping {} number of P1P11 combinations'.format(new_c))
183 | # if format == 'silent':
184 | toCommands(args, (sequences, mutant_list), constraintSuffix, mode=format)
185 | printToBatchCommand(args)
186 |
187 | if __name__ == '__main__':
188 | args = parse_args()
189 | main(args)
190 |
--------------------------------------------------------------------------------
/helper/BenchmarkMLTrainAfterPGCN.py:
--------------------------------------------------------------------------------
1 | # This script is to train and test ml models
2 | # Author: Changpeng Lu
3 |
4 | from sklearn import linear_model
5 | from sklearn.ensemble import RandomForestClassifier
6 | from sklearn.tree import DecisionTreeClassifier
7 | from sklearn import svm
8 | import tensorflow as tf
9 | from tensorflow import keras
10 | import warnings
11 | warnings.filterwarnings('ignore')
12 | import pandas as pd
13 | import numpy as np
14 | import os
15 | import pickle as pkl
16 | from sklearn import preprocessing
17 | import argparse
18 |
19 | from utils import *
20 |
21 | parser = argparse.ArgumentParser()
22 | parser.add_argument('-data', type=str, help='data name')
23 | parser.add_argument('-model', type=str, help='svm or ann or classic')
24 | parser.add_argument('-feature', type=str, choices=['complete','seq','energy'])
25 | parser.add_argument('-save', type=str, default='./experiment1')
26 | args = parser.parse_args()
27 |
28 | makedirs(args.save)
29 |
30 | def trainSeqOnly(dataset, save = '/scratch/cl1205/ml-cleavage/outputs/seqOnly_20220217', model = 'logistic_regression',
31 | encoding = 'energy', split=2):
32 | classes = 2
33 | if split == 2:
34 | X_train, y_train, X_test, y_test = load_data(dataset, classes)
35 | elif split == 3:
36 | X_train, y_train, X_val, y_val, X_test, y_test = load_data(dataset, classes, 3)
37 |
38 | energy_indices = []
39 | seq_indices = []
40 | if dataset.find('TEV_all') != -1:
41 | for i in range(X_train.shape[1]):
42 | if i >= 1316: #1326:
43 | energy_indices.append(i)
44 | else:
45 | if i % 28 >= 20: # if having identifier, need to minus 10
46 | energy_indices.append(i)
47 | else:
48 | seq_indices.append(i)
49 | if dataset.find('HCV') != -1:
50 | for i in range(X_train.shape[1]):
51 | if i >= 952: #1326:
52 | energy_indices.append(i)
53 | else:
54 | if i % 28 >= 20: # if having identifier, need to minus 10
55 | energy_indices.append(i)
56 | else:
57 | seq_indices.append(i)
58 | if encoding == 'energy':
59 | X_train = X_train.iloc[:, energy_indices].copy()
60 | X_test = X_test.iloc[:, energy_indices].copy()
61 | if split == 3:
62 | X_val = X_val.iloc[:, energy_indices].copy()
63 | elif encoding == 'seq':
64 | X_train = X_train.iloc[:, seq_indices].copy()
65 | X_test = X_test.iloc[:, seq_indices].copy()
66 | if split == 3:
67 | X_val = X_val.iloc[:, seq_indices].copy()
68 |
69 | X_train = scale(X_train)
70 | X_test = scale(X_test)
71 | if split == 3:
72 | X_val = scale(X_val)
73 |
74 | if model == 'logistic_regression':
75 | from sklearn import linear_model
76 | lg = linear_model.LogisticRegression(C = 1, max_iter = 500)
77 | prob, acc = train_test(lg, X_train, y_train, X_test, y_test)
78 | print('Test Accuracy:{:.4f}'.format(acc))
79 | np.savetxt(os.path.join(save, 'logits_' + model + '_' + str(dataset) + '_' + encoding), prob)
80 | elif model == 'random_forest':
81 | av_acc = 0
82 | for i in range(20):
83 | from sklearn.ensemble import RandomForestClassifier
84 | rf = RandomForestClassifier()
85 | prob, acc = train_test(rf, X_train, y_train, X_test, y_test)
86 | av_acc += acc
87 | av_acc = av_acc / 20
88 | print('Test Accuracy:{:.4f}'.format(av_acc))
89 | np.savetxt(os.path.join(save, 'logits_' + model + '_' + str(dataset) + '_' + encoding), prob)
90 | elif model == 'decision_tree':
91 | from sklearn.tree import DecisionTreeClassifier
92 | dt = DecisionTreeClassifier()
93 | prob, acc = train_test(dt, X_train, y_train, X_test, y_test)
94 | print('Test Accuracy:{:.4f}'.format(acc))
95 | np.savetxt(os.path.join(save, 'logits_' + model + '_' + str(dataset) + '_' + encoding), prob)
96 | elif model == 'svm':
97 | from sklearn import svm
98 | svmsvc = svm.SVC(C = 1, probability=True)
99 | prob, acc = train_test(svmsvc, X_train, y_train, X_test, y_test)
100 | print('Test Accuracy:{:.4f}'.format(acc))
101 | np.savetxt(os.path.join(save, 'logits_' + model + '_' + str(dataset) + '_' + encoding), prob)
102 | elif model == 'ann':
103 | import tensorflow as tf
104 | from tensorflow import keras
105 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
106 | accs = []
107 | dropout_list = [0.5,0.4,0.3,0.2,0.1,0.05,0.01]
108 | learning_rate = [0.01,0.05,1e-3,5e-3,1e-4,5e-4]
109 | combinations = []
110 | test_accs = []
111 | for dropout in dropout_list:
112 | for lr in learning_rate:
113 | print('dropout: {}; learning rate: {}'.format(dropout, lr))
114 | combinations.append([dropout, lr])
115 | n_class = 2
116 | ann = keras.Sequential([keras.layers.Dense(1024, activation=tf.nn.relu),
117 | keras.layers.Dropout(dropout, input_shape = (1024,)),
118 | keras.layers.Dense(n_class, activation=tf.nn.softmax)])
119 |
120 | ann.compile(optimizer=tf.train.AdamOptimizer(learning_rate = lr),loss='sparse_categorical_crossentropy',metrics=['accuracy'])
121 | if split == 2:
122 | prob, acc = train_test_ann(ann, n_class, X_train, y_train, X_test, y_test)
123 | elif split == 3:
124 | prob, acc, test_prob, test_acc = train_test_ann_split(ann, n_class, X_train, y_train,
125 | X_test, y_test,
126 | X_val, y_val)
127 | np.savetxt(os.path.join(save, 'logits_val_' + model + '_' + str(dataset) + '_' + encoding +
128 | '_dropout_' + str(dropout) + '_lr_' + str(lr) + '_epoch_100'), prob)
129 | np.savetxt(os.path.join(save, 'logits_test_' + model + '_' + str(dataset) + '_' + encoding +
130 | '_dropout_' + str(dropout) + '_lr_' + str(lr) + '_epoch_100'), test_prob)
131 | accs.append(acc)
132 | test_accs.append(test_acc)
133 | print('Validation Accuracy:{:.4f}'.format(max(accs)))
134 | i=np.argmax(np.array(accs))
135 | print('Test Accuracy:{:.4f}'.format(test_accs[i]))
136 | print('Dropout: {:f}; Learning Rate: {:f}'.format(combinations[i][0], combinations[i][1]))
137 |
138 | enco = args.feature
139 | data = args.data
140 | model = args.model
141 |
142 | print(enco)
143 | print(data)
144 |
145 | # trisplit
146 | if model == 'classic':
147 | for m in ['logistic_regression','random_forest','decision_tree']:
148 | print(model)
149 | trainSeqOnly(data, model = m, encoding = enco, split=3,
150 | save = args.save)
151 | else:
152 | print(model)
153 | trainSeqOnly(data, model = model, encoding = enco, split=3,
154 | save = args.save)
155 |
156 |
--------------------------------------------------------------------------------
/helper/generate_class_singlePDB.py:
--------------------------------------------------------------------------------
1 | '''
2 | This script is to generate classification file if generated structures are in single PDB format.
3 | python generate_class_singlePDB.py -s /projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures -class /projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt
4 | Changpeng Lu 2023-04-16
5 | Vidur Sarma 2023-04-15
6 | '''
7 | import argparse
8 | from pathlib import Path
9 | import pandas as pd
10 | import numpy as np
11 | import json
12 | from collections import defaultdict
13 |
14 | def parse_args():
15 | parser = argparse.ArgumentParser()
16 | parser.add_argument('-s', '--score_folder_path', type=str,
17 | default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/crystal_structures/2yol_class_generated_structures',
18 | help='Directory of generated structures.')
19 | parser.add_argument('-class', '--classification_file', type=str,
20 | default='/projects/f_sdk94_1/PGCN/protease-gcnn-pytorch/graph/classifications/3cProt_class/2yol-ER-summarized_label.txt',
21 | help='Directory of generated structures.')
22 | return parser.parse_args()
23 |
24 | def main(args):
25 | score_path = Path(args.score_folder_path)
26 | class_file = Path(args.classification_file)
27 | df_class = pd.read_csv(class_file, delimiter='\t')
28 | # edit based on Vidur's code
29 | new_sequences = []
30 | for seq in df_class['Sequence']: #df_class['Sequence']
31 | fasc = score_path / (seq + '.fasc')
32 | with open(fasc, 'r') as fp:
33 | for i, line in enumerate(fp):
34 | js = json.loads(line)
35 | if i == 0:
36 | dic_scores = defaultdict(list, { k:[v] for k,v in js.items()})
37 | else:
38 | for k in js.keys():
39 | dic_scores[k].append(js[k])
40 | df = pd.DataFrame(dic_scores)
41 | pdb = df.loc[df['total_score'].idxmin(),['filename']].values[0].split('/')[-1]
42 | new_sequences.append(pdb)
43 | df = pd.DataFrame({'Sequence': new_sequences, 'Result': df_class['Result']}) #df_class['Result']
44 | df.to_csv(class_file.parent / (class_file.stem.split('.')[0] + '_singlePDB.txt'), sep='\t', index=None)
45 |
46 | if __name__ == '__main__':
47 | args = parse_args()
48 | main(args)
49 |
--------------------------------------------------------------------------------
/helper/make_modeling_commands.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import os
4 | from pathlib import Path
5 | import argparse
6 |
7 | def parse_args():
8 | parser = argparse.ArgumentParser()
9 | parser.add_argument('-s', '--info_file', type=str,
10 | default='/projects/f_sdk94_1/protease_3C/data_ngs_enrichment/2bof-ER-summarized.csv',
11 | help='Directory of the information for all structures to be generated. \
12 | It should consist of three columns, (currently, the program cannot support multiple proteases)\
13 | protease_name or protease_mutations, substrate_sequence, and label.')
14 | parser.add_argument('-p1p11', '--p1p11_wt', type=str,
15 | default='QS',
16 | help='index of p1, can be either negative or positive indices. \
17 | e.g., p1=0 means p1 is the first of the substrate; \
18 | p1=-2 means p1 is the last second of the substrate sequence. \
19 | If you use . as a delimiter between P1 and P1 prime position, ignore this flag.')
20 | parser.add_argument('-p1_ind', '--p1_index_substrate', type=int,
21 | default=888,
22 | help='index of p1, can be either negative or positive indices. \
23 | e.g., p1=0 means p1 is the first of the substrate; \
24 | p1=-2 means p1 is the last second of the substrate sequence. \
25 | If you use . as a delimiter between P1 and P1 prime position, ignore this flag.')
26 | parser.add_argument('-p1_pdb', '--p1_index_pdb', type=int,
27 | default=7,
28 | help='pdb index of p1.')
29 | parser.add_argument('-struct', '--starting_structures', type=str,
30 | default='/projects/f_sdk94_1/protease_3C/final_3C_protease_peptide_structures/2b0f_wt_pep.pdb',
31 | help='Directory of starting structure(s). It currently cannot handle multiple starting structures. \
32 | If multiple starting strctures, make sure names of starting structures match \
33 | protease_name in the info_file.')
34 | parser.add_argument('-script_path', '--script_path', type=str,
35 | default = '/projects/f_sdk94_1/EnzymeModelling/Protease-Substrate-Design',
36 | help='Where to save output file for all commands')
37 | parser.add_argument('-o', '--output_name', type=str,
38 | default='new.command.txt',
39 | help='output command file name')
40 | parser.add_argument('-f', '--output_format', choices=['sequence','silent'],
41 | default='sequence',
42 | help='two options of output format, either sequence, or silent files. \
43 | Silent file mode will concatenate sequences which have same patterns into one same file.')
44 | parser.add_argument('-os', '--output_structure_directory', type=str,
45 | default='/projects/f_sdk94_1/EnzymeModelling/Protease3C/2bof',
46 | help='where to put generated Rosetta structures')
47 | parser.add_argument('-constraint', '--constraint_suffix', type=str,
48 | default="-site 215 -cons tev.cst -cr 39 74 144 -dprot 0 -dpep 0",
49 | help='Specify all flags for design_protease.py, e.g., -site 215 -cons tev.cst -cr 39 74 144 -dprot 0 -dpep 0 \
50 | -site specifies the starting pose index of threading, -cr specifies three catalytic residues.')
51 | parser.add_argument('-jn', '--job_name', type=str,
52 | default=None,
53 | help='job name for Rosetta modeling')
54 | parser.add_argument('-bs', '--batch_size', type=int,
55 | default=5,
56 | help='')
57 | parser.add_argument('-cd', '--command_directory', type=str,
58 | default='/projects/f_sdk94_1/EnzymeModelling/Commands_OYDV')
59 | parser.add_argument('-mem', '--memory', type=int,
60 | default='Memory assigned to the processor')
61 | return parser.parse_args()
62 |
63 | def createCrys(p_wt, p, ind, root):
64 | letter1 = 'ARNDBCEQZGHILKMFPSTWYV'
65 | letter1 = list(letter1)
66 | letter3 = ['ALA', 'ARG', 'ASN', 'ASP', 'ASX', 'CYS', 'GLU', 'GLN', 'GLX', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS',
67 | 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL']
68 | letterMap = {letter1[i]: letter3[i] for i in range(len(letter1))}
69 |
70 | with open(root.parent / (root.stem + '_' + p + '.pdb'), 'w') as gp:
71 | fp = open(root, 'r')
72 | # p1Count = 0
73 | # p2Count = 0
74 | pp = list(p) #['Q','S']
75 | p1_motif = letterMap[p_wt[0]] + ' ' + str(ind)
76 | p2_motif = letterMap[p_wt[1]] + ' ' + str(ind+1)
77 | for line in fp:
78 | if line.find('REMARK') != 1 and line.find(p1_motif) != -1:
79 | p1Ind = line.find(p1_motif)
80 | line = line[0:p1Ind] + letterMap[pp[0]] + line[p1Ind + 3:]
81 | if line.find('REMARK') !=1 and line.find(p2_motif) != -1:
82 | p2Ind = line.find(p2_motif)
83 | line = line[0:p2Ind] + letterMap[pp[1]] + line[p2Ind + 3:]
84 | gp.write(line)
85 |
86 | def toCommands(args, info_set, constraint, mode = 'silent'):
87 | output_name = args.output_name
88 | script_path = args.script_path
89 | p1_ind = args.p1_index_substrate
90 | root = Path(args.starting_structures)
91 | outStructFolder = args.output_structure_directory
92 |
93 | # if mode == 'silent':
94 | # with open(os.path.join(out_path, output_name), 'w') as fp:
95 | # for silent in tmpSilent:
96 | # tmp = list(silent)
97 | # dotInd = silent.find('.')
98 | # p1p11 = ''.join(silent[dotInd-1] + silent[dotInd+1])
99 | # fp.write('python design_protease.py -s ' + os.path.join(crysPath, crysPath.split('/')[-1] + '_' + p1p11 + '.pdb') +
100 | # ' -od ' + silentPath + ' -st ' + os.path.join(out, 'new.sequence.txt') +
101 | # ' -sf ' + silent + " " + constraint + '\n')
102 | # elif mode == 'sequence':
103 | sequences = info_set[0]
104 | mutant_list = info_set[1]
105 | with open(os.path.join(script_path, output_name), 'w') as fp:
106 | for i in range(len(sequences)):
107 | mutant = mutant_list[i]
108 | seq = sequences[i]
109 | p1p11, newSeq = locate_p1p11(seq, p1_ind)
110 | newStructPath = root.parent / (root.stem + '_' + p1p11 + '.pdb')
111 | name = mutant + '_' + newSeq
112 | if mutant == '':
113 | name = newSeq
114 | fp.write('python design_protease.py -s ' + str(newStructPath) +
115 | ' -od ' + outStructFolder + ' -seq ' + newSeq + ' -name ' + name +
116 | " " + constraint + '\n')
117 |
118 | def locate_p1p11(seq, p1_ind=None):
119 | dotInd = seq.find('.')
120 | p1p11 = ''.join(seq[dotInd - 1] + seq[dotInd + 1])
121 | oriSeq = ''.join(seq[0:dotInd] + seq[dotInd + 1:])
122 | if dotInd == -1:
123 | dotInd = p1_ind
124 | assert p1_ind != -1
125 | p1p11 = seq[dotInd] + seq[dotInd+1]
126 | oriSeq = seq
127 | return p1p11, oriSeq
128 |
129 | def printToBatchCommand(args):
130 | jobName = Path(args.info_file).stem
131 | mem = args.memory
132 | if args.job_name != None:
133 | jobName = args.job_name
134 | commandPath = args.command_directory
135 | nBatch = args.batch_size
136 | scriptPath = args.script_path
137 | output_name = args.output_name
138 |
139 | splitCommand = "python " + scriptPath + "/text_to_slurm.py -txt " + os.path.join(scriptPath, output_name) + " -job_name " + \
140 | jobName + " -mem 12000 -path_operation " + scriptPath + " -path_sh " + \
141 | commandPath + " -batch " + str(nBatch) + " -time 3-00:00:00"
142 | os.system(splitCommand)
143 | # print("python text_to_slurm.py -txt " + os.path.join(scriptPath, 'new.command.txt') + " -job_name " +
144 | # jobName + " -mem " + str(mem) + " -path_operation " + scriptPath + " -path_sh " +
145 | # commandPath + " -batch " + str(nBatch) + " -time 3-00:00:00")
146 |
147 | def mkdir(path):
148 | if not path.exists():
149 | path.mkdir(parents=True)
150 |
151 | def main(args):
152 | mutSeqLabel = Path(args.info_file) #info_files_path
153 | p1_ind = args.p1_index_substrate
154 | p1_ind_pdb = args.p1_index_pdb
155 | starting_structure_path = Path(args.starting_structures)
156 | structure_save_path = starting_structure_path.parent
157 | p1p11_wt = args.p1p11_wt
158 | format = args.output_format
159 | constraintSuffix = args.constraint_suffix
160 | commandPath = Path(args.command_directory)
161 | mkdir(commandPath)
162 |
163 | # Use intermediate output from CleavEX as the input. Need to update in the future
164 | df = pd.read_csv(mutSeqLabel, index_col=0)
165 | mutant_list = [''] * df.shape[0]
166 | for column_name in df.columns:
167 | if column_name.lower().find('mutant') != -1:
168 | mutant_list = df[column_name]
169 | sequences = df.index.values
170 | p1p11s = []
171 | new_c = 0
172 | for seq in sequences:
173 | # protease = df.iloc[i, 0]
174 | p1p11,_ = locate_p1p11(seq, p1_ind)
175 | # check whether file exists or not
176 | if (structure_save_path / (starting_structure_path.stem + '_' + p1p11 + '.pdb')).is_file(): #, protease + '_' + p1p11 + '.pdb'
177 | # print('starting structure for {} exists! Skip it....'.format(p1p11))
178 | continue
179 | else:
180 | createCrys(p1p11_wt, p1p11, p1_ind_pdb, starting_structure_path)
181 | new_c += 1
182 | print('Swapping {} number of P1P11 combinations'.format(new_c))
183 | # if format == 'silent':
184 | toCommands(args, (sequences, mutant_list), constraintSuffix, mode=format)
185 | printToBatchCommand(args)
186 |
187 | if __name__ == '__main__':
188 | args = parse_args()
189 | main(args)
190 |
--------------------------------------------------------------------------------
/helper/text_to_slurm.py:
--------------------------------------------------------------------------------
1 | # This lets you read a list of commands from a text file given in a flag and does all the slurming for you.
2 | # By default they are run at /scratch/ss3410/GCNN. Additionally, you can specify where to put the .sh output file.
3 | # By default they go down on file directory ex) /scratch/ss3410/GCNN/
4 |
5 | """
6 | python text_to_slurm.py -txt /projects/f_sdk94_1/EnzymeModelling/Protease-Substrate-Design/HCV_D183A_commands.txt -job_name HCV_D183A -mem 12000 -path_operation /projects/f_sdk94_1/EnzymeModelling/Protease-Substrate-Design -path_sh /projects/f_sdk94_1/EnzymeModelling/Commands -batch 20 -time 2-00:00:00
7 | """
8 |
9 | import argparse
10 | import os
11 |
12 | parser = argparse.ArgumentParser()
13 | parser.add_argument("-txt", type=str)
14 | parser.add_argument("-job_name", type=str)
15 | parser.add_argument("-path_operation", type=str)
16 | parser.add_argument("-path_sh", type=str)
17 | parser.add_argument("-mem", type=str)
18 | parser.add_argument("-delay", type=int)
19 | parser.add_argument("-batch", type=int)
20 | parser.add_argument("-np",type=int, help="ratio in each batch that should be parallel")
21 | parser.add_argument("-time", type=str)
22 |
23 | args = parser.parse_args()
24 |
25 | filename = args.txt
26 | job_name = args.job_name
27 | path = args.path_operation
28 | sh = args.path_sh
29 | delay = args.delay
30 | mem = args.mem
31 | batch = args.batch
32 | np = args.np
33 | time = args.time
34 |
35 | if np == None:
36 | np = 1
37 |
38 | if batch == None:
39 | batch == 1
40 |
41 | if delay == None:
42 | delay = ""
43 |
44 | if mem == None:
45 | mem = 2000
46 |
47 | if path == None:
48 | path = "/projects/f_SDK94_1/EnymeModelling/Commands"
49 |
50 | if job_name == None:
51 | raise ValueError("no name given")
52 |
53 | if time == None:
54 | time = "3-00:00:00"
55 |
56 | if not os.path.exists(filename) and not os.path.exists(os.path.join(os.getcwd(), filename)):
57 | raise ValueError("file specified not found")
58 |
59 | with open(filename) as f:
60 | lineList = f.readlines()
61 |
62 | header ="""#!/bin/bash
63 | #SBATCH --export=ALL
64 | #SBATCH --job-name {0}.{1}
65 | #SBATCH --partition main
66 | #SBATCH --ntasks {2}
67 | #SBATCH --cpus-per-task 1
68 | #SBATCH --mem {3}
69 | #SBATCH --output {0}.{1}.log
70 | #SBATCH --error {0}.err
71 | #SBATCH --time {5}
72 | #SBATCH --begin now
73 |
74 | cd {4}
75 |
76 | """
77 |
78 | lineList = [x.strip() for x in lineList]
79 |
80 | if sh == None:
81 | sh = "../Commands/"
82 | else:
83 | sh += "/"
84 |
85 | i = 0
86 | counter = 1
87 |
88 | while i < len(lineList) + batch:
89 | command = r"{}{}_{}.sh".format(sh, job_name, counter)
90 | header_specific = header.format(job_name, counter, np, mem, path, time)
91 | if os.path.isfile(command):
92 | os.remove(command)
93 | f = open(command, "w")
94 | f.write(header_specific)
95 | for j in range(batch):
96 | if i + j < len(lineList):
97 | if (i + j) % np == 0:
98 | line = lineList[i+j]
99 | file_as_string = "\nsrun {}\n".format(line)
100 | f.write(file_as_string)
101 | else:
102 | line = lineList[i+j]
103 | file_as_string = "\nsrun {} &\n".format(line)
104 | f.write(file_as_string)
105 | f.write("printf done\n")
106 | f.close()
107 | i += batch
108 | counter += 1
109 |
110 |
111 |
112 |
--------------------------------------------------------------------------------
/model/findBestAcc.py:
--------------------------------------------------------------------------------
1 | # Find best acc from logits calculation
2 | # Author: Changpeng Lu
3 |
4 | import os
5 | import time
6 | import logging
7 | import argparse
8 | import numpy as np
9 | import pandas as pd
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | import torch.optim as optim
15 | from torch.utils.data import DataLoader
16 | import torchvision.datasets as datasets
17 | import torchvision.transforms as transforms
18 | import math
19 | import scipy.sparse as sp
20 | from torch.nn.parameter import Parameter
21 | #os.chdir('/scratch/cl1205/protease-gcnn-pytorch/model')
22 | #print(os.getcwd())
23 | from utils import *
24 | from models import *
25 |
26 | def findBestAcc(dataset = 'HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond',
27 | testset = 'HCV_WT_binary_10_ang_aa_energy_7_energyedge_5_hbond',
28 | is_energy_only = True, hidden = 20, valset = None,
29 | modelPath = '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_finalize_20220211/HCV_WT_binary_10_ang_energy_7_energyedge_5_hbond'):
30 | if valset == None:
31 | adj_ls, features, labels, sequences, proteases, labelorder, train_mask, test_mask = load_data(dataset, is_test=testset, norm_type=True, test_format = 'split', energy_only=is_energy_only, noenergy=False)
32 | else:
33 | adj_ls, features, labels, sequences, proteases, labelorder, train_mask, val_mask, test_mask = load_data(dataset, is_test=testset, is_val=valset, norm_type=True, test_format = 'split', energy_only=is_energy_only, noenergy=False)
34 |
35 | folder = modelPath # /projects/f_sdk94_1/PGCN/outputs/tt_finalize_20210413
36 | max_acc = [0,0,0]
37 | path_fin = ["","",""]
38 | for root, dirs, files in os.walk(folder):
39 | for name in files:
40 | if name.split('.')[-1] == 'pth':
41 | path = root + os.sep + name
42 | # /projects/f_sdk94_1/PGCN/outputs/tt_finalize_20210413/HCV_A171T_binary_10_ang_aa_energy_7_energyedge_5_hbond/bs_50/
43 | # model_for_test_seed_3_hidden_20_linear_0_lr_0.001_wd_0.005_bs_50_dt_0.3.pth
44 | bs = int(name.split('_')[-3])
45 | wd = float(name.split('_')[-5])
46 | lr = float(name.split('_')[-7])
47 | dt = float(name.split('_')[-1][0:-4])
48 | seed = int(name.split('_')[4])
49 | model = GCN(nnode=features.shape[1],
50 | nfeat=features.shape[2],
51 | mfeat=adj_ls.shape[3],
52 | # ngcn=args.ngcn,
53 | hidden1=hidden,
54 | depth=2,
55 | # hidden2=args.hidden2,
56 | natt=0, # one layer
57 | linear=0,
58 | weight='pre',
59 | is_des=False,
60 | nclass=2, #labels.shape[1],
61 | dropout=dt,
62 | cheby=None)
63 |
64 | logit_test, acc_test = test(X=features, graph=adj_ls, y=labels, testmask=test_mask, model_for_test=model,
65 | hidden1=hidden, linear=0, learning_rate=lr, weight_decay=wd, batch_size=bs,
66 | dropout=dt,
67 | path_save=path,
68 | new=False)
69 | if acc_test > max_acc[seed-1]:
70 | logit_test_fin = logit_test
71 | max_acc[seed-1] = acc_test
72 | path_fin[seed-1] = path
73 | return logit_test_fin, max_acc, path_fin
74 |
75 | def test(X, graph, y, testmask, model_for_test, hidden1, linear, learning_rate, weight_decay, batch_size, dropout, path_save,new=False):
76 | #checkpoint = torch.load(os.path.join(path_save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(hidden1) + '_linear_' + str(linear) +'_lr_'+str(learning_rate)+'_wd_'+str(weight_decay)+'_bs_'+str(batch_size)+ '_dt_' + str(dropout) + '.pth'))
77 | try:
78 | checkpoint = torch.load(path_save)
79 | except:
80 | print(path_save)
81 | return None,0
82 |
83 | model_for_test.load_state_dict(checkpoint['state_dict'])
84 | if new == False:
85 | X = X[testmask]
86 | graph = graph[testmask]
87 | y = y[testmask]
88 | #else:
89 | # print('testmask is none. bad!')
90 | max_acc = 0
91 | with torch.no_grad():
92 | model_for_test.eval()
93 | #for j in range(100):
94 | logits_test = model_for_test(X, graph)
95 | test_acc = accuracy(logits_test, torch.argmax(y,axis=1))
96 | # if test_acc > max_acc:
97 | # logits_test_fin = logits_test
98 | # max_acc = test_acc
99 | return logits_test, test_acc
100 | else:
101 | with torch.no_grad():
102 | model_for_test.eval()
103 | logits_test = model_for_test(X, graph)
104 | return logits_test
105 |
106 | for i in ['WT','A171T','D183A','Triple','all']:
107 | logit, acc, path = findBestAcc('HCV_' + i + '_binary_10_ang_aa',
108 | 'HCV_' + i + '_binary_10_ang_aa',
109 | False, 20,
110 | 'HCV_' + i + '_binary_10_ang_aa',
111 | '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/hcv_20220708_trisplit_seqOnly/HCV_' + i + '_binary_10_ang_aa')
112 | print(acc, path)
113 |
114 | #for i in ['WT','A171T','D183A','Triple', 'all']:
115 | #logit, acc, path = findBestAcc('TEV_' + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
116 | # 'TEV_' + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
117 | # False, '/projects/f_sdk94_1/PGCN/TEV/WT/outputs/tt_finalize_energy_only/')
118 | #print(acc, path)
119 |
120 | # logit, acc, path = findBestAcc('HCV_' + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
121 | # 'HCV_' + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
122 | # False,
123 | # '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_finalize_20220211/HCV_' \
124 | # + i + '_binary_10_ang_aa_energy_7_energyedge_5_hbond/')
125 | #'/projects/f_sdk94_1/PGCN/TEV/WT/outputs/tt_finalize_aa/')
126 | #for i in ['all']: #['WT','A171T','D183A','Triple', 'all']:
127 | # logit, acc, path = findBestAcc('TEV_' + 'WT' + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
128 | # 'TEV_' + 'WT' + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
129 | # False, 10,
130 | # '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_tev_20220403/TEV_' \
131 | # + 'WT' + '_binary_10_ang_aa_energy_7_energyedge_5_hbond_epoch_' + i + '/')
132 | #for model in ['all']:
133 | # logit, acc, path = findBestAcc('TEV_' + model + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
134 | # 'TEV_' + model + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
135 | # True, 20,
136 | # 'TEV_' + model + '_binary_10_ang_aa_energy_7_energyedge_5_hbond',
137 | # '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_tev_20220629_trisplit/TEV_' + model + '_binary_10_ang_energy_7_energyedge_5_hbond/')
138 | # print(acc, path)
139 |
140 | #for model in ['all']:
141 | # logit, acc, path = findBestAcc('TEV_' + model + '_binary_10_ang_aa',
142 | # 'TEV_' + model + '_binary_10_ang_aa',
143 | # False, 20,
144 | # 'TEV_' + model + '_binary_10_ang_aa',
145 | # '/scratch/cl1205/protease-gcnn-pytorch/model/outputs/tt_tev_20220629_trisplit/TEV_' + model + '_binary_10_ang_aa_/')
146 | # print(acc, path)
147 |
--------------------------------------------------------------------------------
/model/findBestAcc_from_log.py:
--------------------------------------------------------------------------------
1 | import os
2 | #import pandas as pd
3 | import numpy as np
4 |
5 | max_acc = 0
6 | for f in os.listdir('./'):
7 | if f.split('.')[-1] == 'out' and f.split('.')[0] == 'tt':
8 | with open(f, 'r') as fp:
9 | lines = fp.readlines()
10 | best_epoch = int(lines[-1].split(':')[-1].strip())
11 | for i, line in enumerate(lines):
12 | if line.split(':')[-1].strip() == str(best_epoch):
13 | ind = i
14 | break
15 | val_acc = float(lines[ind+4].split(':')[-1].strip())
16 | if val_acc > max_acc:
17 | max_acc = val_acc
18 | good_f = f
19 | print(good_f)
20 | print('accuracy: {}'.format(max_acc))
21 |
22 |
--------------------------------------------------------------------------------
/model/importance.py:
--------------------------------------------------------------------------------
1 | # This script is to calculate dropping accuracy for each node/edge to show each importance
2 | # Author: Changpeng Lu
3 | # Usage
4 | # python importance.py --importance --dataset HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond --test_dataset HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond --hidden1 20 --depth 2 --linear 0 --att 0 --batch_size 500 --lr 0.005 --dropout 0.05 --weight_decay 5e-4 --save 'outputs/tt/HCV_ternary_10_ang_aa_energy_7_energyedge_5_hbond/bs_500/'
5 |
6 | from __future__ import division
7 | from __future__ import print_function
8 |
9 | import os
10 | import time
11 | import logging
12 | import argparse
13 | import numpy as np
14 | import pandas as pd
15 |
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | import torch.optim as optim
20 | from torch.utils.data import DataLoader
21 | import torchvision.datasets as datasets
22 | import torchvision.transforms as transforms
23 | import math
24 | import scipy.sparse as sp
25 | from torch.nn.parameter import Parameter
26 |
27 | from utils import *
28 | from models import *
29 |
30 | # Training settings
31 | parser = argparse.ArgumentParser()
32 | parser.add_argument('--gpu', type=int, default=0,
33 | help='number of gpus.')
34 | #parser.add_argument('--fastmode', action='store_true', default=False,
35 | # help='Validate during training pass.')
36 | parser.add_argument('--seed', type=int, default=42, help='Random seed.')
37 | parser.add_argument('--epochs', type=int, default=200,
38 | help='Number of epochs to train.')
39 | parser.add_argument('--lr', type=float, default=0.01,
40 | help='Initial learning rate.')
41 | parser.add_argument('--weight_decay', type=float, default=5e-4,
42 | help='Weight decay (L2 loss on parameters).')
43 | parser.add_argument('--hidden1', type=int, default=10,
44 | help='Number of hidden units for nodes.')
45 | parser.add_argument('--depth', type=int, default=10, help='Number of gcnn layers')
46 | parser.add_argument('--att', type=int, default=0, help='the dimension of weight matrices for key and query')
47 | parser.add_argument('--linear', type=int, default=0)
48 | parser.add_argument('--dropout', type=float, default=0.1,
49 | help='Dropout rate (1 - keep probability).')
50 | parser.add_argument('--no_energy', action='store_true', default=False)
51 | parser.add_argument('--test_dataset',type=str)
52 | parser.add_argument('--data_path', default= None, type=str) #'/projects/f_sdk94_1/PGCN/Data/new_subs'
53 | parser.add_argument('--test_logits_path', type=str)
54 | parser.add_argument('--val_dataset', type=str, default=None)
55 | parser.add_argument('--dataset',type=str, help='input dataset string')
56 | parser.add_argument('--model', type = str, default = 'gcn',choices=['gcn','chebyshev'])
57 | parser.add_argument('--scale_type', choices = ['exp','minmax'], default='exp')
58 | parser.add_argument('--max_degree',type=int, default = 3, help='number of supports')
59 | parser.add_argument('--batch_size',type=int, default=8)
60 | parser.add_argument('--weight', type=str, default='pre',choices=['pre','post'])
61 | parser.add_argument('--dim_des',action='store_true',default=False)
62 | parser.add_argument('--new', action='store_true', default=False)
63 | parser.add_argument('--energy_only', action='store_true', default=False)
64 | parser.add_argument('--seq_only',action='store_true',default=False)
65 | parser.add_argument('--feature',choices=['d','s+d','s','e','s+e','s+e+d'],default='s+e')
66 | parser.add_argument('--save', type=str, default='./experiment1')
67 | parser.add_argument('--importance',action='store_true', default = False, help='Whether calculate each variable''s importance.')
68 | args = parser.parse_args()
69 |
70 | makedirs(args.save)
71 | logger = get_logger(logpath=os.path.join('logs'), filepath=os.path.abspath(__file__))
72 | logger.info(args)
73 |
74 | # test
75 | def test(X, graph, y, testmask, model_for_test, hidden1, linear, learning_rate, weight_decay, batch_size, dropout, path_save,new=False):
76 | #checkpoint = torch.load(os.path.join(path_save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(hidden1) + '_linear_' + str(linear) +'_lr_'+str(learning_rate)+'_wd_'+str(weight_decay)+'_bs_'+str(batch_size)+ '_dt_' + str(dropout) + '.pth'))
77 | checkpoint = torch.load(path_save)
78 | logger.info("best epoch is:" + str(checkpoint['epoch']))
79 | model_for_test.load_state_dict(checkpoint['state_dict'])
80 | print('model loaded')
81 | if new == False:
82 | #if testmask != None:
83 | print('testmask is not none. good.')
84 | X = X[testmask]
85 | graph = graph[testmask]
86 | y = y[testmask]
87 | #else:
88 | # print('testmask is none. bad!')
89 | with torch.no_grad():
90 | model_for_test.eval()
91 | #for j in range(100):
92 | logits_test = model_for_test(X, graph)
93 | test_acc = accuracy(logits_test, torch.argmax(y,axis=1))
94 | #if test_acc > max_acc:
95 | # logits_test_fin = logits_test
96 | # max_acc = test_acc
97 | return logits_test, test_acc
98 | else:
99 | with torch.no_grad():
100 | model_for_test.eval()
101 | logits_test = model_for_test(X, graph)
102 | return logits_test
103 |
104 |
105 | # variable importance
106 | def importance(all_features, all_graph, ys, full_test_mask, trained_model, hidden1, linear, learning_rate, \
107 | weight_decay, batch_size, dropout, path_save):
108 | num_node = all_graph.shape[1]
109 | var = int(num_node + num_node * (num_node - 1) / 2) # the number of nodes and edges
110 | acc_arr = np.zeros(int(var))
111 | logger.info('number of candidate node/edges:{}'.format(var))
112 | logger.info('number of nodes:{}'.format(num_node))
113 | logger.info('number of edges:{}'.format(var - num_node))
114 |
115 | edge_ind = []
116 | for ind in range(num_node):
117 | k = ind + 1
118 | while k < num_node:
119 | edge_ind.append((ind,k))
120 | k += 1
121 |
122 | for i in range(var): # for each variable
123 | #adj_ls, features, labels, sequences, proteases, labelorder, train_mask, val_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset, norm_type=is_cheby)
124 | tmp_adj_ls = all_graph[full_test_mask].clone()
125 | tmp_features = all_features[full_test_mask].clone()
126 | tmp_y = ys[full_test_mask].clone()
127 | OOB_mask = np.asarray([1 for i in tmp_features],dtype=np.bool)
128 | if i < num_node:
129 | for j in range(tmp_features.shape[2]): # for each node feature
130 | np.random.shuffle(tmp_features[:,i,j].cpu().numpy())
131 | print("Shuffling Node Feature: {}".format(i+1))
132 | else:
133 | for j in range(tmp_adj_ls.shape[3]): # for each edge feature
134 | edge_node = i - num_node
135 | np.random.shuffle(tmp_adj_ls[:, edge_ind[edge_node][0], edge_ind[edge_node][1],j])
136 | after_shuffle = tmp_adj_ls[:,edge_ind[edge_node][0], edge_ind[edge_node][1], j]
137 | tmp_adj_ls[:,edge_ind[edge_node][1], edge_ind[edge_node][0], j] = after_shuffle
138 | print("Shuffling Edge Feature: {}".format(edge_node + 1))
139 | logit_vi, acc_vi = test(X=tmp_features, graph=tmp_adj_ls, y=tmp_y, testmask=OOB_mask, model_for_test=trained_model, \
140 | hidden1=hidden1, linear=linear, learning_rate=learning_rate, \
141 | weight_decay=weight_decay, batch_size=batch_size, dropout=dropout, path_save=path_save)
142 | if i < num_node:
143 | logger.info("Node {:04d} | Test Accuracy: {:.4f}".format(i+1, acc_vi))
144 | else:
145 | logger.info("Edge {:04d} | Test Accuracy: {:.4f}".format(i-num_node+1, acc_vi))
146 | acc_arr[i] = acc_vi
147 | return acc_arr
148 |
149 | is_energy_only = args.energy_only
150 | no_energy = True if args.no_energy == True else False
151 | if args.new == False:
152 | if args.val_dataset != None:
153 | logger.info('TripleSplit!')
154 | adj_ls, features, labels, sequences, labelorder, train_mask, val_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset, is_val=args.val_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only, seq_only=args.seq_only, feature_type=args.feature)
155 | logger.info("|Training| {},|Validation| {}, |Testing| {}".format(np.sum(train_mask), np.sum(val_mask), np.sum(test_mask)))
156 | tmp_mask = train_mask
157 | else:
158 | adj_ls, features, labels, sequences, labelorder, train_mask, val_mask = load_data(args.dataset, is_test=args.test_dataset, is_val=args.val_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only, seq_only = args.seq_only, feature_type=args.feature) #scale_type determines node feature scale
159 | tmp_mask = np.array([(not idx) for idx in val_mask], dtype=np.bool)
160 | # Size of Different Sets
161 | logger.info("|Training| {},|Testing| {}".format(np.sum(tmp_mask), np.sum(val_mask)))
162 | else:
163 | adj_ls, features, sequences, labelorder = load_data(args.dataset, norm_type=True, energy_only=is_energy_only, seq_only=args.seq_only, noenergy=args.no_energy, data_path=args.data_path, feature_type=args.feature)
164 |
165 | cheby_params = args.max_degree if args.model == 'chebyshev' else None
166 | weight_mode = args.weight
167 | dim_des = args.dim_des
168 |
169 | model = GCN(nnode=features.shape[1],
170 | nfeat=features.shape[2],
171 | mfeat=adj_ls.shape[3],
172 | # ngcn=args.ngcn,
173 | hidden1=args.hidden1,
174 | depth=args.depth,
175 | # hidden2=args.hidden2,
176 | natt=args.att, # one layer
177 | linear=args.linear,
178 | weight=weight_mode,
179 | is_des=dim_des,
180 | nclass=len(labelorder),
181 | dropout=args.dropout,
182 | cheby=cheby_params)
183 | logger.info(model)
184 | logger.info('Number of parameters: {}'.format(count_parameters(model)))
185 |
186 | batch_size = args.batch_size
187 |
188 | # load trained model and test first
189 | if args.new == False:
190 | logit_test, acc_test = test(X=features, graph=adj_ls, y=labels, testmask=test_mask, model_for_test=model, hidden1=args.hidden1, linear=args.linear, learning_rate=args.lr, weight_decay=args.weight_decay, batch_size=args.batch_size, dropout=args.dropout, path_save=args.save, new=False)
191 | print("Original Test Accuracy is:" + str(acc_test))
192 | else:
193 | logger.info('testing begin')
194 | logit_test = test(X=features, graph=adj_ls, y=None, testmask=None, model_for_test=model, hidden1=args.hidden1, linear=args.linear, learning_rate=args.lr, weight_decay=args.weight_decay, batch_size=args.batch_size, dropout=args.dropout, path_save=args.save, new=True)
195 |
196 | logger.info('logits printing')
197 | logger.info(args.save.split('/')[-1][:-4])
198 | dump_path = os.path.join(args.test_logits_path, '-'.join(args.save.split('/')[1:]))
199 | logger.info('dump path set up')
200 | logger.info(dump_path)
201 | makedirs(dump_path)
202 | #pkl.dump(logit_test,open('outputs/new_subs_energy_only_20220718' + suffix + '/logits_test_' + args.dataset + '_energy_only_' + str(args.energy_only),'wb'))
203 | # print('outputs/new_subs_energy_only_20220718' + suffix + '/logits_test_' + args.dataset + '_energy_only_' + str(args.energy_only))
204 | logger.info('make folder')
205 | pkl.dump(logit_test, open(dump_path + '/logits_test_' + args.dataset, 'wb'))
206 | logger.info('dump successful')
207 | if args.importance == True:
208 | acc_vi_arr = importance(all_features=features, all_graph=adj_ls, ys=labels, \
209 | full_test_mask=test_mask, trained_model=model, hidden1=args.hidden1, \
210 | linear=args.linear, learning_rate=args.lr, \
211 | weight_decay=args.weight_decay, batch_size=args.batch_size, \
212 | dropout=args.dropout, path_save=args.save)
213 | df = pd.DataFrame(acc_vi_arr, index = range(acc_vi_arr.shape[0])) # node + edge
214 | df.to_csv(os.path.join('-'.join(args.save.split('/')[1:]) + "_variable_importance.csv"))
215 |
--------------------------------------------------------------------------------
/model/layers.py:
--------------------------------------------------------------------------------
1 | import math
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch.nn.parameter import Parameter
6 | from torch.nn.modules.module import Module
7 | from torch.nn.functional import softmax
8 | from utils import chebyshev
9 |
10 |
11 | class GraphConvolution(Module):
12 | """
13 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
14 | """
15 |
16 | def __init__(self, in_features, out_features, bias=True):
17 | super(GraphConvolution, self).__init__()
18 | self.in_features = in_features
19 | self.out_features = out_features
20 | self.weight = Parameter(torch.FloatTensor(in_features, out_features))
21 | if bias:
22 | self.bias = Parameter(torch.FloatTensor(out_features))
23 | else:
24 | self.register_parameter('bias', None)
25 | self.reset_parameters()
26 |
27 | def reset_parameters(self):
28 | stdv = 1. / math.sqrt(self.weight.size(1))
29 | self.weight.data.uniform_(-stdv, stdv)
30 | if self.bias is not None:
31 | self.bias.data.uniform_(-stdv, stdv)
32 |
33 | def forward(self, input, adj):
34 | support = torch.matmul(adj, input)
35 | output = torch.matmul(support, self.weight)
36 | if self.bias is not None:
37 | return output + self.bias
38 | else:
39 | return output
40 |
41 | def __repr__(self):
42 | return self.__class__.__name__ + ' (' \
43 | + str(self.in_features) + ' -> ' \
44 | + str(self.out_features) + ')'
45 |
46 | class GraphConvolutionChebyshev(Module):
47 | def __init__(self, in_features, out_features, cheby, bias=True):
48 | super(GraphConvolutionChebyshev, self).__init__()
49 | self.in_features = in_features
50 | self.out_features = out_features
51 | self.weight = Parameter(torch.FloatTensor(in_features, out_features))
52 | self.K = Parameter(torch.FloatTensor(cheby,1)) # degree of chebyshev polynomial
53 | self.max_degree = cheby
54 | if bias:
55 | self.bias = Parameter(torch.FloatTensor(out_features))
56 | else:
57 | self.register_parameter('bias', None)
58 | self.reset_parameters()
59 |
60 | def reset_parameters(self):
61 | stdv = 1. / math.sqrt(self.weight.size(1))
62 | self.weight.data.uniform_(-stdv, stdv)
63 | stdv2 = 1. / math.sqrt(self.K.size(1))
64 | self.K.data.uniform_(-stdv2, stdv2)
65 | if self.bias is not None:
66 | self.bias.data.uniform_(-stdv, stdv)
67 |
68 | def forward(self, input, adj):
69 | support = chebyshev(input, adj, self.max_degree) # build the tensor form of chebyshev polynomials
70 | support = torch.matmul(support, self.K).view(support.shape[0],support.shape[1],-1)
71 | output = torch.matmul(support, self.weight)
72 | if self.bias is not None:
73 | return output + self.bias
74 | else:
75 | return output
76 |
77 | def __repr__(self):
78 | return self.__class__.__name__ + ' (' \
79 | + str(self.in_features) + ' -> ' \
80 | + str(self.out_features) + ')'
81 |
82 | class Flatten(Module):
83 |
84 | def __init__(self):
85 | super(Flatten, self).__init__()
86 |
87 | def forward(self, x, adj):
88 | shape = torch.prod(torch.tensor(x.shape[1:])).item()
89 | return x.view(-1, shape)
90 |
91 | class ConcatLinear(Module):
92 | def __init__(self, in_dim, out_dim):
93 | super(ConcatLinear, self).__init__()
94 | self.linear = torch.nn.Linear(in_dim, out_dim)
95 |
96 | def forward(self, x, adj):
97 | out = self.linear(x)
98 | return out
99 |
100 | class ConcatReLU(Module):
101 | def __init__(self):
102 | super(ConcatReLU, self).__init__()
103 | self.relu = torch.nn.ReLU()
104 | def forward(self, x, adj):
105 | out = self.relu(x)
106 | return out
107 |
108 | class norm(Module):
109 | def __init__(self, in_features, mode):
110 | super(norm, self).__init__()
111 | if mode == 'pre':
112 | self.norm = torch.nn.BatchNorm1d(in_features)
113 | elif mode == 'post':
114 | self.norm = torch.nn.BatchNorm2d(in_features)
115 | def forward(self,x,adj):
116 | out = self.norm(x)
117 | return out
118 |
119 | class SelfAttention(Module):
120 | def __init__(self, in_features, w_features):
121 | super(SelfAttention, self).__init__()
122 | self.w_key = Parameter(torch.FloatTensor(in_features, w_features))
123 | self.w_value = Parameter(torch.FloatTensor(in_features, in_features))
124 | self.w_query = Parameter(torch.FloatTensor(in_features, w_features))
125 | def forward(self,x,adj):
126 | keys = x @ self.w_key # ? x N x W
127 | querys = x @ self.w_query # ? x N x W
128 | values = x @ self.w_value # ? x N x F
129 | attn_scores = torch.zeros(x.shape[0],x.shape[1],x.shape[1])
130 | for b in range(x.shape[0]):
131 | attn_scores[b] = softmax(querys[b] @ keys[b].T, dim=-1) # ? x N x N
132 | out = torch.zeros_like(values) # ? x N x F
133 | for b in range(x.shape[0]):
134 | weighted_values = values[b][:,None] * attn_scores[b].T[:,:,None]
135 | out[b] = weighted_values.sum(dim=0)
136 | return out
137 |
138 |
139 |
140 |
141 |
--------------------------------------------------------------------------------
/model/models.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.nn.parameter import Parameter
5 | from layers import *
6 |
7 |
8 | class GCN(nn.Module):
9 | def __init__(self, nnode, nfeat, mfeat, hidden1, linear, depth, natt, nclass, dropout, weight, is_des,cheby):
10 | super(GCN, self).__init__()
11 | # ngcn_list = ngcn.strip('[]').split(',')
12 | # nfull_list = nfull.strip('[]').split(',')
13 | # natt_list = natt.strip('[]').split(',')
14 | nin = nfeat # in_features
15 | self.dropout = dropout
16 | self.mfeat = mfeat
17 | self.weight = weight
18 | if self.weight == 'pre':
19 | ch = nnode
20 | elif self.weight == 'post':
21 | ch = mfeat
22 | if is_des == True:
23 | self.hidden = [50,50,50,50,50,20,20,20,20,20]
24 | else:
25 | self.hidden = [hidden1] * depth
26 | gcn_layers = [] # build a list for gcnn layers
27 | if cheby == None:
28 | for nhid in range(depth):
29 | gcn_layers.append(GraphConvolution(nin, self.hidden[nhid]))
30 | gcn_layers.append(norm(ch,weight))
31 | gcn_layers.append(ConcatReLU())
32 | nin = int(self.hidden[nhid])
33 | else:
34 | self.gcn1 = GraphConvolutionChebyshev(nfeat, hidden1, cheby)
35 | self.gcn2 = GraphConvolutionChebyshev(hidden1, hidden1, cheby)
36 | if natt != 0:
37 | self.att = SelfAttention(self.hidden[-1], natt) # dimension of value keeps the same with input size
38 | # single head for now
39 | self.is_att = True
40 | else:
41 | self.is_att = False
42 | self.gc = nn.Sequential(*gcn_layers)
43 | self.flatten = Flatten()
44 | full_layers = []
45 | nin_full = self.hidden[-1] * nnode # in_features for fully connected layers, which is gcn output * number of node
46 | if linear != 0:
47 | full_layers.append(ConcatLinear(nin_full, int(linear)))
48 | full_layers.append(ConcatLinear(int(linear), nclass))
49 | else:
50 | full_layers.append(ConcatLinear(nin_full, nclass))
51 | self.linear = nn.Sequential(*full_layers)
52 | # modification here: replace FloatTensor with rand to make sure values in adjacency matrix are more than zero.
53 | self.edgeweight = Parameter(torch.rand(mfeat,1))
54 |
55 | def forward(self, x, adj):
56 | if self.weight == 'pre':
57 | adj = torch.matmul(adj, self.edgeweight).view(adj.shape[0],adj.shape[1],-1)
58 | elif self.weight == 'post':
59 | x = x.view(x.shape[0],x.shape[1],-1,1).expand(x.shape[0],x.shape[1],x.shape[2],self.mfeat)
60 |
61 | x = torch.transpose(x,-1,-3)
62 | x = torch.transpose(x,-1,-2)
63 | adj = torch.transpose(adj,-1,-3)
64 | adj = torch.transpose(adj,-1,-2)
65 | for func in self.gc:
66 | x = func(x,adj)
67 | if self.is_att == True:
68 | x = self.att(x,adj)
69 | if self.weight == 'post':
70 | x = torch.transpose(x,-1,-3)
71 | x = torch.transpose(x,-3,-2)
72 | x = torch.matmul(x, self.edgeweight).view(x.shape[0],x.shape[1],-1)
73 | x = F.dropout(x, self.dropout, training=self.training)
74 | x = self.flatten(x,adj)
75 | #x = self.linear(x,adj)
76 | for func_full in self.linear:
77 | x = func_full(x,adj)
78 | return x #F.log_softmax(x, dim=1)
79 |
--------------------------------------------------------------------------------
/model/outputs/HCV_A171T_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_A171T_model.pth
--------------------------------------------------------------------------------
/model/outputs/HCV_Combined_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_Combined_model.pth
--------------------------------------------------------------------------------
/model/outputs/HCV_D183A_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_D183A_model.pth
--------------------------------------------------------------------------------
/model/outputs/HCV_Triple_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_Triple_model.pth
--------------------------------------------------------------------------------
/model/outputs/HCV_WT_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/HCV_WT_model.pth
--------------------------------------------------------------------------------
/model/outputs/TEV_model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/model/outputs/TEV_model.pth
--------------------------------------------------------------------------------
/model/test.py:
--------------------------------------------------------------------------------
1 | # This script is to test GCNN with already-trained gcnn model
2 | # Author: Changpeng Lu
3 | # Usage:
4 | # python test.py --dataset HCV_binary_10_ang_aa_sinusoidal_encoding_4_energy_7_energyedge_5_hbond --test_dataset HCV_binary_10_ang_aa_sinusoidal_encoding_2_energy_7_energyedge_5_hbond --epochs 500 --hidden1 20 --depth 2 --linear 1024 --att 0 --model gcn --batch_size 500 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save 'outputs/tt/HCV_binary_10_ang_aa_sinusoidal_encoding_4_energy_7_energyedge_5_hbond/bs_500/'
5 | # Train and test each epoch for this new version, instead of testing only after all training are done. Changes below aren't applied to wider_deeper and more epoch trials with the base setting. Replace Adam with SGD, also add lr_scheduler. Also calculate average train accuracy and loss instead of the last batch. Also, we use earlystop to let the model train enough epochs. 1) if test accuracy always go smaller, then the model will stop; 2) if the test accuracy always the same as the former accuracy, then it means converges, then the model will stop as well.
6 |
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import os
11 | import time
12 | import logging
13 | import argparse
14 | import numpy as np
15 | import random
16 |
17 | from comet_ml import Experiment
18 | import torch
19 | import torch.nn as nn
20 | import torch.nn.functional as F
21 | import torch.optim as optim
22 | from torch.utils.data import DataLoader
23 | import torchvision.datasets as datasets
24 | import torchvision.transforms as transforms
25 | import math
26 | import scipy.sparse as sp
27 | from torch.nn.parameter import Parameter
28 |
29 | from utils import *
30 | from models import *
31 |
32 | # Training settings
33 | parser = argparse.ArgumentParser()
34 | parser.add_argument('--gpu', type=int, default=0,
35 | help='number of gpus.')
36 | #parser.add_argument('--fastmode', action='store_true', default=False,
37 | # help='Validate during training pass.')
38 | parser.add_argument('--seed', type=int, default=1, help='Random seed.')
39 | parser.add_argument('--epochs', type=int, default=200,
40 | help='Number of epochs to train.')
41 | parser.add_argument('--lr', type=float, default=0.01,
42 | help='Initial learning rate.')
43 | parser.add_argument('--weight_decay', type=float, default=5e-4,
44 | help='Weight decay (L2 loss on parameters).')
45 | parser.add_argument('--hidden1', type=int, default=10,
46 | help='Number of hidden units for nodes.')
47 | parser.add_argument('--depth', type=int, default=10, help='Number of gcnn layers')
48 | parser.add_argument('--att', type=int, default=0, help='the dimension of weight matrices for key and query')
49 | parser.add_argument('--linear', type=int, default=0)
50 | parser.add_argument('--dropout', type=float, default=0.1,
51 | help='Dropout rate (1 - keep probability).')
52 | parser.add_argument('--no_energy', action='store_true', default=False)
53 | parser.add_argument('--energy_only', action='store_true', default=False)
54 | parser.add_argument('--scale_type', choices = ['exp','minmax'], default='exp')
55 | parser.add_argument('--test_dataset',type=str)
56 | parser.add_argument('--dataset',type=str, help='input dataset string')
57 | parser.add_argument('--model', type = str, default = 'gcn',choices=['gcn','chebyshev'])
58 | parser.add_argument('--max_degree',type=int, default = 3, help='number of supports')
59 | parser.add_argument('--batch_size',type=int, default=8)
60 | parser.add_argument('--weight', type=str, default='pre',choices=['pre','post'])
61 | parser.add_argument('--dim_des',action='store_true',default=False)
62 | parser.add_argument('--save', type=str, default='./experiment1')
63 | args = parser.parse_args()
64 |
65 | makedirs(args.save)
66 | logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__))
67 | logger.info(args)
68 | experiment = Experiment(project_name = args.dataset, api_key="yOMD8snU8WrOgVJM6jTErziMh", workspace="hcvpgcntrain")
69 | hyper_params = {"seed": args.seed, "weight_decay": args.weight_decay, "learning_rate": args.lr, "dropout": args.dropout, "batch_size": args.batch_size}
70 | experiment.log_parameters(hyper_params)
71 |
72 | # test
73 | #def test():
74 | # checkpoint = torch.load(os.path.join(args.save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout) + '.pth'))
75 | # print("best epoch is:" + str(checkpoint['epoch']))
76 | # model.load_state_dict(checkpoint['state_dict'])
77 | # max_acc = 0
78 | # with torch.no_grad():
79 | # model.eval()
80 | #for j in range(100):
81 | # logits_test = model(features[test_mask], adj_ls[test_mask])
82 | # test_acc = accuracy(logits_test, torch.argmax(labels[test_mask],axis=1))
83 | # if test_acc > max_acc:
84 | #logits_test_fin = logits_test
85 | # max_acc = test_acc
86 | # logger.info("Test accuracy is:" + str(test_acc))
87 | # pkl.dump(logits_test,open(os.path.join(args.save, 'logits_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout)),'wb'))
88 |
89 | torch.manual_seed(args.seed)
90 | np.random.seed(args.seed)
91 | random.seed(args.seed)
92 | # cheby is no longer useful
93 | is_cheby = True if args.model == 'chebyshev' else False
94 | no_energy = True if args.no_energy == True else False
95 | adj_ls, features, labels, sequences, proteases, labelorder, train_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only) #scale_type determines node feature scale
96 | cheby_params = args.max_degree if args.model == 'chebyshev' else None
97 | weight_mode = args.weight
98 | dim_des = args.dim_des
99 | tmp_mask = np.array([(not idx) for idx in test_mask], dtype=np.bool)
100 |
101 | # Size of Different Sets
102 | logger.info("|Training| {},|Testing| {}".format(np.sum(tmp_mask), np.sum(test_mask)))
103 |
104 | model = GCN(nnode=features.shape[1],
105 | nfeat=features.shape[2],
106 | mfeat=adj_ls.shape[3],
107 | # ngcn=args.ngcn,
108 | hidden1=args.hidden1,
109 | depth=args.depth,
110 | # hidden2=args.hidden2,
111 | natt=args.att, # one layer
112 | linear=args.linear,
113 | weight=weight_mode,
114 | is_des=dim_des,
115 | nclass=labels.shape[1],
116 | dropout=args.dropout,
117 | cheby=cheby_params)
118 | logger.info(model)
119 | logger.info('Number of parameters: {}'.format(count_parameters(model)))
120 |
121 | batch_size = args.batch_size
122 |
123 | criterion = nn.CrossEntropyLoss()
124 | optimizer = optim.Adam(model.parameters(),lr=args.lr, weight_decay=args.weight_decay)
125 | #optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
126 | nepoch = args.epochs #willbe useless if set earlystop
127 | #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, int(nepoch / 10))
128 | #scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10000)
129 |
130 | best_acc = 0
131 | print("Total number of forward processes:" + str(args.epochs * args.batch_size))
132 | with experiment.train():
133 | for i in range(nepoch):
134 | #i = 0 # epoch
135 | #converge_detect = 0
136 | #former_acc = 0
137 | #while True:
138 | #n = 0
139 | model.train()
140 | tmp_accs = []
141 | tmp_losses = []
142 | for batch_mask in get_batch_iterator(tmp_mask, batch_size):
143 | optimizer.zero_grad()
144 | #n = n + 1
145 | x = features[batch_mask]
146 | y = labels[batch_mask]
147 | y = torch.argmax(y,axis=1)
148 | adj = adj_ls[batch_mask]
149 | logits = model(x, adj)
150 | loss = criterion(logits,y)
151 | train_acc = accuracy(logits,y)
152 | loss.backward()
153 | optimizer.step()
154 | tmp_losses.append(loss.item())
155 | tmp_accs.append(train_acc.item())
156 | # scheduler.step()
157 | #train_acc = accuracy(logits, y) # only record the last batch accuracy for each epoch
158 | #experiment.log_metric("epoch_loss", sum(tmp_losses) / len(tmp_losses), step=i+1)
159 | #experiment.log_metric("epoch_accuracy", sum(tmp_accs) / len(tmp_accs), step=i+1)
160 | #print("train accuracy for {0}th epoch is: {1}".format(i+1, train_acc))
161 | # print("train loss for {0}th epoch is : {1}".format(i+1, loss))
162 | print("epoch: " + str(i+1))
163 | print("train_loss: " + str(sum(tmp_losses) / len(tmp_losses))) #loss.item()))
164 | print("train_acc: " + str(sum(tmp_accs) / len(tmp_accs))) #train_acc.item()))
165 | with torch.no_grad():
166 | with experiment.validate():
167 | model.eval()
168 | #for j in range(100):
169 | logits_test = model(features[test_mask], adj_ls[test_mask])
170 | test_loss = criterion(logits_test, torch.argmax(labels[test_mask],axis=1))
171 | test_acc = accuracy(logits_test, torch.argmax(labels[test_mask],axis=1))
172 | print("test_loss: " + str(test_loss.item()))
173 | print("test_acc: " + str(test_acc.item()))
174 | #experiment.log_metric("test_accuracy", test_acc.item(), step=i+1)
175 | #experiment.log_metric("test_loss", test_loss.item(), step=i+1)
176 | # if test_acc > max_acc:
177 | #logits_test_fin = logits_test
178 | # max_acc = test_acc
179 | # logger.info("Test accuracy is:" + str(test_acc))
180 | if test_acc > best_acc:
181 | torch.save({'epoch': i+1,'state_dict': model.state_dict()}, os.path.join(args.save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout) + '.pth'))
182 | pkl.dump(logits_test,open(os.path.join(args.save, 'logits_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout)),'wb'))
183 | #print('save successfully')
184 | best_acc = test_acc
185 | best_epo = i
186 | #if abs(test_acc - former_acc) < 0.0001:
187 | # converge_detect += 1
188 | # if converge_detect == 100:
189 | # break
190 | #elif test_acc < former_acc :
191 | # overfit_detect += 1
192 | # if overfit_detect >= 100:
193 | # break
194 | #i += 1
195 | print("best_test_acc: " + str(best_acc.item()))
196 | #logger.info(
197 | # "Epoch {:04d} | "
198 | # "Best Acc {:.4f}".format(
199 | # best_epo, best_acc
200 | # ))
201 | #makedirs(args.save)
202 | #logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__))
203 | #logger.info(args)
204 | #device = torch.device('cuda:' + str(args.gpu) if torch.cuda.is_available() else 'cpu')
205 | #
206 | ### Determine Number of Supports and Assign Model Function
207 | ##if args.model == 'gcn':
208 | ## num_supports = 1
209 | ## model_func = GCN
210 | ##elif args.model == 'gcn_cheby':
211 | ## num_supports = 1 + args.max_degree
212 | ## model_func = GCN
213 | ##else:
214 | ## raise ValueError('Invalid argument for model: ' + str(FLAGS.model))
215 | #
216 | ## Load data
217 | #adj_ls, features, labels, sequences, proteases, labelorder, train_mask, val_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset)
218 | #
219 | ## Size of Different Sets
220 | #print("|Training| {}, |Validation| {}, |Testing| {}".format(np.sum(train_mask), np.sum(val_mask), np.sum(test_mask)))
221 | #
222 | ## Model and optimizer
223 | #model = GCN(nnode=features.shape[1],
224 | # nfeat=features.shape[2],
225 | # mfeat=adj_ls.shape[3],
226 | # nhid1=args.hidden1,
227 | # nhid2=args.hidden2,
228 | # nclass=labels.shape[1],
229 | # dropout=args.dropout).to(device)
230 | #logger.info(model)
231 | #logger.info('Number of parameters: {}'.format(count_parameters(model)))
232 | #
233 | #criterion = nn.NLLLoss().to(device)
234 | #
235 | #optimizer = optim.Adam(model.parameters(),lr=args.lr, weight_decay=args.weight_decay)
236 | #
237 | #best_acc = 0
238 | #batch_time_meter = RunningAverageMeter()
239 | #end = time.time()
240 | #print("Total number of forward processes:" + str(args.epochs * args.batch_size))
241 | #
242 | ##batches_per_epoch = int(sum(train_mask) / args.batch_size)
243 | ##print("Batches per epoch is:" + str(batches_per_epoch))
244 | #batch_size = args.batch_size
245 | #epochs_num = args.epochs
246 | #
247 | #if args.save_validation == True:
248 | # val_df = np.zeros([args.epochs*sum(val_mask),labels.shape[1]])
249 | #
250 | ##mask = np.array([x or y for (x,y) in zip(train_mask, val_mask)], dtype = np.bool)
251 | #for epoch in range(epochs_num):
252 | # n = 0
253 | # for batch_mask in get_batch_iterator(train_mask, batch_size):
254 | # optimizer.zero_grad()
255 | # n = n + 1
256 | # print('this is the {}th batch'.format(n))
257 | # x = features[batch_mask].to(device)
258 | # y = labels[batch_mask]
259 | # y = torch.argmax(y,axis=1).to(device)
260 | # adj = adj_ls[batch_mask].to(device)
261 | # model.train()
262 | # logits = model(x, adj)
263 | # loss = criterion(logits,y)
264 | # loss.backward()
265 | # optimizer.step()
266 | # train_acc = accuracy(logits, y)
267 | # print("train loss is {}".format(loss))
268 | # print("train accuracy is {}".format(train_acc))
269 | # batch_time_meter.update(time.time() - end)
270 | # end = time.time()
271 | # with torch.no_grad():
272 | # #train_acc = accuracy(model, logits, labels[train_mask])
273 | # model.eval()
274 | # logits_val = model(features[val_mask], adj_ls[val_mask])
275 | # loss_val = criterion(logits_val,torch.argmax(labels[val_mask],axis=1))
276 | # val_acc = accuracy(logits_val, torch.argmax(labels[val_mask],axis=1))
277 | # print("accuracy for {0}th epoch is: {1}".format(epoch,val_acc))
278 | # print("loss is {0}:".format(loss_val))
279 | # if val_acc > best_acc:
280 | # torch.save({'epoch': epoch,'state_dict': model.state_dict(), 'args': args}, os.path.join(args.save, 'model.pth'))
281 | # best_acc = val_acc
282 | # best_epo = epoch
283 | # logger.info(
284 | # "Epoch {:04d} | Time {:.3f} ({:.3f}) | "
285 | # "Val Acc {:.4f}".format(
286 | # epoch, batch_time_meter.val, batch_time_meter.avg, val_acc
287 | # )
288 | # )
289 | # f = open(args.save + "epoch_record.txt","a")
290 | # f.write("batch_size_{0}_lr_{1}_gc_{2}_decay_{3}_epoch_{4}\tacc:{5}".format(batch_size,args.lr,args.hidden1,args.weight_decay,epoch,val_acc))
291 | # f.close()
292 | # val_df[(epoch)*sum(val_mask):(epoch + 1) * sum(val_mask), :] = logits_val
293 | #pkl.dump(val_df, open(os.path.join(args.save, args.dataset + '_validation.csv'),'wb'))
294 | #test()
295 |
296 |
297 |
--------------------------------------------------------------------------------
/model/train.py:
--------------------------------------------------------------------------------
1 | # This script is to test GCNN with already-trained gcnn model
2 | # Author: Changpeng Lu
3 | # Usage:
4 | # python test.py --dataset HCV_binary_10_ang_aa_sinusoidal_encoding_4_energy_7_energyedge_5_hbond --test_dataset HCV_binary_10_ang_aa_sinusoidal_encoding_2_energy_7_energyedge_5_hbond --epochs 500 --hidden1 20 --depth 2 --linear 1024 --att 0 --model gcn --batch_size 500 --lr $tmp_lr --dropout $tmp_dt --weight_decay $tmp_wd --save 'outputs/tt/HCV_binary_10_ang_aa_sinusoidal_encoding_4_energy_7_energyedge_5_hbond/bs_500/'
5 | # Train and test each epoch for this new version, instead of testing only after all training are done. Changes below aren't applied to wider_deeper and more epoch trials with the base setting. Replace Adam with SGD, also add lr_scheduler. Also calculate average train accuracy and loss instead of the last batch. Also, we use earlystop to let the model train enough epochs. 1) if test accuracy always go smaller, then the model will stop; 2) if the test accuracy always the same as the former accuracy, then it means converges, then the model will stop as well.
6 |
7 | from __future__ import division
8 | from __future__ import print_function
9 |
10 | import os
11 | import time
12 | import logging
13 | import argparse
14 | import numpy as np
15 | import random
16 |
17 | from comet_ml import Experiment
18 | import torch
19 | import torch.nn as nn
20 | import torch.nn.functional as F
21 | import torch.optim as optim
22 | from torch.utils.data import DataLoader
23 | import torchvision.datasets as datasets
24 | import torchvision.transforms as transforms
25 | import math
26 | import scipy.sparse as sp
27 | from torch.nn.parameter import Parameter
28 |
29 | from utils import *
30 | from models import *
31 |
32 | # Training settings
33 | parser = argparse.ArgumentParser()
34 | parser.add_argument('--gpu', type=int, default=0,
35 | help='number of gpus.')
36 | #parser.add_argument('--fastmode', action='store_true', default=False,
37 | # help='Validate during training pass.')
38 | parser.add_argument('--seed', type=int, default=1, help='Random seed.')
39 | parser.add_argument('--epochs', type=int, default=200,
40 | help='Number of epochs to train.')
41 | parser.add_argument('--lr', type=float, default=0.01,
42 | help='Initial learning rate.')
43 | parser.add_argument('--weight_decay', type=float, default=5e-4,
44 | help='Weight decay (L2 loss on parameters).')
45 | parser.add_argument('--hidden1', type=int, default=10,
46 | help='Number of hidden units for nodes.')
47 | parser.add_argument('--depth', type=int, default=10, help='Number of gcnn layers')
48 | parser.add_argument('--att', type=int, default=0, help='the dimension of weight matrices for key and query')
49 | parser.add_argument('--linear', type=int, default=0)
50 | parser.add_argument('--dropout', type=float, default=0.1,
51 | help='Dropout rate (1 - keep probability).')
52 | parser.add_argument('--no_energy', action='store_true', default=False)
53 | parser.add_argument('--energy_only', action='store_true', default=False)
54 | parser.add_argument('--seq_only', action='store_true', default=False)
55 | parser.add_argument('--feature',choices=['d','s+d','s','e','s+e','s+e+d'],default='s+e')
56 | parser.add_argument('--scale_type', choices = ['exp','minmax'], default='exp')
57 | parser.add_argument('--test_dataset',type=str, default=None)
58 | parser.add_argument('--val_dataset', type=str, default=None)
59 | parser.add_argument('--resampling', type=str, default=None)
60 | parser.add_argument('--dataset',type=str, help='input dataset string')
61 | parser.add_argument('--model', type = str, default = 'gcn',choices=['gcn','chebyshev'])
62 | parser.add_argument('--max_degree',type=int, default = 3, help='number of supports')
63 | parser.add_argument('--batch_size',type=int, default=8)
64 | parser.add_argument('--weight', type=str, default='pre',choices=['pre','post'])
65 | parser.add_argument('--dim_des',action='store_true',default=False)
66 | parser.add_argument('--save', type=str, default='./experiment1')
67 | args = parser.parse_args()
68 |
69 | makedirs(args.save)
70 | logger = get_logger(logpath=os.path.join(args.save, 'logs'), filepath=os.path.abspath(__file__))
71 | logger.info(args)
72 | experiment = Experiment(project_name = args.dataset, api_key="yOMD8snU8WrOgVJM6jTErziMh", workspace="tevtrisplit")
73 | hyper_params = {"seed": args.seed, "weight_decay": args.weight_decay, "learning_rate": args.lr, "dropout": args.dropout, "batch_size": args.batch_size}
74 | experiment.log_parameters(hyper_params)
75 |
76 | torch.manual_seed(args.seed)
77 | np.random.seed(args.seed)
78 | random.seed(args.seed)
79 | # cheby is no longer useful
80 | is_cheby = True if args.model == 'chebyshev' else False
81 | no_energy = True if args.no_energy == True else False
82 | if args.val_dataset != None:
83 | logger.info('TripleSplit!')
84 | adj_ls, features, labels, sequences, labelorder, train_mask, val_mask, test_mask = load_data(args.dataset, is_test=args.test_dataset, is_val=args.val_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only, seq_only = args.seq_only, feature_type=args.feature)
85 | logger.info("|Training| {},|Validation| {}, |Testing| {}".format(np.sum(train_mask), np.sum(val_mask), np.sum(test_mask)))
86 | tmp_mask = train_mask
87 | else:
88 | adj_ls, features, labels, sequences, labelorder, train_mask, val_mask = load_data(args.dataset, is_test=args.test_dataset, is_val=args.val_dataset, norm_type=True, scale_type=args.scale_type, test_format = 'index', energy_only = args.energy_only, seq_only = args.seq_only, feature_type=args.feature) #scale_type determines node feature scale
89 | tmp_mask = np.array([(not idx) for idx in val_mask], dtype=np.bool)
90 | # Size of Different Sets
91 | logger.info("|Training| {},|Testing| {}".format(np.sum(tmp_mask), np.sum(val_mask)))
92 |
93 | cheby_params = args.max_degree if args.model == 'chebyshev' else None
94 | weight_mode = args.weight
95 | dim_des = args.dim_des
96 |
97 | model = GCN(nnode=features.shape[1],
98 | nfeat=features.shape[2],
99 | mfeat=adj_ls.shape[3],
100 | # ngcn=args.ngcn,
101 | hidden1=args.hidden1,
102 | depth=args.depth,
103 | # hidden2=args.hidden2,
104 | natt=args.att, # one layer
105 | linear=args.linear,
106 | weight=weight_mode,
107 | is_des=dim_des,
108 | nclass=labels.shape[1],
109 | dropout=args.dropout,
110 | cheby=cheby_params)
111 | logger.info(model)
112 | logger.info('Number of parameters: {}'.format(count_parameters(model)))
113 |
114 | batch_size = args.batch_size
115 |
116 | criterion = nn.CrossEntropyLoss()
117 | #criterion = nn.NLLLoss()
118 | optimizer = optim.Adam(model.parameters(),lr=args.lr, weight_decay=args.weight_decay)
119 | #optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
120 | nepoch = args.epochs #willbe useless if set earlystop
121 | #scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, int(nepoch / 10))
122 | #scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 10000)
123 | resampling = args.resampling
124 | best_acc = 0
125 | print("Total number of forward processes:" + str(args.epochs * args.batch_size))
126 | #patience = 100
127 | #last_loss = 100
128 | #triggertimes = 0
129 | with experiment.train():
130 | for i in range(nepoch):
131 | #i = 0 # epoch
132 | #converge_detect = 0
133 | #former_acc = 0
134 | #while True:
135 | #n = 0
136 | model.train()
137 | tmp_accs = []
138 | tmp_losses = []
139 | batch_mask_ls, count_mask_ls = get_batch_iterator(tmp_mask, batch_size, sampling=resampling)
140 | for batch_mask, count_mask in zip(batch_mask_ls, count_mask_ls):
141 | optimizer.zero_grad()
142 | x = features[batch_mask]
143 | y = labels[batch_mask]
144 | adj = adj_ls[batch_mask]
145 | for i, count in enumerate(count_mask):
146 | if count > 1:
147 | while count - 1 > 0:
148 | x = torch.concat([x, features[i,:,:].reshape(1,features.shape[1],-1)])
149 | y = torch.concat([y, labels[i,:].reshape(1, -1)])
150 | adj = torch.concat([adj, adj_ls[i,:,:,:].reshape(1,adj_ls.shape[1], adj_ls.shape[2],-1)])
151 | count -= 1
152 | assert x.shape[0] == y.shape[0]
153 | y = torch.argmax(y,axis=1)
154 | logits = model(x, adj)
155 | loss = criterion(logits,y)
156 | train_acc = accuracy(logits,y)
157 | loss.backward()
158 | optimizer.step()
159 | tmp_losses.append(loss.item())
160 | tmp_accs.append(train_acc.item())
161 | # scheduler.step()
162 | #train_acc = accuracy(logits, y) # only record the last batch accuracy for each epoch
163 | experiment.log_metric("epoch_loss", sum(tmp_losses) / len(tmp_losses), step=i+1)
164 | experiment.log_metric("epoch_accuracy", sum(tmp_accs) / len(tmp_accs), step=i+1)
165 | #print("train accuracy for {0}th epoch is: {1}".format(i+1, train_acc))
166 | # print("train loss for {0}th epoch is : {1}".format(i+1, loss))
167 | print("epoch: " + str(i+1))
168 | print("train_loss: " + str(sum(tmp_losses) / len(tmp_losses))) #loss.item()))
169 | print("train_acc: " + str(sum(tmp_accs) / len(tmp_accs))) #train_acc.item()))
170 | with torch.no_grad():
171 | with experiment.validate():
172 | model.eval()
173 | logits_test = model(features[val_mask], adj_ls[val_mask])
174 | val_loss = criterion(logits_test, torch.argmax(labels[val_mask],axis=1))
175 | val_acc = accuracy(logits_test, torch.argmax(labels[val_mask],axis=1))
176 | print("val_loss: " + str(val_loss.item()))
177 | print("val_acc: " + str(val_acc.item()))
178 | experiment.log_metric("val_accuracy", val_acc.item(), step=i+1)
179 | experiment.log_metric("val_loss", val_loss.item(), step=i+1)
180 | # if test_acc > max_acc:
181 | #logits_test_fin = logits_test
182 | # max_acc = test_acc
183 | # logger.info("Test accuracy is:" + str(test_acc))
184 |
185 | if val_acc > best_acc:
186 | torch.save({'epoch': i+1,'state_dict': model.state_dict(),'optimizer_state_dict': optimizer.state_dict(), 'loss':loss}, os.path.join(args.save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout) + '.pth'))
187 | pkl.dump(logits_test,open(os.path.join(args.save, 'logits_val_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout)),'wb'))
188 | best_acc = val_acc
189 | best_epo = i
190 | #if abs(test_acc - former_acc) < 0.0001:
191 | # converge_detect += 1
192 | # if converge_detect == 100:
193 | # break
194 | #elif test_acc < former_acc :
195 | # overfit_detect += 1
196 | # if overfit_detect >= 100:
197 | # break
198 | #i += 1
199 | logger.info("best_val_acc: " + str(best_acc.item()))
200 | #logger.info(
201 | # "Epoch {:04d} | "
202 | # "Best Acc {:.4f}".format(
203 | # best_epo, best_acc
204 | # ))
205 |
206 | # test
207 | if args.val_dataset != None:
208 | checkpoint = torch.load(os.path.join(args.save, 'model_for_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout) + '.pth'))
209 | print("best epoch is:" + str(checkpoint['epoch']))
210 | model.load_state_dict(checkpoint['state_dict'])
211 | max_acc = 0
212 | with torch.no_grad():
213 | model.eval()
214 | logits_test = model(features[test_mask], adj_ls[test_mask])
215 | test_acc = accuracy(logits_test, torch.argmax(labels[test_mask],axis=1))
216 | #if test_acc > max_acc:
217 | #logits_test_fin = logits_test
218 | # max_acc = test_acc
219 | logger.info("Test accuracy is:" + str(test_acc))
220 | pkl.dump(logits_test,open(os.path.join(args.save, 'logits_test_seed_' + str(args.seed) + '_hidden_' + str(args.hidden1) + '_linear_' + str(args.linear) +'_lr_'+str(args.lr)+'_wd_'+str(args.weight_decay)+'_bs_'+str(args.batch_size)+ '_dt_' + str(args.dropout)),'wb'))
221 |
222 |
223 |
--------------------------------------------------------------------------------
/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Nucleus2014/protease-gcnn-pytorch/c769a0eb53f3b3ec99d53da738ad380376be9bcf/pipeline.png
--------------------------------------------------------------------------------