├── gnn_utils.py
├── gnn_model.py
├── Example2.csv
├── README.md
├── Example1.csv
├── upload.html
└── app.py


/gnn_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import dgl
 3 | import torch
 4 | from rdkit import Chem
 5 | from rdkit.Chem import Descriptors, AllChem, MACCSkeys, RDKFingerprint
 6 | from torch.utils.data import Dataset
 7 | def mol_to_graph(mol):
 8 |     if mol is None:
 9 |         return None
10 |     num_atoms = mol.GetNumAtoms()
11 |     g = dgl.graph(([], []))
12 |     g.add_nodes(num_atoms)
13 | 
14 |     bond_types = []
15 |     for bond in mol.GetBonds():
16 |         start, end = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
17 |         bond_type = bond.GetBondTypeAsDouble()
18 |         bond_types.extend([bond_type, bond_type])
19 |         g.add_edges([start, end], [end, start])
20 | 
21 |     h_feats = [atom.GetAtomicNum() for atom in mol.GetAtoms()]
22 |     g.ndata['h'] = torch.tensor(h_feats).unsqueeze(1).float()
23 |     g.edata['e'] = torch.tensor(bond_types).unsqueeze(1).float()
24 | 
25 |     return g
26 | 
27 | 
28 | class MoleculeDataset(Dataset):
29 |     def __init__(self, smiles_list, labels):
30 |         self.smiles_list = smiles_list
31 |         self.labels = labels
32 | 
33 |     def __len__(self):
34 |         return len(self.smiles_list)
35 | 
36 |     def __getitem__(self, idx):
37 |         smiles = self.smiles_list[idx]
38 |         mol = Chem.MolFromSmiles(smiles)
39 |         graph = mol_to_graph(mol)
40 | 
41 |         if graph is None:
42 |             return None, None, None
43 | 
44 |         # Extract Molecular Descriptors and Fingerprints
45 |         descriptor_names = [
46 |             "MolWt",
47 |             "TPSA",
48 |             "NumHDonors",
49 |             "NumHAcceptors",
50 |             "MolLogP",
51 |             "NumRotatableBonds",
52 |         ]
53 |         descriptors = [
54 |             Descriptors.MolWt(mol),
55 |             Descriptors.TPSA(mol),
56 |             Descriptors.NumHDonors(mol),
57 |             Descriptors.NumHAcceptors(mol),
58 |             Descriptors.MolLogP(mol),
59 |             Descriptors.NumRotatableBonds(mol),
60 |         ]
61 |         fingerprints = [
62 |             int(bit) for fp in [
63 |                 AllChem.GetMorganFingerprintAsBitVect(mol, 2, nBits=512).ToBitString(),
64 |                 MACCSkeys.GenMACCSKeys(mol).ToBitString(),
65 |                 RDKFingerprint(mol, fpSize=512).ToBitString()
66 |             ] for bit in fp
67 |         ]
68 |         fingerprint_names = [f"Fingerprint_{i}" for i in range(len(fingerprints))]
69 | 
70 |         feature_names = descriptor_names + fingerprint_names
71 | 
72 |         #print("Feature names:")
73 |         #print(feature_names)
74 | 
75 |         features = torch.tensor(descriptors + fingerprints).float()
76 |         label = self.labels[idx]
77 | 
78 |         return graph, features, label
79 | 
80 | 
81 | def collate(samples):
82 |     valid_samples = [s for s in samples if s[0] is not None]
83 |     graphs, features, labels = map(list, zip(*valid_samples))
84 |     batched_graph = dgl.batch(graphs)
85 |     return batched_graph, torch.stack(features), torch.tensor(labels)


--------------------------------------------------------------------------------
/gnn_model.py:
--------------------------------------------------------------------------------
 1 | import dgl
 2 | import torch
 3 | import torch.nn as nn
 4 | from rdkit import Chem
 5 | from rdkit.Chem import Descriptors, AllChem, MACCSkeys, RDKFingerprint
 6 | import pandas as pd
 7 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 8 | from torch.utils.data import Dataset, DataLoader
 9 | from sklearn.model_selection import StratifiedShuffleSplit
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | from sklearn.metrics import roc_auc_score, roc_curve
13 | 
14 | # GNN model definition
15 | class GNNLayer(nn.Module):
16 |     def __init__(self, in_feats, out_feats):
17 |         super(GNNLayer, self).__init__()
18 |         self.linear = nn.Linear(in_feats, out_feats)
19 |         self.bn = nn.BatchNorm1d(out_feats)
20 |         self.residual = (in_feats == out_feats)
21 | 
22 |     def forward(self, g, h, e):
23 |         g.ndata['h'] = h
24 |         g.edata['e'] = e
25 |         g.update_all(dgl.function.u_mul_e('h', 'e', 'm'), dgl.function.mean('m', 'h'))
26 |         h = self.linear(g.ndata['h'])
27 |         h = self.bn(h)
28 |         if self.residual:
29 |             h += g.ndata['h']
30 |         return h
31 | 
32 | 
33 | class GNN(nn.Module):
34 |     def __init__(self, in_feats, hidden_size, num_classes, dropout=0.5, feature_size=1197):
35 |         super(GNN, self).__init__()
36 |         self.gnn1 = GNNLayer(in_feats, hidden_size)
37 |         self.gnn2 = GNNLayer(hidden_size, hidden_size)
38 |         self.gnn3 = GNNLayer(hidden_size, hidden_size)
39 |         self.dropout = nn.Dropout(dropout)
40 |         self.fc_feature = nn.Linear(feature_size, hidden_size)
41 |         self.fc_combine = nn.Linear(hidden_size * 2, hidden_size)
42 |         self.fc = nn.Linear(hidden_size, num_classes)
43 | 
44 |     def forward(self, g, features):
45 |         h = g.ndata['h']
46 |         e = g.edata['e']
47 |         h = self.gnn1(g, h, e)
48 |         h = self.gnn2(g, h, e)
49 |         h = self.gnn3(g, h, e)
50 |         h = self.dropout(h)
51 |         g.ndata['h'] = h
52 |         h_agg = dgl.mean_nodes(g, 'h')
53 |         features_out = torch.relu(self.fc_feature(features))
54 |         combined = torch.cat((h_agg, features_out), dim=1)
55 |         combined = torch.relu(self.fc_combine(combined))
56 |         return self.fc(combined)
57 |     def get_features(self, g, features):
58 |         h = g.ndata['h']
59 |         e = g.edata['e']
60 |         h = self.gnn1(g, h, e)
61 |         h = self.gnn2(g, h, e)
62 |         h = self.gnn3(g, h, e)
63 |         h = self.dropout(h)
64 |         g.ndata['h'] = h
65 |         h_agg = dgl.mean_nodes(g, 'h')
66 |         features_out = torch.relu(self.fc_feature(features))
67 |         combined = torch.cat((h_agg, features_out), dim=1)
68 |         combined = torch.relu(self.fc_combine(combined))
69 |         return combined
70 | 
71 | class GNNLayer(nn.Module):
72 |     def __init__(self, in_feats, out_feats):
73 |         super(GNNLayer, self).__init__()
74 |         self.linear1 = nn.Linear(in_feats, out_feats)
75 |         self.linear2 = nn.Linear(out_feats, out_feats)
76 |         self.bn = nn.BatchNorm1d(out_feats)
77 |         self.residual = (in_feats == out_feats)
78 | 
79 |     def forward(self, g, h, e):
80 |         g.ndata['h'] = h
81 |         g.edata['e'] = e
82 |         g.update_all(dgl.function.u_mul_e('h', 'e', 'm'), dgl.function.mean('m', 'h'))
83 |         h = torch.relu(self.linear1(g.ndata['h']))
84 |         h = torch.relu(self.linear2(h))
85 |         h = self.bn(h)
86 |         if self.residual:
87 |             h += g.ndata['h']
88 |         return h
89 | 


--------------------------------------------------------------------------------
/Example2.csv:
--------------------------------------------------------------------------------
 1 | SMILES
 2 | s1ncc2c3c(n(OC)c12)cccc3
 3 | s1nc2cc(ccc2n1)C(=O)NC(=S)Nc1ccc(cc1)C(C)C
 4 | s1nc(C=2C[NH+](CCC=2)C)c(SCCCF)n1
 5 | s1cc(nc1C(C#N)=CNc1ccc(F)cc1F)-c1ccc(OC)cc1
 6 | s1c[n+](Cc2cnc(nc2N)C)c(C)c1CCO
 7 | s1c2c(nc1CN1CCN(CC1)C(=O)[C@@H]1NC(=O)CC1)cccc2
 8 | s1c2c(nc1CN1CCN(CC1)C(=O)[C@@H]1NC(=O)CC1)cccc2
 9 | s1c2N(S(=O)(=O)C(=CNc3ncccn3)C(=O)c2c(C)c1C)C
10 | s1c(ccc1-c1sccc1)-c1sccc1
11 | s1c(C)c(cc1C)C(=O)\C=C\c1cc(O)ccc1
12 | s1c(C)c(cc1C(OCC(=O)Nc1c(C)c(ccc1[N+](=O)[O-])C)=O)CCC
13 | s1c(C(=O)\C=C\c2cc(O)ccc2)c(nc1C(=S)N)C
14 | s1c(-c2ccccc2)c(Cc2ccccc2)c(C(=O)[O-])c1N
15 | o1nc(OCc2ccccc2)cc1CO
16 | o1nc(CC)c(n1)N
17 | o1cccc1\C=C(/C)\C1=NC2CC3(C4OCC2C1C4O)c1c(N(OC)C3=O)cccc1
18 | o1cccc1CO
19 | o1cccc1C=O
20 | o1cccc1C(Oc1ccc(cc1OCC)C=C1C(=NN(C1=O)c1ccccc1)C)=O
21 | o1cccc1C(N(C(=O)Cn1nnc2c1cccc2)c1c2c(ccc1)cccc2)C(=O)NC(CC)(C)C
22 | o1cccc1-c1nc(cc2c1[nH]c1c2cccc1)C(O)=O
23 | o1ccc(CO)c1-c1nccc2c1[nH]c1c2cccc1
24 | o1cc(cc1)[C@H]1[NH+]2[C@@H](CC[C@@H](C2)C)[C@@H](CC1)C
25 | o1cc(cc1)[C@H]1[C@]2([C@@]3(O[C@@H]3C1=O)[C@]1([C@H](CC2)[C@@]2([C@@H](C[C@H]1OC(=O)C)C(C)(C)C(=O)C=C2)C)C)C
26 | o1cc(cc1)[C@H]1[C@]2(C(=CC1=O)[C@]1([C@H](CC2)[C@@]2([C@H](C[C@H]1OC(=O)C)C(C)(C)C(=O)C=C2)C)C)C
27 | o1cc(cc1)[C@H]1[C@]2(C(=CC1=O)[C@]1([C@H](CC2)[C@@]2([C@@H](C[C@H]1OC(=O)C)C(C)(C)C(=O)C=C2)C)C)C
28 | o1cc(cc1)[C@@H]1C[C@H]2O[C@@]23[C@@]1(C)[C@@H](OC(=O)C)[C@@H](OC(=O)C)[C@H]([C@]1(C=CC(=O)C(C)(C)[C@@H]1CC(OC)=O)C)C3=C
29 | o1cc(cc1)[C@@H]1CC=C2[C@]1(CC[C@H]1[C@@]2(C)[C@H](OC(=O)C)C[C@@H]2[C@@]1(C=CC(=O)C2(C)C)C)C
30 | o1cc(cc1)C=1CC[C@H](C)C=1C(=O)\C=C(\C)/C
31 | o1cc(cc1)C1C2(C(=CC1=O)C1(C(CC2)C2(C(CC1OC(=O)C)C(C)(C)C(=O)C=C2)C)C)C
32 | o1cc(cc1)C(=O)CCC(O)C
33 | o1cc(c2c1CC(C=C)(C)C(C(C)=C)C2=O)C
34 | o1cc(c2c1-c1c(c3c(cc1)c(ccc3)C)C(=O)C2=O)C
35 | o1cc(c2c1-c1c(c3c(cc1)c(ccc3)C)C(=O)C2=O)C
36 | o1cc(c2c1-c1c(c3c(cc1)c(ccc3)C)C(=O)C2=O)C
37 | o1cc(c2c1-c1c(c3c(cc1)C(CCC3)(C)C)C(=O)C2=O)C
38 | o1c2nc3c(cccc3OC)c(OC)c2cc1
39 | o1c2nc3c(cccc3O)c(OC)c2cc1
40 | o1c2nc3c(ccc(OC\C=C(\CCC(O)C(O)(C)C)/C)c3OC)c(OC)c2cc1
41 | o1c2nc3c(ccc(OC)c3OC)c(OC)c2cc1
42 | o1c2nc3c(ccc(OC)c3OC)c(OC)c2cc1
43 | o1c2nc3c(ccc(OC)c3)c(OC)c2cc1
44 | o1c2nc3c(cc(OC)cc3)c(OC)c2cc1
45 | o1c2nc3c(c(OC)cc(OC)c3OC)c(OC)c2cc1
46 | o1c2nc3c(CC[C@@H]([O-])[C@@]3(OC)C\C=C(\C)/C)c(OC)c2cc1
47 | o1c2nc3c(CCC(O)C3(OC)CC=C(C)C)c(OC)c2cc1.O=C([O-])c1ccc[n+](c1)C
48 | o1c2cc(O)ccc2cc1-c1ccc(OC)cc1O
49 | o1c2cc(O)cc3[C@@H]4c5c([C@H]6c7c(-c(c23)c1-c1ccc(O)cc1)cc(O)cc7O[C@@H]6c1ccc(O)cc1)cc(O)cc5O[C@H]4c1ccc(O)cc1
50 | o1c2cc(O)cc3[C@@H]4c5c([C@@H]6c7c(-c(c23)c1-c1ccc([O-])cc1)cc(O)cc7O[C@H]6c1ccc([O-])cc1)cc(O)cc5O[C@H]4c1ccc([O-])cc1
51 | o1c2cc(O)c3c(N(c4c(cccc4)C3=O)C)c2cc1
52 | o1c2cc(O)c3c(N(c4c(ccc(O)c4OC)C3=O)C)c2cc1
53 | o1c2c3c(O[C@H](CC3=O)c3ccc(O)cc3)cc(O)c2c(C(=O)c2ccc(O)cc2O)c1-c1ccc(O)cc1
54 | o1c2c(occ2)cc1
55 | o1c2c(cc1[C@H](O)C)C(=O)c1c(cccc1)C2=O
56 | o1c2c(cc1C)cccc2
57 | o1c2c(cc1C)C(=O)C=C(OC)C2=O
58 | o1c2c(cc1C(O)C)C(=O)c1c(cccc1)C2=O
59 | o1c2c(cc1-c1ccc(O)cc1O)c(OC)c(C\C=C(\C)/C)c(OC)c2
60 | o1c2c(cc1)c(OC)c1c(cccc1)c2OC
61 | o1c2c(cc1)c(OC)c(cc2)C(=O)\C=C\c1ccc(cc1)C
62 | o1c2c(cc1)c(OC)c(cc2)C(=O)C(C(=O)c1ccccc1)C(C=C)(C)C
63 | o1c2c(cc1)c(C(OC)=O)c(O)c1c2cccc1
64 | o1c2c(cc1)C(=O)c1c(ccc(O)c1)C2=O
65 | o1c2c(cc(cc2OC)\C=C\C)c(C)c1-c1cc(OC)c(O)c(O)c1
66 | o1c2c(cc(cc2OC)\C=C\C)c(C)c1-c1cc(OC)c(O)c(O)c1
67 | o1c2c(cc(cc2OC)\C=C\C)c(C)c1-c1cc(OC)c(O)c(O)c1
68 | o1c2c(cc(cc2OC)C=O)c(C)c1-c1cc(OC)c(O)c(O)c1
69 | o1c2c(cc(cc2OC)C=CC)c(C)c1-c1cc(OC)c(O)c(O)c1
70 | o1c2c(c3c1cccc3)c(OC)c(OC)c(O)c2OC
71 | o1c2c(c3c1cccc3)c(OC)c(O)c(OC)c2OC
72 | o1c2c(c3c1c1c(cccc1)c(O)c3OC)C(=O)c1c(cccc1)C2=O
73 | o1c2c(c3c1c(C(=O)CC(C)C)c(O)c(C)c3O)c(O)c(C)c(O)c2C(=O)C(CC)C
74 | o1c2c(c3[nH]c4cc(O)ccc4c3cc2C=O)cc1
75 | o1c2c(c3O[C@@H]([C@@H]([O-])Cc3c(O)c2)c2ccc([O-])cc2)c(C(=O)c2c(O)cc(O)cc2O)c1-c1ccc([O-])cc1
76 | o1c2c(c3O[C@@H]([C@@H](O)Cc3c(O)c2)c2ccc(O)cc2)c(C(=O)c2c(O)cc(O)cc2O)c1-c1ccc(O)cc1
77 | o1c2c(c3O[C@@H](CCc3c(O)c2)c2ccc([O-])cc2)c(C(=O)c2c(O)cc(O)cc2O)c1-c1ccc([O-])cc1
78 | o1c2c(c3O[C@@H](CCc3c(O)c2)c2ccc(O)cc2)c(C(=O)c2c(O)cc(O)cc2O)c1-c1ccc(O)cc1
79 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # VirtuDockDL: A Deep Learning-based Python Pipeline for Virtual Screening
  2 | 
  3 | VirtuDockDL is a comprehensive solution for streamlining the process of drug discovery and molecular analysis. With VirtuDockDL, you can harness the power of deep learning to perform virtual screening, evaluate molecular activities, and predict binding affinities with unprecedented accuracy and speed.
  4 | 
  5 | ## Features
  6 | 
  7 | - **Graph Neural Network-Based Ligand Prioritization:** Streamline drug discovery with our GNN model, prioritizing ligands for speed and accuracy.
  8 | - **Descriptor Analysis:** Analyze molecular descriptors to predict pharmacological profiles and drug-likeness.
  9 | - **Re-screening:** Refine your ligand search iteratively, utilizing new data for targeted identification.
 10 | - **Protein Refinement:** Facilitates protein refinement by uploading PDB files.
 11 | - **Molecular Docking:** Predict ligand interactions with state-of-the-art simulations, focusing on optimal compounds.
 12 | - **Scalable Data Processing:** Efficiently process and analyze data across all scales, ensuring fast, reliable drug discovery results.
 13 | 
 14 | ## Installations
 15 | 
 16 | ### Prerequisites
 17 | - Python 3.8 or higher
 18 | - [PyTorch](https://pytorch.org/)
 19 | - [RDKit](https://www.rdkit.org/)
 20 | - [OpenMM](https://openmm.org/)
 21 | - [Flask](https://flask.palletsprojects.com/)
 22 | 
 23 | ### Installing Required Libraries
 24 | ```sh
 25 | pip install flask torch torchvision torchaudio
 26 | pip install pandas numpy scikit-learn matplotlib rdkit biopython dgl
 27 | pip install openmm
 28 | 
 29 | ```
 30 | 
 31 | # Usage
 32 | ## Running the Application
 33 | Clone the repository:
 34 | ```sh
 35 | git clone https://github.com/yourusername/VirtuDockDL.git
 36 | cd VirtuDockDL
 37 | ```
 38 | Set up your environment and install dependencies
 39 | 
 40 | Run the Flask application
 41 | ```sh
 42 | python app.py
 43 | ```
 44 | Open your web browser and navigate to http://127.0.0.1:5000 to access VirtuDockDL.
 45 | 
 46 | # Uploading Files
 47 | ## Upload CSV File for Ligand Prioritization
 48 | Navigate to the Ligand Prioritization tab and upload your CSV file containing data of active and inactive molecules.
 49 | ```sh
 50 | <form method="POST" enctype="multipart/form-data">
 51 |     <label for="file">Select a CSV File:</label>
 52 |     <input type="file" name="file" id="file" accept=".csv" required>
 53 |     <button type="submit">Upload</button>
 54 | </form>
 55 | ```
 56 | ## Upload PDB File for Protein Refinement
 57 | Navigate to the Protein Refinement tab and upload your PDB file.
 58 | ```sh
 59 | <form method="post" enctype="multipart/form-data">
 60 |     <label for="proteinFile">Select a PDB File:</label>
 61 |     <input type="file" name="file" id="proteinFile" accept=".pdb" required>
 62 |     <button type="submit">Upload</button>
 63 | </form>
 64 | ```
 65 | ## Upload ZIP File for Molecular Docking
 66 | Navigate to the Molecular Docking tab and upload your ZIP file containing ligand structures.
 67 | ```sh
 68 | <form id="docking-form" method="POST" enctype="multipart/form-data">
 69 |     <label for="protein_file">Select a Protein File (.pdb):</label>
 70 |     <input type="file" name="protein_file" id="protein_file" accept=".pdb" required>
 71 |     <label for="ligand_zip">Select Ligand Zip File (.zip):</label>
 72 |     <input type="file" name="ligand_zip" id="ligand_zip" accept=".zip" required>
 73 |     <button type="submit">Upload and Start Docking</button>
 74 | </form>
 75 | ```
 76 | # Main Functionalities
 77 | ## Ligand Prioritization
 78 | ```sh
 79 | 
 80 | def predict():
 81 |     smiles = request.form['smiles']
 82 |     model = GNN()
 83 |     model.load_state_dict(torch.load('model.pth'))
 84 |     model.eval()
 85 |     
 86 |     mol_data = mol_to_graph(smiles)
 87 |     with torch.no_grad():
 88 |         output = model(mol_data.x, mol_data.edge_index, mol_data.batch)
 89 |         activity = torch.sigmoid(output)
 90 |     
 91 |     return jsonify({'activity': activity.item()})
 92 | ```
 93 | # Protein Refinement
 94 | ```sh
 95 | def refine_protein():
 96 |     file = request.files['file']
 97 |     pdb = PDBFile(file)
 98 |     forcefield = ForceField('amber99sb.xml')
 99 |     modeller = Modeller(pdb.topology, pdb.positions)
100 |     modeller.addHydrogens(forcefield)
101 |     system = forcefield.createSystem(modeller.topology)
102 |     integrator = LangevinMiddleIntegrator(300*kelvin, 1/picosecond, 0.004*picosecond)
103 |     simulation = Simulation(modeller.topology, system, integrator)
104 |     simulation.context.setPositions(modeller.positions)
105 |     simulation.minimizeEnergy()
106 |     
107 |     refined_file = 'refined_protein.pdb'
108 |     with open(refined_file, 'w') as f:
109 |         PDBFile.writeFile(simulation.topology, simulation.context.getState(getPositions=True).getPositions(), f)
110 |     
111 |     return send_file(refined_file, as_attachment=True)
112 | 
113 | ```
114 | # Molecular Docking
115 | ```sh
116 | 
117 | def dock():
118 |     protein_file = request.files['protein_file']
119 |     ligand_file = request.files['ligand_file']
120 |     output_dir = 'docking_results'
121 |     
122 |     protein_path = f"{output_dir}/protein.pdb"
123 |     ligand_path = f"{output_dir}/ligand.pdbqt"
124 |     protein_file.save(protein_path)
125 |     ligand_file.save(ligand_path)
126 |     
127 |     subprocess.run(['vina', '--receptor', protein_path, '--ligand', ligand_path, '--out', f'{output_dir}/out.pdbqt'])
128 |     
129 |     return send_file(f'{output_dir}/out.pdbqt', as_attachment=True)
130 | ```
131 | # Tips for Success
132 | 
133 | - Ensure your input files are correctly formatted and contain all necessary information.
134 | - Utilize the "De Novo Molecule Generation" feature to explore new ligands based on specified criteria.
135 | - Take advantage of our re-screening feature to iteratively refine your search for the optimal ligand.
136 |   
137 | # Contributing
138 | We welcome contributions! Please fork the repository and submit pull requests for any enhancements or bug fixes.
139 | 
140 | # Contact Us
141 | If you have any inquiries or encounter any issues, we encourage you to contribute to this project by opening an issue in our GitHub repository. For direct assistance or detailed inquiries, please feel free to reach out to our team:
142 | - Ms. Fatima Noor: [fatima.noor@imbb.uol.edu.pk](mailto:fatima.noor@imbb.uol.edu.pk)
143 | - Dr. Muhammad Tahir ul Qamar: [tahirulqamar@gcuf.edu.pk](mailto:tahirulqamar@gcuf.edu.pk)
144 | 
145 | We are dedicated to supporting our community and enhancing the project with your valuable feedback.
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/Example1.csv:
--------------------------------------------------------------------------------
  1 | SMILES,Activity
  2 | [C-]#[S+],1
  3 | [H]/N=C(\N)/SCC(=O)N(C)[C@@H]1CCS(=O)(=O)C1,0
  4 | [Li+].[Cl-],1
  5 | [Li+].[F-],1
  6 | [Li+].[Li+].C(=O)([O-])[O-],1
  7 | C(C(=O)O)NC(=O)C(=O)O,1
  8 | C(C(C1C(=C(C(=O)O1)O)O)O)O,1
  9 | C(C1C(C(C(O1)(CO)OP(=O)(O)O)O)O)OP(=O)(O)O,1
 10 | C(CCNCCCN)CN,1
 11 | C=CCNC1=NC=NC2=C1C=C(C=C2)Br,1
 12 | C1=CC(=C(C=C1[N+](=O)[O-])[N+](=O)[O-])O,1
 13 | C1=CC(=C(C=C1C=CC(=O)O)O)O,1
 14 | C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O,1
 15 | C1=CC(=C(C=C1C2=C(C(=O)C3=C(O2)C=C(C=C3)O)O)O)O,1
 16 | C1=CC(=C(C=C1C2=CC(=O)C3=C(C=C(C=C3O2)O)O)O)O,1
 17 | C1=CC(=C(C=C1Cl)C2=NC3=NC=CN=C3C(=N2)NC4=CC=NC=C4)F,1
 18 | C1=CC(=C(C=C1I)F)NC2=C(C=CC(=C2F)F)C(=O)NOCC(CO)O,1
 19 | C1=CC(=CC=C1C(=O)NC2=CC3=C(C=C2)NC=C3)F,1
 20 | C1=CC(=CC=C1C=CC2=CC(=CC(=C2)O)O)O,1
 21 | C1=CC(=CC=C1C2=CC(=O)C3=C(C=C(C=C3O2)O)O)O,1
 22 | C1=CC(=CC=C1C2=NC(=C(N2)C3=CC=NC=C3)C4=CC=C(C=C4)F)[N+](=O)[O-],1
 23 | C1=CC(=CN=C1)C(=O)N,1
 24 | C1=CC=C(C=C1)C(=CC(=O)O)CCC2=CC=C(C=C2)Cl,1
 25 | C1=CC=C(C=C1)C(=O)NC2=CC3=C(C=C2)NC=C3,1
 26 | C1=CC=C(C=C1)C(=O)NC2=NC=C3C(=C2)C=CN3,1
 27 | C1=CC=C(C=C1)C2=CC(=O)C3=C(O2)C=C(C=C3)O,1
 28 | C1=CC=C(C=C1)CNC(=O)C2=CSC(=N2)NC3=NC=NC=C3,1
 29 | C1=CC=C(C=C1)N2C3=NC=NC(=C3C=N2)N,1
 30 | C1=CC=C(C=C1)NC(=O)CCCCCCC(=O)NO,1
 31 | C1=CC=C2C(=C1)C(=CC=N2)C3=C(NN=C3)C4=CC=CC=N4,1
 32 | C1=CC=C2C(=C1)C(=CN2)C=CC(=O)NC3=CC=CC(=C3)C(=O)N,1
 33 | C1=CC=C2C(=C1)C(=O)N(C2=O)C(CC3=CNC4=CC=CC=C43)C(=O)O,1
 34 | C1=CC=C2C(=C1)C(=O)OC23C4=CC(=C(C(=C4OC5=C(C(=C(C=C35)Br)[O-])Br)Br)[O-])Br.[Na+].[Na+],1
 35 | C1=CC=C2C(=C1)C=CC(=C2C=NNC(=O)C3=CC=NC=C3)O,1
 36 | C1=CC=C2C(=C1)C3=NNC4=CC=CC(=C43)C2=O,1
 37 | C1=CC2=C3C(=C1)C(=O)N(C(=O)C3=CC=C2)CCCCCC(=O)NO,1
 38 | C1=CN(C(=O)N=C1)C2C(C(C(O2)CO)O)O,1
 39 | C1=CN(C2=NC=NC(=C21)N)C3C(C(C(O3)CO)O)O,1
 40 | C1=CN=C(C2=C1N(C=N2)C3C=C(C(C3O)O)CO)N,1
 41 | C1=NC(=NC(=O)N1C2C(C(C(O2)CO)O)O)N,1
 42 | C1=NC2=C(N=C(N=C2N1C3C(C(C(O3)CO)O)O)Cl)N,1
 43 | C1C(C(C(CC1(C(=O)O)O)OC(=O)C=CC2=CC(=C(C=C2)O)O)O)O,1
 44 | C1C(C(OC1N2C=C(C(=O)NC2=O)Br)CO)O,1
 45 | C1C(C(OC1N2C=NC(=NC2=O)N)CO)O,1
 46 | C1C(C(OC2=CC(=CC(=C21)O)O)C3=CC(=C(C(=C3)O)O)O)OC(=O)C4=CC(=C(C(=C4)O)O)O,1
 47 | C1C(C1N)C2=CC=CC=C2,1
 48 | C1C(C1N)C2=CC=CC=C2,1
 49 | c1c(cc(cc1Sc2c3c([n-]cn3)ncn2)S(=O)(=O)N)N,0
 50 | c1c(cn[nH]1)CNC(=O)CC(=O)O,0
 51 | c1c(cn2c(c(nc2c1Cl)C3CC3)N)Cl,0
 52 | c1c(n[nH]n1)c2nnnn2CC[NH3+],0
 53 | c1c(nc(s1)N=C(N)N)CSCC/C(=N/S(=O)(=O)N)/[O-],0
 54 | c1c(nc(s1)N2CC[C@H]3CCCC[C@@H]3C2)CN,0
 55 | C1C2=C(C3=CC=CC=C3NC1=O)NC4=C2C=C(C=C4)[N+](=O)[O-],1
 56 | C1C2=C(C3=CC=CC=C3NC1=O)NC4=C2C=C(C=C4)Br,1
 57 | C1C2C(C(C(O2)N3C4=NC=NC(=C4N=C3Br)N)O)OP(=O)(O1)O,1
 58 | C1CCC(CC1)N2C=NC3=C(N=C(N=C32)OC4=CC=CC5=CC=CC=C54)NC6=CC=C(C=C6)N7CCOCC7,1
 59 | C1CCC(CC1)NC2=NC(=NC3=C2NC=N3)NC4=CC=C(C=C4)N5CCOCC5,1
 60 | C1CCN(CC1)CCOC2=CC=C(C=C2)C3=CN4C(=C(C=N4)C5=CC=NC=C5)N=C3,1
 61 | C1CN(CCN1)C2=CC=C(C=C2)C3=CN4C(=C(C=N4)C5=CC=NC6=CC=CC=C56)N=C3,1
 62 | C1CN(CCN1)CC2=CSC3=NC(=CN23)C4=CC=CC=C4NC(=O)C5=NC6=CC=CC=C6N=C5.Cl,1
 63 | C1CNCCN(C1)S(=O)(=O)C2=CC=CC3=C2C=CC=C3I.Cl,1
 64 | C1COC2=C(O1)C=CC(=C2)C3=C(NC(=N3)C4=CC=C(C=C4)C(=O)N)C5=CC=CC=N5,1
 65 | C1COCCC1NC(=O)C2=CC=C(C=C2)C3=NC=CC(=C3)C4=C(NN=C4)C5=CC=CC=N5,1
 66 | C1COCCN1C2=CC(=O)C3=C(O2)C(=CC=C3)C4=CC=CC=C4,1
 67 | C1OC2=C(O1)C=C(C=C2)C3=C(NC(=N3)C4=CC=C(C=C4)C(=O)N)C5=CC=CC=N5,1
 68 | CC(=CC1=CC=C(C=C1)C(=O)O)C2=CC3=C(C=C2)C(CCC3(C)C)(C)C,1
 69 | CC(=O)c1ccc(NC(=O)c2ccc(OC(C)C)cc2)cc1C,0
 70 | CC(=O)c1ccc(NC(=O)c2cccc(OCc3ccccc3)c2)cc1,0
 71 | CC(=O)c1ccc(NC(=O)CCc2ccc([N+](=O)[O-])cc2)cc1,0
 72 | CC(=O)c1ccc(NC(=O)CCNC(=O)C2CC2)cc1,0
 73 | CC(=O)c1ccc(NC(=O)CCNC(=O)c2ccccc2)cc1,0
 74 | CC(=O)c1ccc(NC(=O)Nc2ccccc2C(=O)N(C)C)cc1,0
 75 | CC(=O)c1ccc(OCC(=O)N(C)Cc2ccsc2)cc1,0
 76 | CC(=O)c1ccc(S(=O)(=O)Nc2cccc(C(N)=O)c2)cc1,0
 77 | CC(=O)c1cccc(NC(=O)CCc2ccc(O)cc2)c1,0
 78 | CC(=O)N(C)C1CCCN(C(=O)C(C)Oc2ccccc2)C1,0
 79 | CC(=O)N(C)Cc1ccc(C(=O)Nc2ccccc2C(C)C)s1,0
 80 | CC(=O)N(Cc1ccc(C)o1)c1nc(-c2ccsc2)cs1,0
 81 | CC(=O)N(O)Cc1cccc(NC(=O)c2cccc(-n3cncn3)c2)c1,0
 82 | CC(=O)N[C@@H](CS)C(=O)[O-],0
 83 | CC(=O)N1CCC(C(=O)Nc2ccc(S(=O)(=O)N(C)C)cc2)CC1,0
 84 | CC(=O)N1CCC(C(=O)NCc2ccc(CC(C)C)cc2)CC1,0
 85 | CC(=O)N1CCC(Oc2ccc(C#N)cc2)CC1C,0
 86 | CC(=O)N1CCC(Oc2cccc(C(F)(F)F)c2)CC1,0
 87 | CC(=O)N1CCc2cc(C(=O)N3CCC(Oc4ccccc4)CC3)ccc21,0
 88 | CC(=O)N1CCc2cc(NC(=O)c3cnc(C)cc3C)ccc21,0
 89 | CC(=O)N1CCc2cc(NS(=O)(=O)c3ccc(C)c(C)c3)ccc21,0
 90 | CC(=O)N1CCc2cc(S(=O)(=O)N3CCCCCC3)ccc21,0
 91 | CC(=O)N1CCN(C(=O)c2ccc(S(C)(=O)=O)cc2)CC1,0
 92 | CC(=O)N1CCN(C(=O)c2cccc(OCc3cscn3)c2)CC1,0
 93 | CC(=O)N1CCN(C(=O)CCc2nc(-c3cccnc3)no2)CC1,0
 94 | CC(=O)N1CCN(C(=O)COc2ccc3ccccc3c2)CC1,0
 95 | CC(=O)N1CCN(c2nnc(SCc3ccccc3)s2)CC1,0
 96 | CC(=O)NC(C(=O)N1CCCC1C(=O)NCCN1CCCC1)c1ccccc1,0
 97 | CC(=O)NC(C(=O)NC(C)c1ccc(O)cc1)c1ccccc1,0
 98 | CC(=O)NC(CS)C(=O)O,1
 99 | CC(=O)Nc1cc(C(=O)N(C)CC(F)(F)F)ccc1C,0
100 | CC(=O)Nc1cc(C(=O)N(C)Cc2ccccn2)ccc1C,0
101 | CC(=O)Nc1cc(C(=O)N2CCCC(N3CCCC3=O)C2)ccc1F,0
102 | CC(=O)Nc1cc(NC(=O)c2ccc(NC(=O)c3ccccc3)cc2)ccc1Cl,0
103 | CC(=O)Nc1cc(NC(=O)c2cccc(C(F)(F)F)c2)ccc1F,0
104 | CC(=O)Nc1cc(NC(=O)c2nc3ccccc3s2)ccc1Cl,0
105 | CC(=O)OC1C(C2C(CCC(C2(C3(C1(OC(CC3=O)(C)C=C)C)O)C)O)(C)C)O,1
106 | CC(C(=O)NC(C1=CC=CC=C1)C(=O)OC(C)(C)C)NC(=O)CC2=CC(=CC(=C2)F)F,1
107 | CC(C(=O)NC1C(=O)N(C2=CC=CC=C2C(=N1)C3=CC=CC=C3)C)NC(=O)CC4=CC(=CC(=C4)F)F,1
108 | CC(C)(C)C1=CC(=CC(=C1)C(=O)C=CC2=CC=C(C=C2)C(=O)O)C(C)(C)C,1
109 | CC(C)(C)C1=NC2=C(N1)C3=C(C=C(C=C3)F)C4=C2C=CNC4=O,1
110 | CC(C)C1(C)CCC2(C)CCC3(C)C(CCC4C5(C)CCC(OC(=O)CC(C)(C)C(=O)O)C(C)(C)C5CCC43C)C2C1,0
111 | CC(C)c1[nH]nc(n1)[C@@H]2C[C@@H]3CCCC[C@@H]3[NH2+]2,0
112 | CC(C)c1c(=O)[nH]c(nc1[O-])CNC,0
113 | CC(C)c1c(c(n(n1)C)N2CCC(C2)(C)C)N,0
114 | CC(C)c1c(-c2ccc(F)cc2)cnn1Cc1ccc(C(=O)Nc2ccccc2)cc1,0
115 | CC(C)c1c(ncnc1N)c2ccc(c(c2)F)F,0
116 | CC(C)c1cc(C(=O)N(C)Cc2ccccc2)on1,0
117 | CC(C)c1cc(C(=O)Nc2c(C(C)C)nn(C)c2C(N)=O)ccc1F,0
118 | CC(C)c1ccc(-c2nnc(NC(=O)Cc3ccc(F)cc3)o2)cc1,0
119 | CC(C)c1ccc(CC(=O)Nc2cccc(-c3nnn[nH]3)c2)cc1,0
120 | CC(C)c1ccc(CNC(=O)Nc2cccc(C(N)=O)c2)cc1,0
121 | CC(C)c1ccc(N2CC(C(=O)Nc3ccc(C(=O)N4CCCCC4)cc3)CC2=O)cc1,0
122 | CC(C)c1ccc(NC(=O)c2cc(C(C)C)nc3ccccc23)cc1,0
123 | CC(C)C1CCC2(C)CCC3(C)C(CCC4C5(C)CCC(C)(O)C(C)(C)C5CCC43C)C12,0
124 | CC(C)c1ccc2c(c1)C(=O)CC2c1ccccc1-c1ccccn1,0
125 | CC(C)c1ccccc1N(CC(=O)NC1CCCCC1)S(C)(=O)=O,0
126 | CC(C)C1COc2ccc(NC(=O)NC3CCCCC3)cc2N1,0
127 | CC(C)c1nc(nc(n1)[O-])N,0
128 | CC(C)c1nc(sn1)NC2[C@H]3[C@@H]2C[NH2+]C3,0
129 | CC(C)CCC(C(C)C1CCC2C1(CCC3C2CC=C4C3(CCC(C4)O)C)C)O,1
130 | CC(C)CCCC(C)C1CCC2C1(CCCC2=CC=C3CC(CCC3=C)O)C,1
131 | CC(C)CN1C2=C(C(=O)N(C1=O)C)NC=N2,1
132 | CC(C)N(CCCNC(=O)NC1=CC=C(C=C1)C(C)(C)C)CC2C(C(C(O2)N3C=C(C4=C(N=CN=C43)N)Br)O)O,1
133 | CC(C)N(CCCNC(=O)NC1=CC=C(C=C1)C(C)(C)C)CC2C(C(C(O2)N3C=CC4=C(N=CN=C43)N)O)O,1
134 | CC(C)N1C2=NC=NC(=C2C(=N1)C3=CC4=C(N3)C=CC(=C4)O)N,1
135 | CC(C)N1CCC(CC1)NC2=NC(=NC3=CC(=C(C=C32)OC)OCCCN4CCCC4)C5CCCCC5,1
136 | CC(C=C(C)C=CC(=O)NO)C(=O)C1=CC=C(C=C1)N(C)C,1
137 | CC(C1CCC(CC1)C(=O)NC2=CC=NC=C2)N,1
138 | CC1(CCC(C2=C1C=CC(=C2)C(=O)NC3=CC=C(C=C3)C(=O)O)(C)C)C,1
139 | CC1=C(C(=CC=C1)Cl)NC(=O)C2=CN=C(S2)NC3=CC(=NC(=N3)C)N4CCN(CC4)CCO,1
140 | CC1=C(C(=NO1)C)C2=C(C=C3C(=C2)N=CC4=C3N(C(=O)N4)C(C)C5=CC=CC=N5)OC,1
141 | CC1=C(C(C(=C(N1)C)[N+](=O)[O-])C2=CC=CC=C2C(F)(F)F)C(=O)OC,1
142 | CC1=C(C(CCC1)(C)C)C=CC(=CC=CC(=CC(=O)O)C)C,1
143 | CC1=C(C(CCC1)(C)C)C=CC(=CC=CC(=CCO)C)C,1
144 | CC1=C(C2=C(N1)C=CC(=C2)O)CCN,1
145 | CC1=CC=C(C=C1)C2=C(N3C=C(C=CC3=N2)C)CC(=O)N(C)C,1
146 | CC1=CC=C(C=C1)C2=CN3C4=C(CCCC4)SC3=N2.Br,1
147 | CC1=CC=C(C=C1)C2=NN(C3=NC=NC(=C23)N)C(C)(C)C,1
148 | CC1=CN=C(N1)C2=CN=C(N=C2C3=C(C=C(C=C3)Cl)Cl)NCCNC4=NC=C(C=C4)C#N,1
149 | CC1=NC(=CC=C1)C2=C(C=NN2)C3=NC4=C(C=C3)N=CC=C4,1
150 | CC1=NC(=CC=C1)C2=C(N=C(N2)C(C)(C)C)C3=CC4=C(C=C3)OCO4,1
151 | CC1=NC(=CC=C1)C2=NN(C=C2C3=CC=NC4=CC=CC=C34)C(=S)NC5=CC=CC=C5,1
152 | CC1=NC2=CC=CC=C2C(=C1)NC(=O)NC3=C(C=CC(=C3)Cl)OC,1
153 | CC1C(C(C(C(O1)OC2=C(OC3=C(C2=O)C(=CC(=C3CC=C(C)C)OC4C(C(C(C(O4)CO)O)O)O)O)C5=CC=C(C=C5)OC)O)O)O,1
154 | CC1CC2C3CCC4=CC(=O)C=CC4(C3(C(CC2(C1(C(=O)CO)O)C)O)F)C,1
155 | Cc1ccc(CCC(=O)Nc2nnc(C3CC3)s2)cc1,0
156 | Cc1ccc(CN(C)C(=O)c2ccc(NC(=O)NC(C)C)cc2)cc1,0
157 | Cc1ccc(CN2CCc3[nH]nc(-c4cccs4)c3C2)s1,0
158 | Cc1ccc(CN2CCCC3(CNC(=O)C3)C2)cc1,0
159 | Cc1ccc(CN2CCN(CC(=O)NC3CCCCCC3)CC2)o1,0
160 | Cc1ccc(CNC(=O)c2ccccc2S(=O)(=O)N2CCOCC2)s1,0
161 | Cc1ccc(CNC(=O)Nc2cccc(-c3csc(C)n3)c2)o1,0
162 | Cc1ccc(CNC(=O)Nc2ccccc2C(F)(F)F)s1,0
163 | Cc1ccc(CNC(=O)NCc2ccc(-n3cccn3)cc2)o1,0
164 | Cc1ccc(CNC(=O)S(=O)(=O)c2ccc(C)cc2)cc1,0
165 | Cc1ccc(F)c(C(=O)N2CCN(Cc3ccc(F)cc3)CC2)c1,0
166 | Cc1ccc(F)cc1S(=O)(=O)N(Cc1ccccc1)C1CC1,0
167 | CC1CCC(N(C)C(=O)c2cccc(-c3nc(C(C)C)co3)c2)CC1,0
168 | Cc1ccc(N(CC(=O)Nc2ccccc2)Cc2ccccc2)cc1,0
169 | Cc1ccc(-n2c(C(=O)C(N)=O)nc3ccccc32)cc1,0
170 | Cc1ccc(-n2c(C)cc(C(=O)C(C)NC(=O)C(C)NC(=O)c3ccc(C(=O)O)c(O)c3)c2N)cc1,0
171 | Cc1ccc(N2CC(C(=O)Nc3nc(C)oc3C)CC2=O)cc1,0
172 | Cc1ccc(-n2cc(C(=O)NCC(C)C)c(=O)[nH]c2=O)cc1,0
173 | Cc1ccc(-n2ccc(C(=O)Nc3cccc(C#N)c3)n2)cc1,0
174 | Cc1ccc(-n2cccc2CN(C)C(=O)c2c[nH]c3ccccc23)cc1,0
175 | Cc1ccc(-n2cccn2)c(C(=O)NCc2ccccn2)c1,0
176 | Cc1ccc(N2CCN(C(=O)c3ccnc(OCC(F)F)c3)CC2)cc1C,0
177 | Cc1ccc(N2CCN(C(=O)CC(c3ccccc3)c3ccccc3)CC2)cc1C,0
178 | Cc1ccc(-n2nc(C)c(C(=O)NCc3ccco3)c2C)cc1,0
179 | Cc1ccc(-n2nc(C)cc2NC(=O)Nc2cc(C)on2)cc1,0
180 | Cc1ccc(NC(=O)c2ccc(C#N)cc2)c(OC(C)C)c1,0
181 | Cc1ccc(NC(=O)c2cccc(OC(F)F)c2)c(F)c1,0
182 | Cc1ccc(NC(=O)c2cccc(OCc3cc(C)cc(C)c3)c2)nc1,0
183 | Cc1ccc(NC(=O)C2CCCN(S(=O)(=O)c3ccc4c(c3)CCC4)CC2)cc1,0
184 | Cc1ccc(NC(=O)C2CCCN2C(=O)C(C)n2cccn2)cc1,0
185 | Cc1ccc(NC(=O)Cc2csc(-c3nc4ccccc4s3)n2)cc1,0
186 | Cc1ccc(NC(=O)CN2C(=O)C3C4CCC(C4)C3C2=O)cc1,0
187 | CC1CCC(NC(=O)CNC(=O)c2cccc(C#N)c2)CC1,0
188 | Cc1ccc(NC(=O)COC(=O)c2ccc(C(N)=O)cc2)cc1,0
189 | Cc1ccc(NC(=O)CS(=O)(=O)c2cccc([N+](=O)[O-])c2)cc1,0
190 | Cc1ccc(NC(=O)CSc2n[nH]c(-c3ccccc3C)n2)cc1,0
191 | Cc1ccc(NC(=O)N(C)Cc2nnc3n2CCCCC3)cc1,0
192 | Cc1ccc(NC(=O)NC(C)c2ccccc2)c(O)c1,0
193 | Cc1ccc(NC(=O)NC2CCc3ccccc32)c(C)c1,0
194 | Cc1ccc(NC(S)Cc2ccc(Nc3ccnc(C)c3)cc2)cc1,0
195 | Cc1ccc(O)c(-c2csc(NC(=O)NC(C)C)n2)c1,0
196 | Cc1ccc(o1)[C@H](C)n2c(c(c(c2N)C#N)C)C,0
197 | Cc1ccc(OC(C)C(=O)NC(C)C)c(SC(C)C)c1,0
198 | Cc1ccc(OC(C)C(=O)OCC(=O)Nc2ccccc2)cc1,0
199 | Cc1ccc(OC2CCN(C(=O)c3ccc(F)c(C#N)c3)CC2)nc1,0
200 | Cc1ccc(OCC(=O)c2c(C)nc3sc(C)cn23)cc1,0
201 | Cc1ccc(OCC(=O)N(C)c2cccc3ccccc23)cc1C,0
202 | Cc1ccc(OCC(=O)N(Cc2ccc(F)cc2)C2CC2)cc1,0
203 | Cc1ccc(OCC(=O)N2CC(C)OC(C)C2)c(C)c1,0
204 | Cc1ccc(OCC(=O)NC2CC(C)(C)Cc3ccccc32)cc1C,0
205 | Cc1ccc(OCCNC(=O)C(=O)NCC(c2ccccc2)N2CCOCC2)cc1F,0
206 | Cc1ccc(S(=O)(=O)N(C)C(C)c2ccc(OC(F)(F)F)cc2)cc1,0
207 | Cc1ccc(S(=O)(=O)N(C)C)c(NC(=O)CSc2nnc(C)n2C)c1,0
208 | Cc1ccc(S(=O)(=O)N(C)CC(=O)Nc2ccccc2C(=O)O)cc1,0
209 | Cc1ccc(S(=O)(=O)N(C)CCC(=O)Nc2ccccc2)cc1,0
210 | Cc1ccc(S(=O)(=O)N2CCC(C(=O)NCC3CCCO3)C2)cc1,0
211 | Cc1ccc(S(=O)(=O)NC2(CC(=O)NCc3ccccc3C(F)(F)F)CCCC2)cc1,0
212 | Cc1ccc(S(=O)(=O)NC2CCN(C(=O)CC(C)C)C2)cc1,0
213 | Cc1ccc(S(=O)(=O)Nc2sc3c(c2C#N)CCC3)c(C)c1,0
214 | Cc1ccc(s1)c2cc(n[nH]2)[N-]S(=O)(=O)c3cccc(c3)C#N,0
215 | Cc1ccc(s1)c2nc([n-]n2)NC(=O)c3cc4c(s3)CCOC4,0
216 | Cc1ccc2c(c1)N[C@@H]([N-]S2(=O)=O)C(=O)Nc3ccc(cc3)C(=O)N,0
217 | Cc1ccc2c(c1)N[C@H]([N-]S2(=O)=O)C(=O)Nc3ccc(cc3)C(=O)N,0
218 | Cc1ccc2c(c1)nc(CC(=O)NCCc1ccccn1)n2C,0
219 | CC1CCC2CC(C(=CC=CC=CC(CC(C(=O)C(C(C(=CC(C(=O)CC(OC(=O)C3CCCCN3C(=O)C(=O)C1(O2)O)C(C)CC4CCC(C(C4)OC)O)C)C)O)OC)C)C)C)OC,1
220 | Cc1ccc2cccc(C(=O)NCCCN3CCCCCC3)c2c1,0
221 | Cc1ccc2cnc(Nc3ccc(S(N)(=O)=O)cc3)nc2c1,0
222 | Cc1ccc2oc(-c3c[nH]nc3C(F)(F)F)nc2c1,0
223 | Cc1cccc(C(=O)N2CCC3(CCCN3C(=O)c3ccc(F)cc3)C2)c1,0
224 | Cc1cccc(C)c1NC(=O)C(C)Nc1cccc(F)c1,0
225 | Cc1cccc(C)c1NC(=O)CCCC(=O)Nc1ccccc1[N+](=O)[O-],0
226 | Cc1cccc(C)c1OCC(=O)Nc1ccc(N2CCNC2=O)cc1,0
227 | Cc1cccc(C)c1OCC(=O)Nc1ccc2c(c1)oc(=O)n2C,0
228 | Cc1cccc(c1)/C=N\n2c(nnc2[S-])c3c4c(n[nH]3)CCC4,0
229 | Cc1cccc(c1N)Oc2c(cccn2)Cl,0
230 | Cc1cccc(c1O)[N-]S(=O)(=O)c2cnn(c2)c3ccccc3F,0
231 | Cc1cccc(CN(C)C(=O)CCc2ccc(C#N)cc2)n1,0
232 | Cc1cccc(CN2CCN(C(=O)CCc3nc(C)c(C)s3)CC2)n1,0
233 | Cc1cccc(CNC(=O)c2ccc(N3CCNC3=O)cc2)c1,0
234 | Cc1cccc(CNC(=O)CCc2c[nH]c3ccccc23)c1,0
235 | Cc1cccc(COC(=O)N2CCC(Cc3ccccc3)CC2)c1,0
236 | Cc1cccc(COC2CC(=O)N(c3cc(C)on3)C2)c1,0
237 | Cc1cccc(n1)c2c(c[nH]n2)c3ccc4c(n3)cccn4,0
238 | CCCC(=O)[O-].[Na+],1
239 | CCCC(CCC)C(=O)O,1
240 | CCCCCC(=O)C=CC1C(CC(=O)C1CCCCCCC(=O)O)O,1
241 | CNC(=O)C1=CN=C(C=C1)NN.[Tc],1
242 | COC1=C(C=C(C=C1)C=CC(=O)NC2=CC=CC=C2C(=O)O)OC,1
243 | COC1=C(C=C(C=C1)C2CC(=O)NC2)OC3CCCC3,1
244 | COC1=C(C=C2C(=C1)CC3=C2NN=C3NC4=CC(=CC=C4)F)OC,1
245 | COC1=C(C=CC(=C1)C=CC(=O)CC(=O)C=CC2=CC(=C(C=C2)O)OC)O,1
246 | COCCOC1=CN=CC(=C1)C2=NC=C3C=CC4=C(C3=C2)NC5=C4C(=O)N=CC5,1
247 | CS(=O)(=O)NC1=C(C=C(C=C1)[N+](=O)[O-])OC2=CC=CC=C2,1
248 | O=C(O)c1ccc(-c2ccc(NC(=O)c3ccco3)s2)cc1,0
249 | O=c1c2ccccc2nc(C=Nc2ccccc2C(F)(F)F)n1Cc1ccccc1Cl,0
250 | O=C1CC(C(=O)Nc2ccc(-c3ccccc3)cc2)C(=O)c2ccccc21,0
251 | O=C1CC(c2ccccc2)CN(CCCOCCOCCO)C1,0
252 | O=c1ccc2cc([N+](=O)[O-])ccc2[nH]1,0
253 | O=C1CCc2cc(OC(=O)c3ccc(N4CCOCC4)cc3)ccc2N1,0
254 | O=Cc1cc(-c2ccc(-c3ccc(F)cc3)cc2)ccc1OCCc1ccc(C(=O)O)cc1,0
255 | O=S(=O)(c1ccc(-n2ccnn2)cc1)N1CCCCC1,0
256 | O=S(=O)(NCC(O)c1ccccc1)c1cccc(Cl)c1,0
257 | OCC1c2c(cnn2-c2ccccc2)CC(O)N1C1CCCC1,0
258 | CCCCCCCCC=CCCCCCCCC(=O)OCC(COP(=O)(O)O)O,1
259 | CCCCCCCCCCCCCC=CC(C(COP(=O)(O)O)N)O,1
260 | CCN(CC)C1=CC=C(C=C1)C=NNC(=O)C2=CC=C(C=C2)O,1
261 | O=S(=O)(NCC1CN2CCCCC2CO1)c1ccccc1,0
262 | Oc1ccc(Cc2cc(C3CCCCC3)c(C3CC3)o2)cc1,0
263 | CCNC(=O)C1CN(C2CC3=CNC4=CC=CC(=C34)C2=C1)C,1
264 | CCOC(=O)C1=CC=C(C=C1)NC(=O)OC2C(OC(C(C2OC(=O)NC3=CC4=C(C=C3)OCO4)OC(=O)NC5=CC6=C(C=C5)OCO6)OC)CO,1
265 | CN(C)CCCN1C=C(C2=C1C=CC(=C2)OC)C3=C(C(=O)NC3=O)C4=CNC5=CC=CC=C54,1
266 | COCCOC1=CN=CC(=C1)C2=NC=C3C=CC4=C(C3=C2)NC5=C4C(=O)N=CC5,1
267 | CN1C=C(C2=CC=CC=C21)C=C3C4=C(C=CC(=C4)S(=O)(=O)N)NC3=O,1
268 | CN1C=CN=C1C(=O)NC2=CN(C(=C2)C(=O)NC3=CN(C(=C3)C(=O)NC4=CN(C(=C4)C(=O)NCCC(=O)NCCCN(C)C)C)C)C,1
269 | 


--------------------------------------------------------------------------------
/upload.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  6 |     <title>VirtuDockDL: Automated Virtual Screening</title>
  7 |     <link href="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css" rel="stylesheet">
  8 |     <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap" rel="stylesheet">
  9 |     <style>
 10 |         body {
 11 |             font-family: 'Roboto', sans-serif;
 12 |             background-color: #e4f1fe;
 13 |             margin: 0;
 14 |             padding: 0;
 15 |         }
 16 | 
 17 |         .container {
 18 |             max-width: 1300px;
 19 |             margin: 5% auto;
 20 |             box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
 21 |             padding: 2rem;
 22 |             border-radius: 10px;
 23 |             background-color: #fff;
 24 |         }
 25 | 
 26 |         h1, h2 {
 27 |             color: #2980b9;
 28 |         }
 29 | 
 30 |         h1 {
 31 |             font-weight: 700;
 32 |         }
 33 | 
 34 |         button:hover {
 35 |             opacity: 0.9;
 36 |         }
 37 | 
 38 |         .btn-success {
 39 |             background-color: #3498db;
 40 |             border-color: #3498db;
 41 |         }
 42 | 
 43 |         .btn-success:hover {
 44 |             background-color: #2980b9;
 45 |             border-color: #2980b9;
 46 |         }
 47 | 
 48 |         .form-section {
 49 |             margin-bottom: 2rem;
 50 |             padding: 1rem;
 51 |             border-radius: 10px;
 52 |             background-color: #f2f2f2;
 53 |         }
 54 | 
 55 |         .features-container {
 56 |             display: flex;
 57 |             flex-wrap: wrap;
 58 |             justify-content: space-around; /* Adjusts spacing around items */
 59 |         }
 60 | 
 61 |         .feature-box {
 62 |             background-color: #e9f8fd;
 63 |             border-left: 5px solid #3498db;
 64 |             padding: 1rem;
 65 |             margin-bottom: 1rem;
 66 |             flex: 1; /* Allows boxes to grow */
 67 |             min-width: 400px; /* Minimum width of each box */
 68 |             min-height: 150px; /* Minimum height of each box */
 69 |             display: flex;
 70 |             flex-direction: column;
 71 |             justify-content: space-between; /* Distributes space vertically */
 72 |         }
 73 | 
 74 |         .feature-box h3 {
 75 |             color: #3498db;
 76 |         }
 77 | 
 78 |         .btn-info, .btn-primary {
 79 |             margin: 0.5rem 0;
 80 |         }
 81 | 
 82 |         .blinking-btn {
 83 |             animation: blinker 1s linear infinite;
 84 |             padding: 15px 20px;
 85 |             font-size: 20px;
 86 |             color: #fff;
 87 |             background-color: #ff0000;
 88 |             border: none;
 89 |             border-radius: 10px;
 90 |             cursor: pointer;
 91 |             outline: none;
 92 |             transition: background-color 0.3s;
 93 |         }
 94 | 
 95 |         .blinking-btn:hover {
 96 |             background-color: #cc0000;
 97 |         }
 98 | 
 99 |         @keyframes blinker {
100 |             50% {
101 |                 opacity: 0.5;
102 |             }
103 |         }
104 | /* Enhanced Welcome Section */
105 | .intro-section {
106 |     background: linear-gradient(135deg, #e4f1fe, #fff); /* Adding a subtle gradient */
107 |     padding: 3rem 1rem;
108 |     margin-top: -2rem; /* Optional: Offset the top margin to bring the section up */
109 |     border-radius: 15px;
110 |     box-shadow: 0 10px 20px rgba(0, 0, 0, 0.1);
111 |     text-align: center;
112 | }
113 | 
114 | .intro-section h2 {
115 |     font-size: 2.5rem; /* Larger font size */
116 |     color: #2980b9;
117 |     margin-bottom: 1rem;
118 | }
119 | 
120 | .intro-section p {
121 |     font-size: 1.2rem; /* Larger font size for better readability */
122 |     color: #333; /* Darker color for contrast */
123 |     margin-bottom: 2rem;
124 | }
125 | 
126 | .intro-highlight {
127 |     background-color: #d9eaff;
128 |     border-radius: 5px;
129 |     padding: 0.5rem 1rem;
130 |     color: #2980b9;
131 |     font-weight: 500;
132 | }
133 | 
134 | /* Button enhancements */
135 | .btn-info, .btn-primary {
136 |     font-weight: bold; /* Make the text bold */
137 |     padding: 1rem 2rem; /* Larger buttons */
138 |     font-size: 1.1rem;
139 |     transition: transform 0.3s, box-shadow 0.3s;
140 | }
141 | 
142 | .btn-info:hover, .btn-primary:hover {
143 |     transform: translateY(-2px); /* Slight lift on hover */
144 |     box-shadow: 0 8px 25px rgba(41, 128, 185, 0.2); /* Shadow effect for depth */
145 | }
146 | 
147 |         /* Enhanced tabs styling */
148 |         .nav-pills .nav-link {
149 |             border-radius: 0.25rem;
150 |             margin: 0 0.25rem;
151 |             transition: background-color 0.3s, box-shadow 0.3s;
152 |         }
153 | 
154 |         .nav-pills .nav-link.active, .nav-pills .nav-link:hover, .nav-pills .nav-link:focus {
155 |             box-shadow: 0 4px 8px rgba(41, 128, 185, 0.3);
156 |         }
157 | 
158 |         /* Core features boxes */
159 |         .core-features .feature-box {
160 |             background-color: #e9f8fd;
161 |             padding: 1rem;
162 |             margin-bottom: 1rem;
163 |             border-left: 5px solid #3498db;
164 |             transition: transform 0.3s, box-shadow 0.3s;
165 |         }
166 | 
167 |         .core-features .feature-box:hover {
168 |             transform: translateY(-5px);
169 |             box-shadow: 0 8px 16px rgba(41, 128, 185, 0.2);
170 |         }
171 | ul {
172 |     padding-left: 20px;
173 |     list-style-type: disc; /* Adds disc bullets to lists */
174 | }
175 | 
176 | li {
177 |     margin-bottom: 10px; /* Adds space between list items */
178 | }
179 | 
180 | p, ul {
181 |     font-size: 16px; /* Adjusts font size for readability */
182 |     line-height: 1.6; /* Improves line spacing */
183 | }
184 |     </style>
185 | </head>
186 | <body>
187 |     <div class="container">
188 |         <h1 class="text-center mb-4">VirtuDockDL: A Deep Learning-based Python Pipeline for Virtual Screening</h1>
189 | 
190 |         <!-- Navigation Pills -->
191 |         <ul class="nav nav-pills mb-4 justify-content-center" id="pills-tab" role="tablist">
192 |             <li class="nav-item" role="presentation">
193 |                 <a class="nav-link active" id="pills-home-tab" data-toggle="pill" href="#pills-home" role="tab" aria-controls="pills-home" aria-selected="true">Home</a>
194 |             </li>
195 |             <li class="nav-item" role="presentation">
196 |                 <a class="nav-link" id="pills-virtual-screening-tab" data-toggle="pill" href="#pills-virtual-screening" role="tab" aria-controls="pills-virtual-screening" aria-selected="false">Ligand Prioritization</a>
197 |             </li>
198 |             <li class="nav-item" role="presentation">
199 |                 <a class="nav-link" id="pills-protein-refinement-tab" data-toggle="pill" href="#pills-protein-refinement" role="tab" aria-controls="pills-protein-refinement" aria-selected="false">Structure Refinement</a>
200 |             </li>
201 |             <li class="nav-item" role="presentation">
202 |                 <a class="nav-link" id="pills-molecular-docking-tab" data-toggle="pill" href="#pills-molecular-docking" role="tab" aria-controls="pills-molecular-docking" aria-selected="false">Molecular Docking</a>
203 |             </li>
204 |             <li class="nav-item" role="presentation">
205 |                 <a class="nav-link" id="pills-instructions-tab" data-toggle="pill" href="#pills-instructions" role="tab" aria-controls="pills-instructions" aria-selected="false">Instructions</a>
206 |             </li>
207 |         </ul>
208 | <div class="tab-content" id="pills-tabContent">
209 |     <!-- Home Tab Pane -->
210 |     <div class="tab-pane fade show active" id="pills-home" role="tabpanel" aria-labelledby="pills-home-tab">
211 |         <div class="text-center">
212 |         </div>
213 |         <!-- Welcome Section -->
214 | <div class="intro-section">
215 |             <h2>Welcome to VirtuDockDL</h2>
216 |             <p>VirtuDockDL is your comprehensive solution for streamlining the process of drug discovery and molecular analysis. With our platform, you can harness the power of deep learning to perform virtual screening, evaluate molecular activities, and predict binding affinities with unprecedented accuracy and speed.</p>
217 |     <div class="text-center mt-4">
218 |         <a href="#pills-instructions" class="btn btn-info btn-lg" role="tab" data-toggle="pill">Learn How to Use</a>
219 |         <a href="#pills-virtual-screening" class="btn btn-primary btn-lg" role="tab" data-toggle="pill">Get Started</a>
220 |     </div>
221 | </div>
222 |                 <h3>Core Features:</h3>
223 |                 <!-- Feature Boxes -->
224 |                 <div class="row">
225 |                     <div class="col-md-4">
226 |                         <div class="feature-box">
227 |                             <h3>Graph Neural Network-Based Ligand Prioritization</h3>
228 |                             <p>Streamline drug discovery with our GNN model, prioritizing ligands for speed and accuracy.</p>
229 |                         </div>
230 |                     </div>
231 |                     <div class="col-md-4">
232 |                         <div class="feature-box">
233 |                             <h3>Descriptor Analysis</h3>
234 |                             <p>Analyze molecular descriptors to predict pharmacological profiles and drug-likeness.</p>
235 |                         </div>
236 |                     </div>
237 |                     <div class="col-md-4">
238 |                         <div class="feature-box">
239 |                             <h3>Re-screening</h3>
240 |                             <p>Refine your ligand search iteratively, utilizing new data for targeted identification.</p>
241 |                         </div>
242 |                     </div>
243 |                 </div>
244 |                 <div class="row">
245 |                     <div class="col-md-4">
246 |                         <div class="feature-box">
247 |                             <h3>Protein Refinement</h3>
248 |                             <p>Train sophisticated models to classify compounds based on your data.</p>
249 |                         </div>
250 |                     </div>
251 |                     <div class="col-md-4">
252 |                         <div class="feature-box">
253 |                             <h3>Molecular Docking</h3>
254 |                             <p>Predict ligand interactions with state-of-the-art simulations, focusing on optimal compounds.</p>
255 |                         </div>
256 |                     </div>
257 |                     <div class="col-md-4">
258 |                         <div class="feature-box">
259 |                             <h3>Scalable Data Processing</h3>
260 |                             <p>Efficiently process and analyze data across all scales, ensuring fast, reliable drug discovery results.</p>
261 |                         </div>
262 |                     </div>
263 |                 </div>
264 |             </div>
265 |             <div class="tab-pane fade" id="pills-virtual-screening" role="tabpanel" aria-labelledby="pills-virtual-screening-tab">
266 |             <div class="container">
267 |                <div class="row">
268 |                  <div class="col-md-6">
269 |                 <!-- Upload CSV File Section -->
270 |                 <div class="form-section">
271 |                     <form method="POST" enctype="multipart/form-data">
272 |                         <h2 class="mb-3">Upload CSV File</h2>
273 |                         <div class="form-group">
274 |                             <label for="file" class="form-label">Select a CSV File containing data of active and inactive (decoy) molecules</label>
275 |                             <a href="{{ url_for('static', filename='Example1.csv') }}" download="Example1.csv">(Example)</a>
276 |                             <input type="file" name="file" id="file" accept=".csv" class="form-control-file" required>
277 |                         </div>
278 |                         <button type="submit" class="btn btn-primary">Upload</button>
279 |                     </form>
280 |                 </div>
281 | <!-- Generated Molecules Section -->
282 |                 {% if generated_molecules %}
283 |                     <h2 class="mb-3">Generated Molecules</h2>
284 |                     <table class="table">
285 |                         <thead>
286 |                             <tr>
287 |                                 <th scope="col">SMILES</th>
288 |                                 <th scope="col">Activity</th>
289 |                             </tr>
290 |                         </thead>
291 |                         <tbody>
292 |                             {% for smiles, activity in generated_molecules %}
293 |                                 <tr>
294 |                                     <td>{{ smiles }}</td>
295 |                                     <td>{{ activity }}</td>
296 |                                 </tr>
297 |                             {% endfor %}
298 |                         </tbody>
299 |                     </table>
300 |                     <a href="{{ url_for('download_molecules') }}" class="btn btn-info">Download CSV</a>
301 |                     {% endif %}
302 | 
303 |                 <!-- Clusters Section -->
304 |                 <div>
305 |                     {{ clusters_table|safe }}
306 |                 </div>
307 | 
308 |                 <!-- Cluster Plot Image Section -->
309 |                 {% if plot_file_path %}
310 |                     <img src="{{ url_for('static', filename=plot_file_path) }}" alt="Cluster Plot">
311 |                     <a href="{{ url_for('uploaded_file', filename=plot_file_path) }}" download="{{ plot_file_path }}">
312 |                         <button>Download Plot</button>
313 |                     </a>
314 |                 {% endif %}
315 | 
316 |                 <!-- Flash Messages Section -->
317 |                 <div class="flash-messages">
318 |                     {% with messages = get_flashed_messages() %}
319 |                         {% if messages %}
320 |                             <ul class="list-unstyled">
321 |                                 {% for message in messages %}
322 |                                     <li class="alert alert-info">{{ message }}</li>
323 |                                 {% endfor %}
324 |                             </ul>
325 |                         {% endif %}
326 | 
327 |                     {% endwith %}
328 |                 </div>
329 |             </div>
330 | 
331 |             <!-- Re-screening Tab Pane -->
332 |             <div class="form-section">
333 |                  <div class="col-md-12">
334 |                     <h2 class="mb-3">Rescreening</h2>
335 |                     <form action="{{ url_for('rescreening') }}" method="post" enctype="multipart/form-data">
336 |                         <div class="form-group">
337 |                             <label for="file" class="form-label">Upload Compounds' SMILES notations </label>
338 |                             <a href="{{ url_for('static', filename='Example2.csv') }}" download="Example2.csv">(Example)</a>
339 |                             <input type="file" name="file" id="file" accept=".csv" class="form-control-file" required>
340 |                         </div>
341 |                         <button type="submit" class="btn btn-primary">Re-screen</button>
342 |                     </form>
343 |                 </div>
344 | 
345 | 
346 |                 <!-- Success Message and Tables Section -->
347 |                    
348 |                 {% if success %}
349 |                     <p>Analysis successful!</p>
350 |                     <div>
351 |                         {{ cluster_table|safe }}
352 |                     </div>
353 | 
354 |                     <!-- Cluster Plot Section -->
355 |                     {% if plot_file_p %}
356 |                         <div class="plot">
357 |                     	    <img src="{{ url_for('uploaded_file', filename=plot_file_p) }}" alt="Cluster Plot">
358 |    			    <a href="{{ url_for('uploaded_file', filename=plot_file_p) }}" download="{{ plot_file_p }}">
359 |                                 <button>Download Plot</button>
360 |                             </a>
361 |                         </div>
362 |                 {% endif %}
363 |                 <!-- SDF Zip File Download Section -->
364 |                 {% if sdf_zip_file %}
365 |                     <div class="download-section">
366 |                         <p>Download the compounds in SDF format:</p>
367 |                         <a href="{{ url_for('download_sdf_zip', filename=sdf_zip_file) }}" download="{{ sdf_zip_file }}">
368 |                             <button class="blinking-btn">Download SDF Folder</button>
369 |                         </a>
370 |                     </div>
371 |                 {% endif %}
372 | 
373 |                 {% endif %}
374 | 
375 |             </div> <!-- End of Re-screening Tab Pane --> 
376 | <div class="form-section">
377 |     <div class="col-md-10">
378 |         <h2 class="mb-3">De Novo Molecule Generation</h2>
379 |         <form method="POST" action="/generate">
380 |             <div class="form-group">
381 |                 <label for="num_molecules">Number of Molecules:</label>
382 |                 <input type="number" name="num_molecules" id="num_molecules" min="1" placeholder="Enter number" class="form-control" required>
383 |             </div>
384 |             <div class="form-group">
385 |                 <label for="lipinski">Options:</label>
386 |                 <div class="form-check">
387 |                     <input type="radio" name="options" value="lipinski" id="lipinski" class="form-check-input" checked>
388 |                     <label for="lipinski" class="form-check-label">Apply Lipinski's Rule of Five</label>
389 |                 </div>
390 |                 <div class="form-check">
391 |                     <input type="radio" name="options" value="no_lipinski" id="no_lipinski" class="form-check-input">
392 |                     <label for="no_lipinski" class="form-check-label">Do Not Apply Lipinski's Rule</label>
393 |                 </div>
394 |             </div>
395 |             <button type="submit" name="generate" class="btn btn-success">Generate</button>
396 |         </form>
397 |     </div>
398 | </div>
399 |     </div>
400 | </div>
401 | </div>
402 | 
403 | 
404 | <!-- Protein Refinement Tab Pane -->
405 | <div class="tab-pane fade" id="pills-protein-refinement" role="tabpanel" aria-labelledby="pills-protein-refinement-tab">
406 |     <div class="form-section">
407 |         <h2 class="mb-3">Upload Protein File</h2>
408 |         <form method="post" action="{{ url_for('protein_refinement') }}" enctype="multipart/form-data">
409 |             <div class="form-group">
410 |                 <label for="proteinFile">Select a PDB File:</label>
411 |                 <input type="file" name="file" id="proteinFile" accept=".pdb" class="form-control-file" required>
412 |                 <button type="submit" class="btn btn-primary">Upload</button>
413 |             </div>
414 |         </form>
415 |     </div>
416 |     <!-- Results Section -->
417 |     {% if download_links %}
418 |     <div class="results">
419 |         <h2>Results:</h2>
420 |         <div class="plots">
421 |             <h3>Generated Plots</h3>
422 |             {% if download_links.ramachandran_plot %}
423 |                <img src="{{ download_links.ramachandran_plot }}" alt="Ramachandran Plot" class="img-fluid">
424 |             {% endif %}
425 |             {% if download_links.sasa_per_residue_plot %}
426 |                <img src="{{ download_links.sasa_per_residue_plot }}" alt="SASA Plot" class="img-fluid">
427 |             {% endif %}
428 |         </div>
429 |         <div class="downloads">
430 |             <h3>Download Processed Files</h3>
431 |             <a href="{{ download_links.stripped_protein }}" class="btn btn-success" download>Download Stripped Protein</a>
432 |             <a href="{{ download_links.fixed_protein }}" class="btn btn-success" download>Download Fixed Protein</a>
433 |             <a href="{{ download_links.minimized_protein }}" class="btn btn-success" download>Download Minimized Protein</a>
434 |         </div>
435 |     </div>
436 |     {% endif %}
437 | </div>
438 | 
439 |             <!-- End of Protein Refinement Tab Pane -->
440 |     <!-- Molecular Docking Section -->
441 |         <div class="tab-pane fade" id="pills-molecular-docking" role="tabpanel" aria-labelledby="pills-molecular-docking-tab">
442 |             <div class="form-section">
443 |                 <h2 class="mb-3">Upload Files for Docking</h2>
444 |                 <form id="docking-form" method="POST" enctype="multipart/form-data">
445 |                     <div class="form-group">
446 |                         <label for="protein_file">Select a Protein File (.pdb):</label>
447 |                         <input type="file" name="protein_file" id="protein_file" accept=".pdb" class="form-control-file" required>
448 |                     </div>
449 |                     <div class="form-group">
450 |                         <label for="ligand_zip">Select Ligand Zip File (.zip):</label>
451 |                         <input type="file" name="ligand_zip" id="ligand_zip" accept=".zip" class="form-control-file" required>
452 |                     </div>
453 |                         <h4 class="mt-4 mb-3">Docking Parameters</h4>
454 |                         <!-- Use Bootstrap rows and columns for layout -->
455 |                         <div class="row">
456 |                             <div class="col-md-4">
457 |                                 <div class="form-group">
458 |                                     <label for="center_x">Center X:</label>
459 |                                     <input type="number" name="center_x" id="center_x" class="form-control" placeholder="0" required>
460 |                                 </div>
461 |                             </div>
462 |                             <div class="col-md-4">
463 |                                 <div class="form-group">
464 |                                     <label for="center_y">Center Y:</label>
465 |                                     <input type="number" name="center_y" id="center_y" class="form-control" placeholder="0" required>
466 |                                 </div>
467 |                             </div>
468 |                             <div class="col-md-4">
469 |                                 <div class="form-group">
470 |                                     <label for="center_z">Center Z:</label>
471 |                                     <input type="number" name="center_z" id="center_z" class="form-control" placeholder="0" required>
472 |                                 </div>
473 |                             </div>
474 |                         </div>
475 |                         <div class="row">
476 |                             <div class="col-md-4">
477 |                                 <div class="form-group">
478 |                                     <label for="size_x">Size X:</label>
479 |                                     <input type="number" name="size_x" id="size_x" class="form-control" placeholder="20" required>
480 |                                 </div>
481 |                             </div>
482 |                             <div class="col-md-4">
483 |                                 <div class="form-group">
484 |                                     <label for="size_y">Size Y:</label>
485 |                                     <input type="number" name="size_y" id="size_y" class="form-control" placeholder="20" required>
486 |                                 </div>
487 |                             </div>
488 |                             <div class="col-md-4">
489 |                                 <div class="form-group">
490 |                                     <label for="size_z">Size Z:</label>
491 |                                     <input type="number" name="size_z" id="size_z" class="form-control" placeholder="20" required>
492 |                                 </div>
493 |                             </div>
494 |                         </div>
495 |                         <div class="form-group">
496 |                             <label for="exhaustiveness">Exhaustiveness:</label>
497 |                             <input type="number" name="exhaustiveness" id="exhaustiveness" class="form-control" placeholder="8" required>
498 |                         </div>
499 |                         <div class="form-group">
500 |                             <label for="num_modes">Number of Modes:</label>
501 |                             <input type="number" name="num_modes" id="num_modes" class="form-control" placeholder="9" required>
502 |                         </div>
503 |                         <div class="form-group">
504 |                             <label for="energy_range">Energy Range:</label>
505 |                             <input type="number" name="energy_range" id="energy_range" class="form-control" placeholder="3" required>
506 |                         </div>
507 |                         <button type="submit" class="btn btn-primary">Upload and Start Docking</button>
508 |                     </form>
509 |                 </div>
510 | 
511 |             <!-- Results Section -->
512 |             <div class="form-section">
513 |                 <h2 class="mb-3">Docking Results</h2>
514 |                 <p id="results-info">No results yet. Please upload files and start docking.</p>
515 |                 <div id="docking-results-table"></div>
516 |                 <!-- Download Complexes Button -->
517 |                 <a href="#" id="downloadAllComplexesButton" class="btn btn-success mt-2 mb-2" style="display: none;">Download All Complexes and Results in Tabular Form</a>
518 |             <div class="form-section" id="chart-container" style="display:block;">
519 |                 <h2 class="mb-3">Docking Chart</h2>
520 |                 <canvas id="resultsChart"></canvas>
521 |             </div>
522 |             </div>
523 |         </div>
524 | 
525 | <!-- Instructions Tab Pane -->
526 | <div class="tab-pane fade" id="pills-instructions" role="tabpanel" aria-labelledby="pills-instructions-tab">
527 |     <div class="form-section">
528 |         <h2>Welcome to VirtuDockDL – Your Automated Virtual Screening Companion</h2>
529 |         <p>VirtuDockDL leverages the power of deep learning to streamline the drug discovery process, making it faster, more accurate, and accessible. Whether you're refining protein structures, prioritizing ligands, or diving deep into molecular docking, VirtuDockDL is here to guide you every step of the way.</p>
530 | 
531 |         <h3>Getting Started:</h3>
532 |         <ul>
533 |             <li><strong>Upload Your Data:</strong> Begin by uploading your protein files and ligand datasets. VirtuDockDL accepts PDB files for proteins and CSV files for ligands. Ensure your ligand files are formatted correctly, with 'SMILES' and 'Activity' columns for virtual screening.</li>
534 |             <li><strong>Ligand Prioritization:</strong> Use our Graph Neural Network (GNN) model to efficiently prioritize ligands. This process helps in narrowing down potential candidates by predicting their pharmacological profiles.</li>
535 |             <li><strong>Protein Refinement:</strong> Upload your protein structures for refinement. Our platform will optimize your proteins to ensure accurate docking results, improving the prediction of ligand interactions.</li>
536 |             <li><strong>Molecular Docking:</strong> With your ligands prioritized and protein refined, proceed to the Molecular Docking tab. Here, VirtuDockDL simulates the interaction between your ligands and protein targets, helping identify the most promising compounds.</li>
537 |             <li><strong>Analysis and Download Results:</strong> Once docking is complete, analyze the results directly on VirtuDockDL. You can download the detailed reports and visualizations for further analysis.</li>
538 |         </ul>
539 | 
540 |         <h3>Tips for Success:</h3>
541 |         <p>Ensure your input files are correctly formatted and contain all necessary information. Utilize the "De Novo Molecule Generation" feature to explore new ligands based on specified criteria, enhancing your drug discovery process. Take advantage of our re-screening feature to iteratively refine your search for the optimal ligand.</p>
542 | 
543 |         <h3>Technical Support:</h3>
544 |         <p>Should you encounter any issues or have questions, please refer to our FAQ section or reach out to our support team. VirtuDockDL is continuously evolving, and your feedback is invaluable to us.</p>
545 | 
546 |         <h3>Disclaimer:</h3>
547 |         <p>VirtuDockDL is designed for research purposes only. Users are responsible for the interpretation of the results, and it is recommended to corroborate the findings with experimental data.</p>
548 | 
549 |         <h3>Let's Revolutionize Drug Discovery Together</h3>
550 |         <p>VirtuDockDL is more than a tool; it's your partner in the quest to discover new and effective therapeutics. Explore the possibilities, push the boundaries of what's achievable, and embark on a journey of innovation and discovery.</p>
551 |     </div>
552 | </div>
553 | </div>
554 | </div>
555 | </div>                  
556 | 
557 | <!-- JavaScript libraries -->
558 |     <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
559 |     <script src="https://cdn.jsdelivr.net/npm/popper.js@1.14.7/dist/umd/popper.min.js"></script>
560 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js"></script>
561 |     <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
562 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/PapaParse/5.3.0/papaparse.min.js"></script>
563 | 
564 |     <!--JavaScript -->
565 |     <script>
566 | 
567 | // Function to create chart from CSV data
568 | function createChartFromCSVData(chartData) {
569 |     new Chart(document.getElementById('resultsChart').getContext('2d'), {
570 |         type: 'bar', // or 'line', 'doughnut', etc.
571 |         data: chartData,
572 |         options: {
573 |             scales: {
574 |                 y: {
575 |                     beginAtZero: true
576 |                 }
577 |             }
578 |         }
579 |     });
580 | }
581 |         // Define the polling, CSV parsing, and results display functions
582 |         function pollResults() {
583 |             setTimeout(function() {
584 |                 $.ajax({
585 |                     url: '/analyze_results',
586 |                     type: 'GET',
587 |                     success: function(data) {
588 |                         if (data && data.length > 0) {
589 |                             displayResults(data);
590 |                         } else {
591 |                             pollResults();
592 |                         }
593 |                     },
594 |                     error: function() {
595 |                         $('#results-info').text('Error fetching results.');
596 |                     }
597 |                 });
598 |             }, 5000);  // Poll every 5 seconds
599 |         }
600 | 
601 |         function displayResults(results) {
602 |             $('#results-info').text('Results ready. Downloading...');
603 |             window.location.href = '/analyze_results';  // Trigger the file download
604 |         }
605 | 
606 | function fetchAndDisplayResults(job_id) {
607 |     // Fetch results using the job_id
608 |     fetch(`/analyze_results/${job_id}`) // This endpoint should return CSV or some data to indicate readiness
609 |     .then(response => {
610 |         if (!response.ok) {
611 |             throw new Error('Results not ready');
612 |         }
613 |         return response.text();
614 |     })
615 |     .then(csvText => {
616 |         // Parse CSV using PapaParse
617 |         Papa.parse(csvText, {
618 |             header: true,
619 |             skipEmptyLines: true,
620 |             complete: function(results) {
621 |                 if (results.data && results.data.length > 0) {
622 |                     // Call function to display table with parsed data
623 |                     displayResultsAsTable(results.data);
624 |                     // Call function to fetch and create chart
625 |                     fetchAndCreateChart(job_id);
626 |                 } else {
627 |                     // If results.data is empty, keep polling
628 |                     $('#results-info').text('No results yet. Please wait...');
629 |                     setTimeout(() => fetchAndDisplayResults(job_id), 5000); 
630 |                 }
631 |             }
632 |         });
633 |     })
634 |     .catch(error => {
635 |         // Handle errors here
636 |         console.error('Fetch and parse error:', error);
637 |         $('#results-info').text(error.toString());
638 |         setTimeout(() => fetchAndDisplayResults(job_id), 5000); // Retry after delay
639 |     });
640 | }
641 | 
642 | function displayResultsAsTable(dataArray) {
643 | 
644 |     console.log("Displaying table with data:", dataArray);  // Diagnostic log
645 |     const resultsDiv = document.getElementById('docking-results-table');
646 |     if (!resultsDiv) {
647 |         console.error('Docking results div not found.'); // Diagnostic error
648 |         return;
649 |     }
650 |     resultsDiv.innerHTML
651 |     
652 |     // Construct the table with the new data
653 |     const table = document.createElement('table');
654 |     table.className = 'table table-bordered table-hover';
655 |     const thead = document.createElement('thead');
656 |     const headerRow = document.createElement('tr');
657 |     Object.keys(dataArray[0]).forEach(header => {
658 |         const th = document.createElement('th');
659 |         th.textContent = header;
660 |         headerRow.appendChild(th);
661 |     });
662 |     thead.appendChild(headerRow);
663 |     table.appendChild(thead);
664 |     
665 |     const tbody = document.createElement('tbody');
666 |     dataArray.forEach(row => {
667 |         const tr = document.createElement('tr');
668 |         Object.values(row).forEach(val => {
669 |             const td = document.createElement('td');
670 |             td.textContent = val;
671 |             tr.appendChild(td);
672 |         });
673 |         tbody.appendChild(tr);
674 |     });
675 |     table.appendChild(tbody);
676 |     
677 |     // Append the table to the DOM
678 |     resultsDiv.appendChild(table);
679 | }
680 | 
681 | function fetchAndCreateChart(job_id) {
682 |     // Fetch chart data using job_id
683 |     fetch(`/chart_data/${job_id}`) // Use template literal to insert job_id
684 |     .then(response => response.json())
685 |     .then(chartData => {
686 |         if (!chartData || chartData.message) {
687 |             // If the message key exists, it means data is not ready
688 |             throw new Error(chartData.message || "No chart data available.");
689 |         }
690 |         createChartFromCSVData(chartData); // Function to create the chart
691 |     })
692 |     .catch(error => {
693 |         console.error('Error fetching chart data:', error);
694 |         $('#results-info').text(error.toString());
695 |         setTimeout(() => fetchAndCreateChart(job_id), 5000); // Retry after delay
696 |     });
697 | }
698 | $(document).ready(function() {
699 |     $('#docking-form').on('submit', function(e) {
700 |         e.preventDefault(); // Prevent the default form submission
701 |         var formData = new FormData(this);
702 |         $.ajax({
703 |             type: 'POST',
704 |             url: '/upload',
705 |             data: formData,
706 |             contentType: false,
707 |             processData: false,
708 |             success: function(response) {
709 |                 $('#results-info').text('Docking started for job ' + response.job_id + ', please wait...');
710 |                 // Update the download button with the job_id and make it visible
711 |                 $('#downloadAllComplexesButton').attr('href', `/download_complexes/${response.job_id}`).show();
712 |                 fetchAndDisplayResults(response.job_id); // Call the function with the job_id
713 |             },
714 |             error: function() {
715 |                 $('#results-info').text('An error occurred during file upload.');
716 |             }
717 |         });
718 |     });
719 | });
720 | function fetchAndDisplayResults(job_id) {
721 |     // Fetch results using the job_id
722 |     fetch(`/analyze_results/${job_id}`) // This endpoint should return CSV or some data to indicate readiness
723 |     .then(response => response.text())
724 |     .then(text => {
725 |         // ... handle the response text and display results ...
726 |         fetchAndCreateChart(job_id); // Call fetchAndCreateChart here after you have processed text
727 |     })
728 |     .catch(error => {
729 |         console.error('Error:', error);
730 |         $('#results-info').text('No results yet. Please wait...');
731 |         setTimeout(() => fetchAndDisplayResults(job_id), 5000); // Polling if error occurs
732 |     });
733 | }
734 | 
735 | function fetchAndCreateChart(job_id) {
736 |     fetch(`/chart_data/${job_id}`) // Use template literal to insert job_id
737 |     .then(response => response.json())
738 |     .then(chartData => {
739 |         if (chartData.message) {
740 |             // If the message key exists, it means data is not ready
741 |             console.log(chartData.message); // Log the message
742 |             setTimeout(() => fetchAndCreateChart(job_id), 5000); // Polling if not ready
743 |         } else {
744 |             createChartFromCSVData(chartData); // This function creates the chart
745 |         }
746 |     })
747 |     .catch(error => {
748 |         console.error('Error fetching chart data:', error);
749 |     });
750 | }
751 | </script>
752 | </body>
753 | </html>
754 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from flask import Flask, render_template, request, flash, redirect, url_for, send_from_directory, after_this_request
  3 | from flask import send_from_directory, jsonify
  4 | import os
  5 | from flask import session
  6 | import logging
  7 | import json
  8 | from openmm.app import PDBFile, Modeller, ForceField, Simulation, PME, HBonds
  9 | from openmm import LangevinMiddleIntegrator
 10 | from openmm.unit import kelvin, picosecond, picoseconds, nanometer
 11 | from pathlib import Path
 12 | from Bio.PDB import PDBIO
 13 | from torch_geometric.data import Data
 14 | from flask import Flask, request, jsonify
 15 | from rdkit.Chem import PandasTools
 16 | import zipfile
 17 | import uuid
 18 | import subprocess
 19 | import shutil
 20 | import time
 21 | import requests
 22 | from werkzeug.utils import secure_filename
 23 | from flask import send_from_directory
 24 | import torch.nn.functional as F
 25 | from torch_geometric.nn import GCNConv, global_mean_pool
 26 | from torch_geometric.data import Data
 27 | from Bio.PDB import PDBParser
 28 | import numpy as np
 29 | from werkzeug.utils import secure_filename
 30 | import csv
 31 | import dgl
 32 | import torch
 33 | import torch.nn as nn
 34 | from rdkit import Chem
 35 | from rdkit.Chem import Descriptors, AllChem, MACCSkeys, RDKFingerprint
 36 | import pandas as pd
 37 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 38 | from torch.utils.data import Dataset, DataLoader
 39 | from sklearn.model_selection import StratifiedShuffleSplit
 40 | import numpy as np
 41 | from flask import Flask
 42 | from gnn_model import GNN
 43 | from flask import Flask, request, render_template, flash, send_file
 44 | import matplotlib.pyplot as plt
 45 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 46 | from torch.utils.data import Dataset, DataLoader
 47 | from sklearn.model_selection import StratifiedShuffleSplit
 48 | import numpy as np
 49 | import matplotlib.pyplot as plt
 50 | from sklearn.metrics import roc_auc_score, roc_curve
 51 | from torch.utils.data import DataLoader
 52 | from gnn_model import GNN  # Import GNN class from gnn_model.py
 53 | from gnn_utils import mol_to_graph, MoleculeDataset, collate
 54 | from sklearn.mixture import GaussianMixture
 55 | from sklearn.metrics import silhouette_score, davies_bouldin_score
 56 | from gnn_model import GNN
 57 | from gnn_utils import collate, mol_to_graph, MoleculeDataset
 58 | import torch
 59 | from torch_geometric.data import Data
 60 | from Bio.PDB import PDBParser, CaPPBuilder
 61 | import mdtraj as md
 62 | import openmm
 63 | from openmm.app import *  # This will import the necessary 'app' module classes and functions
 64 | from openmm import *
 65 | from openmm.unit import *
 66 | from openmm.app import PDBFile, Modeller, ForceField
 67 | from pdbfixer import PDBFixer
 68 | from simtk.openmm.app import PDBFile
 69 | import matplotlib.pyplot as plt
 70 | from io import BytesIO
 71 | from flask import Flask, send_from_directory, url_for, current_app, flash, redirect, render_template
 72 | from datetime import datetime
 73 | 
 74 | app = Flask(__name__)
 75 | app.config['SECRET_KEY'] = 'your_secret_key'
 76 | app.config['UPLOADED_FILES_DIR'] = 'uploaded_files'
 77 | app.config['GENERATED_FILES_DIR'] = 'generated_files'
 78 | app.config['uploaded_files_dir'] = 'uploaded_files'
 79 | app.config['generated_files_dir'] = 'generated_files'
 80 | app.config['UPLOAD_FOLDER'] = 'uploads'
 81 | app.config['DOCKING_RESULTS_DIR'] = 'docking_results'
 82 | app.config['ALLOWED_EXTENSIONS'] = {'csv', 'zip', 'pdb', 'sdf'}
 83 | 
 84 | # Ensure directories exist
 85 | for directory in [app.config['GENERATED_FILES_DIR'], app.config['UPLOADED_FILES_DIR'], app.config['generated_files_dir'], app.config['uploaded_files_dir'], app.config['UPLOAD_FOLDER'], app.config['DOCKING_RESULTS_DIR']]:
 86 |     os.makedirs(directory, exist_ok=True)
 87 | 
 88 | # Directory setup
 89 | for directory in [app.config['GENERATED_FILES_DIR'], app.config['UPLOADED_FILES_DIR'], app.config['generated_files_dir'], app.config['uploaded_files_dir'],app.config['UPLOAD_FOLDER']]:
 90 |     os.makedirs(directory, exist_ok=True)
 91 | 
 92 | def save_data_to_csv(data, filename):
 93 |     """Save data to CSV format."""
 94 |     with open(filename, 'w', newline='') as csv_file:
 95 |         fieldnames = ['SMILES', 'Activity']
 96 |         writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
 97 |         writer.writeheader()
 98 |         for smiles, activity in data:
 99 |             writer.writerow({'SMILES': smiles, 'Activity': activity})
100 | 
101 | 
102 | def preprocess_csv(file):
103 |     """Preprocess the uploaded CSV file."""
104 |     try:
105 |         # Read the CSV file into a DataFrame
106 |         df = pd.read_csv(file)
107 | 
108 |         # Check if the CSV file contains the required columns
109 |         if 'SMILES' not in df.columns or 'Activity' not in df.columns:
110 |             flash('The CSV file must have "SMILES" and "Activity" columns.', 'error')
111 |             return None
112 | 
113 |         # Canonicalize and validate SMILES strings
114 |         df['SMILES'] = df['SMILES'].apply(lambda x: Chem.MolToSmiles(Chem.MolFromSmiles(x), canonical=True) if Chem.MolFromSmiles(x) is not None else None)
115 | 
116 |         # Drop rows with None (invalid SMILES) values
117 |         df.dropna(subset=['SMILES'], inplace=True)
118 | 
119 |         # Save the preprocessed data in CSV format
120 |         timestamp = int(time.time())
121 |         filename = f'uploaded_data_{timestamp}.csv'
122 |         save_data_to_csv(df.values.tolist(), filename)
123 |         #flash('CSV file uploaded and saved successfully.', 'success')
124 | 
125 |         return df
126 |     except Exception as e:
127 |         flash(f'Error processing the CSV file: {str(e)}', 'error')
128 |         return None
129 | 
130 | 
131 | def train_and_evaluate_model(train_dataloader, test_dataloader, model, optimizer, criterion, scheduler):
132 |     """Train and evaluate the GNN model."""
133 |     best_val_loss = float('inf')
134 |     patience = 10
135 |     stop_counter = 0
136 |     checkpoint_path = 'best_model.pth'
137 |     for epoch in range(50):
138 |         model.train()
139 |         train_loss = 0
140 |         for batched_graph, batched_features, batched_labels in train_dataloader:
141 |             if batched_graph is None:
142 |                 continue
143 |             optimizer.zero_grad()
144 |             outputs = model(batched_graph, batched_features)
145 |             loss = criterion(outputs, batched_labels)
146 |             train_loss += loss.item()
147 |             loss.backward()
148 |             optimizer.step()
149 |         train_loss /= len(train_dataloader)
150 |         print(f"Epoch {epoch + 1}, Train Loss: {train_loss:.4f}")
151 |         model.eval()
152 |         val_loss = 0
153 |         for batched_graph, batched_features, batched_labels in test_dataloader:
154 |             if batched_graph is None:
155 |                 continue
156 |             with torch.no_grad():
157 |                 outputs = model(batched_graph, batched_features)
158 |             loss = criterion(outputs, batched_labels)
159 |             val_loss += loss.item()
160 |         val_loss /= len(test_dataloader)
161 |         scheduler.step(val_loss)
162 |         print(f"Epoch {epoch + 1}, Validation Loss: {val_loss:.4f}")
163 |         if val_loss < best_val_loss:
164 |             best_val_loss = val_loss
165 |             stop_counter = 0
166 |             torch.save(model.state_dict(), checkpoint_path)
167 |         else:
168 |             stop_counter += 1
169 |         if stop_counter >= patience:
170 |             print("Early stopping triggered.")
171 |             break
172 |     model.load_state_dict(torch.load(checkpoint_path))
173 |     return model
174 | def download_clusters():
175 |     return send_file(os.path.join(app.config['GENERATED_FILES_DIR'], 'final_clusters.csv'), as_attachment=True,
176 |                      attachment_filename='final_clusters.csv')
177 | 
178 | @app.route('/download', methods=['GET'])
179 | def download():
180 |     return send_file(os.path.join(app.config['GENERATED_FILES_DIR'], 'generated_molecules.csv'), as_attachment=True)
181 | 
182 | @app.route('/', methods=['GET', 'POST'])
183 | def index():
184 |     timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
185 |     final_compounds_filename = f'final_compounds_{timestamp}.csv'
186 |     final_clusters_filename = f'final_clusters_{timestamp}.csv'
187 |     cluster_plot_filename = f'cluster_plot_{timestamp}.png'
188 |     model = GNN(1, 64, 2)
189 |     criterion = nn.CrossEntropyLoss()
190 |     optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001, weight_decay=5e-4)
191 |     scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor=0.5, patience=5)
192 |     virtual_screening = False
193 |     uploaded_file_path = None
194 |     generated_file_path = None
195 |     generated_molecules = None
196 |     plot_file_path = None
197 | 
198 |     if request.method == 'POST':
199 |         if 'file' in request.files:
200 |             file = request.files['file']
201 |             if file.filename != '':
202 |                 filename = f'Molecules_{timestamp}.csv'
203 |                 uploaded_file_path = os.path.join(app.config['UPLOADED_FILES_DIR'], filename)
204 |                 file.save(uploaded_file_path)
205 |                 #flash('CSV file uploaded and saved successfully.', 'success')
206 | 
207 |                 # Load data and preprocess
208 |                 data = pd.read_csv(uploaded_file_path)
209 |                 smiles = data["SMILES"].tolist()
210 |                 labels = data["Activity"].astype(int).tolist()
211 |                 full_dataset = MoleculeDataset(smiles, labels)
212 | 
213 |                 splitter = StratifiedShuffleSplit(n_splits=1, train_size=0.8, random_state=42)
214 |                 train_indices, test_indices = next(splitter.split(smiles, labels))
215 |                 train_dataset = [full_dataset[i] for i in train_indices]
216 |                 test_dataset = [full_dataset[i] for i in test_indices]
217 | 
218 |                 train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate)
219 |                 test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate)
220 | 
221 |                 #flash('Dataset prepared and split into training and testing sets successfully.', 'success')
222 | 
223 |                 # Train the model
224 |                 model = train_and_evaluate_model(train_dataloader, test_dataloader, model, optimizer, criterion,
225 |                                                  scheduler)
226 | 
227 |                 all_predictions, all_targets = [], []
228 |                 for batched_graph, batched_features, batched_labels in test_dataloader:
229 |                     if batched_graph is None:
230 |                         continue
231 | 
232 |                     with torch.no_grad():
233 |                         outputs = model(batched_graph, batched_features)
234 |                     _, predicted = torch.max(outputs, 1)
235 |                     all_predictions.extend(predicted.cpu().numpy())
236 |                     all_targets.extend(batched_labels.cpu().numpy())
237 | 
238 |                 accuracy = accuracy_score(all_targets, all_predictions)
239 |                 precision = precision_score(all_targets, all_predictions)
240 |                 recall = recall_score(all_targets, all_predictions)
241 |                 f1 = f1_score(all_targets, all_predictions)
242 | 
243 |                 print(f"Test Accuracy: {accuracy:.4f}")
244 |                 print(f"Precision: {precision:.4f}")
245 |                 print(f"Recall: {recall:.4f}")
246 |                 print(f"F1 Score: {f1:.4f}")
247 | 
248 |                 # Evaluate the model on test data
249 |                 model.eval()
250 |                 all_probabilities = []
251 | 
252 |                 for batched_graph, batched_features, batched_labels in test_dataloader:
253 |                     if batched_graph is None:
254 |                         continue
255 |                     with torch.no_grad():
256 |                         outputs = model(batched_graph, batched_features)
257 |                         probabilities = torch.softmax(outputs, dim=1)
258 |                     all_probabilities.extend(probabilities.cpu().numpy())
259 |                 # Calculate AUC
260 |                 all_probabilities = np.array(all_probabilities)
261 |                 true_labels = np.array(all_targets)
262 |                 class_1_probs = all_probabilities[:, 1]
263 | 
264 |                 auc = roc_auc_score(true_labels, class_1_probs)
265 | 
266 |                 print(f"AUC: {auc:.4f}")
267 |                 final_compounds = [(smiles[idx], class_1_probs[i]) for i, idx in enumerate(test_indices) if
268 |                                    class_1_probs[i] >= 0.7]
269 |                 sorted_compounds = sorted(final_compounds, key=lambda x: x[1], reverse=True)
270 |                 final_df = pd.DataFrame(sorted_compounds, columns=['Compound', 'Probability'])
271 |                 generated_file_path = os.path.join(app.config['GENERATED_FILES_DIR'], final_compounds_filename)
272 |                 final_df.to_csv(generated_file_path, index=False)
273 | 
274 |                 print("File saved successfully!")
275 |                 final_df = pd.read_csv(generated_file_path)
276 |                 # Extract the probabilities for clustering
277 |                 X = final_df[['Probability']].values
278 | 
279 |                 # Fit the Gaussian Mixture Model
280 |                 n_clusters = 3  # you can change this to the desired number of clusters
281 |                 gmm = GaussianMixture(n_components=n_clusters, random_state=42)
282 |                 final_df['Cluster'] = gmm.fit_predict(X)
283 |                 # Evaluate clustering performance
284 |                 silhouette_avg = silhouette_score(X, final_df['Cluster'])
285 |                 davies_bouldin = davies_bouldin_score(X, final_df['Cluster'])
286 |                 print(f"Silhouette Score: {silhouette_avg:.4f}")
287 |                 print(f"Davies-Bouldin Score: {davies_bouldin:.4f}")
288 |                 # Save final results in a file
289 |                 final_clusters_file_path = os.path.join(app.config['GENERATED_FILES_DIR'], final_clusters_filename)
290 |                 final_df.to_csv(final_clusters_file_path, index=False)
291 |                 print("Final clusters saved successfully!")
292 | 
293 |                 # Extract the probabilities and clusters for plotting
294 |                 X = final_df[['Probability']].values
295 |                 clusters = final_df['Cluster'].values
296 | 
297 |                 # Create a scatter plot
298 |                 plt.figure(figsize=(10, 6))
299 |                 for cluster in range(n_clusters):
300 |                     cluster_points = X[clusters == cluster]
301 |                     plt.scatter(cluster_points[:, 0], cluster_points[:, 0], label=f"Cluster {cluster}")
302 | 
303 |                 # Plot the centroids
304 |                 centroids = gmm.means_
305 |                 plt.scatter(centroids[:, 0], centroids[:, 0], c='red', marker='X', label='Centroids')
306 | 
307 |                 # Add labels and legend
308 |                 plt.xlabel('Probability')
309 |                 plt.ylabel('Probability')
310 |                 plt.title('Cluster Plot')
311 |                 plt.legend()
312 | 
313 |                 # Save the plot as an image file
314 |                 plot_file_path = os.path.join('static', cluster_plot_filename)  # Assuming your static folder is set up correctly
315 |                 plt.savefig(plot_file_path)
316 |                 plt.close()
317 |                 # Perform virtual screening
318 |                 file_path = os.path.join(app.config['GENERATED_FILES_DIR'], final_clusters_filename)
319 |                 if not os.path.exists(file_path):
320 |                     flash('File "final_clusters.csv" does not exist. Please generate the clusters first.', 'warning')
321 |                 else:
322 |                     # Read CSV files into pandas dataframes
323 |                     compounds_df = pd.read_csv(os.path.join(app.config['GENERATED_FILES_DIR'], 'final_compounds.csv'))
324 |                     clusters_df = pd.read_csv(file_path)
325 |                     # Convert dataframes to HTML tables
326 |                     compounds_table = compounds_df.to_html(classes='table table-striped table-bordered', index=False)
327 |                     clusters_table = clusters_df.to_html(classes='table table-striped table-bordered', index=False)
328 | 
329 |                     virtual_screening = True
330 |                     # Pass data to template
331 |                     return render_template('upload.html', virtual_screening=virtual_screening,
332 |                                            final_clusters_filename=final_clusters_filename,
333 |                                            final_compounds_filename=final_compounds_filename,
334 |                                            compounds_table=compounds_table, clusters_table=clusters_table,
335 |                                            plot_file_path=plot_file_path[len('static/'):],
336 |                                            generated_file_path=generated_file_path,
337 |                                            final_clusters_file_path=final_clusters_file_path)  # Added clusters_table
338 |                     # Add return statement for GET request
339 |     return render_template('upload.html', virtual_screening=virtual_screening,
340 |                            uploaded_file_path=uploaded_file_path,
341 |                            generated_file_path=generated_file_path,
342 |                            generated_molecules=generated_molecules,
343 |                            plot_file_path=plot_file_path)
344 | 
345 | def allow_files(filename):
346 |     ALLOWED_EXTENSIONS = {'csv'}
347 |     return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
348 | # Ensure the static file serving route can handle the new library cluster plot image
349 | @app.route('/images/<filename>')
350 | def uploaded_file(filename):
351 |     return send_from_directory('path to/PycharmProjects/pythonProject3/generated_files', filename)
352 | @app.route('/download/sdf_zip')
353 | def download_sdf_zip():
354 |     return send_from_directory(app.config['GENERATED_FILES_DIR'], 'compounds_sdf.zip', as_attachment=True)
355 | def get_compound_name_from_pubchem(smiles_string):
356 |     # URL for the PubChem PUG-REST service
357 |     url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/smiles/{smiles_string}/synonyms/JSON"
358 |     try:
359 |         response = requests.get(url)
360 |         response.raise_for_status()
361 |         data = response.json()
362 |         name = data['InformationList']['Information'][0]['Synonym'][0]
363 |         return name
364 |     except requests.exceptions.HTTPError as http_err:
365 |         print(f"HTTP error occurred: {http_err}")
366 |     except Exception as err:
367 |         print(f"An error occurred: {err}")
368 |     return None
369 | 
370 | @app.route('/rescreening', methods=['POST'])
371 | def rescreening():
372 |     if request.method == 'POST':
373 |         if 'file' in request.files:
374 |             file = request.files['file']
375 |             if file and allow_files(file.filename):
376 |                 filename = 'New_Library.csv'
377 |                 uploaded_file_path = os.path.join(app.config['UPLOADED_FILES_DIR'], filename)
378 |                 file.save(uploaded_file_path)
379 |                 #flash('CSV file uploaded and saved successfully.', 'success')
380 |                 # Load the trained model
381 |                 model = GNN(1, 64, 2)
382 |                 model.load_state_dict(torch.load("best_model.pth"))
383 |                 model.eval()
384 |                 # Create a dataset for the new library of compounds
385 |                 new_data = pd.read_csv(uploaded_file_path)
386 |                 new_smiles = new_data["SMILES"].tolist()
387 |                 new_dataset = MoleculeDataset(new_smiles, [0] * len(new_smiles))  # labels are not used in prediction
388 |                 # Use the model to predict the drug-like potential for each compound in the library
389 |                 new_dataloader = DataLoader(new_dataset, batch_size=32, shuffle=False, collate_fn=collate)
390 | 
391 |                 all_probabilities = []
392 |                 for batched_graph, batched_features, _ in new_dataloader:
393 |                     if batched_graph is None:
394 |                         continue
395 | 
396 |                     with torch.no_grad():
397 |                         outputs = model(batched_graph, batched_features)
398 |                         probabilities = torch.softmax(outputs, dim=1)
399 | 
400 |                     all_probabilities.extend(probabilities.cpu().numpy())
401 |                 # Evaluate the results and perform clustering if needed
402 |                 all_probabilities = np.array(all_probabilities)
403 |                 class_1_probs = all_probabilities[:, 1]
404 |                 # Save final compounds with their respective predicted probabilities
405 |                 final_compounds = [(new_smiles[i], class_1_probs[i]) for i in range(len(new_smiles))if class_1_probs[i] > 0.7]
406 |                 print(final_compounds)
407 |                 sorted_compounds = sorted(final_compounds, key=lambda x: x[1], reverse=True)
408 |                 final_df = pd.DataFrame(sorted_compounds, columns=['Compound', 'Probability'])
409 |                 generated_file_path = os.path.join(app.config['GENERATED_FILES_DIR'], 'new_library_predictions.csv')
410 |                 final_df.to_csv(generated_file_path, index=False)
411 | 
412 |                 # Extract the probabilities for clustering
413 |                 X = final_df[['Probability']].values
414 | 
415 |                 # Fit the Gaussian Mixture Model
416 |                 n_clusters = 3  # you can change this to the desired number of clusters
417 |                 gmm = GaussianMixture(n_components=n_clusters, random_state=42)
418 |                 final_df['Cluster'] = gmm.fit_predict(X)
419 | 
420 |                 # Save final results in a file
421 |                 final_clusters_file_path = os.path.join(app.config['GENERATED_FILES_DIR'], 'new_library_clusters.csv')
422 |                 final_df.to_csv(final_clusters_file_path, index=False)
423 |                 # Convert SMILES to SDF
424 |                 compounds_sdf_dir = os.path.join(app.config['GENERATED_FILES_DIR'], 'compounds_sdf')
425 |                 if not os.path.exists(compounds_sdf_dir):
426 |                     os.makedirs(compounds_sdf_dir)
427 | 
428 |                     # Save compounds to SDF with PubChem names
429 |                 for index, row in final_df.iterrows():
430 |                     mol = Chem.MolFromSmiles(row['Compound'])
431 |                     compound_name = get_compound_name_from_pubchem(
432 |                         row['Compound']) or f"Compound_{index}"  # Fetch compound name
433 |                     if mol:
434 |                         mol.SetProp("_Name", compound_name)
435 |                         mol.SetProp("Probability", str(row['Probability']))
436 |                         mol.SetProp("Cluster", str(row['Cluster']))
437 | 
438 |                         # Create a filename from the compound name
439 |                         safe_filename = secure_filename(
440 |                             compound_name)  # Use secure_filename to ensure it's safe for file paths
441 |                         sdf_filename = f"{safe_filename}.sdf" if safe_filename else f"Compound_{index}.sdf"
442 |                         sdf_path = os.path.join(compounds_sdf_dir, sdf_filename)
443 | 
444 |                         with Chem.SDWriter(sdf_path) as writer:
445 |                             writer.write(mol)
446 | 
447 |                         # Convert the molecule to SDF format
448 |                         sdf_path = os.path.join(compounds_sdf_dir, f"Compound_{index}.sdf")
449 |                         with Chem.SDWriter(sdf_path) as writer:
450 |                             writer.write(mol)
451 | 
452 |                 # Zip the SDF directory
453 |                 sdf_zipfile_path = os.path.join(app.config['GENERATED_FILES_DIR'], 'compounds_sdf.zip')
454 |                 with zipfile.ZipFile(sdf_zipfile_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
455 |                     for root, _, files in os.walk(compounds_sdf_dir):
456 |                         for file in files:
457 |                             file_path = os.path.join(root, file)
458 |                             zipf.write(file_path, arcname=os.path.relpath(file_path, compounds_sdf_dir))
459 | 
460 |                 # Clean up the individual SDF files after zipping by removing the directory
461 |                 shutil.rmtree(compounds_sdf_dir)
462 | 
463 |                 # Extract the probabilities and clusters for plotting
464 |                 clusters = final_df['Cluster'].values
465 | 
466 |                 # Create a scatter plot
467 |                 plt.figure(figsize=(10, 6))
468 |                 for cluster in range(n_clusters):
469 |                     cluster_points = X[clusters == cluster]
470 |                     plt.scatter(cluster_points[:, 0], cluster_points[:, 0], label=f"Cluster {cluster}")
471 | 
472 |                 # Plot the centroids
473 |                 centroids = gmm.means_
474 |                 plt.scatter(centroids[:, 0], centroids[:, 0], c='red', marker='X', label='Centroids')
475 | 
476 |                 # Add labels and legend
477 |                 plt.xlabel('Probability')
478 |                 plt.ylabel('Probability')
479 |                 plt.title('Cluster Plot')
480 |                 plt.legend()
481 | 
482 |                 # Save the plot as an image file
483 |                 new_plot_file_path = os.path.join(app.config['GENERATED_FILES_DIR'], 'new_library_cluster_plot.png')
484 |                 plt.savefig(new_plot_file_path)
485 |                 plt.close()
486 | 
487 |                 # Convert dataframes to HTML tables
488 |                 compound_table = final_df.to_html(classes='table table-striped table-bordered', index=False)
489 |                 cluster_table = final_df.to_html(classes='table table-striped table-bordered', index=False)
490 |                 #clusters_table = final_df[['Compound', 'Cluster']].to_html(classes='table table-striped table-bordered',
491 |                                                                            #index=False)
492 | 
493 |                 return render_template('upload.html', success=True, compound_table=compound_table,
494 |                                        cluster_table=cluster_table, plot_file_p='new_library_cluster_plot.png',
495 |                                        sdf_zip_file='compounds_sdf.zip')
496 |             else:
497 |                 return render_template('upload.html')
498 | 
499 | def generate_de_novo_molecules(num_molecules, apply_lipinski=True):
500 |     generated_mol = []
501 |     elements = ['C', 'H', 'O', 'N']  # You can expand this list with more elements
502 | 
503 |     # Correctly obtain the directory path from app.config
504 |     generated_files_dir = app.config['GENERATED_FILES_DIR']
505 | 
506 |     while len(generated_mol) < num_molecules:
507 |         compound = ''.join(random.choice(elements) for _ in range(5, 20))  # Generate a compound
508 |         mol = Chem.MolFromSmiles(compound)
509 |         if mol is None:
510 |             continue
511 | 
512 |         activity = 0
513 | 
514 |         if apply_lipinski:
515 |             molecular_weight = Descriptors.MolWt(mol)
516 |             logP = Descriptors.MolLogP(mol)
517 |             num_h_donors = Descriptors.NumHDonors(mol)
518 |             num_h_acceptors = Descriptors.NumHAcceptors(mol)
519 |             # Check if the molecule meets desired property criteria
520 |             if 150 <= molecular_weight <= 500 and -2 <= logP <= 5 and num_h_donors <= 5 and num_h_acceptors <= 10:
521 |                 generated_mol.append((Chem.MolToSmiles(mol, canonical=True), activity))
522 |         else:
523 |             generated_mol.append((Chem.MolToSmiles(mol, canonical=True), activity))
524 | 
525 |     # Use the corrected directory path
526 |     filename = 'Molecules.csv'
527 |     generated_file_p = os.path.join(generated_files_dir, filename)
528 |     save_data_to_csv(generated_mol, generated_file_p)
529 | 
530 |     return generated_mol
531 | 
532 | @app.route('/generate', methods=['POST'])
533 | def generate_molecules():
534 |     num_molecules = int(request.form['num_molecules'])
535 |     apply_lipinski = request.form.get('options') == 'lipinski'
536 |     generated_molecules = generate_de_novo_molecules(num_molecules, apply_lipinski)
537 | 
538 |     # Assuming save_data_to_csv handles the saving process correctly
539 |     filename = 'Molecules.csv'
540 |     file_path = os.path.join(app.config['GENERATED_FILES_DIR'], filename)
541 |     save_data_to_csv(generated_molecules, file_path)
542 | 
543 |     return send_file(file_path, as_attachment=True, download_name=filename)
544 | 
545 | 
546 | @app.route('/downloads/<filename>')
547 | def downloads(filename):
548 |     directory = app.config['GENERATED_FILES_DIR']
549 |     try:
550 |         return send_from_directory(directory, filename, as_attachment=True)
551 |     except FileNotFoundError:
552 |         return "File not found.", 404
553 | 
554 | # Create a logger to capture the output typically sent to Flask's app.logger
555 | logging.basicConfig(level=logging.DEBUG)
556 | logger = logging.getLogger(__name__)
557 | 
558 | def perform_protein_refinement(protein_file_path):
559 |     timestamp = int(time.time())
560 |     # Updated file names with timestamp
561 |     stripped_pdb_filename = f'protein_stripped_{timestamp}.pdb'
562 |     fixed_pdb_filename = f'fixed_output_{timestamp}.pdb'
563 |     minimized_pdb_filename = f'minimized_protein_{timestamp}.pdb'
564 |     ramachandran_plot_filename = f'ramachandran_plot_{timestamp}.png'
565 |     sasa_per_residue_plot_filename = f'sasa_per_residue_plot_{timestamp}.png'
566 |     logger.debug(f"Starting protein refinement for: {protein_file_path}")
567 |     traj = md.load(protein_file_path)
568 |     protein = traj.topology.select('protein')
569 |     stripped_traj = traj.atom_slice(protein)
570 |     stripped_traj.save(stripped_pdb_filename)
571 | 
572 |     fixer = PDBFixer(stripped_pdb_filename)
573 |     fixer.findMissingResidues()
574 |     fixer.findMissingAtoms()
575 |     fixer.addMissingAtoms()
576 |     fixer.addMissingHydrogens(7.4)
577 |     with open(fixed_pdb_filename, 'w') as f:
578 |         PDBFile.writeFile(fixer.topology, fixer.positions, f)
579 |     logger.debug("Protein fixed with PDBFixer. Saved to " + fixed_pdb_filename)
580 | 
581 |     pdb = PDBFile(fixed_pdb_filename)
582 |     modeller = Modeller(pdb.topology, pdb.positions)
583 |     forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
584 |     try:
585 |         modeller.addHydrogens(forcefield)
586 |         logger.debug("Added hydrogens to the model.")
587 |     except Exception as e:
588 |         logger.error(f"An error occurred while adding hydrogens: {e}")
589 |         raise
590 | 
591 |     system = forcefield.createSystem(modeller.topology, nonbondedMethod=PME, nonbondedCutoff=1 * nanometer, constraints=HBonds)
592 |     integrator = LangevinMiddleIntegrator(300 * kelvin, 1 / picosecond, 0.004 * picoseconds)
593 |     simulation = Simulation(modeller.topology, system, integrator)
594 |     simulation.context.setPositions(modeller.positions)
595 |     simulation.minimizeEnergy(maxIterations=500)
596 |     with open(minimized_pdb_filename, 'w') as f:
597 |         state = simulation.context.getState(getPositions=True)
598 |         PDBFile.writeFile(modeller.topology, state.getPositions(), f)
599 |     logger.debug("Minimized protein structure saved to " + minimized_pdb_filename)
600 | 
601 |     traj = md.load(minimized_pdb_filename)  # Corrected variable name
602 |     # Generate and save Ramachandran plot
603 |     phi, psi = md.compute_phi(traj), md.compute_psi(traj)
604 |     phi_angles, psi_angles = np.rad2deg(md.compute_dihedrals(traj, phi[0])), np.rad2deg(
605 |         md.compute_dihedrals(traj, psi[0]))
606 | 
607 |     plt.figure(figsize=(8, 6))
608 |     plt.scatter(phi_angles, psi_angles, s=2, c='blue', alpha=0.5)
609 |     # For alpha helices
610 |     plt.fill_betweenx(np.arange(-180, 50, 1), -100, -45, color='orange', alpha=0.25)
611 |     plt.fill_betweenx(np.arange(-100, 180, 1), 45, 100, color='orange', alpha=0.25)
612 | 
613 |     # For beta sheets
614 |     plt.fill_between(np.arange(-180, 180, 1), 135, 180, color='green', alpha=0.25)
615 |     plt.fill_between(np.arange(-180, 180, 1), -180, -135, color='green', alpha=0.25)
616 | 
617 |     plt.xlim(-180, 180)
618 |     plt.ylim(-180, 180)
619 |     plt.xlabel('Phi (φ) angles (degrees)')
620 |     plt.ylabel('Psi (ψ) angles (degrees)')
621 |     plt.title('Ramachandran Plot with Highlighted Secondary Structure Regions')
622 |     plt.grid(True)
623 | 
624 |     # Annotations for secondary structure types
625 |     plt.text(-75, 150, 'β-sheet', horizontalalignment='center', verticalalignment='center', color='green',
626 |              alpha=0.75)
627 |     plt.text(-60, -60, 'α-helix', horizontalalignment='center', verticalalignment='center', color='orange',
628 |              alpha=0.75)
629 |     plt.text(60, 60, 'α-helix', horizontalalignment='center', verticalalignment='center', color='orange',
630 |              alpha=0.75)
631 |     plt.text(100, -160, 'β-sheet', horizontalalignment='center', verticalalignment='center', color='green',
632 |              alpha=0.75)
633 | 
634 |     plt.savefig(f'static/{ramachandran_plot_filename}')
635 |     plt.close()
636 |     # Compute SASA and plot average SASA per residue
637 |     sasa = md.shrake_rupley(traj, mode='residue')
638 |     # Plot SASA for each residue
639 |     plt.plot(np.mean(sasa, axis=0))
640 |     plt.title('Average Solvent Accessible Surface Area (SASA) per residue')
641 |     plt.xlabel('Residue')
642 |     plt.ylabel('SASA (nm²)')
643 |     plt.savefig(f'static/{sasa_per_residue_plot_filename}')
644 |     plt.close()
645 | 
646 |     return {
647 |         'stripped_pdb': stripped_pdb_filename,
648 |         'fixed_pdb': fixed_pdb_filename,
649 |         'minimized_pdb': minimized_pdb_filename,
650 |         'ramachandran_plot': f'static/{ramachandran_plot_filename}',
651 |         'sasa_per_residue_plot': f'static/{sasa_per_residue_plot_filename}'
652 |     }
653 | 
654 | 
655 | def allowed_file(filename):
656 |     ALLOWED_EXTENSIONS = {'pdb'}  # Add or remove file extensions as needed.
657 |     return '.' in filename and \
658 |            filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
659 | 
660 | 
661 | @app.route('/protein_refinement', methods=['GET', 'POST'])
662 | def protein_refinement():
663 |     try:
664 |         if request.method == 'POST':
665 |             # Check if the post request has the file part
666 |             if 'file' not in request.files:
667 |                 flash('No file part', 'error')
668 |                 return redirect(request.url)
669 |             file = request.files['file']
670 |             if file.filename == '':
671 |                 flash('No selected file', 'error')
672 |                 return redirect(request.url)
673 |             if file and allowed_file(file.filename):
674 |                 filename = secure_filename(file.filename)
675 |                 protein_file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
676 |                 file.save(protein_file_path)
677 |                 #flash('Protein file uploaded successfully.', 'success')
678 |                 perform_protein_refinement(protein_file_path)
679 |                 file2 = 'ramachandran_plot.png'
680 |                 file3 = 'sasa_per_residue_plot.png'
681 |                 result_files = perform_protein_refinement(protein_file_path)
682 |                 # Generate download links and visualization data for the protein refinement results
683 |                 download_links = {
684 |                     'stripped_protein': url_for('uploa', filename=result_files['stripped_pdb']),
685 |                     'fixed_protein': url_for('uploa', filename=result_files['fixed_pdb']),
686 |                     'minimized_protein': url_for('uploa', filename=result_files['minimized_pdb']),
687 |                     'ramachandran_plot': url_for('static', filename=os.path.basename(result_files['ramachandran_plot'])),
688 |                     'sasa_per_residue_plot': url_for('static', filename=os.path.basename(result_files['sasa_per_residue_plot']))
689 |                 }
690 | 
691 |                 return render_template('upload.html', download_links=download_links, random=int(time.time()), active_tab='protein_refinement')
692 |     except Exception as e:
693 |         app.logger.error(f"An error occurred during protein refinement: {str(e)}")
694 |         flash('An error occurred during processing.', 'error')
695 |         return redirect(request.url)
696 |     return render_template('upload.html', active_tab='protein_refinement')
697 | @app.route('/files/<filename>')
698 | def uploa(filename):
699 |     # This sets the directory to your app's root directory
700 |     directory = current_app.root_path
701 |     return send_from_directory(directory, filename)
702 | 
703 | 
704 | def allowed_fil(filename):
705 |     return '.' in filename and filename.rsplit('.', 1)[1].lower() in {'zip', 'pdb'}
706 | def convert_sdf_to_pdbqt(sdf_path, output_directory):
707 |     # Function to convert SDF files in a specified directory to PDBQT format
708 |     for root, dirs, files in os.walk(output_directory):
709 |         for file in files:
710 |             if file.endswith(".sdf"):  # Check for .sdf files
711 |                 sdf_path = os.path.join(root, file)
712 |                 pdbqt_filename = file.replace('.sdf', '.pdbqt')
713 |                 pdbqt_path = os.path.join(root, pdbqt_filename)
714 |                 # Prepare the obabel command
715 |                 obabel_command = [
716 |                     'obabel', sdf_path, '-O', pdbqt_path,
717 |                     '--gen3d', '-h'  # The -h flag adds hydrogens
718 |                 ]
719 |                 # Run the obabel command
720 |                 try:
721 |                     subprocess.run(obabel_command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
722 |                     print(f"Conversion successful for {file}")
723 |                 except subprocess.CalledProcessError as e:
724 |                     print(f"An error occurred while converting {file}: {e.stderr.decode()}")
725 | 
726 | def convert_protein(protein_pdb_path, protein_pdbqt_path):
727 |     # Function to convert a PDB file to PDBQT
728 |     obabel_command = [
729 |         'obabel', protein_pdb_path, '-xr', '-O', protein_pdbqt_path  # The -xr flag removes residues not recognized by AutoDock
730 |     ]
731 |     try:
732 |         subprocess.run(obabel_command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
733 |         print(f"Conversion successful for {protein_pdb_path}")
734 |     except subprocess.CalledProcessError as e:
735 |         error_message = e.stderr.decode() if e.stderr else 'An error occurred.'
736 |         print(f"An error occurred while converting {protein_pdb_path}: {error_message}")
737 | 
738 | 
739 | def clear_workspace(workspace_path):
740 |     if os.path.exists(workspace_path):
741 |         shutil.rmtree(workspace_path)
742 |     os.makedirs(workspace_path)
743 | 
744 | 
745 | @app.route('/upload', methods=['POST'])
746 | def upload_files():
747 |     # Generate a unique job ID for this particular user's session or job
748 |     job_id = uuid.uuid4().hex
749 |     job_workspace = os.path.join(app.config['UPLOAD_FOLDER'], job_id)
750 |     job_results_dir = os.path.join(app.config['DOCKING_RESULTS_DIR'], job_id)
751 | 
752 |     # Create directories for the job
753 |     clear_workspace(job_workspace)  # Clear previous data and create new workspace
754 |     clear_workspace(job_results_dir)  # Clear previous results and create new results directory
755 | 
756 |     # Save uploaded protein and ligand files
757 |     protein_file = request.files.get('protein_file')
758 |     ligand_zip = request.files.get('ligand_zip')
759 | 
760 |     if protein_file and allowed_fil(protein_file.filename) and ligand_zip and allowed_fil(ligand_zip.filename):
761 |         protein_filename = secure_filename(protein_file.filename)
762 |         ligand_zip_filename = secure_filename(ligand_zip.filename)
763 | 
764 |         protein_file_path = os.path.join(job_workspace, protein_filename)
765 |         ligand_zip_path = os.path.join(job_workspace, ligand_zip_filename)
766 | 
767 |         protein_file.save(protein_file_path)
768 |         ligand_zip.save(ligand_zip_path)
769 | 
770 |         # Unzip ligands and start conversion
771 |         output_directory_path = os.path.join(job_workspace, 'refined_ligands')
772 |         Path(output_directory_path).mkdir(parents=True, exist_ok=True)
773 | 
774 |         with zipfile.ZipFile(ligand_zip_path, 'r') as zip_ref:
775 |             zip_ref.extractall(output_directory_path)
776 | 
777 |         # Convert and dock
778 |         convert_sdf_to_pdbqt(sdf_path=ligand_zip_path, output_directory=output_directory_path)
779 |         protein_pdbqt_path = protein_file_path.replace('.pdb', '.pdbqt')
780 |         convert_protein(protein_file_path, protein_pdbqt_path)
781 |         run_docking(protein_pdbqt_path, output_directory_path, job_results_dir)
782 | 
783 |         return jsonify({'job_id': job_id, 'message': 'Files uploaded, conversion started, and docking initiated!'})
784 |     else:
785 |         return jsonify({'error': 'Invalid file type or missing files.'}), 400
786 | 
787 | 
788 | def run_docking(protein_pdbqt, ligand_directory_path, results_directory_path):
789 |     print("Starting the docking process...")  # Debug print
790 |     for ligand_file in Path(ligand_directory_path).glob('*.pdbqt'):
791 |         ligand_pdbqt = str(ligand_file)
792 |         result_file_path = os.path.join(results_directory_path, ligand_file.stem + '_docked.pdbqt')
793 |         # Extract docking parameters from the form
794 |         center_x = request.form.get('center_x', type=float)
795 |         center_y = request.form.get('center_y', type=float)
796 |         center_z = request.form.get('center_z', type=float)
797 |         size_x = request.form.get('size_x', type=float)
798 |         size_y = request.form.get('size_y', type=float)
799 |         size_z = request.form.get('size_z', type=float)
800 |         exhaustiveness = request.form.get('exhaustiveness', type=int)
801 |         num_modes = request.form.get('num_modes', type=int)
802 |         energy_range = request.form.get('energy_range', type=int)
803 | 
804 |         # Configuration text for docking
805 |         # Use these parameters in the docking configuration
806 |         config_text = f"""receptor = {protein_pdbqt}
807 | ligand = {ligand_pdbqt}
808 | 
809 | center_x = {center_x}
810 | center_y = {center_y}
811 | center_z = {center_z}
812 | size_x = {size_x}
813 | size_y = {size_y}
814 | size_z = {size_z}
815 | 
816 | out = {result_file_path}
817 | exhaustiveness = {exhaustiveness}
818 | num_modes = {num_modes}
819 | energy_range = {energy_range}
820 |     """
821 |         # Write configuration to a file
822 |         config_file_path = os.path.join(results_directory_path, ligand_file.stem + '_config.txt')
823 |         with open(config_file_path, 'w') as config_file:
824 |             config_file.write(config_text)
825 | 
826 |         # Run Vina with output capture
827 |         vina_command = ['vina', '--config', config_file_path]
828 |         try:
829 |             result = subprocess.run(vina_command, capture_output=True, text=True)
830 |             if result.returncode != 0:  # Check if the command was not successful
831 |                 print(f"Error in docking: {result.stderr}")  # Log any errors
832 |             else:
833 |                 print(f"Docking completed for {ligand_file.stem}. Output:\n{result.stdout}")  # Log the success output
834 |         except Exception as e:
835 |             print(f"An exception occurred: {e}")  # Log any exceptions
836 |         finally:
837 |             # Clean up the config file after docking
838 |             os.remove(config_file_path)
839 |         # Initialize an empty list to collect docking data
840 |         docking_data = []
841 |         for file_name in Path(results_directory_path).glob('*_docked.pdbqt'):
842 |             with open(file_name, 'r') as file:
843 |                 lines = file.readlines()
844 |                 # Extract data for all poses
845 |                 for line in lines:
846 |                     if line.startswith("REMARK VINA RESULT:"):
847 |                         # Parse out the binding affinity and RMSD
848 |                         parts = line.split()
849 |                         binding_affinity = float(parts[3])  # The fourth item on this line is the affinity
850 |                         rmsd_lb = float(parts[4])  # RMSD lower bound
851 |                         rmsd_ub = float(parts[5])  # RMSD upper bound
852 |                         # Store in the list with the 'file_name' key
853 |                         docking_data.append({
854 |                             'file_name': os.path.basename(file_name),  # Use basename to get the file name only
855 |                             'binding_affinity': binding_affinity,
856 |                             'rmsd_lb': rmsd_lb,
857 |                             'rmsd_ub': rmsd_ub
858 |                         })
859 | 
860 |         # Check if docking data was collected
861 |         if docking_data:
862 |             # Convert list to DataFrame
863 |             df = pd.DataFrame(docking_data)
864 |             df_second_poses = df.groupby('file_name').nth(1)  # This selects the second pose for each ligand
865 |             df_second_poses['final_rmsd'] = df_second_poses['rmsd_ub'] - df_second_poses['rmsd_lb']
866 |             df_best_poses = df_second_poses
867 |             print(df_best_poses)
868 |             csv_file_path = os.path.join(results_directory_path, 'docking_results.csv')
869 |             df_best_poses.to_csv(csv_file_path, index=False)
870 |         else:
871 |             print("No docking data to process.")
872 | 
873 | def validate_docking_output(docked_file_path):
874 |     if os.path.exists(docked_file_path) and os.path.getsize(docked_file_path) > 0:
875 |         with open(docked_file_path, 'r') as file:
876 |             for i in range(10):
877 |                 line = file.readline()
878 |                 if not line:
879 |                     break
880 |                 print(line.strip())  # Process line or check if it's as expected
881 |     else:
882 |         print(f"Docked file {docked_file_path} not found or is empty.")
883 | @app.route('/docking', methods=['GET'])
884 | def docking():
885 |     protein_file_path = request.args.get('protein_file_path', type=str)
886 |     protein_pdbqt_path = os.path.join(app.config['UPLOADED_FILES_DIR'], protein_file_path)
887 |     ligand_directory_path = os.path.join(app.config['GENERATED_FILES_DIR'], 'refined_ligands')
888 |     results_directory_path = os.path.join(app.config['DOCKING_RESULTS_DIR'])
889 | 
890 |     run_docking(protein_pdbqt_path, ligand_directory_path, results_directory_path)
891 |     return jsonify({'message': 'Docking completed!'})
892 | 
893 | @app.route('/list_docking_results')
894 | def list_docking_results():
895 |     results_files = Path(app.config['DOCKING_RESULTS_DIR']).glob('*_docked.pdbqt')
896 |     results_list = [str(result) for result in results_files if result.is_file() and result.stat().st_size > 0]
897 |     return jsonify(results_list)
898 | @app.route('/results/<filename>')
899 | def download_results(filename):
900 |     results_directory_path = os.path.join(app.config['DOCKING_RESULTS_DIR'])
901 |     return send_from_directory(directory=results_directory_path, filename=filename, as_attachment=True)
902 | @app.route('/analyze_results/<job_id>', methods=['GET'])
903 | def analyze_results(job_id):
904 |     # Directory where the results are stored
905 |     results_directory = os.path.join(app.config['DOCKING_RESULTS_DIR'], job_id)
906 |     filepath = os.path.join(results_directory, 'docking_results.csv')
907 | 
908 |     if os.path.isfile(filepath) and os.path.getsize(filepath) > 0:
909 |         return send_file(filepath, as_attachment=True)  # Send the file for download
910 |     else:
911 |         return jsonify({'message': 'Results not ready'}), 202
912 | 
913 | 
914 | @app.route('/chart_data/<job_id>')  # URL pattern includes job_id
915 | def chart_data(job_id):
916 |     # Construct the file path using the job_id provided in the URL
917 |     job_results_dir = os.path.join(app.config['DOCKING_RESULTS_DIR'], job_id)
918 |     filepath = os.path.join(job_results_dir, 'docking_results.csv')
919 | 
920 |     if os.path.isfile(filepath):
921 |         df = pd.read_csv(filepath)
922 |         # Create a barplot of binding affinities
923 |         binding_affinities = df['binding_affinity'].tolist()
924 |         file_names = df['file_name'].tolist()
925 |         chart_data = {
926 |             'labels': file_names,
927 |             'datasets': [{
928 |                 'label': 'Binding Affinity',
929 |                 'data': binding_affinities,
930 |                 'backgroundColor': 'rgba(0, 123, 255, 0.5)',
931 |                 'borderColor': 'rgba(0, 123, 255, 1)',
932 |                 'borderWidth': 1
933 |             }]
934 |         }
935 |         return jsonify(chart_data)
936 |     else:
937 |         return jsonify({'message': 'Results not ready for job ' + job_id}), 202
938 | 
939 | 
940 | @app.route('/download_complexes/<job_id>')
941 | def download_complexes(job_id):
942 |     job_results_dir = os.path.join(app.config['DOCKING_RESULTS_DIR'], job_id)
943 | 
944 |     # Check if the job results directory exists
945 |     if not os.path.isdir(job_results_dir):
946 |         return abort(404, description="Job results not found.")
947 | 
948 |     # Create a BytesIO object to write the zip file in memory
949 |     zip_in_memory = BytesIO()
950 |     with zipfile.ZipFile(zip_in_memory, 'w', zipfile.ZIP_DEFLATED) as zipf:
951 |         for root, dirs, files in os.walk(job_results_dir):
952 |             for file in files:
953 |                 file_path = os.path.join(root, file)
954 |                 zipf.write(file_path, os.path.relpath(file_path, job_results_dir))
955 |     zip_in_memory.seek(0)
956 |     zip_filename = f'{job_id}_results.zip'
957 |     return send_file(zip_in_memory, download_name=zip_filename, as_attachment=True, mimetype='application/zip')
958 | 
959 | 
960 | if __name__ == "__main__":
961 |     if not os.path.exists(app.config['UPLOADED_FILES_DIR']):
962 |         os.makedirs(app.config['UPLOADED_FILES_DIR'])
963 |     if not os.path.exists(app.config['GENERATED_FILES_DIR']):
964 |         os.makedirs(app.config['GENERATED_FILES_DIR'])
965 |     if not os.path.exists(app.config['generated_files_dir']):
966 |         os.makedirs(app.config['generated_files_dir'])
967 |     if not os.path.exists(app.config['uploaded_files_dir']):
968 |         os.makedirs(app.config['uploaded_files_dir'])
969 |     if not os.path.exists(app.config['UPLOAD_FOLDER']):
970 |         os.makedirs(app.config['UPLOAD_FOLDER'])
971 |     if not os.path.exists(app.config['DOCKING_RESULTS_DIR']):
972 |         os.makedirs(app.config['DOCKING_RESULTS_DIR'])
973 |     app.run(debug=True)


--------------------------------------------------------------------------------