├── src
    ├── __init__.py
    └── dockcadd.py
├── requirements.txt
├── .gitignore
├── bin
    └── install_dependencies.sh
├── LICENSE
├── scripts
    └── setup.sh
├── README.md
└── DockCADD
    └── docking.py


/src/__init__.py:
--------------------------------------------------------------------------------
1 | # src/__init__.py
2 | 
3 | from .dockcadd import perform_docking
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | biopandas
2 | pubchempy
3 | tqdm
4 | matplotlib
5 | scipy
6 | rdkit-pypi
7 | biopython
8 | pymol
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Virtual environment
 7 | venv/
 8 | .env
 9 | 
10 | # Data files
11 | data/*.pdb
12 | data/*.pdbqt
13 | 
14 | # Logs
15 | *.log
16 | 
17 | # p2rank output
18 | p2rank_2.4.2/test_output/
19 | 
20 | # AutoDock Vina output
21 | docking_results/*.pdbqt
22 | docking_results/*.txt
23 | 
24 | # Colab specific
25 | .ipynb_checkpoints/
26 | 


--------------------------------------------------------------------------------
/bin/install_dependencies.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Install AutoDock Vina
 3 | wget https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_1.2.5_linux_x86_64
 4 | chmod +x vina_1.2.5_linux_x86_64
 5 | mv vina_1.2.5_linux_x86_64 /usr/local/bin/vina
 6 | 
 7 | # Install P2Rank
 8 | wget https://github.com/rdk/p2rank/releases/download/2.4.2/p2rank_2.4.2.tar.gz
 9 | tar -xzf p2rank_2.4.2.tar.gz
10 | 
11 | echo "Dependencies installed successfully."
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/scripts/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | ############################################
 4 | # scripts/setup.sh
 5 | # Installs all system and Python dependencies needed for DockcaddV2.
 6 | ############################################
 7 | 
 8 | # 1) System Updates and Packages
 9 | sudo apt-get update -y
10 | sudo apt-get install -y pymol openbabel wget tar openjdk-11-jdk
11 | 
12 | # 2) Python Libraries
13 | echo "Upgrading pip and installing Python libraries..."
14 | pip install --upgrade pip
15 | 
16 | # Install dependencies from requirements.txt if available
17 | if [ -f "requirements.txt" ]; then
18 |     pip install -r requirements.txt
19 | fi
20 | 
21 | # Additional packages (if not listed in requirements.txt):
22 | pip install git+https://github.com/openmm/pdbfixer.git
23 | pip install openmm
24 | pip install rdkit-pypi
25 | 
26 | echo "Python dependencies installed successfully."
27 | 
28 | # 3) AutoDock Vina Installation
29 | VINA_URL="https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_1.2.5_linux_x86_64"
30 | VINA_NAME="vina_1.2.5_linux_x86_64"
31 | 
32 | if [ ! -f "/usr/local/bin/vina" ] && [ ! -f "/usr/local/bin/${VINA_NAME}" ]; then
33 |     echo "Installing AutoDock Vina 1.2.5..."
34 |     wget -q "$VINA_URL"
35 |     chmod +x "$VINA_NAME"
36 |     sudo mv "$VINA_NAME" /usr/local/bin/vina
37 |     rm -f "$VINA_NAME"
38 |     echo "AutoDock Vina installed successfully."
39 | else
40 |     echo "AutoDock Vina is already installed."
41 | fi
42 | 
43 | # 4) p2rank Installation
44 | P2RANK_URL="https://github.com/rdk/p2rank/releases/download/2.4.2/p2rank_2.4.2.tar.gz"
45 | P2RANK_DIR="p2rank_2.4.2"
46 | 
47 | if [ ! -d "$P2RANK_DIR" ]; then
48 |     echo "Installing p2rank 2.4.2..."
49 |     wget -q "$P2RANK_URL"
50 |     tar -xzf "p2rank_2.4.2.tar.gz"
51 |     rm -f "p2rank_2.4.2.tar.gz"
52 |     echo "p2rank installed successfully."
53 | else
54 |     echo "p2rank is already installed."
55 | fi
56 | 
57 | echo "Setup is complete."
58 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DockCADD v2
 2 | 
 3 | **DockCADD v2** is a streamlined and automated computational framework designed to facilitate molecular docking and drug discovery. It requires minimal input from users and utilizes advanced tools to provide accurate docking results. 
 4 | 
 5 | This new version of **DockCADD** is a lightweight, integrated workflow for structure-based drug design. It automatically prepares a receptor (extracting only a specified chain, with a fallback if the chain isn’t found), repairs missing residues/atoms via [PDBFixer](https://github.com/openmm/pdbfixer), predicts the binding pocket with [p2rank](https://github.com/rdk/p2rank), and docks ligands using [AutoDock Vina](https://github.com/ccsb-scripps/AutoDock-Vina). Ligands can be provided as a list of SMILES strings or as an SDF file, and multiple conformers are generated for each molecule using [RDKit](https://www.rdkit.org/). 
 6 | 
 7 | ## Features
 8 | 
 9 | - **Receptor Preparation**:  
10 |   - Downloads a protein structure from the PDB.
11 |   - Extracts only chain A (or falls back to the full structure if chain A isn’t found).
12 |   - Repairs missing residues, atoms, and adds hydrogens using PDBFixer.
13 | 
14 | - **Pocket Prediction**:  
15 |   - Uses p2rank to predict the binding pocket and extract its center.
16 | 
17 | - **Ligand Preparation**:  
18 |   - Accepts ligands as a list of SMILES strings and/or an SDF file.
19 |   - Generates multiple 3D conformers per ligand with RDKit.
20 |   - Writes each conformer to a separate PDB file.
21 | 
22 | - **Docking**:  
23 |   - Converts the receptor and ligand files to PDBQT format using OpenBabel.
24 |   - Docks each ligand conformer with AutoDock Vina.
25 |   - Parses the best docking pose and merges it with the receptor to generate a final complex.
26 | 
27 | - **Visualization**:  
28 |   - Includes an optional PyMOL visualization function to generate a static PNG snapshot of a final complex.
29 | 
30 | ## Installation
31 | 
32 | 1. **Clone the Repository:**
33 | 
34 |    ```bash
35 |    git clone https://github.com/mehdikariim/DockCADD-v2.git
36 |    cd DockCADD-v2
37 | 
38 | 2. **Run the Setup Script:**
39 | 
40 | This script installs all system packages (e.g., PyMOL, OpenBabel, Java), AutoDock Vina, p2rank, and the required Python libraries (including PDBFixer, OpenMM, and RDKit).
41 | 
42 |      ```bash
43 |      bash scripts/setup.sh
44 | 
45 | 2. **Run the Setup Script:**
46 | 
47 | 3. **Usage:**
48 | You can use the provided Python package to perform docking. Below are two example usage scenarios:
49 | 
50 | Example 1: **Docking Using a List of SMILES**
51 |     ```bash
52 |     from src.dockcadd import perform_docking
53 | 
54 |     # Define your list of ligand SMILES and target receptor PDB ID
55 |     smiles_list = ["CCOc1ccc(CC(=O)NC)cc1", "CCCC(=O)NCC1=CC=CC=C1"]
56 |     pdb_id = "5ZMA"
57 | 
58 |     # Run docking (generates 3 conformers per ligand by default)
59 |     perform_docking(smiles_list=smiles_list, sdf_file=None, pdb_id=pdb_id, num_confs=3, docking_folder="docking_results")
60 | 
61 | Example 2: **Docking Using an SDF File**
62 |     ```bash
63 |     from src.dockcadd import perform_docking
64 |     
65 |     # Provide the path to your SDF file containing ligands
66 |     sdf_file = "path/to/your_ligands.sdf"
67 |     pdb_id = "5ZMA"
68 |     
69 |     # Run docking using the SDF file (3 conformers per ligand)
70 |     perform_docking(smiles_list=None, sdf_file=sdf_file, pdb_id=pdb_id, num_confs=3, docking_folder="docking_results")
71 | 
72 | 
73 | # License
74 | This project is licensed under the MIT License.
75 | 
76 | # Citations
77 | If you use DockcaddV2 in your work, please cite our article:
78 | 
79 | Karim, E.M.et al (2025). DockCADD: A streamlined In Silico pipeline for the identification of potent Ribosomal S6 Kinase 2 (RSK2) inhibitors. Scientific African, e02581.
80 | https://doi.org/10.1016/j.sciaf.2025.e02581
81 | 
82 | # Acknowledgments
83 | AutoDock Vina: AutoDock Vina 1.2.5
84 | p2rank: p2rank 2.4.2
85 | PDBFixer & OpenMM: PDBFixer
86 | RDKit: RDKit
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/DockCADD/docking.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import sys
  4 | from rdkit import Chem
  5 | from rdkit.Chem import AllChem
  6 | from Bio.PDB import PDBList
  7 | import pandas as pd
  8 | import numpy as np
  9 | 
 10 | def generate_minimized_pdb(smiles, pdb_filename):
 11 |     mol = Chem.MolFromSmiles(smiles)
 12 |     if mol is None:
 13 |         print(f"Invalid SMILES string: {smiles}")
 14 |         return False
 15 |     mol = Chem.AddHs(mol)
 16 |     try:
 17 |         AllChem.EmbedMolecule(mol, randomSeed=42)
 18 |     except:
 19 |         print(f"Failed to generate 3D coordinates for SMILES: {smiles}")
 20 |         return False
 21 |     try:
 22 |         AllChem.UFFOptimizeMolecule(mol, maxIters=200)
 23 |     except:
 24 |         print(f"Energy minimization failed for SMILES: {smiles}")
 25 |         return False
 26 |     try:
 27 |         Chem.SanitizeMol(mol)
 28 |     except:
 29 |         print(f"Sanitization failed for SMILES: {smiles}")
 30 |         return False
 31 |     Chem.MolToPDBFile(mol, pdb_filename)
 32 |     print(f"Minimized molecule saved as {pdb_filename}")
 33 |     return True
 34 | 
 35 | def download_pdb(pdb_id, download_dir):
 36 |     if not os.path.exists(download_dir):
 37 |         os.makedirs(download_dir)
 38 |     pdbl = PDBList()
 39 |     pdb_file_path = pdbl.retrieve_pdb_file(pdb_id, file_format='pdb', pdir=download_dir)
 40 |     return pdb_file_path
 41 | 
 42 | def remove_hetatm(input_pdb, output_pdb):
 43 |     with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile:
 44 |         for line in infile:
 45 |             if not line.startswith('HETATM'):
 46 |                 outfile.write(line)
 47 | 
 48 | def convert_pdb_to_pdbqt_receptor(input_pdb, output_pdbqt):
 49 |     subprocess.run(['obabel', '-i', 'pdb', input_pdb, '-o', 'pdbqt', '-O', output_pdbqt, '-xr', '-xn', '-xp'], check=True)
 50 | 
 51 | def convert_pdb_to_pdbqt_ligand(input_pdb, output_pdbqt):
 52 |     subprocess.run(['obabel', '-i', 'pdb', input_pdb, '-o', 'pdbqt', '-O', output_pdbqt, '-h'], check=True)
 53 | 
 54 | def run_command_with_output(command, log_file):
 55 |     process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
 56 |     with open(log_file, 'w') as log:
 57 |         for line in process.stdout:
 58 |             sys.stdout.write(line)
 59 |             log.write(line)
 60 |             sys.stdout.flush()
 61 |     return process.wait()
 62 | 
 63 | def perform_docking(smiles_list, PDB_ID):
 64 |     folder_name = 'docking_results'
 65 |     receptor_name = PDB_ID
 66 | 
 67 |     # Create results folder
 68 |     if not os.path.exists(folder_name):
 69 |         os.mkdir(folder_name)
 70 | 
 71 |     print(f"Receptor Name: {receptor_name}")
 72 |     print(f"Number of ligands: {len(smiles_list)}")
 73 | 
 74 |     # Generate and pre-process ligands
 75 |     valid_smiles = []
 76 |     for i, smiles in enumerate(smiles_list):
 77 |         pdb_filename = f'{folder_name}/ligand_{i+1}.pdb'
 78 |         if generate_minimized_pdb(smiles, pdb_filename):
 79 |             valid_smiles.append(smiles)
 80 | 
 81 |     print(f"Number of valid SMILES processed: {len(valid_smiles)}")
 82 | 
 83 |     # Download and pre-process receptor
 84 |     downloaded_pdb_path = download_pdb(PDB_ID, folder_name)
 85 |     os.rename(downloaded_pdb_path, f'{folder_name}/{receptor_name}_dirty.pdb')
 86 | 
 87 |     # Remove HETATM from PDB file
 88 |     remove_hetatm(f'{folder_name}/{receptor_name}_dirty.pdb', f'{folder_name}/{receptor_name}.pdb')
 89 | 
 90 |     # Define docking box using p2rank
 91 |     p2rank_jar_path = os.path.join(os.getcwd(), 'p2rank_2.4.2', 'bin', 'p2rank.jar')
 92 |     subprocess.run(['java', '-jar', p2rank_jar_path, 'predict', '-f', f'{folder_name}/{receptor_name}.pdb'], check=True)
 93 | 
 94 |     # Extract docking box center
 95 |     df = pd.read_csv(f'p2rank_2.4.2/test_output/predict_{receptor_name}/{receptor_name}.pdb_predictions.csv')
 96 |     center_x, center_y, center_z = float(df['   center_x'].iloc[0]), float(df['   center_y'].iloc[0]), float(df['   center_z'].iloc[0])
 97 | 
 98 |     # Convert receptor to PDBQT format
 99 |     receptor_pdb = f"{folder_name}/{receptor_name}.pdb"
100 |     receptor_pdbqt = f"{folder_name}/{receptor_name}.pdbqt"
101 |     convert_pdb_to_pdbqt_receptor(receptor_pdb, receptor_pdbqt)
102 | 
103 |     # Open results file and process ligands
104 |     results_file = f"{folder_name}/docking_results.txt"
105 |     with open(results_file, 'w') as f:
106 |         f.write("SMILES,Docking Score\n")  # Write header
107 | 
108 |         for i, smiles in enumerate(smiles_list):
109 |             print(f"\nProcessing ligand {i+1} of {len(smiles_list)}")
110 |             print(f"SMILES: {smiles}")
111 | 
112 |             ligand_pdb = f"{folder_name}/ligand_{i+1}.pdb"
113 |             ligand_pdbqt = f"{folder_name}/ligand_{i+1}.pdbqt"
114 | 
115 |             print("Converting ligand to PDBQT format...")
116 |             convert_pdb_to_pdbqt_ligand(ligand_pdb, ligand_pdbqt)
117 |             print("Ligand conversion complete.")
118 | 
119 |             output = f"{folder_name}/ligand_{i+1}_out.pdbqt"
120 |             log_file = f"{folder_name}/vina_log_{i+1}.txt"
121 |             vina_command = [
122 |                 'vina',
123 |                 '--receptor', receptor_pdbqt,
124 |                 '--ligand', ligand_pdbqt,
125 |                 '--out', output,
126 |                 '--center_x', str(center_x),
127 |                 '--center_y', str(center_y),
128 |                 '--center_z', str(center_z),
129 |                 '--size_x', '20',
130 |                 '--size_y', '20',
131 |                 '--size_z', '20'
132 |             ]
133 | 
134 |             print("Starting Vina docking...")
135 |             exit_code = run_command_with_output(vina_command, log_file)
136 | 
137 |             if exit_code == 0:
138 |                 print("Vina docking completed successfully.")
139 |                 with open(log_file, 'r') as log:
140 |                     score = "N/A"
141 |                     for line in log:
142 |                         if line.startswith('   1'):
143 |                             score = line.split()[1]
144 |                             break
145 |                 print(f"Best docking score: {score}")
146 |             else:
147 |                 print(f"Error running Vina for ligand {i+1}. Check the log file for details.")
148 |                 score = "Error"
149 | 
150 |             # Write result to file
151 |             f.write(f"{smiles},{score}\n")
152 | 


--------------------------------------------------------------------------------
/src/dockcadd.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import re
  4 | import subprocess
  5 | import pandas as pd
  6 | 
  7 | from Bio.PDB import PDBList
  8 | from pdbfixer import PDBFixer
  9 | from openmm.app import PDBFile
 10 | 
 11 | from rdkit import Chem
 12 | from rdkit.Chem import AllChem
 13 | 
 14 | ########################################
 15 | # Helper Functions
 16 | ########################################
 17 | 
 18 | def run_command_with_live_output(command, log_file):
 19 |     """
 20 |     Runs a command in a subprocess, writing output to both console and a log file.
 21 |     """
 22 |     process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
 23 |     with open(log_file, 'w') as lf:
 24 |         for line in process.stdout:
 25 |             print(line, end="")  # Print to console
 26 |             lf.write(line)
 27 |     return process.wait()
 28 | 
 29 | 
 30 | def keep_only_chain_A_with_fallback(input_pdb, output_pdb):
 31 |     """
 32 |     Extracts lines for chain A from input_pdb.
 33 |     Writes only ATOM/HETATM lines (and TER if chain A) to output_pdb.
 34 |     If no chain A lines are found, copies the full file.
 35 |     """
 36 |     chain_a_count = 0
 37 |     with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile:
 38 |         for line in infile:
 39 |             chain_id = line[21] if len(line) >= 22 else None
 40 |             if line.startswith(("ATOM", "HETATM")):
 41 |                 if chain_id == 'A':
 42 |                     outfile.write(line)
 43 |                     chain_a_count += 1
 44 |             elif line.startswith("TER"):
 45 |                 if chain_id == 'A':
 46 |                     outfile.write(line)
 47 |             elif line.startswith("END"):
 48 |                 outfile.write(line)
 49 |     if chain_a_count == 0:
 50 |         print("[WARN] No chain A lines found; using full PDB instead.")
 51 |         with open(input_pdb, 'r') as inf, open(output_pdb, 'w') as outf:
 52 |             outf.write(inf.read())
 53 | 
 54 | 
 55 | def remove_hetatm(input_pdb, output_pdb):
 56 |     """
 57 |     Removes all HETATM lines from input_pdb.
 58 |     This effectively removes any co-ligand present in the receptor.
 59 |     """
 60 |     with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile:
 61 |         for line in infile:
 62 |             if not line.startswith("HETATM"):
 63 |                 outfile.write(line)
 64 | 
 65 | 
 66 | def fix_with_pdbfixer(pdb_in, pdb_out):
 67 |     """
 68 |     Uses PDBFixer to add missing residues, atoms, and hydrogens (pH 7).
 69 |     """
 70 |     fixer = PDBFixer(filename=pdb_in)
 71 |     fixer.findMissingResidues()
 72 |     fixer.findMissingAtoms()
 73 |     fixer.addMissingAtoms()
 74 |     fixer.addMissingHydrogens(7.0)
 75 |     with open(pdb_out, 'w') as f:
 76 |         PDBFile.writeFile(fixer.topology, fixer.positions, f)
 77 | 
 78 | 
 79 | def convert_pdb_to_pdbqt_receptor(input_pdb, output_pdbqt):
 80 |     """
 81 |     Converts receptor PDB to PDBQT using OpenBabel.
 82 |     """
 83 |     cmd = [
 84 |         "obabel", "-i", "pdb", input_pdb,
 85 |         "-o", "pdbqt", "-O", output_pdbqt,
 86 |         "-xr", "-xn", "-xp"
 87 |     ]
 88 |     subprocess.run(cmd, check=True)
 89 | 
 90 | 
 91 | def convert_pdb_to_pdbqt_ligand(input_pdb, output_pdbqt):
 92 |     """
 93 |     Converts ligand PDB to PDBQT using OpenBabel.
 94 |     """
 95 |     cmd = [
 96 |         "obabel", "-i", "pdb", input_pdb,
 97 |         "-o", "pdbqt", "-O", output_pdbqt,
 98 |         "-h"
 99 |     ]
100 |     subprocess.run(cmd, check=True)
101 | 
102 | 
103 | ########################################
104 | # p2rank Pocket Prediction
105 | ########################################
106 | 
107 | def run_p2rank_and_get_center(receptor_pdb, pdb_id):
108 |     """
109 |     Runs p2rank on the receptor PDB and returns the top pocket center (x,y,z).
110 |     """
111 |     p2rank_exec = os.path.join(os.getcwd(), "p2rank_2.4.2", "prank")
112 |     if not os.path.isfile(p2rank_exec):
113 |         raise FileNotFoundError(f"p2rank not found at {p2rank_exec}")
114 |     if not os.access(p2rank_exec, os.X_OK):
115 |         os.chmod(p2rank_exec, 0o755)
116 |     cmd = [p2rank_exec, "predict", "-f", receptor_pdb]
117 |     log_file = f"p2rank_{pdb_id}.log"
118 |     ret = run_command_with_live_output(cmd, log_file)
119 |     if ret != 0:
120 |         raise RuntimeError("p2rank prediction failed.")
121 |     base_name = os.path.splitext(os.path.basename(receptor_pdb))[0]
122 |     predictions_csv = f"p2rank_2.4.2/test_output/predict_{base_name}/{base_name}.pdb_predictions.csv"
123 |     df = pd.read_csv(predictions_csv, skipinitialspace=True)
124 |     df.columns = [c.strip().lower() for c in df.columns]
125 |     cx = float(df["center_x"].iloc[0])
126 |     cy = float(df["center_y"].iloc[0])
127 |     cz = float(df["center_z"].iloc[0])
128 |     print(f"[p2rank] Pocket center: ({cx}, {cy}, {cz})")
129 |     return (cx, cy, cz)
130 | 
131 | 
132 | ########################################
133 | # Ligand Preparation (SMILES and/or SDF)
134 | ########################################
135 | 
136 | def generate_multiple_conformers(mol, num_confs=3):
137 |     """
138 |     Generates multiple 3D conformers for a molecule using RDKit.
139 |     """
140 |     mol = Chem.AddHs(mol)
141 |     cids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, randomSeed=42)
142 |     for cid in cids:
143 |         if AllChem.MMFFHasAllMoleculeParams(mol):
144 |             AllChem.MMFFOptimizeMolecule(mol, confId=cid, maxIters=200)
145 |         else:
146 |             AllChem.UFFOptimizeMolecule(mol, confId=cid, maxIters=200)
147 |     return mol
148 | 
149 | 
150 | def prepare_ligands(smiles_list=None, sdf_file=None, num_confs=3, out_dir="ligand_prep"):
151 |     """
152 |     Prepares ligand files from a list of SMILES and/or an SDF file.
153 |     For each valid molecule, generates multiple conformers and writes each as a separate PDB.
154 |     Returns a list of (pdb_filepath, label).
155 |     """
156 |     if not os.path.exists(out_dir):
157 |         os.makedirs(out_dir, exist_ok=True)
158 |     results = []
159 | 
160 |     def write_confs_to_pdb(mol, base_name):
161 |         conf_ids = [conf.GetId() for conf in mol.GetConformers()]
162 |         out_paths = []
163 |         for i, cid in enumerate(conf_ids, start=1):
164 |             tmp_mol = Chem.Mol(mol, False, cid)
165 |             pdb_name = f"{base_name}_conf{i}.pdb"
166 |             pdb_path = os.path.join(out_dir, pdb_name)
167 |             Chem.MolToPDBFile(tmp_mol, pdb_path)
168 |             out_paths.append((pdb_path, pdb_name.replace(".pdb", "")))
169 |         return out_paths
170 | 
171 |     # Process SMILES if provided
172 |     if smiles_list:
173 |         for idx, smi in enumerate(smiles_list, start=1):
174 |             mol = Chem.MolFromSmiles(smi)
175 |             if not mol:
176 |                 print(f"[LigandPrep] Warning: invalid SMILES skipped: {smi}")
177 |                 continue
178 |             mol3d = generate_multiple_conformers(mol, num_confs=num_confs)
179 |             base_name = f"lig_{idx}"
180 |             results.extend(write_confs_to_pdb(mol3d, base_name))
181 |     
182 |     # Process SDF if provided
183 |     if sdf_file and os.path.isfile(sdf_file):
184 |         suppl = Chem.SDMolSupplier(sdf_file, removeHs=False)
185 |         mol_count = 0
186 |         for i, mol in enumerate(suppl):
187 |             if mol is None:
188 |                 print(f"[LigandPrep] Warning: skipping invalid SDF record {i}.")
189 |                 continue
190 |             mol_count += 1
191 |             name = mol.GetProp("_Name") if mol.HasProp("_Name") else f"sdf_{mol_count}"
192 |             mol3d = generate_multiple_conformers(mol, num_confs=num_confs)
193 |             base_name = f"{name}_{mol_count}"
194 |             results.extend(write_confs_to_pdb(mol3d, base_name))
195 |     
196 |     if not results:
197 |         print("[LigandPrep] No valid ligands found.")
198 |     else:
199 |         print(f"[LigandPrep] Prepared {len(results)} ligand conformers.")
200 |     return results
201 | 
202 | 
203 | ########################################
204 | # Main Docking Workflow
205 | ########################################
206 | 
207 | def perform_docking(smiles_list=None, sdf_file=None, pdb_id="5ZMA", num_confs=3, docking_folder="docking_results"):
208 |     """
209 |     Main docking workflow:
210 |       1) Prepare receptor: download PDB, extract chain A (fallback if needed), remove HETATM (co-ligand), and fix with PDBFixer.
211 |       2) Run p2rank to get pocket center (for a 20x20x20 box).
212 |       3) Convert receptor to PDBQT.
213 |       4) Prepare ligands from SMILES and/or SDF (generate multiple conformers).
214 |       5) For each ligand conformer, convert to PDBQT, dock with AutoDock Vina,
215 |          convert best pose to PDB, and merge with receptor to form the final complex.
216 |       6) Write docking scores to a CSV file.
217 |     """
218 |     if not (smiles_list or (sdf_file and os.path.isfile(sdf_file))):
219 |         print("[ERROR] No valid ligand input provided (neither SMILES nor SDF).")
220 |         return
221 | 
222 |     if not os.path.exists(docking_folder):
223 |         os.makedirs(docking_folder, exist_ok=True)
224 | 
225 |     print(f"[Docking] Preparing receptor {pdb_id} ...")
226 |     pdbl = PDBList()
227 |     raw_file = pdbl.retrieve_pdb_file(pdb_id, file_format="pdb", pdir=docking_folder)
228 |     raw_pdb = os.path.join(docking_folder, f"{pdb_id}_raw.pdb")
229 |     os.rename(raw_file, raw_pdb)
230 |     chainA_file = os.path.join(docking_folder, f"{pdb_id}_chainA_tmp.pdb")
231 |     keep_only_chain_A_with_fallback(raw_pdb, chainA_file)
232 |     
233 |     # Remove HETATM lines to eliminate co-ligands from the receptor
234 |     receptor_no_het = os.path.join(docking_folder, f"{pdb_id}_chainA_nohet.pdb")
235 |     remove_hetatm(chainA_file, receptor_no_het)
236 |     
237 |     receptor_pdb = os.path.join(docking_folder, f"{pdb_id}_prepared.pdb")
238 |     fix_with_pdbfixer(receptor_no_het, receptor_pdb)
239 | 
240 |     # p2rank pocket prediction
241 |     cx, cy, cz = run_p2rank_and_get_center(receptor_pdb, pdb_id)
242 |     box_size = 20.0
243 |     print(f"[Docking] Using docking box (20x20x20) centered at ({cx}, {cy}, {cz})")
244 | 
245 |     # Convert receptor to PDBQT
246 |     receptor_pdbqt = os.path.join(docking_folder, f"{pdb_id}_prepared.pdbqt")
247 |     convert_pdb_to_pdbqt_receptor(receptor_pdb, receptor_pdbqt)
248 | 
249 |     # Ligand preparation (from SMILES and/or SDF)
250 |     lig_out_dir = os.path.join(docking_folder, "ligands")
251 |     ligand_list = prepare_ligands(smiles_list=smiles_list, sdf_file=sdf_file, num_confs=num_confs, out_dir=lig_out_dir)
252 |     if not ligand_list:
253 |         print("[Docking] No valid ligands to dock. Exiting.")
254 |         return
255 | 
256 |     results_csv = os.path.join(docking_folder, "docking_results.csv")
257 |     with open(results_csv, "w") as rf:
258 |         rf.write("LigandLabel,Score\n")
259 | 
260 |     for i, (lig_pdb, label) in enumerate(ligand_list, start=1):
261 |         print(f"\n[Docking] Processing ligand conformer {label} ...")
262 |         ligand_pdbqt = os.path.join(docking_folder, f"{label}.pdbqt")
263 |         convert_pdb_to_pdbqt_ligand(lig_pdb, ligand_pdbqt)
264 | 
265 |         out_pdbqt = os.path.join(docking_folder, f"{label}_out.pdbqt")
266 |         log_file = os.path.join(docking_folder, f"{label}_vina.log")
267 | 
268 |         vina_cmd = [
269 |             "vina",
270 |             "--receptor", receptor_pdbqt,
271 |             "--ligand", ligand_pdbqt,
272 |             "--out", out_pdbqt,
273 |             "--center_x", str(cx),
274 |             "--center_y", str(cy),
275 |             "--center_z", str(cz),
276 |             "--size_x", str(box_size),
277 |             "--size_y", str(box_size),
278 |             "--size_z", str(box_size),
279 |             "--num_modes", "10"
280 |         ]
281 |         ret_code = run_command_with_live_output(vina_cmd, log_file)
282 |         best_score = "N/A"
283 |         if ret_code == 0:
284 |             with open(log_file, "r") as lg:
285 |                 for line in lg:
286 |                     if re.match(r"^\s*1\s+", line):
287 |                         parts = line.split()
288 |                         if len(parts) >= 2:
289 |                             best_score = parts[1]
290 |                         break
291 |         else:
292 |             best_score = "ERROR"
293 |         print(f"[Docking] Best score for {label}: {best_score}")
294 |         with open(results_csv, "a") as rf:
295 |             rf.write(f"{label},{best_score}\n")
296 | 
297 |         # Convert best pose from PDBQT to PDB
298 |         docked_pdb = os.path.join(docking_folder, f"{label}_docked.pdb")
299 |         subprocess.run([
300 |             "obabel", "-ipdbqt", out_pdbqt,
301 |             "-opdb", "-O", docked_pdb, "-d"
302 |         ], check=True)
303 | 
304 |         # Merge receptor and docked ligand to form final complex
305 |         final_complex = os.path.join(docking_folder, f"{label}_complex.pdb")
306 |         with open(final_complex, "w") as fc:
307 |             with open(receptor_pdb, "r") as recf:
308 |                 for line in recf:
309 |                     if line.startswith("END"):
310 |                         continue
311 |                     fc.write(line)
312 |             fc.write("TER\n")
313 |             with open(docked_pdb, "r") as ligf:
314 |                 for line in ligf:
315 |                     fc.write(line)
316 |             fc.write("END\n")
317 |         print(f"[Docking] Final complex saved as: {final_complex}")
318 | 
319 |     print(f"\n[DONE] Docking complete. Results saved in {results_csv}")
320 | 


--------------------------------------------------------------------------------