├── src ├── __init__.py └── dockcadd.py ├── requirements.txt ├── .gitignore ├── bin └── install_dependencies.sh ├── LICENSE ├── scripts └── setup.sh ├── README.md └── DockCADD └── docking.py /src/__init__.py: -------------------------------------------------------------------------------- 1 | # src/__init__.py 2 | 3 | from .dockcadd import perform_docking 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | biopandas 2 | pubchempy 3 | tqdm 4 | matplotlib 5 | scipy 6 | rdkit-pypi 7 | biopython 8 | pymol 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Virtual environment 7 | venv/ 8 | .env 9 | 10 | # Data files 11 | data/*.pdb 12 | data/*.pdbqt 13 | 14 | # Logs 15 | *.log 16 | 17 | # p2rank output 18 | p2rank_2.4.2/test_output/ 19 | 20 | # AutoDock Vina output 21 | docking_results/*.pdbqt 22 | docking_results/*.txt 23 | 24 | # Colab specific 25 | .ipynb_checkpoints/ 26 | -------------------------------------------------------------------------------- /bin/install_dependencies.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Install AutoDock Vina 3 | wget https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_1.2.5_linux_x86_64 4 | chmod +x vina_1.2.5_linux_x86_64 5 | mv vina_1.2.5_linux_x86_64 /usr/local/bin/vina 6 | 7 | # Install P2Rank 8 | wget https://github.com/rdk/p2rank/releases/download/2.4.2/p2rank_2.4.2.tar.gz 9 | tar -xzf p2rank_2.4.2.tar.gz 10 | 11 | echo "Dependencies installed successfully." 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /scripts/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ############################################ 4 | # scripts/setup.sh 5 | # Installs all system and Python dependencies needed for DockcaddV2. 6 | ############################################ 7 | 8 | # 1) System Updates and Packages 9 | sudo apt-get update -y 10 | sudo apt-get install -y pymol openbabel wget tar openjdk-11-jdk 11 | 12 | # 2) Python Libraries 13 | echo "Upgrading pip and installing Python libraries..." 14 | pip install --upgrade pip 15 | 16 | # Install dependencies from requirements.txt if available 17 | if [ -f "requirements.txt" ]; then 18 | pip install -r requirements.txt 19 | fi 20 | 21 | # Additional packages (if not listed in requirements.txt): 22 | pip install git+https://github.com/openmm/pdbfixer.git 23 | pip install openmm 24 | pip install rdkit-pypi 25 | 26 | echo "Python dependencies installed successfully." 27 | 28 | # 3) AutoDock Vina Installation 29 | VINA_URL="https://github.com/ccsb-scripps/AutoDock-Vina/releases/download/v1.2.5/vina_1.2.5_linux_x86_64" 30 | VINA_NAME="vina_1.2.5_linux_x86_64" 31 | 32 | if [ ! -f "/usr/local/bin/vina" ] && [ ! -f "/usr/local/bin/${VINA_NAME}" ]; then 33 | echo "Installing AutoDock Vina 1.2.5..." 34 | wget -q "$VINA_URL" 35 | chmod +x "$VINA_NAME" 36 | sudo mv "$VINA_NAME" /usr/local/bin/vina 37 | rm -f "$VINA_NAME" 38 | echo "AutoDock Vina installed successfully." 39 | else 40 | echo "AutoDock Vina is already installed." 41 | fi 42 | 43 | # 4) p2rank Installation 44 | P2RANK_URL="https://github.com/rdk/p2rank/releases/download/2.4.2/p2rank_2.4.2.tar.gz" 45 | P2RANK_DIR="p2rank_2.4.2" 46 | 47 | if [ ! -d "$P2RANK_DIR" ]; then 48 | echo "Installing p2rank 2.4.2..." 49 | wget -q "$P2RANK_URL" 50 | tar -xzf "p2rank_2.4.2.tar.gz" 51 | rm -f "p2rank_2.4.2.tar.gz" 52 | echo "p2rank installed successfully." 53 | else 54 | echo "p2rank is already installed." 55 | fi 56 | 57 | echo "Setup is complete." 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DockCADD v2 2 | 3 | **DockCADD v2** is a streamlined and automated computational framework designed to facilitate molecular docking and drug discovery. It requires minimal input from users and utilizes advanced tools to provide accurate docking results. 4 | 5 | This new version of **DockCADD** is a lightweight, integrated workflow for structure-based drug design. It automatically prepares a receptor (extracting only a specified chain, with a fallback if the chain isn’t found), repairs missing residues/atoms via [PDBFixer](https://github.com/openmm/pdbfixer), predicts the binding pocket with [p2rank](https://github.com/rdk/p2rank), and docks ligands using [AutoDock Vina](https://github.com/ccsb-scripps/AutoDock-Vina). Ligands can be provided as a list of SMILES strings or as an SDF file, and multiple conformers are generated for each molecule using [RDKit](https://www.rdkit.org/). 6 | 7 | ## Features 8 | 9 | - **Receptor Preparation**: 10 | - Downloads a protein structure from the PDB. 11 | - Extracts only chain A (or falls back to the full structure if chain A isn’t found). 12 | - Repairs missing residues, atoms, and adds hydrogens using PDBFixer. 13 | 14 | - **Pocket Prediction**: 15 | - Uses p2rank to predict the binding pocket and extract its center. 16 | 17 | - **Ligand Preparation**: 18 | - Accepts ligands as a list of SMILES strings and/or an SDF file. 19 | - Generates multiple 3D conformers per ligand with RDKit. 20 | - Writes each conformer to a separate PDB file. 21 | 22 | - **Docking**: 23 | - Converts the receptor and ligand files to PDBQT format using OpenBabel. 24 | - Docks each ligand conformer with AutoDock Vina. 25 | - Parses the best docking pose and merges it with the receptor to generate a final complex. 26 | 27 | - **Visualization**: 28 | - Includes an optional PyMOL visualization function to generate a static PNG snapshot of a final complex. 29 | 30 | ## Installation 31 | 32 | 1. **Clone the Repository:** 33 | 34 | ```bash 35 | git clone https://github.com/mehdikariim/DockCADD-v2.git 36 | cd DockCADD-v2 37 | 38 | 2. **Run the Setup Script:** 39 | 40 | This script installs all system packages (e.g., PyMOL, OpenBabel, Java), AutoDock Vina, p2rank, and the required Python libraries (including PDBFixer, OpenMM, and RDKit). 41 | 42 | ```bash 43 | bash scripts/setup.sh 44 | 45 | 2. **Run the Setup Script:** 46 | 47 | 3. **Usage:** 48 | You can use the provided Python package to perform docking. Below are two example usage scenarios: 49 | 50 | Example 1: **Docking Using a List of SMILES** 51 | ```bash 52 | from src.dockcadd import perform_docking 53 | 54 | # Define your list of ligand SMILES and target receptor PDB ID 55 | smiles_list = ["CCOc1ccc(CC(=O)NC)cc1", "CCCC(=O)NCC1=CC=CC=C1"] 56 | pdb_id = "5ZMA" 57 | 58 | # Run docking (generates 3 conformers per ligand by default) 59 | perform_docking(smiles_list=smiles_list, sdf_file=None, pdb_id=pdb_id, num_confs=3, docking_folder="docking_results") 60 | 61 | Example 2: **Docking Using an SDF File** 62 | ```bash 63 | from src.dockcadd import perform_docking 64 | 65 | # Provide the path to your SDF file containing ligands 66 | sdf_file = "path/to/your_ligands.sdf" 67 | pdb_id = "5ZMA" 68 | 69 | # Run docking using the SDF file (3 conformers per ligand) 70 | perform_docking(smiles_list=None, sdf_file=sdf_file, pdb_id=pdb_id, num_confs=3, docking_folder="docking_results") 71 | 72 | 73 | # License 74 | This project is licensed under the MIT License. 75 | 76 | # Citations 77 | If you use DockcaddV2 in your work, please cite our article: 78 | 79 | Karim, E.M.et al (2025). DockCADD: A streamlined In Silico pipeline for the identification of potent Ribosomal S6 Kinase 2 (RSK2) inhibitors. Scientific African, e02581. 80 | https://doi.org/10.1016/j.sciaf.2025.e02581 81 | 82 | # Acknowledgments 83 | AutoDock Vina: AutoDock Vina 1.2.5 84 | p2rank: p2rank 2.4.2 85 | PDBFixer & OpenMM: PDBFixer 86 | RDKit: RDKit 87 | 88 | 89 | -------------------------------------------------------------------------------- /DockCADD/docking.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import sys 4 | from rdkit import Chem 5 | from rdkit.Chem import AllChem 6 | from Bio.PDB import PDBList 7 | import pandas as pd 8 | import numpy as np 9 | 10 | def generate_minimized_pdb(smiles, pdb_filename): 11 | mol = Chem.MolFromSmiles(smiles) 12 | if mol is None: 13 | print(f"Invalid SMILES string: {smiles}") 14 | return False 15 | mol = Chem.AddHs(mol) 16 | try: 17 | AllChem.EmbedMolecule(mol, randomSeed=42) 18 | except: 19 | print(f"Failed to generate 3D coordinates for SMILES: {smiles}") 20 | return False 21 | try: 22 | AllChem.UFFOptimizeMolecule(mol, maxIters=200) 23 | except: 24 | print(f"Energy minimization failed for SMILES: {smiles}") 25 | return False 26 | try: 27 | Chem.SanitizeMol(mol) 28 | except: 29 | print(f"Sanitization failed for SMILES: {smiles}") 30 | return False 31 | Chem.MolToPDBFile(mol, pdb_filename) 32 | print(f"Minimized molecule saved as {pdb_filename}") 33 | return True 34 | 35 | def download_pdb(pdb_id, download_dir): 36 | if not os.path.exists(download_dir): 37 | os.makedirs(download_dir) 38 | pdbl = PDBList() 39 | pdb_file_path = pdbl.retrieve_pdb_file(pdb_id, file_format='pdb', pdir=download_dir) 40 | return pdb_file_path 41 | 42 | def remove_hetatm(input_pdb, output_pdb): 43 | with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile: 44 | for line in infile: 45 | if not line.startswith('HETATM'): 46 | outfile.write(line) 47 | 48 | def convert_pdb_to_pdbqt_receptor(input_pdb, output_pdbqt): 49 | subprocess.run(['obabel', '-i', 'pdb', input_pdb, '-o', 'pdbqt', '-O', output_pdbqt, '-xr', '-xn', '-xp'], check=True) 50 | 51 | def convert_pdb_to_pdbqt_ligand(input_pdb, output_pdbqt): 52 | subprocess.run(['obabel', '-i', 'pdb', input_pdb, '-o', 'pdbqt', '-O', output_pdbqt, '-h'], check=True) 53 | 54 | def run_command_with_output(command, log_file): 55 | process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) 56 | with open(log_file, 'w') as log: 57 | for line in process.stdout: 58 | sys.stdout.write(line) 59 | log.write(line) 60 | sys.stdout.flush() 61 | return process.wait() 62 | 63 | def perform_docking(smiles_list, PDB_ID): 64 | folder_name = 'docking_results' 65 | receptor_name = PDB_ID 66 | 67 | # Create results folder 68 | if not os.path.exists(folder_name): 69 | os.mkdir(folder_name) 70 | 71 | print(f"Receptor Name: {receptor_name}") 72 | print(f"Number of ligands: {len(smiles_list)}") 73 | 74 | # Generate and pre-process ligands 75 | valid_smiles = [] 76 | for i, smiles in enumerate(smiles_list): 77 | pdb_filename = f'{folder_name}/ligand_{i+1}.pdb' 78 | if generate_minimized_pdb(smiles, pdb_filename): 79 | valid_smiles.append(smiles) 80 | 81 | print(f"Number of valid SMILES processed: {len(valid_smiles)}") 82 | 83 | # Download and pre-process receptor 84 | downloaded_pdb_path = download_pdb(PDB_ID, folder_name) 85 | os.rename(downloaded_pdb_path, f'{folder_name}/{receptor_name}_dirty.pdb') 86 | 87 | # Remove HETATM from PDB file 88 | remove_hetatm(f'{folder_name}/{receptor_name}_dirty.pdb', f'{folder_name}/{receptor_name}.pdb') 89 | 90 | # Define docking box using p2rank 91 | p2rank_jar_path = os.path.join(os.getcwd(), 'p2rank_2.4.2', 'bin', 'p2rank.jar') 92 | subprocess.run(['java', '-jar', p2rank_jar_path, 'predict', '-f', f'{folder_name}/{receptor_name}.pdb'], check=True) 93 | 94 | # Extract docking box center 95 | df = pd.read_csv(f'p2rank_2.4.2/test_output/predict_{receptor_name}/{receptor_name}.pdb_predictions.csv') 96 | center_x, center_y, center_z = float(df[' center_x'].iloc[0]), float(df[' center_y'].iloc[0]), float(df[' center_z'].iloc[0]) 97 | 98 | # Convert receptor to PDBQT format 99 | receptor_pdb = f"{folder_name}/{receptor_name}.pdb" 100 | receptor_pdbqt = f"{folder_name}/{receptor_name}.pdbqt" 101 | convert_pdb_to_pdbqt_receptor(receptor_pdb, receptor_pdbqt) 102 | 103 | # Open results file and process ligands 104 | results_file = f"{folder_name}/docking_results.txt" 105 | with open(results_file, 'w') as f: 106 | f.write("SMILES,Docking Score\n") # Write header 107 | 108 | for i, smiles in enumerate(smiles_list): 109 | print(f"\nProcessing ligand {i+1} of {len(smiles_list)}") 110 | print(f"SMILES: {smiles}") 111 | 112 | ligand_pdb = f"{folder_name}/ligand_{i+1}.pdb" 113 | ligand_pdbqt = f"{folder_name}/ligand_{i+1}.pdbqt" 114 | 115 | print("Converting ligand to PDBQT format...") 116 | convert_pdb_to_pdbqt_ligand(ligand_pdb, ligand_pdbqt) 117 | print("Ligand conversion complete.") 118 | 119 | output = f"{folder_name}/ligand_{i+1}_out.pdbqt" 120 | log_file = f"{folder_name}/vina_log_{i+1}.txt" 121 | vina_command = [ 122 | 'vina', 123 | '--receptor', receptor_pdbqt, 124 | '--ligand', ligand_pdbqt, 125 | '--out', output, 126 | '--center_x', str(center_x), 127 | '--center_y', str(center_y), 128 | '--center_z', str(center_z), 129 | '--size_x', '20', 130 | '--size_y', '20', 131 | '--size_z', '20' 132 | ] 133 | 134 | print("Starting Vina docking...") 135 | exit_code = run_command_with_output(vina_command, log_file) 136 | 137 | if exit_code == 0: 138 | print("Vina docking completed successfully.") 139 | with open(log_file, 'r') as log: 140 | score = "N/A" 141 | for line in log: 142 | if line.startswith(' 1'): 143 | score = line.split()[1] 144 | break 145 | print(f"Best docking score: {score}") 146 | else: 147 | print(f"Error running Vina for ligand {i+1}. Check the log file for details.") 148 | score = "Error" 149 | 150 | # Write result to file 151 | f.write(f"{smiles},{score}\n") 152 | -------------------------------------------------------------------------------- /src/dockcadd.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import re 4 | import subprocess 5 | import pandas as pd 6 | 7 | from Bio.PDB import PDBList 8 | from pdbfixer import PDBFixer 9 | from openmm.app import PDBFile 10 | 11 | from rdkit import Chem 12 | from rdkit.Chem import AllChem 13 | 14 | ######################################## 15 | # Helper Functions 16 | ######################################## 17 | 18 | def run_command_with_live_output(command, log_file): 19 | """ 20 | Runs a command in a subprocess, writing output to both console and a log file. 21 | """ 22 | process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True) 23 | with open(log_file, 'w') as lf: 24 | for line in process.stdout: 25 | print(line, end="") # Print to console 26 | lf.write(line) 27 | return process.wait() 28 | 29 | 30 | def keep_only_chain_A_with_fallback(input_pdb, output_pdb): 31 | """ 32 | Extracts lines for chain A from input_pdb. 33 | Writes only ATOM/HETATM lines (and TER if chain A) to output_pdb. 34 | If no chain A lines are found, copies the full file. 35 | """ 36 | chain_a_count = 0 37 | with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile: 38 | for line in infile: 39 | chain_id = line[21] if len(line) >= 22 else None 40 | if line.startswith(("ATOM", "HETATM")): 41 | if chain_id == 'A': 42 | outfile.write(line) 43 | chain_a_count += 1 44 | elif line.startswith("TER"): 45 | if chain_id == 'A': 46 | outfile.write(line) 47 | elif line.startswith("END"): 48 | outfile.write(line) 49 | if chain_a_count == 0: 50 | print("[WARN] No chain A lines found; using full PDB instead.") 51 | with open(input_pdb, 'r') as inf, open(output_pdb, 'w') as outf: 52 | outf.write(inf.read()) 53 | 54 | 55 | def remove_hetatm(input_pdb, output_pdb): 56 | """ 57 | Removes all HETATM lines from input_pdb. 58 | This effectively removes any co-ligand present in the receptor. 59 | """ 60 | with open(input_pdb, 'r') as infile, open(output_pdb, 'w') as outfile: 61 | for line in infile: 62 | if not line.startswith("HETATM"): 63 | outfile.write(line) 64 | 65 | 66 | def fix_with_pdbfixer(pdb_in, pdb_out): 67 | """ 68 | Uses PDBFixer to add missing residues, atoms, and hydrogens (pH 7). 69 | """ 70 | fixer = PDBFixer(filename=pdb_in) 71 | fixer.findMissingResidues() 72 | fixer.findMissingAtoms() 73 | fixer.addMissingAtoms() 74 | fixer.addMissingHydrogens(7.0) 75 | with open(pdb_out, 'w') as f: 76 | PDBFile.writeFile(fixer.topology, fixer.positions, f) 77 | 78 | 79 | def convert_pdb_to_pdbqt_receptor(input_pdb, output_pdbqt): 80 | """ 81 | Converts receptor PDB to PDBQT using OpenBabel. 82 | """ 83 | cmd = [ 84 | "obabel", "-i", "pdb", input_pdb, 85 | "-o", "pdbqt", "-O", output_pdbqt, 86 | "-xr", "-xn", "-xp" 87 | ] 88 | subprocess.run(cmd, check=True) 89 | 90 | 91 | def convert_pdb_to_pdbqt_ligand(input_pdb, output_pdbqt): 92 | """ 93 | Converts ligand PDB to PDBQT using OpenBabel. 94 | """ 95 | cmd = [ 96 | "obabel", "-i", "pdb", input_pdb, 97 | "-o", "pdbqt", "-O", output_pdbqt, 98 | "-h" 99 | ] 100 | subprocess.run(cmd, check=True) 101 | 102 | 103 | ######################################## 104 | # p2rank Pocket Prediction 105 | ######################################## 106 | 107 | def run_p2rank_and_get_center(receptor_pdb, pdb_id): 108 | """ 109 | Runs p2rank on the receptor PDB and returns the top pocket center (x,y,z). 110 | """ 111 | p2rank_exec = os.path.join(os.getcwd(), "p2rank_2.4.2", "prank") 112 | if not os.path.isfile(p2rank_exec): 113 | raise FileNotFoundError(f"p2rank not found at {p2rank_exec}") 114 | if not os.access(p2rank_exec, os.X_OK): 115 | os.chmod(p2rank_exec, 0o755) 116 | cmd = [p2rank_exec, "predict", "-f", receptor_pdb] 117 | log_file = f"p2rank_{pdb_id}.log" 118 | ret = run_command_with_live_output(cmd, log_file) 119 | if ret != 0: 120 | raise RuntimeError("p2rank prediction failed.") 121 | base_name = os.path.splitext(os.path.basename(receptor_pdb))[0] 122 | predictions_csv = f"p2rank_2.4.2/test_output/predict_{base_name}/{base_name}.pdb_predictions.csv" 123 | df = pd.read_csv(predictions_csv, skipinitialspace=True) 124 | df.columns = [c.strip().lower() for c in df.columns] 125 | cx = float(df["center_x"].iloc[0]) 126 | cy = float(df["center_y"].iloc[0]) 127 | cz = float(df["center_z"].iloc[0]) 128 | print(f"[p2rank] Pocket center: ({cx}, {cy}, {cz})") 129 | return (cx, cy, cz) 130 | 131 | 132 | ######################################## 133 | # Ligand Preparation (SMILES and/or SDF) 134 | ######################################## 135 | 136 | def generate_multiple_conformers(mol, num_confs=3): 137 | """ 138 | Generates multiple 3D conformers for a molecule using RDKit. 139 | """ 140 | mol = Chem.AddHs(mol) 141 | cids = AllChem.EmbedMultipleConfs(mol, numConfs=num_confs, randomSeed=42) 142 | for cid in cids: 143 | if AllChem.MMFFHasAllMoleculeParams(mol): 144 | AllChem.MMFFOptimizeMolecule(mol, confId=cid, maxIters=200) 145 | else: 146 | AllChem.UFFOptimizeMolecule(mol, confId=cid, maxIters=200) 147 | return mol 148 | 149 | 150 | def prepare_ligands(smiles_list=None, sdf_file=None, num_confs=3, out_dir="ligand_prep"): 151 | """ 152 | Prepares ligand files from a list of SMILES and/or an SDF file. 153 | For each valid molecule, generates multiple conformers and writes each as a separate PDB. 154 | Returns a list of (pdb_filepath, label). 155 | """ 156 | if not os.path.exists(out_dir): 157 | os.makedirs(out_dir, exist_ok=True) 158 | results = [] 159 | 160 | def write_confs_to_pdb(mol, base_name): 161 | conf_ids = [conf.GetId() for conf in mol.GetConformers()] 162 | out_paths = [] 163 | for i, cid in enumerate(conf_ids, start=1): 164 | tmp_mol = Chem.Mol(mol, False, cid) 165 | pdb_name = f"{base_name}_conf{i}.pdb" 166 | pdb_path = os.path.join(out_dir, pdb_name) 167 | Chem.MolToPDBFile(tmp_mol, pdb_path) 168 | out_paths.append((pdb_path, pdb_name.replace(".pdb", ""))) 169 | return out_paths 170 | 171 | # Process SMILES if provided 172 | if smiles_list: 173 | for idx, smi in enumerate(smiles_list, start=1): 174 | mol = Chem.MolFromSmiles(smi) 175 | if not mol: 176 | print(f"[LigandPrep] Warning: invalid SMILES skipped: {smi}") 177 | continue 178 | mol3d = generate_multiple_conformers(mol, num_confs=num_confs) 179 | base_name = f"lig_{idx}" 180 | results.extend(write_confs_to_pdb(mol3d, base_name)) 181 | 182 | # Process SDF if provided 183 | if sdf_file and os.path.isfile(sdf_file): 184 | suppl = Chem.SDMolSupplier(sdf_file, removeHs=False) 185 | mol_count = 0 186 | for i, mol in enumerate(suppl): 187 | if mol is None: 188 | print(f"[LigandPrep] Warning: skipping invalid SDF record {i}.") 189 | continue 190 | mol_count += 1 191 | name = mol.GetProp("_Name") if mol.HasProp("_Name") else f"sdf_{mol_count}" 192 | mol3d = generate_multiple_conformers(mol, num_confs=num_confs) 193 | base_name = f"{name}_{mol_count}" 194 | results.extend(write_confs_to_pdb(mol3d, base_name)) 195 | 196 | if not results: 197 | print("[LigandPrep] No valid ligands found.") 198 | else: 199 | print(f"[LigandPrep] Prepared {len(results)} ligand conformers.") 200 | return results 201 | 202 | 203 | ######################################## 204 | # Main Docking Workflow 205 | ######################################## 206 | 207 | def perform_docking(smiles_list=None, sdf_file=None, pdb_id="5ZMA", num_confs=3, docking_folder="docking_results"): 208 | """ 209 | Main docking workflow: 210 | 1) Prepare receptor: download PDB, extract chain A (fallback if needed), remove HETATM (co-ligand), and fix with PDBFixer. 211 | 2) Run p2rank to get pocket center (for a 20x20x20 box). 212 | 3) Convert receptor to PDBQT. 213 | 4) Prepare ligands from SMILES and/or SDF (generate multiple conformers). 214 | 5) For each ligand conformer, convert to PDBQT, dock with AutoDock Vina, 215 | convert best pose to PDB, and merge with receptor to form the final complex. 216 | 6) Write docking scores to a CSV file. 217 | """ 218 | if not (smiles_list or (sdf_file and os.path.isfile(sdf_file))): 219 | print("[ERROR] No valid ligand input provided (neither SMILES nor SDF).") 220 | return 221 | 222 | if not os.path.exists(docking_folder): 223 | os.makedirs(docking_folder, exist_ok=True) 224 | 225 | print(f"[Docking] Preparing receptor {pdb_id} ...") 226 | pdbl = PDBList() 227 | raw_file = pdbl.retrieve_pdb_file(pdb_id, file_format="pdb", pdir=docking_folder) 228 | raw_pdb = os.path.join(docking_folder, f"{pdb_id}_raw.pdb") 229 | os.rename(raw_file, raw_pdb) 230 | chainA_file = os.path.join(docking_folder, f"{pdb_id}_chainA_tmp.pdb") 231 | keep_only_chain_A_with_fallback(raw_pdb, chainA_file) 232 | 233 | # Remove HETATM lines to eliminate co-ligands from the receptor 234 | receptor_no_het = os.path.join(docking_folder, f"{pdb_id}_chainA_nohet.pdb") 235 | remove_hetatm(chainA_file, receptor_no_het) 236 | 237 | receptor_pdb = os.path.join(docking_folder, f"{pdb_id}_prepared.pdb") 238 | fix_with_pdbfixer(receptor_no_het, receptor_pdb) 239 | 240 | # p2rank pocket prediction 241 | cx, cy, cz = run_p2rank_and_get_center(receptor_pdb, pdb_id) 242 | box_size = 20.0 243 | print(f"[Docking] Using docking box (20x20x20) centered at ({cx}, {cy}, {cz})") 244 | 245 | # Convert receptor to PDBQT 246 | receptor_pdbqt = os.path.join(docking_folder, f"{pdb_id}_prepared.pdbqt") 247 | convert_pdb_to_pdbqt_receptor(receptor_pdb, receptor_pdbqt) 248 | 249 | # Ligand preparation (from SMILES and/or SDF) 250 | lig_out_dir = os.path.join(docking_folder, "ligands") 251 | ligand_list = prepare_ligands(smiles_list=smiles_list, sdf_file=sdf_file, num_confs=num_confs, out_dir=lig_out_dir) 252 | if not ligand_list: 253 | print("[Docking] No valid ligands to dock. Exiting.") 254 | return 255 | 256 | results_csv = os.path.join(docking_folder, "docking_results.csv") 257 | with open(results_csv, "w") as rf: 258 | rf.write("LigandLabel,Score\n") 259 | 260 | for i, (lig_pdb, label) in enumerate(ligand_list, start=1): 261 | print(f"\n[Docking] Processing ligand conformer {label} ...") 262 | ligand_pdbqt = os.path.join(docking_folder, f"{label}.pdbqt") 263 | convert_pdb_to_pdbqt_ligand(lig_pdb, ligand_pdbqt) 264 | 265 | out_pdbqt = os.path.join(docking_folder, f"{label}_out.pdbqt") 266 | log_file = os.path.join(docking_folder, f"{label}_vina.log") 267 | 268 | vina_cmd = [ 269 | "vina", 270 | "--receptor", receptor_pdbqt, 271 | "--ligand", ligand_pdbqt, 272 | "--out", out_pdbqt, 273 | "--center_x", str(cx), 274 | "--center_y", str(cy), 275 | "--center_z", str(cz), 276 | "--size_x", str(box_size), 277 | "--size_y", str(box_size), 278 | "--size_z", str(box_size), 279 | "--num_modes", "10" 280 | ] 281 | ret_code = run_command_with_live_output(vina_cmd, log_file) 282 | best_score = "N/A" 283 | if ret_code == 0: 284 | with open(log_file, "r") as lg: 285 | for line in lg: 286 | if re.match(r"^\s*1\s+", line): 287 | parts = line.split() 288 | if len(parts) >= 2: 289 | best_score = parts[1] 290 | break 291 | else: 292 | best_score = "ERROR" 293 | print(f"[Docking] Best score for {label}: {best_score}") 294 | with open(results_csv, "a") as rf: 295 | rf.write(f"{label},{best_score}\n") 296 | 297 | # Convert best pose from PDBQT to PDB 298 | docked_pdb = os.path.join(docking_folder, f"{label}_docked.pdb") 299 | subprocess.run([ 300 | "obabel", "-ipdbqt", out_pdbqt, 301 | "-opdb", "-O", docked_pdb, "-d" 302 | ], check=True) 303 | 304 | # Merge receptor and docked ligand to form final complex 305 | final_complex = os.path.join(docking_folder, f"{label}_complex.pdb") 306 | with open(final_complex, "w") as fc: 307 | with open(receptor_pdb, "r") as recf: 308 | for line in recf: 309 | if line.startswith("END"): 310 | continue 311 | fc.write(line) 312 | fc.write("TER\n") 313 | with open(docked_pdb, "r") as ligf: 314 | for line in ligf: 315 | fc.write(line) 316 | fc.write("END\n") 317 | print(f"[Docking] Final complex saved as: {final_complex}") 318 | 319 | print(f"\n[DONE] Docking complete. Results saved in {results_csv}") 320 | --------------------------------------------------------------------------------