├── dockbox ├── __init__.py ├── gnina.py ├── dsx.py ├── license.py ├── vina.py ├── dbxtools.py ├── configure.py ├── method.py ├── autodock.py ├── rundbx.py ├── dock.py ├── pyqcprot.pyx └── moe.py ├── logo.png ├── examples ├── autodock │ ├── docking │ │ ├── run.sh │ │ ├── config.ini │ │ └── 1a30_ligand.mol2 │ └── rescoring │ │ ├── run.sh │ │ ├── config.ini │ │ └── 1a30_ligand.mol2 ├── autodock_vina_dock │ └── rescoring │ │ ├── run.sh │ │ ├── config.ini │ │ └── 4de2_ligand.mol2 └── tlr7_chapter │ ├── vs │ ├── round1 │ │ ├── config.ini │ │ ├── README │ │ └── ligand.mol2 │ └── round2 │ │ ├── README │ │ ├── config.ini │ │ └── ligand.mol2 │ └── active_decoys │ ├── config.ini │ ├── README │ ├── analysis │ ├── README │ └── compute_hit_rates.py │ └── ligand.mol2 ├── bin ├── rundbx └── extract_top_poses ├── .gitignore ├── setup.py ├── LICENSE.txt └── README.rst /dockbox/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jp43/DockBox/HEAD/logo.png -------------------------------------------------------------------------------- /examples/autodock/docking/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rundbx -f config.ini -l 1a30_ligand.mol2 -r 1a30_protein.pdb 3 | -------------------------------------------------------------------------------- /examples/autodock/rescoring/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rundbx -f config.ini -l 1a30_ligand.mol2 -r 1a30_protein.pdb 3 | -------------------------------------------------------------------------------- /examples/autodock_vina_dock/rescoring/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rundbx -f config.ini -l 4de2_ligand.mol2 -r 4de2_protein.pdb 3 | -------------------------------------------------------------------------------- /bin/rundbx: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from dockbox import rundbx as dbx 3 | 4 | if __name__ == '__main__': 5 | dbx.Docking().run() 6 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/vs/round1/config.ini: -------------------------------------------------------------------------------- 1 | [DOCKING] 2 | program = dock 3 | rescoring = no 4 | minimize = no 5 | cleanup = 1 6 | 7 | [DOCK] 8 | nposes = 1 9 | charge_method = gas 10 | -------------------------------------------------------------------------------- /examples/autodock/docking/config.ini: -------------------------------------------------------------------------------- 1 | [DOCKING] 2 | program = autodock 3 | rescoring = no 4 | minimize = no 5 | cleanup = no 6 | 7 | [AUTODOCK] 8 | ga_run = 2 9 | spacing = 0.3 10 | 11 | [SITE] 12 | center = 37.092, 37.149, 29.264 13 | boxsize = 30.0, 30.0, 30.0 14 | -------------------------------------------------------------------------------- /examples/autodock/rescoring/config.ini: -------------------------------------------------------------------------------- 1 | [DOCKING] 2 | program = autodock 3 | rescoring = yes 4 | minimize = yes 5 | cleanup = yes 6 | 7 | [RESCORING] 8 | program = autodock 9 | 10 | [AUTODOCK] 11 | ga_run = 2 12 | spacing = 0.3 13 | 14 | [SITE] 15 | center = 37.092, 37.149, 29.264 16 | boxsize = 30.0, 30.0, 30.0 17 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/vs/round1/README: -------------------------------------------------------------------------------- 1 | The current directory includes the input files to run rundbx on target #12 and a compound tested in VS (round 1). The config.ini file is the same as the one originally used in our study. 2 | 3 | The rundbx command can be simply executed using: 4 | 5 | rundbx -f config.ini -l ligand.mol2 -r target12.pdb 6 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/vs/round2/README: -------------------------------------------------------------------------------- 1 | The current directory includes the input files to run rundbx on target #12 and a compound tested in VS (round 2). The config.ini file is the same as the one originally used in our study. 2 | 3 | The rundbx command can be simply executed using: 4 | 5 | rundbx -f config.ini -l ligand.mol2 -r target12.pdb 6 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/vs/round2/config.ini: -------------------------------------------------------------------------------- 1 | [DOCKING] 2 | program = autodock, vina, dock 3 | rescoring = yes 4 | minimize = yes 5 | cleanup = 2 6 | 7 | [RESCORING] 8 | program = autodock, vina, dock, dsx 9 | 10 | [AUTODOCK] 11 | ga_run = 10 12 | 13 | [VINA] 14 | num_modes = 10 15 | 16 | [MINIMIZATION] 17 | solvent = vacuo 18 | ncyc = 1000 19 | maxcyc = 2000 20 | cut = 999.0 21 | charge_method = gas 22 | 23 | [DOCK] 24 | nposes = 10 25 | charge_method = gas 26 | 27 | [SITE] 28 | center = 23.395, -12.165, 26.111 29 | boxsize = 35.0, 35.0, 35.0 30 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/active_decoys/config.ini: -------------------------------------------------------------------------------- 1 | [DOCKING] 2 | program = autodock, vina, dock 3 | rescoring = yes 4 | minimize = yes 5 | cleanup = 2 6 | 7 | [RESCORING] 8 | program = autodock, vina, dock, dsx 9 | 10 | [AUTODOCK] 11 | ga_run = 10 12 | 13 | [VINA] 14 | num_modes = 10 15 | 16 | [MINIMIZATION] 17 | solvent = vacuo 18 | ncyc = 1000 19 | maxcyc = 2000 20 | cut = 999.0 21 | charge_method = gas 22 | 23 | [DOCK] 24 | nposes = 10 25 | charge_method = gas 26 | 27 | [SITE] 28 | center = 23.395, -12.165, 26.111 29 | boxsize = 35.0, 35.0, 35.0 30 | -------------------------------------------------------------------------------- /examples/autodock_vina_dock/rescoring/config.ini: -------------------------------------------------------------------------------- 1 | [DOCKING] 2 | program = autodock, vina, dock 3 | rescoring = yes 4 | minimize = yes 5 | cleanup = yes 6 | 7 | [RESCORING] 8 | program = autodock, vina, dock 9 | 10 | [MINIMIZATION] 11 | solvent = vacuo 12 | ncyc = 5000 13 | maxcyc = 10000 14 | cut = 999.0 15 | charge_method = gas 16 | 17 | [AUTODOCK] 18 | ga_run = 10 19 | spacing = 0.3 20 | 21 | [VINA] 22 | num_modes = 10 23 | 24 | [DOCK] 25 | nposes = 10 26 | charge_method = gas 27 | rmsd = 1.0 28 | 29 | [SITE] 30 | center = 32.408, 38.328, 27.571 31 | boxsize = 30.0, 30.0, 30.0 32 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/active_decoys/README: -------------------------------------------------------------------------------- 1 | The current directory includes the input files to run rundbx on target #12 and one TLR7 active compound. The config.ini file is the same as the one originally used in our study. 2 | 3 | The rundbx command can be simply executed using: 4 | 5 | rundbx -f config.ini -l ligand.mol2 -r target12.pdb 6 | 7 | * The analysis folder contains results for all the active molecules (103) and decoys (5850). It shows the performance of different docking, rescoring and 8 | consensus strategies in terms of hit-rates and Enrichment Factors (EF). 9 | 10 | * The mol2files folder contains MOL2 file including all the active molecules and decoys (note that rundbx does not work with MOL2 or PDB files containing 11 | multiple structures, thus, in order to be used with rundbx, MOL2 files inside that folder should be first split in multiple files containing one molecule 12 | each) 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # nfs 7 | *nfs* 8 | 9 | # C extensions 10 | *.c 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | bin/prepare_* 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *,cover 52 | .hypothesis/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | 61 | # Sphinx documentation 62 | docs/_build/ 63 | 64 | # PyBuilder 65 | target/ 66 | 67 | #Ipython Notebook 68 | .ipynb_checkpoints 69 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import numpy as np 4 | from setuptools import setup, Extension 5 | 6 | import Cython 7 | from Cython.Distutils import build_ext 8 | from Cython.Build import cythonize 9 | 10 | try: 11 | numpy_include = np.get_include() 12 | except AttributeError: 13 | numpy_include = np.get_numpy_include() 14 | 15 | # check Python version 16 | if not (sys.version_info[0] == 2 and sys.version_info[1] >= 6): 17 | sys.exit("You need Python 2.6.x or Python 2.7.x to install the DockBox package!") 18 | 19 | ext_modules = [Extension( 20 | name='dockbox.pyqcprot', 21 | sources=["dockbox/pyqcprot.pyx"], 22 | include_dirs=[numpy_include])] 23 | 24 | setup(name='dockbox', 25 | version='1.4', 26 | packages=['dockbox'], 27 | scripts=['bin/rundbx', 'bin/extract_top_poses'], 28 | install_requires=['mdkit', 'pandas<=0.24.2', 'nwalign', 'oldnumeric'], 29 | ext_modules = cythonize(ext_modules), 30 | license='LICENSE.txt', 31 | description='Platform package to simplify the use of docking programs and consensus methods', 32 | long_description=open('README.rst').read(), 33 | ) 34 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/active_decoys/analysis/README: -------------------------------------------------------------------------------- 1 | The current directory contains folders generated with extract_dbx_best_poses related to the best strategies evidenced in our study. For the sake of space availability, we did not provide folders for all the strategies. 2 | 3 | The current directory also contains the script compute_hit_rates.py which provides the hit-rate (top 100) and Enrichment Factor (EF) for every strategy. 4 | 5 | The compute_hit_rates.py script requires each folder to include a file called best_poses.csv (normally generated with extract_dbx_best_poses). 6 | 7 | The first molecules listed in each best_poses.csv should correspond to active compounds followed by decoys. When running compute_hit_rates.py, simply specify the 8 | number of active molecules in the set. 9 | 10 | Here each */best_poses.csv file includes 103 actives followed by 5850 decoys. Therefore, compute_hit_rates.py can be directly executed with the command: 11 | 12 | python compute_hit_rates.py -n 103 13 | 14 | Running the above command will generate a directory called top_hits containing the top 100 deduced from each strategy. It will also contain a file called ranking.csv where different strategies are ranked according to their hit rates (where EFs are also provided). 15 | -------------------------------------------------------------------------------- /dockbox/gnina.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import method 4 | import subprocess 5 | 6 | from mdkit.utility import mol2 7 | 8 | required_programs = ['gnina'] 9 | default_settings = {'type': 'CNNscore', 'cnn': None} 10 | 11 | class Gnina(method.ScoringMethod): 12 | 13 | def write_rescoring_script(self, filename, file_r, file_l): 14 | 15 | if self.options['cnn'] is None or self.options['cnn'].lower() in ["none", "no"]: 16 | cnn_flag = "" 17 | else: 18 | cnn_flag = " --cnn %s"%self.options['cnn'] 19 | 20 | # write vina script 21 | with open(filename, 'w') as file: 22 | script ="""#!/bin/bash 23 | 24 | rm -rf gnina.out 25 | 26 | # execute GNINA 27 | gnina -r %(file_r)s -l %(file_l)s%(cnn_flag)s --score_only > gnina.out\n"""% locals() 28 | file.write(script) 29 | 30 | def extract_rescoring_results(self, filename): 31 | 32 | with open(filename, 'a') as sf: 33 | is_score = False 34 | if os.path.isfile('gnina.out'): 35 | with open('gnina.out', 'r') as outf: 36 | for line in outf: 37 | if line.startswith(self.options['type']): 38 | sf.write(line.split()[1]+'\n') 39 | is_score = True 40 | break 41 | if not is_score: 42 | sf.write('NaN\n') 43 | else: 44 | sf.write('NaN\n') 45 | -------------------------------------------------------------------------------- /dockbox/dsx.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import method 4 | from mdkit.utility import mol2 5 | 6 | required_programs = ['dsx'] 7 | 8 | default_settings = {'pot_dir': None, 'other_flags': None} 9 | 10 | class Dsx(method.ScoringMethod): 11 | 12 | def write_rescoring_script(self, filename, file_r, file_l): 13 | 14 | locals().update(self.options) 15 | 16 | if self.options['pot_dir']: 17 | pot_dir_str = ' -D ' + self.options['pot_dir'] 18 | else: 19 | pot_dir_str = '' 20 | 21 | if self.options['other_flags']: 22 | other_flags_str = ' ' + self.options['other_flags'] 23 | else: 24 | other_flags_str = '' 25 | 26 | # write vina script 27 | with open(filename, 'w') as file: 28 | script ="""#!/bin/bash 29 | set -e 30 | # remove pre-existing result file 31 | rm -rf dsx.out 32 | 33 | cp %(file_r)s protein.pdb 34 | cp %(file_l)s ligand.mol2 35 | 36 | # execute DSX 37 | dsx -P protein.pdb -L ligand.mol2 -F dsx.out%(pot_dir_str)s%(other_flags_str)s 38 | """% locals() 39 | file.write(script) 40 | 41 | def extract_rescoring_results(self, filename): 42 | 43 | with open(filename, 'a') as sf: 44 | is_score = False 45 | if os.path.isfile('dsx.out'): 46 | with open('dsx.out', 'r') as outf: 47 | for line in outf: 48 | if line.startswith(" 0"): 49 | sf.write(line.split('|')[3].strip()+'\n') 50 | is_score = True 51 | break 52 | if not is_score: 53 | sf.write('NaN\n') 54 | else: 55 | sf.write('NaN\n') 56 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/active_decoys/analysis/compute_hit_rates.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pandas as pd 4 | from glob import glob 5 | import shutil 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Computes hit rates and enrichment factors from output of extract_dbx_best_poses (each folder generated with extract_dbx_best_poses should contain a file called best_poses.csv)") 9 | 10 | parser.add_argument('-n', 11 | dest='nactives', 12 | type=int, 13 | required=True, 14 | help='Number of active compounds in the set (in best_poses.csv files, all the active compounds should be provided first, followed by decoys")') 15 | 16 | nhits = 100 # number of hits that should be considered for hit rates 17 | 18 | # update parsers with arguments 19 | args = parser.parse_args() 20 | 21 | dirs = [] 22 | for dir in glob("*"): 23 | if os.path.isfile(dir+"/best_poses.csv"): 24 | dirs.append(dir) 25 | 26 | topdir = "top_hits" 27 | shutil.rmtree(topdir, ignore_errors=True) 28 | os.mkdir(topdir) 29 | 30 | info = {"method": [], "EF": [], "hit-rate": []} 31 | 32 | for dir in dirs: 33 | df = pd.read_csv(dir+"/best_poses.csv") 34 | df['status'] = "decoy" 35 | df.iloc[:args.nactives, df.columns.get_loc('status')] = "active" 36 | 37 | if 'consensus' in df.columns: 38 | df_groupby = df.groupby(['status'])[['consensus']].sum() 39 | tp = int(df_groupby.ix[0]['consensus']) # True Positives 40 | fn = args.nactives - tp # False Negatives 41 | fp = int(df_groupby.ix[1]['consensus']) # False Negatives 42 | tn = len(df)-args.nactives - fp # True Negatives 43 | 44 | nctot = tp + fn + fp + tn 45 | nc = tp + fp 46 | ratio = tp*1./fp 47 | ef = tp*1./(tp+fn)*nctot*1./nc 48 | 49 | df = df[df['consensus']] 50 | else: 51 | ratio = 100. 52 | ef = 1. 53 | 54 | if dir.startswith("docking"): 55 | column = ["score"] 56 | 57 | elif dir.startswith("rescoring"): 58 | column = [dir.split("_")[-1]] 59 | 60 | elif dir.startswith("cd"): 61 | column = [] 62 | for prgm in dir.split("_")[1:]: 63 | column.append("score_"+prgm) 64 | 65 | elif dir.startswith("sbcd"): 66 | column =[] 67 | for prgm in dir.split("_")[1:]: 68 | column.append(prgm) 69 | 70 | for cc in column: 71 | df_top_hits = df.sort_values(by=cc).head(nhits) 72 | if dir.startswith(("cd", "sbcd")): 73 | ccs = cc.split('_') 74 | method = dir + "_scored_with_" + ccs[-1] 75 | else: 76 | method = dir 77 | csvfile = topdir + "/" + method + ".csv" 78 | df_top_hits[['ligID', cc]].to_csv(csvfile, index=False, float_format="%.3f") 79 | info["method"].append(method) 80 | info["EF"].append(ef) 81 | info["hit-rate"].append(len(df_top_hits[df_top_hits['status']=='active'])) 82 | 83 | df_info = pd.DataFrame(info) 84 | df_info = df_info.sort_values('hit-rate', ascending=False) 85 | df_info[["method", "hit-rate", "EF"]].to_csv(topdir+"/ranking.csv",index=False, float_format="%.3f") 86 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/vs/round1/ligand.mol2: -------------------------------------------------------------------------------- 1 | @MOLECULE 2 | ZINC000001529646 3 | 37 36 0 0 0 4 | SMALL 5 | USER_CHARGES 6 | NO_NAME 7 | @ATOM 8 | 1 N1 -0.0111 0.9658 0.0074 N.pl3 1 LIG -0.7800 9 | 2 H2 0.0021 -0.0041 0.0020 H 1 LIG 0.4400 10 | 3 H3 0.8223 1.4622 0.0021 H 1 LIG 0.4200 11 | 4 C4 -1.1486 1.6021 0.0162 C.cat 1 LIG 0.7400 12 | 5 N5 -1.1673 2.9768 0.0238 N.pl3 1 LIG -0.7200 13 | 6 C6 0.0914 3.7265 0.0219 C.3 1 LIG 0.1100 14 | 7 C7 -0.2086 5.2268 0.0314 C.3 1 LIG -0.1400 15 | 8 C8 1.1060 6.0097 0.0295 C.3 1 LIG -0.1200 16 | 9 C9 0.8060 7.5100 0.0390 C.3 1 LIG -0.0500 17 | 10 H10 0.1608 7.7461 0.8852 H 1 LIG 0.1400 18 | 11 N11 0.1301 7.8822 -1.2110 N.4 1 LIG -0.6100 19 | 12 H12 -0.0677 8.8716 -1.2047 H 1 LIG 0.4300 20 | 13 H13 0.7274 7.6636 -1.9943 H 1 LIG 0.4300 21 | 14 H14 -0.7336 7.3670 -1.2926 H 1 LIG 0.4100 22 | 15 C15 2.0960 8.2794 0.1609 C.2 1 LIG 0.4900 23 | 16 O16 2.5136 8.9234 -0.7864 O.co2 1 LIG -0.6700 24 | 17 O17 2.7222 8.2579 1.2069 O.co2 1 LIG -0.6300 25 | 18 N18 -2.3298 0.8986 0.0180 N.pl3 1 LIG -0.6100 26 | 19 C19 -2.3099 -0.5662 0.0099 C.3 1 LIG 0.0800 27 | 20 H20 -1.4605 -0.9213 0.5934 H 1 LIG 0.0900 28 | 21 C21 -3.6073 -1.0973 0.6228 C.3 1 LIG -0.1800 29 | 22 C22 -3.6723 -0.7074 2.0771 C.2 1 LIG 0.4900 30 | 23 O23 -2.7650 -0.0644 2.5770 O.co2 1 LIG -0.7000 31 | 24 O24 -4.6321 -1.0350 2.7538 O.co2 1 LIG -0.7000 32 | 25 C25 -2.1860 -1.0591 -1.4088 C.2 1 LIG 0.4800 33 | 26 O26 -2.1204 -0.2608 -2.3280 O.co2 1 LIG -0.6900 34 | 27 O27 -2.1514 -2.2562 -1.6374 O.co2 1 LIG -0.6700 35 | 28 H28 -2.0138 3.4503 0.0304 H 1 LIG 0.4200 36 | 29 H29 0.6707 3.4661 0.9077 H 1 LIG 0.0900 37 | 30 H30 0.6623 3.4758 -0.8722 H 1 LIG 0.0800 38 | 31 H31 -0.7879 5.4871 -0.8544 H 1 LIG 0.0700 39 | 32 H32 -0.7794 5.4774 0.9255 H 1 LIG 0.0900 40 | 33 H33 1.6853 5.7493 0.9153 H 1 LIG 0.1200 41 | 34 H34 1.6768 5.7590 -0.8646 H 1 LIG 0.0900 42 | 35 H35 -3.1763 1.3721 0.0246 H 1 LIG 0.4200 43 | 36 H36 -4.4600 -0.6712 0.0940 H 1 LIG 0.0700 44 | 37 H37 -3.6320 -2.1835 0.5364 H 1 LIG 0.0900 45 | @BOND 46 | 1 1 2 1 47 | 2 1 3 1 48 | 3 1 4 ar 49 | 4 4 5 ar 50 | 5 4 18 ar 51 | 6 5 6 1 52 | 7 5 28 1 53 | 8 6 7 1 54 | 9 6 29 1 55 | 10 6 30 1 56 | 11 7 8 1 57 | 12 7 31 1 58 | 13 7 32 1 59 | 14 8 9 1 60 | 15 8 33 1 61 | 16 8 34 1 62 | 17 9 10 1 63 | 18 9 11 1 64 | 19 9 15 1 65 | 20 11 12 1 66 | 21 11 13 1 67 | 22 11 14 1 68 | 23 15 16 ar 69 | 24 15 17 ar 70 | 25 18 19 1 71 | 26 18 35 1 72 | 27 19 20 1 73 | 28 19 21 1 74 | 29 19 25 1 75 | 30 21 22 1 76 | 31 21 36 1 77 | 32 21 37 1 78 | 33 22 23 ar 79 | 34 22 24 ar 80 | 35 25 26 ar 81 | 36 25 27 ar 82 | -------------------------------------------------------------------------------- /dockbox/license.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import time 4 | import subprocess 5 | 6 | def wrap_command(cmd, prgm): 7 | 8 | exe = os.path.abspath(__file__) 9 | if exe[-1] == 'c': 10 | exe = exe[:-1] # get .py from .pyc 11 | 12 | cmd_s = cmd.split() 13 | exe_cmd = cmd_s[0] 14 | 15 | if prgm == 'moe': 16 | logfile = 'moebatch.log' 17 | # write eval command until license is found 18 | newcmd = """while true; do 19 | %(cmd)s &> %(logfile)s 20 | status=`python %(exe)s %(prgm)s %(logfile)s` 21 | if [ "$status" == "0" ]; then break; fi 22 | sleep 10s 23 | done"""% locals() 24 | 25 | elif prgm == 'gold': 26 | logfile = 'gold.err' 27 | newcmd = """while true; do 28 | %(cmd)s > /dev/null 29 | status=`python %(exe)s %(prgm)s %(logfile)s` 30 | if [ "$status" == "0" ]; then break; fi 31 | sleep 10s 32 | done"""% locals() 33 | 34 | elif prgm == 'schrodinger': 35 | if exe_cmd == 'glide': 36 | filename1 = cmd_s[-1] 37 | elif exe_cmd == 'prepwizard': 38 | filename1 = cmd_s[-2] 39 | elif exe_cmd == 'ifd': 40 | filename1 = cmd_s[-1] 41 | else: 42 | raise ValueError("Schrodinger's command %s not recognized!"%exe_cmd) 43 | splitext_0 = os.path.splitext(filename1)[0] 44 | suffix = os.path.basename(splitext_0) 45 | logfile = suffix + '.log' 46 | newcmd = """while true; do 47 | output=`%(cmd)s` 48 | jobid=`echo "$output" | sed -n -e 's/^.*JobId: //p'` 49 | status=`python %(exe)s %(prgm)s %(logfile)s $jobid` 50 | if [ "$status" == "0" ]; then break; fi 51 | sleep 10s 52 | done"""% locals() 53 | 54 | return newcmd 55 | 56 | def check_schrodinger_license(logfile, jobid): 57 | """Check if schrodinger exe had license issues, design to avoid retry every 60 sec""" 58 | 59 | status = 0 60 | is_job_done = False 61 | is_job_killed = False 62 | 63 | while True: 64 | # (A) check if the job is still running 65 | output = subprocess.check_output('jobcontrol -list', shell=True, executable='/bin/bash') 66 | if jobid in output: 67 | time.sleep(2) # sleep for 2 sec 68 | else: 69 | is_job_done = True # the job is done 70 | # (B) check if the job has license issues 71 | if not is_job_killed: 72 | with open(logfile) as logf: 73 | for line in logf: 74 | if 'Licensed number of users already reached' in line: 75 | output = subprocess.check_output('jobcontrol -killnooutput %s'%jobid, shell=True, executable='/bin/bash') 76 | status = 1 77 | is_job_killed = True 78 | if is_job_done: 79 | break 80 | return status 81 | 82 | def check_moe_license(logfile): 83 | 84 | status = 0 85 | with open(logfile) as logf: 86 | for line in logf: 87 | if 'Licensed number of users already reached' in line: 88 | status = 1 89 | return status 90 | 91 | def check_gold_license(logfile): 92 | 93 | status = 0 94 | if os.path.exists(logfile): 95 | with open(logfile) as logf: 96 | for line in logf: 97 | if 'Licensed number of users already reached' in line: 98 | status = 1 99 | return status 100 | 101 | def run(args): 102 | 103 | if len(args) < 2: 104 | raise ValueError("check_licence.py should have at least two arguments") 105 | 106 | # first argument should be the program name 107 | prgm = args[1] 108 | status = 0 109 | 110 | # second argument should be the log file where to look for warning/error messages 111 | logfile = args[2] 112 | 113 | if prgm == 'moe': 114 | status = check_moe_license(logfile) 115 | elif prgm == 'gold': 116 | status = check_gold_license(logfile) 117 | elif prgm == 'schrodinger': 118 | jobid = args[3] # for Schrodinger, an extra argument is expected (job ID) 119 | status = check_schrodinger_license(logfile, jobid) 120 | 121 | return status 122 | 123 | if __name__ == '__main__': 124 | status = run(sys.argv) 125 | print(status) 126 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/vs/round2/ligand.mol2: -------------------------------------------------------------------------------- 1 | @MOLECULE 2 | ZINC000097231848 3 | 41 43 1 0 0 4 | SMALL 5 | USER_CHARGES 6 | @ATOM 7 | 1 C1 0.0396 0.6774 -0.0050 C.3 1 LIG -0.1400 8 | 2 C2 0.1066 -0.7996 0.3883 C.3 1 LIG -0.1800 9 | 3 H3 0.5461 -1.3733 -0.4276 H 1 LIG 0.0700 10 | 4 C4 0.9684 -0.9547 1.6430 C.3 1 LIG 0.1100 11 | 5 N5 2.2663 -0.3107 1.4265 N.am 1 LIG -0.5700 12 | 6 C6 2.4992 0.9200 1.9241 C.2 1 LIG 0.6000 13 | 7 O7 3.5875 1.4411 1.7729 O.2 1 LIG -0.5500 14 | 8 C8 1.4347 1.6344 2.6517 C.2 1 LIG -0.2700 15 | 9 C9 1.6011 2.7165 3.4935 C.2 1 LIG 0.1200 16 | 10 N10 0.3836 3.0628 3.9481 N.pl3 1 LIG -0.3200 17 | 11 C11 0.1076 4.1169 4.8315 C.ar 1 LIG 0.1200 18 | 12 C12 1.1399 4.9116 5.3092 C.ar 1 LIG -0.0900 19 | 13 C13 0.8664 5.9497 6.1788 C.ar 1 LIG -0.1400 20 | 14 C14 -0.4369 6.1981 6.5750 C.ar 1 LIG 0.1200 21 | 15 F15 -0.7011 7.2137 7.4259 F 1 LIG -0.1300 22 | 16 C16 -1.4700 5.4075 6.1002 C.ar 1 LIG -0.1700 23 | 17 C17 -1.2018 4.3707 5.2255 C.ar 1 LIG 0.1400 24 | 18 F18 -2.2092 3.5991 4.7619 F 1 LIG -0.1000 25 | 19 N19 -0.5613 2.1942 3.3905 N.2 1 LIG -0.2800 26 | 20 C20 0.0523 1.3442 2.6134 C.2 1 LIG 0.0800 27 | 21 C21 3.3111 -1.0043 0.6692 C.3 1 LIG 0.0800 28 | 22 C22 4.4746 -0.1644 0.1384 C.3 1 LIG -0.1900 29 | 23 C23 4.6937 -1.1183 1.3144 C.3 1 LIG -0.1900 30 | 24 C24 -1.2840 -1.3085 0.6681 C.2 1 LIG 0.5000 31 | 25 O25 -2.1329 -1.2748 -0.2064 O.co2 1 LIG -0.6900 32 | 26 O26 -1.5602 -1.7540 1.7689 O.co2 1 LIG -0.6900 33 | 27 H27 1.0175 1.1367 0.1399 H 1 LIG 0.0500 34 | 28 H28 -0.2498 0.7617 -1.0525 H 1 LIG 0.0600 35 | 29 H29 -0.6965 1.1862 0.6174 H 1 LIG 0.0600 36 | 30 H30 0.4664 -0.4853 2.4889 H 1 LIG 0.1000 37 | 31 H31 1.1188 -2.0139 1.8518 H 1 LIG 0.0800 38 | 32 H32 2.5365 3.1961 3.7411 H 1 LIG 0.1900 39 | 33 H33 2.1570 4.7185 5.0015 H 1 LIG 0.1500 40 | 34 H34 1.6702 6.5680 6.5503 H 1 LIG 0.1500 41 | 35 H35 -2.4859 5.6031 6.4102 H 1 LIG 0.1600 42 | 36 H36 -0.4142 0.5526 2.0459 H 1 LIG 0.2400 43 | 37 H37 2.9715 -1.8390 0.0559 H 1 LIG 0.1300 44 | 38 H38 4.9006 -0.4464 -0.8244 H 1 LIG 0.0800 45 | 39 H39 4.4627 0.9042 0.3527 H 1 LIG 0.1400 46 | 40 H40 4.8260 -0.6772 2.3024 H 1 LIG 0.0900 47 | 41 H41 5.2639 -2.0278 1.1252 H 1 LIG 0.0900 48 | @BOND 49 | 1 1 2 1 50 | 2 1 27 1 51 | 3 1 28 1 52 | 4 1 29 1 53 | 5 2 3 1 54 | 6 2 4 1 55 | 7 2 24 1 56 | 8 4 5 1 57 | 9 4 30 1 58 | 10 4 31 1 59 | 11 5 6 am 60 | 12 5 21 1 61 | 13 6 7 2 62 | 14 6 8 1 63 | 15 8 20 1 64 | 16 8 9 2 65 | 17 9 10 1 66 | 18 9 32 1 67 | 19 10 11 1 68 | 20 10 19 1 69 | 21 11 17 ar 70 | 22 11 12 ar 71 | 23 12 13 ar 72 | 24 12 33 1 73 | 25 13 14 ar 74 | 26 13 34 1 75 | 27 14 15 1 76 | 28 14 16 ar 77 | 29 16 17 ar 78 | 30 16 35 1 79 | 31 17 18 1 80 | 32 19 20 2 81 | 33 20 36 1 82 | 34 21 23 1 83 | 35 21 22 1 84 | 36 21 37 1 85 | 37 22 23 1 86 | 38 22 38 1 87 | 39 22 39 1 88 | 40 23 40 1 89 | 41 23 41 1 90 | 42 24 25 ar 91 | 43 24 26 ar 92 | -------------------------------------------------------------------------------- /dockbox/vina.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import shutil 4 | import subprocess 5 | from glob import glob 6 | 7 | from mdkit.utility import mol2 8 | import autodock 9 | 10 | required_programs = ['prepare_ligand4.py', 'prepare_receptor4.py', 'vina', 'obabel'] 11 | 12 | default_settings = {'cpu': '1', 'num_modes': '9', 'energy_range': '3', 'seed': None} 13 | 14 | class Vina(autodock.ADBased): 15 | 16 | def __init__(self, instance, site, options): 17 | 18 | super(Vina, self).__init__(instance, site, options) 19 | 20 | center = map(str.strip, site[1].split(',')) 21 | boxsize = map(str.strip, site[2].split(',')) 22 | 23 | for idx, xyz in enumerate(['x', 'y', 'z']): 24 | self.options['center_'+xyz] = center[idx] 25 | self.options['size_'+xyz] = boxsize[idx] 26 | 27 | def write_docking_script(self, filename, file_r, file_l, rescoring=False): 28 | """write docking script for Vina""" 29 | 30 | locals().update(self.options) 31 | 32 | self.write_check_ligand_pdbqt_script('check_ligand_pdbqt.py') 33 | self.write_check_ions_script('check_ions.py') 34 | 35 | # write vina config file 36 | with open('vina.config', 'w') as cf: 37 | # write mandatory options 38 | cf.write('receptor = target.pdbqt\n') 39 | cf.write('ligand = ligand.pdbqt\n') 40 | # write other options 41 | for key, value in self.options.iteritems(): 42 | if value is not None: 43 | cf.write(key+' = '+value+'\n') 44 | 45 | # write vina script 46 | if not rescoring: 47 | with open(filename, 'w') as ff: 48 | script ="""#!/bin/bash 49 | set -e 50 | 51 | MGLPATH=`which prepare_ligand4.py` 52 | MGLPATH=`python -c "print '/'.join('$MGLPATH'.split('/')[:-3])"` 53 | export PYTHONPATH=$PYTHONPATH:$MGLPATH 54 | 55 | # prepare ligand 56 | prepare_ligand4.py -l %(file_l)s -o ligand.pdbqt 57 | python check_ligand_pdbqt.py ligand.pdbqt 58 | 59 | # prepare receptor 60 | prepare_receptor4.py -U nphs_lps_waters -r %(file_r)s -o target.pdbqt &> prepare_receptor4.log 61 | python check_ions.py target.pdbqt prepare_receptor4.log 62 | 63 | # run vina 64 | vina --config vina.config 1> vina.out 2> vina.err"""% locals() 65 | ff.write(script) 66 | else: 67 | with open(filename, 'w') as ff: 68 | script ="""#!/bin/bash 69 | set -e 70 | 71 | MGLPATH=`which prepare_ligand4.py` 72 | MGLPATH=`python -c "print '/'.join('$MGLPATH'.split('/')[:-3])"` 73 | export PYTHONPATH=$PYTHONPATH:$MGLPATH 74 | 75 | # prepare ligand 76 | prepare_ligand4.py -l %(file_l)s -o ligand.pdbqt 77 | python check_ligand_pdbqt.py ligand.pdbqt 78 | 79 | if [ ! -f target.pdbqt ]; then 80 | prepare_receptor4.py -U nphs_lps_waters -r %(file_r)s -o target.pdbqt > prepare_receptor4.log 81 | python check_ions.py target.pdbqt prepare_receptor4.log 82 | fi 83 | 84 | # run vina 85 | vina --score_only --config vina.config > vina.out"""% locals() 86 | ff.write(script) 87 | 88 | def extract_docking_results(self, file_s, input_file_r, input_file_l): 89 | """Extract output structures in .mol2 formats""" 90 | 91 | poses_extracted = False 92 | if os.path.exists('ligand_out.pdbqt'): 93 | try: 94 | subprocess.check_output('obabel -ipdbqt ligand_out.pdbqt -omol2 -Opose-.mol2 -m &>/dev/null', shell=True, executable='/bin/bash') 95 | self.update_output_mol2files(sample=input_file_l) 96 | poses_extracted = True 97 | except: 98 | for mol2file in glob('pose-*.mol2'): 99 | os.remove(mol2file) 100 | poses_extracted = False 101 | 102 | if poses_extracted: 103 | with open('ligand_out.pdbqt','r') as dlgf: 104 | with open(file_s, 'w') as sf: 105 | for line in dlgf: 106 | if line.startswith('REMARK VINA RESULT:'): 107 | score = line[19:].split()[0].strip() 108 | sf.write(score+'\n') 109 | else: 110 | open(file_s, 'w').close() 111 | 112 | def write_rescoring_script(self, filename, file_r, file_l): 113 | self.write_docking_script(filename, file_r, file_l, rescoring=True) 114 | 115 | def extract_rescoring_results(self, filename): 116 | with open(filename, 'a') as ff: 117 | with open('vina.out', 'r') as outf: 118 | for line in outf: 119 | if line.startswith('Affinity:'): 120 | ff.write(line.split()[1].strip()+'\n') 121 | filenames = ['ligand.pdbqt', 'target.pdbqt'] 122 | for ff in filenames: 123 | if os.path.isfile(ff): 124 | os.remove(ff) 125 | -------------------------------------------------------------------------------- /examples/autodock_vina_dock/rescoring/4de2_ligand.mol2: -------------------------------------------------------------------------------- 1 | ### 2 | ### Created by X-TOOL on Mon Nov 18 15:37:28 2013 3 | ### 4 | 5 | @MOLECULE 6 | 4de2 ligand 7 | 43 45 1 0 0 8 | SMALL 9 | GAST_HUCK 10 | 11 | 12 | @ATOM 13 | 1 O14 -9.2570 55.8220 13.5960 O.2 1 DN3 -0.3730 14 | 2 C13 -8.8590 55.9810 12.4460 C.2 1 DN3 0.2307 15 | 3 C15 -9.6340 56.8840 11.5140 C.ar 1 DN3 0.0495 16 | 4 C16 -9.2810 56.9990 10.1710 C.ar 1 DN3 -0.0484 17 | 5 C17 -9.9640 57.9060 9.3610 C.ar 1 DN3 -0.0627 18 | 6 C18 -11.0130 58.6660 9.8650 C.ar 1 DN3 -0.0510 19 | 7 C19 -11.2930 58.6230 11.2310 C.ar 1 DN3 0.0068 20 | 8 C21 -12.4100 59.4660 11.8250 C.3 1 DN3 0.0024 21 | 9 N22 -13.6880 58.9840 11.2650 N.4 1 DN3 0.2429 22 | 10 C23 -13.9840 57.6630 11.8460 C.3 1 DN3 -0.0419 23 | 11 C24 -14.7930 59.9020 11.5810 C.3 1 DN3 -0.0419 24 | 12 C20 -10.6200 57.7150 12.0380 C.ar 1 DN3 -0.0351 25 | 13 N12 -7.7600 55.3740 11.9800 N.am 1 DN3 -0.2334 26 | 14 C10 -6.9040 54.5610 12.7580 C.ar 1 DN3 0.0663 27 | 15 C11 -5.5580 54.5190 12.4080 C.ar 1 DN3 -0.0321 28 | 16 C6 -4.6570 53.7620 13.1500 C.ar 1 DN3 -0.0043 29 | 17 C7 -5.0900 53.0350 14.2540 C.ar 1 DN3 -0.0672 30 | 18 C8 -6.4250 53.0920 14.6240 C.ar 1 DN3 -0.0745 31 | 19 C9 -7.3410 53.8320 13.8640 C.ar 1 DN3 -0.0410 32 | 20 C2 -3.2280 53.7180 12.7620 C.2 1 DN3 0.1452 33 | 21 N1 -2.7510 53.9830 11.5290 N.2 1 DN3 -0.1437 34 | 22 N5 -1.3720 53.8130 11.5380 N.pl3 1 DN3 -0.1036 35 | 23 N4 -1.0230 53.4450 12.8380 N.2 1 DN3 -0.0518 36 | 24 N3 -2.2040 53.3760 13.5800 N.2 1 DN3 -0.1756 37 | 25 H1 -8.4840 56.3900 9.7601 H 1 DN3 0.0611 38 | 26 H2 -9.6725 58.0199 8.3232 H 1 DN3 0.0611 39 | 27 H3 -11.6074 59.2857 9.2034 H 1 DN3 0.0594 40 | 28 H4 -12.2613 60.5234 11.5609 H 1 DN3 0.0980 41 | 29 H5 -12.4175 59.3590 12.9197 H 1 DN3 0.0980 42 | 30 H6 -13.6019 58.9034 10.2639 H 1 DN3 0.2042 43 | 31 H7 -14.9347 57.2881 11.4390 H 1 DN3 0.0778 44 | 32 H8 -14.0633 57.7528 12.9394 H 1 DN3 0.0778 45 | 33 H9 -13.1751 56.9617 11.5933 H 1 DN3 0.0778 46 | 34 H10 -14.5814 60.8922 11.1511 H 1 DN3 0.0778 47 | 35 H11 -14.8954 59.9905 12.6726 H 1 DN3 0.0778 48 | 36 H12 -15.7285 59.5099 11.1555 H 1 DN3 0.0778 49 | 37 H13 -10.8663 57.6525 13.0918 H 1 DN3 0.0609 50 | 38 H14 -7.5290 55.5047 11.0159 H 1 DN3 0.2251 51 | 39 H15 -5.2083 55.0811 11.5496 H 1 DN3 0.0504 52 | 40 H16 -4.3906 52.4299 14.8195 H 1 DN3 0.0498 53 | 41 H17 -6.7624 52.5609 15.5067 H 1 DN3 0.0579 54 | 42 H18 -8.3903 53.8378 14.1360 H 1 DN3 0.0505 55 | 43 H19 -0.7319 53.9349 10.7403 H 1 DN3 0.2946 56 | @BOND 57 | 1 1 2 2 58 | 2 2 3 1 59 | 3 2 13 am 60 | 4 3 4 ar 61 | 5 3 12 ar 62 | 6 4 5 ar 63 | 7 5 6 ar 64 | 8 6 7 ar 65 | 9 7 8 1 66 | 10 7 12 ar 67 | 11 8 9 1 68 | 12 9 10 1 69 | 13 9 11 1 70 | 14 13 14 1 71 | 15 14 15 ar 72 | 16 14 19 ar 73 | 17 15 16 ar 74 | 18 16 17 ar 75 | 19 16 20 1 76 | 20 17 18 ar 77 | 21 18 19 ar 78 | 22 20 21 2 79 | 23 20 24 1 80 | 24 21 22 1 81 | 25 22 23 1 82 | 26 23 24 2 83 | 27 4 25 1 84 | 28 5 26 1 85 | 29 6 27 1 86 | 30 8 28 1 87 | 31 8 29 1 88 | 32 9 30 1 89 | 33 10 31 1 90 | 34 10 32 1 91 | 35 10 33 1 92 | 36 11 34 1 93 | 37 11 35 1 94 | 38 11 36 1 95 | 39 12 37 1 96 | 40 13 38 1 97 | 41 15 39 1 98 | 42 17 40 1 99 | 43 18 41 1 100 | 44 19 42 1 101 | 45 22 43 1 102 | @SUBSTRUCTURE 103 | 1 DN3 1 104 | 105 | -------------------------------------------------------------------------------- /examples/autodock/docking/1a30_ligand.mol2: -------------------------------------------------------------------------------- 1 | ### 2 | ### Created by X-TOOL on Mon Nov 18 12:13:00 2013 3 | ### 4 | 5 | @MOLECULE 6 | 1a30 ligand 7 | 49 48 1 0 0 8 | SMALL 9 | GAST_HUCK 10 | 11 | 12 | @ATOM 13 | 1 N 4.8410 27.5760 5.3100 N.4 1 MOL 0.2380 14 | 2 CA 5.7330 26.3940 5.1650 C.3 1 MOL 0.0665 15 | 3 C 7.1740 26.7720 5.5040 C.2 1 MOL 0.2269 16 | 4 O 7.6240 27.8870 5.2180 O.2 1 MOL -0.3907 17 | 5 CB 5.6380 25.8180 3.7500 C.3 1 MOL 0.0194 18 | 6 CG 5.8270 26.8460 2.6520 C.3 1 MOL 0.0075 19 | 7 CD 5.6250 26.2750 1.2620 C.2 1 MOL 0.0351 20 | 8 OE1 4.6630 25.4970 1.0650 O.co2 1 MOL -0.5690 21 | 9 OE2 6.4250 26.6200 0.3630 O.co2 1 MOL -0.5690 22 | 10 N 7.8730 25.8560 6.1630 N.am 1 MOL -0.2595 23 | 11 CA 9.2580 26.0720 6.5600 C.3 1 MOL 0.1424 24 | 12 C 10.1010 24.9180 6.0470 C.2 1 MOL 0.2051 25 | 13 O 10.1820 23.8640 6.6820 O.2 1 MOL -0.3943 26 | 14 CB 9.3460 26.1960 8.0790 C.3 1 MOL 0.0406 27 | 15 CG 8.6490 27.4410 8.5880 C.2 1 MOL 0.0393 28 | 16 OD1 9.2700 28.5180 8.5210 O.co2 1 MOL -0.5688 29 | 17 OD2 7.4740 27.3570 9.0120 O.co2 1 MOL -0.5688 30 | 18 N 10.6860 25.1150 4.8690 N.am 1 MOL -0.2694 31 | 19 CA 11.4980 24.0930 4.2260 C.3 1 MOL 0.0946 32 | 20 C 12.9840 24.3990 4.2680 C.2 1 MOL 0.0601 33 | 21 O 13.7840 23.4420 4.1920 O.co2 1 MOL -0.5666 34 | 22 CB 11.0380 23.9220 2.7850 C.3 1 MOL -0.0156 35 | 23 CG 9.5830 23.4660 2.7700 C.3 1 MOL -0.0431 36 | 24 CD1 8.8190 24.1970 1.7080 C.3 1 MOL -0.0625 37 | 25 CD2 9.5210 21.9550 2.6020 C.3 1 MOL -0.0625 38 | 26 OXT 13.3340 25.5920 4.3780 O.co2 1 MOL -0.5666 39 | 27 H1 3.8947 27.3126 5.0836 H 1 MOL 0.2015 40 | 28 H2 5.1455 28.3055 4.6845 H 1 MOL 0.2015 41 | 29 H3 4.8784 27.9107 6.2601 H 1 MOL 0.2015 42 | 30 H4 5.4024 25.6212 5.8745 H 1 MOL 0.1099 43 | 31 H5 4.6449 25.3608 3.6285 H 1 MOL 0.0363 44 | 32 H6 6.4134 25.0458 3.6380 H 1 MOL 0.0363 45 | 33 H7 6.8488 27.2476 2.7204 H 1 MOL 0.0434 46 | 34 H8 5.1023 27.6592 2.8052 H 1 MOL 0.0434 47 | 35 H9 7.4339 24.9880 6.3949 H 1 MOL 0.1886 48 | 36 H10 9.6222 27.0066 6.1085 H 1 MOL 0.0819 49 | 37 H11 10.4057 26.2377 8.3712 H 1 MOL 0.0478 50 | 38 H12 8.8743 25.3133 8.5353 H 1 MOL 0.0478 51 | 39 H13 10.5637 25.9949 4.4098 H 1 MOL 0.1875 52 | 40 H14 11.3353 23.1452 4.7600 H 1 MOL 0.0726 53 | 41 H15 11.6644 23.1676 2.2866 H 1 MOL 0.0308 54 | 42 H16 11.1276 24.8815 2.2546 H 1 MOL 0.0308 55 | 43 H17 9.1313 23.7168 3.7411 H 1 MOL 0.0297 56 | 44 H18 7.7727 23.8576 1.7097 H 1 MOL 0.0232 57 | 45 H19 9.2693 23.9921 0.7255 H 1 MOL 0.0232 58 | 46 H20 8.8541 25.2779 1.9092 H 1 MOL 0.0232 59 | 47 H21 10.0967 21.4728 3.4058 H 1 MOL 0.0232 60 | 48 H22 9.9482 21.6766 1.6273 H 1 MOL 0.0232 61 | 49 H23 8.4734 21.6232 2.6515 H 1 MOL 0.0232 62 | @BOND 63 | 1 3 2 1 64 | 2 2 5 1 65 | 3 2 1 1 66 | 4 5 6 1 67 | 5 6 7 1 68 | 6 7 8 ar 69 | 7 7 9 ar 70 | 8 3 4 2 71 | 9 12 11 1 72 | 10 11 14 1 73 | 11 11 10 1 74 | 12 14 15 1 75 | 13 15 16 ar 76 | 14 15 17 ar 77 | 15 12 13 2 78 | 16 19 20 1 79 | 17 19 22 1 80 | 18 18 19 1 81 | 19 22 23 1 82 | 20 23 25 1 83 | 21 23 24 1 84 | 22 20 26 ar 85 | 23 20 21 ar 86 | 24 10 3 am 87 | 25 12 18 am 88 | 26 1 27 1 89 | 27 1 28 1 90 | 28 1 29 1 91 | 29 2 30 1 92 | 30 5 31 1 93 | 31 5 32 1 94 | 32 6 33 1 95 | 33 6 34 1 96 | 34 10 35 1 97 | 35 11 36 1 98 | 36 14 37 1 99 | 37 14 38 1 100 | 38 18 39 1 101 | 39 19 40 1 102 | 40 22 41 1 103 | 41 22 42 1 104 | 42 23 43 1 105 | 43 24 44 1 106 | 44 24 45 1 107 | 45 24 46 1 108 | 46 25 47 1 109 | 47 25 48 1 110 | 48 25 49 1 111 | @SUBSTRUCTURE 112 | 1 MOL 1 113 | 114 | -------------------------------------------------------------------------------- /examples/autodock/rescoring/1a30_ligand.mol2: -------------------------------------------------------------------------------- 1 | ### 2 | ### Created by X-TOOL on Mon Nov 18 12:13:00 2013 3 | ### 4 | 5 | @MOLECULE 6 | 1a30 ligand 7 | 49 48 1 0 0 8 | SMALL 9 | GAST_HUCK 10 | 11 | 12 | @ATOM 13 | 1 N 4.8410 27.5760 5.3100 N.4 1 MOL 0.2380 14 | 2 CA 5.7330 26.3940 5.1650 C.3 1 MOL 0.0665 15 | 3 C 7.1740 26.7720 5.5040 C.2 1 MOL 0.2269 16 | 4 O 7.6240 27.8870 5.2180 O.2 1 MOL -0.3907 17 | 5 CB 5.6380 25.8180 3.7500 C.3 1 MOL 0.0194 18 | 6 CG 5.8270 26.8460 2.6520 C.3 1 MOL 0.0075 19 | 7 CD 5.6250 26.2750 1.2620 C.2 1 MOL 0.0351 20 | 8 OE1 4.6630 25.4970 1.0650 O.co2 1 MOL -0.5690 21 | 9 OE2 6.4250 26.6200 0.3630 O.co2 1 MOL -0.5690 22 | 10 N 7.8730 25.8560 6.1630 N.am 1 MOL -0.2595 23 | 11 CA 9.2580 26.0720 6.5600 C.3 1 MOL 0.1424 24 | 12 C 10.1010 24.9180 6.0470 C.2 1 MOL 0.2051 25 | 13 O 10.1820 23.8640 6.6820 O.2 1 MOL -0.3943 26 | 14 CB 9.3460 26.1960 8.0790 C.3 1 MOL 0.0406 27 | 15 CG 8.6490 27.4410 8.5880 C.2 1 MOL 0.0393 28 | 16 OD1 9.2700 28.5180 8.5210 O.co2 1 MOL -0.5688 29 | 17 OD2 7.4740 27.3570 9.0120 O.co2 1 MOL -0.5688 30 | 18 N 10.6860 25.1150 4.8690 N.am 1 MOL -0.2694 31 | 19 CA 11.4980 24.0930 4.2260 C.3 1 MOL 0.0946 32 | 20 C 12.9840 24.3990 4.2680 C.2 1 MOL 0.0601 33 | 21 O 13.7840 23.4420 4.1920 O.co2 1 MOL -0.5666 34 | 22 CB 11.0380 23.9220 2.7850 C.3 1 MOL -0.0156 35 | 23 CG 9.5830 23.4660 2.7700 C.3 1 MOL -0.0431 36 | 24 CD1 8.8190 24.1970 1.7080 C.3 1 MOL -0.0625 37 | 25 CD2 9.5210 21.9550 2.6020 C.3 1 MOL -0.0625 38 | 26 OXT 13.3340 25.5920 4.3780 O.co2 1 MOL -0.5666 39 | 27 H1 3.8947 27.3126 5.0836 H 1 MOL 0.2015 40 | 28 H2 5.1455 28.3055 4.6845 H 1 MOL 0.2015 41 | 29 H3 4.8784 27.9107 6.2601 H 1 MOL 0.2015 42 | 30 H4 5.4024 25.6212 5.8745 H 1 MOL 0.1099 43 | 31 H5 4.6449 25.3608 3.6285 H 1 MOL 0.0363 44 | 32 H6 6.4134 25.0458 3.6380 H 1 MOL 0.0363 45 | 33 H7 6.8488 27.2476 2.7204 H 1 MOL 0.0434 46 | 34 H8 5.1023 27.6592 2.8052 H 1 MOL 0.0434 47 | 35 H9 7.4339 24.9880 6.3949 H 1 MOL 0.1886 48 | 36 H10 9.6222 27.0066 6.1085 H 1 MOL 0.0819 49 | 37 H11 10.4057 26.2377 8.3712 H 1 MOL 0.0478 50 | 38 H12 8.8743 25.3133 8.5353 H 1 MOL 0.0478 51 | 39 H13 10.5637 25.9949 4.4098 H 1 MOL 0.1875 52 | 40 H14 11.3353 23.1452 4.7600 H 1 MOL 0.0726 53 | 41 H15 11.6644 23.1676 2.2866 H 1 MOL 0.0308 54 | 42 H16 11.1276 24.8815 2.2546 H 1 MOL 0.0308 55 | 43 H17 9.1313 23.7168 3.7411 H 1 MOL 0.0297 56 | 44 H18 7.7727 23.8576 1.7097 H 1 MOL 0.0232 57 | 45 H19 9.2693 23.9921 0.7255 H 1 MOL 0.0232 58 | 46 H20 8.8541 25.2779 1.9092 H 1 MOL 0.0232 59 | 47 H21 10.0967 21.4728 3.4058 H 1 MOL 0.0232 60 | 48 H22 9.9482 21.6766 1.6273 H 1 MOL 0.0232 61 | 49 H23 8.4734 21.6232 2.6515 H 1 MOL 0.0232 62 | @BOND 63 | 1 3 2 1 64 | 2 2 5 1 65 | 3 2 1 1 66 | 4 5 6 1 67 | 5 6 7 1 68 | 6 7 8 ar 69 | 7 7 9 ar 70 | 8 3 4 2 71 | 9 12 11 1 72 | 10 11 14 1 73 | 11 11 10 1 74 | 12 14 15 1 75 | 13 15 16 ar 76 | 14 15 17 ar 77 | 15 12 13 2 78 | 16 19 20 1 79 | 17 19 22 1 80 | 18 18 19 1 81 | 19 22 23 1 82 | 20 23 25 1 83 | 21 23 24 1 84 | 22 20 26 ar 85 | 23 20 21 ar 86 | 24 10 3 am 87 | 25 12 18 am 88 | 26 1 27 1 89 | 27 1 28 1 90 | 28 1 29 1 91 | 29 2 30 1 92 | 30 5 31 1 93 | 31 5 32 1 94 | 32 6 33 1 95 | 33 6 34 1 96 | 34 10 35 1 97 | 35 11 36 1 98 | 36 14 37 1 99 | 37 14 38 1 100 | 38 18 39 1 101 | 39 19 40 1 102 | 40 22 41 1 103 | 41 22 42 1 104 | 42 23 43 1 105 | 43 24 44 1 106 | 44 24 45 1 107 | 45 24 46 1 108 | 46 25 47 1 109 | 47 25 48 1 110 | 48 25 49 1 111 | @SUBSTRUCTURE 112 | 1 MOL 1 113 | 114 | -------------------------------------------------------------------------------- /examples/tlr7_chapter/active_decoys/ligand.mol2: -------------------------------------------------------------------------------- 1 | @MOLECULE 2 | CHEMBL1836893 3 | 57 60 1 0 0 4 | SMALL 5 | USER_CHARGES 6 | 7 | 8 | @ATOM 9 | 1 F1 8.8190 -2.5950 -0.4650 F 1 LIG -0.3400 10 | 2 C1 7.8060 -2.3110 0.3790 C.3 1 LIG 1.1640 11 | 3 F2 8.2970 -2.1650 1.6270 F 1 LIG -0.3400 12 | 4 F3 6.9440 -3.3530 0.3840 F 1 LIG -0.3400 13 | 5 C2 7.0420 -1.0220 -0.0280 C.ar 1 LIG 0.1660 14 | 6 N1 7.5610 -0.1670 -0.9750 N.ar 1 LIG -0.6200 15 | 7 C3 6.9050 0.9360 -1.3060 C.ar 1 LIG 0.4100 16 | 8 N2 7.5190 1.7010 -2.2560 N.pl3 1 LIG -0.9000 17 | 9 H1 8.2800 1.1810 -2.7160 H 1 LIG 0.4000 18 | 10 H2 6.8860 2.1920 -2.8970 H 1 LIG 0.4000 19 | 11 C4 5.6740 1.2600 -0.7160 C.ar 1 LIG 0.1170 20 | 12 N3 4.7940 2.3330 -0.8570 N.am 1 LIG -0.5470 21 | 13 H3 4.9300 3.1940 -1.4010 H 1 LIG 0.3700 22 | 14 C5 3.7390 2.1700 -0.0240 C.2 1 LIG 0.6900 23 | 15 O1 2.7760 2.9250 0.0650 O.2 1 LIG -0.5700 24 | 16 N4 3.9270 0.9760 0.6500 N.am 1 LIG -0.4770 25 | 17 C6 3.1400 0.5400 1.8100 C.3 1 LIG 0.2840 26 | 18 H4 3.3450 1.2180 2.6300 H 1 LIG 0.0800 27 | 19 H5 3.3880 -0.4840 2.0590 H 1 LIG 0.0800 28 | 20 C7 1.6530 0.6050 1.4720 C.ar 1 LIG -0.1440 29 | 21 C8 1.1760 0.3660 0.1980 C.ar 1 LIG 0.2110 30 | 22 H6 1.7800 0.1120 -0.6390 H 1 LIG 0.1500 31 | 23 N5 -0.1620 0.4820 -0.0640 N.ar 1 LIG -0.1790 32 | 24 H7 -0.5030 0.3530 -1.0230 H 1 LIG 0.4570 33 | 25 C9 -1.0510 0.8030 0.8910 C.ar 1 LIG 0.4610 34 | 26 N6 -2.3600 0.9020 0.5450 N.pl3 1 LIG -0.8690 35 | 27 H8 -2.9520 1.3760 1.2570 H 1 LIG 0.4000 36 | 28 C10 -2.9590 0.7950 -0.8240 C.3 1 LIG 0.2090 37 | 29 H9 -2.7690 1.7090 -1.3930 H 1 LIG 0.0800 38 | 30 H10 -2.5590 -0.0600 -1.3710 H 1 LIG 0.0800 39 | 31 C11 -4.5000 0.6210 -0.6730 C.3 1 LIG 0.3430 40 | 32 H11 -4.8780 1.5100 -0.1800 H 1 LIG 0.0800 41 | 33 H12 -4.9410 0.5000 -1.6530 H 1 LIG 0.0800 42 | 34 N7 -4.7510 -0.5770 0.1950 N.4 1 LIG -0.9590 43 | 35 H13 -4.0100 -0.6110 0.8910 H 1 LIG 0.4500 44 | 36 C12 -4.7230 -1.8640 -0.5790 C.3 1 LIG 0.3430 45 | 37 H14 -3.7810 -1.9620 -1.1060 H 1 LIG 0.0800 46 | 38 H15 -4.8770 -2.6940 0.1030 H 1 LIG 0.0800 47 | 39 C13 -5.8620 -1.9020 -1.6180 C.3 1 LIG -0.1600 48 | 40 H16 -5.7550 -1.1350 -2.3840 H 1 LIG 0.0800 49 | 41 H17 -5.8480 -2.8740 -2.1170 H 1 LIG 0.0800 50 | 42 C14 -7.1930 -1.7460 -0.8740 C.3 1 LIG -0.1600 51 | 43 H18 -8.0120 -1.7600 -1.5970 H 1 LIG 0.0800 52 | 44 H19 -7.3390 -2.5820 -0.1850 H 1 LIG 0.0800 53 | 45 C15 -7.2190 -0.4190 -0.1080 C.3 1 LIG -0.1600 54 | 46 H20 -8.1640 -0.3410 0.4350 H 1 LIG 0.0800 55 | 47 H21 -7.1530 0.4270 -0.7930 H 1 LIG 0.0800 56 | 48 C16 -6.0610 -0.4390 0.9110 C.3 1 LIG 0.3430 57 | 49 H22 -6.0850 0.4920 1.4660 H 1 LIG 0.0800 58 | 50 H23 -6.2100 -1.2920 1.5640 H 1 LIG 0.0800 59 | 51 C17 -0.6010 1.0330 2.2070 C.ar 1 LIG -0.1500 60 | 52 H24 -1.3010 1.2880 2.9610 H 1 LIG 0.1500 61 | 53 C18 0.7390 0.9360 2.4950 C.ar 1 LIG -0.1500 62 | 54 H25 1.0870 1.1210 3.4830 H 1 LIG 0.1500 63 | 55 C19 5.1380 0.4230 0.2240 C.ar 1 LIG 0.1170 64 | 56 C20 5.8180 -0.7380 0.5940 C.ar 1 LIG -0.1500 65 | 57 H26 5.4170 -1.3870 1.3280 H 1 LIG 0.1500 66 | @BOND 67 | 1 1 2 1 68 | 2 2 3 1 69 | 3 2 4 1 70 | 4 2 5 1 71 | 5 5 6 ar 72 | 6 5 56 ar 73 | 7 6 7 ar 74 | 8 7 8 1 75 | 9 7 11 ar 76 | 10 8 9 1 77 | 11 8 10 1 78 | 12 11 12 1 79 | 13 11 55 ar 80 | 14 12 13 1 81 | 15 12 14 am 82 | 16 14 15 2 83 | 17 14 16 am 84 | 18 16 17 1 85 | 19 16 55 1 86 | 20 17 18 1 87 | 21 17 19 1 88 | 22 17 20 1 89 | 23 20 21 ar 90 | 24 20 53 ar 91 | 25 21 22 1 92 | 26 21 23 ar 93 | 27 23 24 1 94 | 28 23 25 ar 95 | 29 25 26 1 96 | 30 25 51 ar 97 | 31 26 27 1 98 | 32 26 28 1 99 | 33 28 29 1 100 | 34 28 30 1 101 | 35 28 31 1 102 | 36 31 32 1 103 | 37 31 33 1 104 | 38 31 34 1 105 | 39 34 35 1 106 | 40 34 36 1 107 | 41 34 48 1 108 | 42 36 37 1 109 | 43 36 38 1 110 | 44 36 39 1 111 | 45 39 40 1 112 | 46 39 41 1 113 | 47 39 42 1 114 | 48 42 43 1 115 | 49 42 44 1 116 | 50 42 45 1 117 | 51 45 46 1 118 | 52 45 47 1 119 | 53 45 48 1 120 | 54 48 49 1 121 | 55 48 50 1 122 | 56 51 52 1 123 | 57 51 53 ar 124 | 58 53 54 1 125 | 59 55 56 ar 126 | 60 56 57 1 127 | @SUBSTRUCTURE 128 | 1 **** 21 GROUP 4 **** **** 0 129 | 130 | # MOE 2018.01 (io_trps.svl 2018.01) 131 | 132 | -------------------------------------------------------------------------------- /dockbox/dbxtools.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import numpy as np 5 | import nwalign as nw 6 | 7 | from mdkit.utility import mol2 8 | from dockbox import pyqcprot 9 | 10 | # prefix to identify ligand, target and isomer directories 11 | ligand_prefix = 'lig' 12 | target_prefix = 'target' 13 | 14 | residues_3_to_1 = {'ALA': 'A', 15 | 'ARG': 'R', 16 | 'ASN': 'N', 17 | 'ASP': 'D', 18 | 'CYS': 'C', 19 | 'GLU': 'E', 20 | 'GLY': 'G', 21 | 'HIS': 'H', 22 | 'ILE': 'I', 23 | 'LEU': 'L', 24 | 'LYS': 'K', 25 | 'MET': 'M', 26 | 'PHE': 'F', 27 | 'PRO': 'P', 28 | 'GLN': 'Q', 29 | 'SER': 'S', 30 | 'SEC': 'U', 31 | 'THR': 'T', 32 | 'TRP': 'W', 33 | 'TYR': 'Y', 34 | 'VAL': 'V'} 35 | 36 | equivalent_residues = {'CYM': 'CYS', 37 | 'LYN': 'LYS', 38 | 'ASH': 'ASP', 39 | 'CYX': 'CYS', 40 | 'GLH': 'GLU', 41 | 'HID': 'HIS', 42 | 'HIE': 'HIS', 43 | 'HIP': 'HIS'} 44 | 45 | def get_total_residue_number(filename): 46 | indices = [] 47 | nresidues = 0 48 | with open(filename, 'r') as pdbf: 49 | for line in pdbf: 50 | if line.startswith('ATOM'): 51 | resnum = line[22:26].strip() 52 | if resnum not in indices: 53 | indices.append(resnum) 54 | nresidues += 1 55 | return nresidues 56 | 57 | def get_sequence_from_PDB(filename): 58 | indices = [] 59 | sequence = '' 60 | 61 | with open(filename, 'r') as pdbf: 62 | for line in pdbf: 63 | if line.startswith('ATOM'): 64 | resnum = line[22:26].strip() 65 | resname = line[17:20].strip() 66 | if resname in equivalent_residues: 67 | resname = equivalent_residues[resname] 68 | 69 | if resnum not in indices and resname in residues_3_to_1: 70 | sequence += residues_3_to_1[resname] 71 | indices.append(resnum) 72 | return sequence, indices 73 | 74 | def get_residues_coordinates(filename, indices): 75 | indices_new = [] 76 | coords = [] 77 | 78 | with open(filename, 'r') as pdbf: 79 | for line in pdbf: 80 | if line.startswith('ATOM'): 81 | resnum = line[22:26].strip() 82 | resname = line[17:20].strip() 83 | atomname = line[12:16].strip() 84 | 85 | if resnum not in indices_new and resnum in indices: 86 | coords.append([]) 87 | indices_new.append(resnum) 88 | #if resnum in indices and atomname[0] != 'H': 89 | if resnum in indices and atomname in ['CA', 'C', 'N', 'O']: 90 | x = float(line[30:38]) 91 | y = float(line[38:46]) 92 | z = float(line[46:54]) 93 | coords[-1].append([atomname, x, y, z]) 94 | 95 | return coords, indices_new 96 | 97 | def compute_rmsd(file1, file2, rotmat=np.eye(3), trans1=np.zeros(3), trans2=np.zeros(3)): 98 | """Compute RMSD between 2 poses""" 99 | 100 | for file in [file1, file2]: 101 | if isinstance(file, float) and math.isnan(file): 102 | return float('nan') 103 | 104 | # load coordinates of first pose (non-hydrogen atoms) 105 | coords1 = mol2.get_coordinates(file1, keep_h=False) 106 | coords1 = np.array(coords1) 107 | natoms = coords1.shape[0] 108 | 109 | coords1_rot = np.empty_like(coords1) 110 | for idx in range(natoms): 111 | coords1t = coords1[idx,:] + trans1 112 | coords1t = coords1t[:,np.newaxis] 113 | coords1_rot[idx,:] = np.dot(rotmat, coords1t).flatten() - trans2 114 | 115 | # load coordinates of second pose (non-hydrogen atoms) 116 | coords2 = mol2.get_coordinates(file2, keep_h=False) 117 | coords2 = np.array(coords2) 118 | 119 | rmsd = np.sqrt(np.sum((coords1_rot-coords2)**2)/natoms) 120 | return rmsd 121 | 122 | def get_rmsd_rotation_and_translations(file1, file2): 123 | 124 | nres1 = get_total_residue_number(file1) 125 | nres2 = get_total_residue_number(file2) 126 | 127 | seq1, ind1 = get_sequence_from_PDB(file1) 128 | seq2, ind2 = get_sequence_from_PDB(file2) 129 | 130 | alignment = nw.global_align(seq1, seq2) 131 | 132 | nalign = len(alignment[0]) 133 | nresidues_min = min(len(seq1), len(seq2)) 134 | 135 | ind1new = [] 136 | ind2new = [] 137 | idx1, idx2 = 0, 0 138 | 139 | for idx in range(nalign): 140 | if (idx < nresidues_min) and seq1[idx] == seq2[idx] and seq1[idx] != '-': 141 | ind1new.append(ind1[idx1]) 142 | ind2new.append(ind2[idx2]) 143 | if (idx < len(seq1)) and seq1[idx] != '-': 144 | idx1 += 1 145 | if (idx < len(seq2)) and seq2[idx] != '-': 146 | idx2 += 1 147 | 148 | ind1 = ind1new 149 | ind2 = ind2new 150 | 151 | #TODO: add a threshold for the number of residues considered 152 | frac1 = len(ind1)*100.0/nres1 153 | frac2 = len(ind2)*100.0/nres2 154 | 155 | # get coordinates of specific residues 156 | coords1, ind1 = get_residues_coordinates(file1, ind1) 157 | coords2, ind2 = get_residues_coordinates(file2, ind2) 158 | 159 | new_coords1 = [] 160 | new_coords2 = [] 161 | 162 | # check if there is consistency in atom names 163 | nresidues1 = len(coords1) 164 | for idx in range(nresidues1): 165 | coords1_res = coords1[idx] 166 | coords2_res = coords2[idx] 167 | 168 | atomnames1 = [item[0] for item in coords1_res] 169 | atomnames2 = [item[0] for item in coords2_res] 170 | if set(atomnames1) != set(atomnames2): 171 | sys.exit("Inconsistency found in residue %s in file %s and residue %s in file %s! Missing atom suspected..."%(ind1[idx],file1,ind2[idx],file2)) 172 | 173 | # create new coordinates 174 | for an1, x1, y1, z1 in coords1_res: 175 | for an2, x2, y2, z2 in coords2_res: 176 | if an1 == an2: 177 | new_coords1.append([x1, y1, z1]) 178 | new_coords2.append([x2, y2, z2]) 179 | break 180 | 181 | new_coords1 = np.array(new_coords1).T 182 | new_coords2 = np.array(new_coords2).T 183 | 184 | rotation = np.zeros(9) 185 | trans1 = -new_coords1[:,0] 186 | trans2 = -new_coords2[:,0] 187 | 188 | rmsd = pyqcprot.CalcRMSDRotationalMatrix(new_coords1, new_coords2, rotation, None) 189 | 190 | rotation = rotation.reshape((3, 3)) 191 | trans1 += new_coords1[:,0] 192 | trans2 += new_coords2[:,0] 193 | 194 | return rotation, trans1, trans2 195 | 196 | def get_rmsd_rotation_and_translations_all_targets(files_r): 197 | rmsd_rot_trans = {} 198 | 199 | for key1 in files_r: 200 | rmsd_rot_trans[key1] = {} 201 | 202 | for key2 in files_r: 203 | if key1 == key2: 204 | rotation = np.eye(3) 205 | trans1 = np.zeros(3) 206 | trans2 = np.zeros(3) 207 | else: 208 | file1 = files_r[key1] 209 | file2 = files_r[key2] 210 | rotation, trans1, trans2 = get_rmsd_rotation_and_translations(file1, file2) 211 | rmsd_rot_trans[key1][key2] = [rotation, trans1, trans2] 212 | 213 | return rmsd_rot_trans 214 | 215 | def check_architecture(directory): 216 | """Check architecture %s*/%s* of specified directories"""%(ligand_prefix,target_prefix) 217 | 218 | if os.path.isdir(directory): 219 | dir_split = directory.split('/') 220 | if dir_split[-1].startswith(target_prefix): 221 | istargetID = True 222 | if len(dir_split) > 1 and dir_split[-2].startswith(ligand_prefix): 223 | isligID = True 224 | else: 225 | isligID = False 226 | elif dir_split[-1].startswith(ligand_prefix): 227 | istargetID = False 228 | isligID = True 229 | else: 230 | istargetID = False 231 | isligID = False 232 | 233 | return isligID, istargetID 234 | 235 | def get_IDs(directory, isligID, istargetID): 236 | """Get IDs of ligand target and isomer (if applicable) according to the current architecture.""" 237 | 238 | if istargetID: 239 | targetID = directory.split('/')[-1] 240 | if isligID: 241 | ligID = directory.split('/')[-2] 242 | else: 243 | ligID = None 244 | elif isligID: 245 | targetID = None 246 | ligID = directory.split('/')[-1] 247 | else: 248 | targetID = None 249 | ligID = None 250 | 251 | return ligID, targetID 252 | 253 | def check_directories(dirs): 254 | if dirs != ['.']: 255 | iscwd = False 256 | for jdx, dir in enumerate(dirs): 257 | isligID, istargetID = check_architecture(dir) 258 | if jdx == 0: 259 | isligID_ref = isligID 260 | istargetID_ref = istargetID 261 | elif isligID != isligID_ref or istargetID != istargetID_ref: 262 | raise ValueError("%s*/%s* architecture architecture inconsistent between folders!"%(ligand_prefix,target_prefix)) 263 | else: 264 | iscwd = True 265 | isligID = False 266 | istargetID = False 267 | 268 | return iscwd, isligID, istargetID 269 | 270 | -------------------------------------------------------------------------------- /dockbox/configure.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from glob import glob 4 | import subprocess 5 | 6 | known_programs = {'docking': ['autodock', 'vina', 'dock', 'moe'], 'rescoring': ['autodock', 'vina', 'dock', 'moe', 'dsx', 'gnina']} 7 | known_programs['scoring'] = known_programs['rescoring'] 8 | 9 | single_run_scoring_programs = ['dock'] 10 | programs_handling_ions = ['autodock', 'vina', 'dock'] 11 | 12 | default_minimize_options = {'charge_method': 'gas', 'ncyc': 1000, 'maxcyc': 2000, 'cut': 999.0, 'solvent': 'vacuo'} 13 | path_options = {'dock': ['grid_dir']} 14 | 15 | class ConfigSetup(object): 16 | 17 | def __init__(self, task, config): 18 | 19 | self.task = task 20 | self.section = task.upper() 21 | 22 | self.setup_instances(task, config) 23 | self.set_site_options(config) 24 | 25 | def setup_instances(self, task, config): 26 | self.instances = [] 27 | 28 | if config.has_option(self.section, 'program'): 29 | 30 | instances = config.get(self.section, 'program').lower() 31 | instances = map(str.strip, instances.split(',')) 32 | 33 | for instance in instances: 34 | program = ''.join([c for c in instance if not c.isdigit()]) # get program's exact name 35 | if program not in known_programs[task]: 36 | raise ValueError("%s programs should be one of "%task.capitalize() + ", ".join(known_programs[task])) 37 | sys.modules[program] = __import__('dockbox.'+program, fromlist=['a']) 38 | 39 | options = {} 40 | # check if all needed executables are available 41 | if hasattr(sys.modules[program], 'required_programs'): 42 | required_programs = getattr(sys.modules[program], 'required_programs') 43 | for exe in required_programs: 44 | try: 45 | subprocess.check_call('which %s > /dev/null'%exe, shell=True) 46 | except subprocess.CalledProcessError: 47 | raise ValueError('Executable %s needed for docking with %s not found! \ 48 | Make sure the program has been installed and is in your PATH!'%(exe, program)) 49 | 50 | # check if mandatory options are set up 51 | if hasattr(sys.modules[program], 'mandatory_settings'): 52 | madatory_settings = getattr(sys.modules[program], 'mandatory_settings') 53 | config_d = dict(config.items(instance.upper())) 54 | for setting in madatory_settings: 55 | if setting not in config_d or not config_d[setting]: 56 | raise ValueError('Option %s when using %s is mandatory!'%(setting,program)) 57 | 58 | # load default parameters 59 | if hasattr(sys.modules[program], 'default_settings'): 60 | default_settings = getattr(sys.modules[program], 'default_settings') 61 | for key, value in default_settings.iteritems(): 62 | options[key] = value 63 | 64 | known_settings = {} 65 | if hasattr(sys.modules[program], 'known_settings'): 66 | known_settings = getattr(sys.modules[program], 'known_settings') 67 | 68 | def check_value(key, value, instance): 69 | if not key in default_settings.keys(): 70 | raise ValueError("Option %s not recognized in instance %s!"%(key, instance)) 71 | # TODO: check that value has the required type, e.g. set known_settings as a dict with the type and the list of possible choices if any! 72 | if key in known_settings: 73 | for known_value in known_settings[key]: 74 | if value.lower() == known_value.lower(): 75 | return known_value 76 | raise ValueError("Value %s not recognized for option %s in instance %s!"%(value, key, instance)) 77 | elif key.endswith('dir'): # path value 78 | return os.path.abspath(value) 79 | else: 80 | return value 81 | 82 | # get parameters from config file (would possibly overwrite default preset parameters) 83 | if config.has_section(instance.upper()): 84 | config_d = dict(config.items(instance.upper())) 85 | for key, value in config_d.iteritems(): 86 | if program in path_options and key in path_options[program]: 87 | value = os.path.abspath(value) 88 | options[key] = check_value(key, value, instance) 89 | 90 | self.instances.append((instance, program, options)) 91 | else: 92 | raise ValueError("option program in section %s is required in config file!"%self.section) 93 | 94 | def set_site_options(self, config): 95 | """set options for the binding site""" 96 | 97 | site = {} 98 | required_options = ['center', 'boxsize'] 99 | 100 | if config.has_option('DOCKING', 'site'): 101 | sitenames = config.get('DOCKING', 'site').lower() 102 | sitenames = map(str.strip, sitenames.split(',')) 103 | for idx, name in enumerate(sitenames): 104 | site['site'+str(idx+1)] = [name] 105 | for option in required_options: 106 | section = name.upper() 107 | if config.has_option(section, option): 108 | value = config.get(section, option) 109 | site['site'+str(idx+1)].append(value) 110 | else: 111 | raise ValueError("Option %s in section %s is required in config file!"%(option,section)) 112 | else: 113 | section = 'SITE' 114 | site['site1'] = [None] 115 | for option in required_options: 116 | if config.has_option(section, option): 117 | value = config.get(section, option) 118 | site['site1'].append(value) 119 | else: 120 | raise ValueError("Option %s in section %s is required in config file for local docking!"%(option,section)) 121 | self.site = site 122 | self.nsites = len(site) 123 | 124 | 125 | def get_value_yesno_option(self, config, section, option, default=False): 126 | 127 | if config.has_option(section, option): 128 | yesno = config.get(section, option).lower() 129 | if yesno == 'yes': 130 | return True 131 | elif yesno == 'no': 132 | return False 133 | else: 134 | raise ValueError("option %s should be yes or no!"%option) 135 | else: 136 | return default 137 | 138 | def get_value_cleanup_option(self, config, section, default=0): 139 | 140 | if config.has_option(section, 'cleanup'): 141 | value = config.get(section, 'cleanup').lower() 142 | if value == 'no' or value == '0': 143 | return 0 144 | elif value == 'yes' or value == '1': 145 | return 1 146 | elif value == '2' or value == '3': 147 | return int(value) 148 | else: 149 | raise ValueError("cleanup option in section DOCKING should be yes, no or 0 to 3!") 150 | else: 151 | return default 152 | 153 | class DockingSetup(ConfigSetup): 154 | 155 | def __init__(self, config): 156 | 157 | super(DockingSetup, self).__init__('docking', config) 158 | 159 | self.cleanup = self.get_value_cleanup_option(config, 'DOCKING') 160 | self.minimize = self.set_minimization_options(config) 161 | 162 | def set_minimization_options(self, config): 163 | """set options for minimization""" 164 | 165 | self.minimize_options = {} 166 | self.minimize_options['minimization'] = self.get_value_yesno_option(config, 'DOCKING', 'minimize') 167 | 168 | section = 'MINIMIZATION' 169 | if self.minimize_options['minimization']: 170 | 171 | # check AMBER version 172 | self.minimize_options['amber_version'] = self.check_amber_version() 173 | 174 | # load default parameters 175 | for key, value in default_minimize_options.iteritems(): 176 | self.minimize_options[key] = value 177 | 178 | # get parameters from config file (would possibly overwrite default preset parameters) 179 | if config.has_section(section): 180 | config_m = dict(config.items(section)) 181 | for key, value in config_m.iteritems(): 182 | self.minimize_options[key] = value 183 | 184 | return self.minimize_options 185 | 186 | def check_amber_version(self): 187 | error_msg = 'AmberTools serial version >= 14 and <= 17 is required for minimization with DockBox!' 188 | 189 | if os.environ.get('AMBERHOME'): 190 | for exe in ['tleap', 'sander', 'cpptraj']: 191 | try: 192 | subprocess.check_call('which %s > /dev/null'%exe, shell=True) 193 | except subprocess.CalledProcessError: 194 | raise ValueError('Executable %s is not found in your PATH! %s'%(exe, error_msg)) 195 | 196 | docfile = glob(os.environ.get('AMBERHOME')+'/doc/Amber*.pdf') 197 | amber_version = os.path.basename(docfile[0])[5:-4] 198 | try: 199 | int(amber_version) 200 | if amber_version not in ['14', '15', '16', '17']: 201 | raise ValueError("Amber version %s detected! %s"%error_msg) 202 | return amber_version 203 | except ValueError: 204 | raise ValueError("Amber version not detected! %s"%error_msg) 205 | else: 206 | raise ValueError("AMBERHOME is not set! %s"%error_msg) 207 | 208 | class RescoringSetup(ConfigSetup): 209 | 210 | def __init__(self, config): 211 | self.is_rescoring = self.get_value_yesno_option(config, 'DOCKING', 'rescoring') 212 | 213 | if self.is_rescoring: 214 | super(RescoringSetup, self).__init__('rescoring', config) 215 | 216 | class ScoringSetup(ConfigSetup): 217 | pass 218 | -------------------------------------------------------------------------------- /dockbox/method.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import stat 4 | import shutil 5 | import subprocess 6 | 7 | from glob import glob 8 | 9 | from mdkit.amber import minimization 10 | from mdkit.utility import mol2 11 | 12 | import configure 13 | 14 | class DockingMethod(object): 15 | 16 | def __init__(self, instance, site, options): 17 | """Initialize docking instance""" 18 | 19 | self.instance = instance 20 | self.site = site 21 | self.options = options 22 | 23 | self.program = self.__class__.__name__.lower() 24 | 25 | def run_docking(self, file_r, file_l, minimize_options=None, cleanup=0, prepare_only=False, skip_docking=False): 26 | """Run docking one (file per ligand and receptor)""" 27 | 28 | curdir = os.getcwd() 29 | # find name for docking directory 30 | if 'name' in self.options: 31 | dockdir = self.options['name'] 32 | else: 33 | dockdir = self.instance 34 | 35 | if self.site[0]: 36 | dockdir += '.' + self.site[0] 37 | 38 | if not skip_docking: 39 | # create directory for docking (remove directory if exists) 40 | shutil.rmtree(dockdir, ignore_errors=True) 41 | os.mkdir(dockdir) 42 | os.chdir(dockdir) 43 | 44 | if not skip_docking: 45 | print "Starting docking with %s..."%self.program.capitalize() 46 | print "The following options will be used:" 47 | options_info = "" 48 | for key, value in self.options.iteritems(): 49 | options_info += str(key) + ': ' + str(value) + ', ' 50 | print options_info[:-2] 51 | 52 | # (A) run docking 53 | script_name = "run_" + self.program + ".sh" 54 | self.write_docking_script(script_name, file_r, file_l) 55 | os.chmod(script_name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR) 56 | 57 | if prepare_only: 58 | return 59 | try: 60 | # try running docking procedure 61 | subprocess.check_output('./' + script_name + " &> " + self.program + ".log", shell=True, executable='/bin/bash') 62 | except subprocess.CalledProcessError as e: 63 | print e 64 | print "Error: check %s file for more details!"%(dockdir+'/'+self.program+'.log') 65 | os.chdir(curdir) 66 | return 67 | 68 | if prepare_only: 69 | return 70 | 71 | # (B) extract docking results 72 | self.extract_docking_results('score.out', file_r, file_l) 73 | 74 | # (C) cleanup poses (minimization, remove out-of-box poses) 75 | if minimize_options['minimization']: 76 | self.backup_files('origin') 77 | self.minimize_extracted_poses(file_r, 'score.out', **minimize_options) 78 | self.remove_out_of_range_poses('score.out') 79 | 80 | # (D) remove intermediate files if required 81 | if cleanup == 1: 82 | self.cleanup() 83 | 84 | os.chdir(curdir) 85 | print "Docking with %s done."%self.program.capitalize() 86 | 87 | def run_rescoring(self, file_r, files_l): 88 | """Rescore multiple ligands on one receptor""" 89 | 90 | curdir = os.getcwd() 91 | # get name of rescoring from instance 92 | rescordir = self.instance 93 | if self.site[0]: 94 | rescordir += '.' + self.site[0] 95 | 96 | # overwrite previous directory if exists 97 | shutil.rmtree(rescordir, ignore_errors=True) 98 | os.mkdir(rescordir) 99 | 100 | # change directory 101 | os.chdir(rescordir) 102 | 103 | mol2files = files_l 104 | if self.program in configure.single_run_scoring_programs: 105 | # if the program rescores in one run, provides a list of files 106 | mol2files = [mol2files] 107 | 108 | if mol2files: 109 | # iterate over all the poses 110 | for idx, file_l in enumerate(mol2files): 111 | # (A) write script 112 | script_name = "run_scoring_" + self.program + ".sh" 113 | self.write_rescoring_script(script_name, file_r, file_l) 114 | os.chmod(script_name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR) 115 | 116 | # (B) run scoring method 117 | try: 118 | subprocess.check_output('./' + script_name + ' &> ' + self.program + '.log', shell=True, executable='/bin/bash') 119 | except subprocess.CalledProcessError as e: 120 | print e.output 121 | pass 122 | 123 | # (C) extract rescoring results 124 | if self.program in configure.single_run_scoring_programs: 125 | nligands = len(file_l) 126 | self.extract_rescoring_results('score.out', nligands=nligands) 127 | else: 128 | self.extract_rescoring_results('score.out') 129 | else: 130 | # if no files provided, create an empty score.out file 131 | open('score.out', 'w').close() 132 | 133 | os.chdir(curdir) 134 | return rescordir + '/score.out' 135 | 136 | def get_output_mol2files(self): 137 | """Get output mol2files sorted by pose ranking after docking""" 138 | 139 | filenames_idxs = [] 140 | for filename in glob('pose-*.mol2'): 141 | suffix, ext = os.path.splitext(filename) 142 | filenames_idxs.append(int(suffix.split('-')[-1])) 143 | filenames_idxs = sorted(filenames_idxs) 144 | 145 | mol2files = [] 146 | for idx in filenames_idxs: 147 | mol2files.append('pose-%s.mol2'%idx) 148 | return mol2files 149 | 150 | def backup_files(self, dir): 151 | """Do a backup of output mol2files""" 152 | 153 | mol2files = self.get_output_mol2files() 154 | shutil.rmtree(dir, ignore_errors=True) 155 | os.mkdir(dir) 156 | for filename in mol2files: 157 | shutil.copyfile(filename, dir+'/'+filename) 158 | 159 | def remove_scores_from_scorefile(self, file_s, indices, nligands=None): 160 | """Remove scores of bad poses (failed minimization, out of the box...) from score.out""" 161 | if os.path.exists(file_s): 162 | new_content = [] 163 | with open(file_s, 'r') as sf: 164 | for idx, line in enumerate(sf): 165 | if idx not in indices: 166 | new_content.append(line) 167 | if nligands: 168 | # consistency check 169 | assert nligands == idx+1, "number of ligand mol2files should be equal to number of lines in score.out" 170 | with open(file_s, 'w') as sf: 171 | for line in new_content: 172 | sf.write(line) 173 | 174 | def minimize_extracted_poses(self, file_r, file_s, **minimize_options): 175 | """Perform AMBER minimization on extracted poses""" 176 | 177 | mol2files = self.get_output_mol2files() 178 | if mol2files: 179 | # do energy minimization on ligand 180 | minimization.do_minimization_after_docking(file_r, mol2files, keep_hydrogens=True, charge_method=minimize_options['charge_method'],\ 181 | ncyc=minimize_options['ncyc'], maxcyc=minimize_options['maxcyc'], cut=minimize_options['cut'], amber_version=minimize_options['amber_version']) 182 | 183 | failed_idxs = [] 184 | # extract results from minimization and purge out 185 | for idx, filename_before_min in enumerate(mol2files): 186 | suffix, ext = os.path.splitext(filename_before_min) 187 | filename = 'em/' + suffix + '-out' + ext 188 | if os.path.isfile(filename): # the minimization succeeded 189 | shutil.copyfile(filename, filename_before_min) 190 | else: # the minimization failed 191 | os.remove(filename_before_min) 192 | failed_idxs.append(idx) 193 | 194 | # remove scores of failed poses 195 | self.remove_scores_from_scorefile(file_s, failed_idxs, nligands=len(mol2files)) 196 | 197 | if failed_idxs: 198 | # display warning message 199 | failed_mol2files = [mol2files[idx] for idx in failed_idxs] 200 | print "Warning: minimization of poses %s failed, poses were removed!"%(', '.join(failed_mol2files)) 201 | 202 | def remove_out_of_range_poses(self, file_s): 203 | """Get rid of poses which were predicted outside the box""" 204 | 205 | mol2files = self.get_output_mol2files() 206 | if mol2files: 207 | sitename, center, boxsize = self.site 208 | # get values of docking box center and boxsize 209 | center = map(float, center.split(',')) 210 | boxsize = map(float, boxsize.split(',')) 211 | 212 | out_of_range_idxs = [] 213 | for jdx, filename in enumerate(mol2files): 214 | is_out = False 215 | for coord in mol2.get_coordinates(filename): 216 | for idx, value in enumerate(coord): 217 | # check if the pose is out of the box 218 | if abs(value - center[idx]) > boxsize[idx]*1./2: 219 | is_out = True 220 | break 221 | if is_out: 222 | os.remove(filename) 223 | out_of_range_idxs.append(jdx) 224 | break 225 | # remove scores of failed poses 226 | self.remove_scores_from_scorefile(file_s, out_of_range_idxs, nligands=len(mol2files)) 227 | 228 | if out_of_range_idxs: 229 | # display warning message 230 | out_of_range_mol2files = [mol2files[idx] for idx in out_of_range_idxs] 231 | print "Warning: poses %s were found out of the box, poses were removed!"%(', '.join(out_of_range_mol2files)) 232 | 233 | def cleanup(self): 234 | """Remove all intermediate files""" 235 | for filename in glob('*'): 236 | if os.path.isfile(filename) and not filename.startswith('pose-') and filename != 'score.out': 237 | os.remove(filename) 238 | 239 | elif os.path.isdir(filename): 240 | shutil.rmtree(filename) 241 | 242 | def write_rescoring_script(self, script_name, file_r, file_l): 243 | pass 244 | 245 | def extract_rescoring_results(self, filename): 246 | pass 247 | 248 | def write_docking_script(self, script_name, file_r, file_l): 249 | pass 250 | 251 | def extract_docking_results(self, file_r, file_l, file_s, input_file_r): 252 | pass 253 | 254 | class ScoringMethod(DockingMethod): 255 | 256 | def run_docking(self, file_r, file_l, minimize=False, cleanup=0, extract_only=False): 257 | pass 258 | 259 | def remove_out_of_range_poses(self, file_s): 260 | pass 261 | 262 | def minimize_extracted_poses(self, file_r): 263 | pass 264 | -------------------------------------------------------------------------------- /dockbox/autodock.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import subprocess 4 | import shutil 5 | from glob import glob 6 | 7 | from mdkit.utility import mol2 8 | import method 9 | 10 | required_programs = ['prepare_ligand4.py', 'prepare_receptor4.py', 'prepare_dpf4.py', 'prepare_gpf4.py', 'autogrid4', 'autodock4', 'obabel'] 11 | 12 | default_settings = {'ga_run': '100', 'spacing': '0.3'} 13 | 14 | class ADBased(method.DockingMethod): 15 | 16 | def write_rescoring_script(self, filename, file_r, file_l): 17 | self.write_docking_script(filename, file_r, file_l, rescoring=True) 18 | 19 | def update_output_mol2files(self, sample=None): 20 | nfiles = len(glob('pose-*.mol2')) 21 | 22 | mgltools_path = subprocess.check_output('which prepare_ligand4.py', shell=True, executable='/bin/bash') 23 | mgltools_path = '/'.join(mgltools_path.split('/')[:-3]) 24 | 25 | for idx in range(nfiles): 26 | mol2file = 'pose-%s.mol2'%(idx+1) 27 | mol2.update_mol2file(mol2file, mol2file, ADupdate=sample, unique=True, mask=['h','H']) 28 | mol2.arrange_hydrogens(mol2file, 'tmp.mol2', path=mgltools_path) 29 | shutil.move('tmp.mol2', mol2file) 30 | 31 | def write_check_ligand_pdbqt_script(self, filename): 32 | 33 | with open(filename, 'w') as ff: 34 | content ="""import os 35 | import sys 36 | import shutil 37 | 38 | input_file = sys.argv[1] 39 | 40 | filename, ext = os.path.splitext(input_file) 41 | file_tmp = filename + '_tmp.pdbqt' 42 | 43 | lines_to_be_removed = [] 44 | 45 | has_branch_started = False 46 | with open(input_file, 'r') as ff: 47 | for line in ff: 48 | if has_branch_started: 49 | has_branch_started = False 50 | branch_num = start_branch_line.split()[-1] 51 | if line.split()[1] != branch_num: 52 | lines_to_be_removed.append(start_branch_line) 53 | lines_to_be_removed.append('END' + start_branch_line) 54 | if line.startswith('BRANCH'): 55 | start_branch_line = line 56 | has_branch_started = True 57 | 58 | if lines_to_be_removed: 59 | with open(input_file, 'r') as ff: 60 | with open(file_tmp, 'w') as of: 61 | for line in ff: 62 | if line.startswith(('BRANCH', 'ENDBRANCH')) and line in lines_to_be_removed: 63 | pass 64 | else: 65 | of.write(line) 66 | shutil.move(file_tmp, input_file)""" 67 | ff.write(content) 68 | 69 | def write_check_ions_script(self, filename): 70 | 71 | with open(filename, 'w') as file: 72 | script = """import sys 73 | import shutil 74 | from tempfile import mkstemp 75 | 76 | from mdkit.amber.ambertools import load_atomic_ions 77 | 78 | # first all residues are supposed to be recognized 79 | are_unrecognized_residues = False 80 | 81 | # check if and which atoms were not recognized 82 | unrecognized_residues = [] 83 | with open(sys.argv[2], 'r') as logf: 84 | for line in logf: 85 | if line.startswith('Sorry, there are no Gasteiger parameters available for atom'): 86 | are_unrecognized_residues = True 87 | resname = line.split()[-1].split(':')[0] 88 | resname = ''.join([i for i in resname if not i.isdigit()]) 89 | unrecognized_residues.append(resname) 90 | 91 | if are_unrecognized_residues: 92 | 93 | ions_amber = load_atomic_ions() 94 | print "No charges specified for ion(s) " + ', '.join(unrecognized_residues) 95 | print "Attributing formal charges..." 96 | 97 | # update .pdbqt file for the receptor 98 | fh, abs_path = mkstemp() 99 | 100 | with open(abs_path, 'w') as tempf: 101 | with open(sys.argv[1], 'r') as ff: 102 | 103 | for line in ff: 104 | is_ion = False 105 | 106 | if line.startswith(('ATOM', 'HETATM')): 107 | resname = line[17:20].strip() 108 | if resname in unrecognized_residues: 109 | assert resname in ions_amber 110 | charge = "%.3f"%ions_amber[resname] 111 | is_ion = True 112 | 113 | if is_ion: 114 | tempf.write(line[:70] + ' '*(6-len(charge)) + charge + line[76:]) 115 | else: 116 | tempf.write(line) 117 | 118 | shutil.move(abs_path, sys.argv[1])""" 119 | file.write(script) 120 | 121 | class Autodock(ADBased): 122 | 123 | def __init__(self, instance, site, options): 124 | 125 | super(Autodock, self).__init__(instance, site, options) 126 | 127 | # set box center 128 | self.options['gridcenter'] = '\"' + ' '.join(map(str.strip, site[1].split(','))) + '\"' 129 | 130 | # set box size 131 | boxsize = map(float, map(str.strip, site[2].split(','))) 132 | spacing = float(options['spacing']) 133 | npts = [] 134 | for size in boxsize: 135 | sz = int(size*1.0/spacing) + 1 136 | npts.append(str(sz)) # round to the integer above 137 | self.options['npts'] = ','.join(npts) 138 | 139 | autogrid_options_names = ['spacing', 'npts', 'gridcenter'] 140 | autodock_options_names = ['ga_run', 'ga_pop_size', 'ga_num_evals', 'ga_num_generations', 'outlev', 'seed'] 141 | 142 | self.autogrid_options = {} 143 | for name in autogrid_options_names: 144 | if name in options: 145 | self.autogrid_options[name] = options[name] 146 | 147 | self.autodock_options = {} 148 | for name in autodock_options_names: 149 | if name in options: 150 | self.autodock_options[name] = options[name] 151 | 152 | def write_docking_script(self, filename, file_r, file_l, rescoring=False): 153 | #TODO: add treatment of ions for autogrid: http://autodock.scripps.edu/faqs-help/how-to/adding-new-atom-parameters-to-autodock 154 | 155 | # create flags with specified options for autogrid and autodock 156 | autogrid_options_flag = ' '.join(['-p ' + key + '=' + value for key, value in self.autogrid_options.iteritems()]) 157 | autodock_options_flag = ' '.join(['-p ' + key + '=' + value for key, value in self.autodock_options.iteritems()]) 158 | 159 | self.write_check_ligand_pdbqt_script('check_ligand_pdbqt.py') 160 | self.write_check_ions_script('check_ions.py') 161 | 162 | if not rescoring: 163 | if 'ga_num_evals' not in self.options: 164 | ga_num_evals_lines="""prepare_dpf4.py -l ligand.pdbqt -r target.pdbqt -o dock.dpf -p move=ligand.pdbqt 165 | ga_num_evals_flag=`python -c \"with open('dock.dpf') as ff: 166 | for line in ff: 167 | if line.startswith('torsdof'): 168 | torsion = int(line.split()[1]) 169 | break 170 | ga_num_evals = min(25000000, 987500 * torsion + 125000) 171 | print \'-p ga_num_evals=%i\'%ga_num_evals\"`""" 172 | else: 173 | ga_num_evals_lines="" 174 | 175 | # write autodock script 176 | with open(filename, 'w') as ff: 177 | script ="""#!/bin/bash 178 | set -e 179 | 180 | MGLPATH=`which prepare_ligand4.py` 181 | MGLPATH=`python -c "print '/'.join('$MGLPATH'.split('/')[:-3])"` 182 | export PYTHONPATH=$PYTHONPATH:$MGLPATH 183 | 184 | # prepare ligand 185 | prepare_ligand4.py -l %(file_l)s -o ligand.pdbqt 186 | python check_ligand_pdbqt.py ligand.pdbqt 187 | 188 | # prepare receptor 189 | prepare_receptor4.py -U nphs_lps_waters -r %(file_r)s -o target.pdbqt &> prepare_receptor4.log 190 | python check_ions.py target.pdbqt prepare_receptor4.log 191 | 192 | # run autogrid 193 | prepare_gpf4.py -l ligand.pdbqt -r target.pdbqt -o grid.gpf %(autogrid_options_flag)s 194 | autogrid4 -p grid.gpf -l grid.glg 195 | 196 | # prepare .dpf file 197 | %(ga_num_evals_lines)s 198 | prepare_dpf4.py -l ligand.pdbqt -r target.pbdqt -o dock.dpf -p move=ligand.pdbqt %(autodock_options_flag)s $ga_num_evals_flag 199 | 200 | # run autodock 201 | autodock4 -p dock.dpf -l dock.dlg"""% locals() 202 | ff.write(script) 203 | 204 | else: 205 | # write autodock script for rescoring 206 | with open(filename, 'w') as ff: 207 | script ="""#!/bin/bash 208 | set -e 209 | 210 | MGLPATH=`which prepare_ligand4.py` 211 | MGLPATH=`python -c "print '/'.join('$MGLPATH'.split('/')[:-3])"` 212 | export PYTHONPATH=$PYTHONPATH:$MGLPATH 213 | 214 | # prepare ligand 215 | prepare_ligand4.py -l %(file_l)s -o ligand.pdbqt 216 | python check_ligand_pdbqt.py ligand.pdbqt 217 | 218 | # prepare receptor only once 219 | if [ ! -f target.pdbqt ]; then 220 | prepare_receptor4.py -U nphs_lps_waters -r %(file_r)s -o target.pdbqt > prepare_receptor4.log 221 | python check_ions.py target.pdbqt prepare_receptor4.log 222 | fi 223 | 224 | # run autogrid 225 | if [ ! -f grid.glg ]; then 226 | prepare_gpf4.py -l ligand.pdbqt -r target.pdbqt -o grid.gpf %(autogrid_options_flag)s 227 | autogrid4 -p grid.gpf -l grid.glg 228 | fi 229 | 230 | # prepare .dpf file 231 | if [ ! -f dock.dpf ]; then 232 | prepare_dpf4.py -l ligand.pdbqt -r target.pbdqt -o dock.dpf -p move=ligand.pdbqt %(autodock_options_flag)s $ga_num_evals_flag 233 | # construct new dock.dpf with rescoring options only 234 | sed -e "1,/about/w tmp.dpf" dock.dpf > /dev/null 235 | mv tmp.dpf dock.dpf 236 | echo 'epdb # small molecule to be evaluated' >> dock.dpf 237 | fi 238 | 239 | # run autodock 240 | autodock4 -p dock.dpf -l dock.dlg"""% locals() 241 | ff.write(script) 242 | 243 | def extract_docking_results(self, file_s, input_file_r, input_file_l): 244 | """Extract output structures in .mol2 formats""" 245 | 246 | poses_extracted = False 247 | if os.path.exists('dock.dlg'): 248 | try: 249 | subprocess.check_output('obabel -ad -ipdbqt dock.dlg -omol2 -Opose-.mol2 -m &>/dev/null', shell=True, executable='/bin/bash') 250 | self.update_output_mol2files(sample=input_file_l) 251 | poses_extracted = True 252 | except: 253 | for mol2file in glob('pose-*.mol2'): 254 | os.remove(mol2file) 255 | poses_extracted = False 256 | 257 | if poses_extracted: 258 | with open('dock.dlg','r') as dlgf: 259 | with open(file_s, 'w') as sf: 260 | for line in dlgf: 261 | if line.startswith('DOCKED: USER Estimated Free Energy of Binding'): 262 | score = line.split()[8].strip() 263 | sf.write(score+'\n') 264 | if 'CLUSTERING HISTOGRAM' in line: 265 | break 266 | else: 267 | open(file_s, 'w').close() 268 | 269 | def extract_rescoring_results(self, filename): 270 | """extract scores from .dlg file""" 271 | with open(filename, 'a') as ff: 272 | if os.path.exists('dock.dlg'): 273 | with open('dock.dlg', 'r') as dlgf: 274 | has_score_line = False 275 | for line in dlgf: 276 | if line.startswith('epdb: USER Estimated Free Energy of Binding'): 277 | ff.write(line.split()[8].strip()+'\n') 278 | has_score_line = True 279 | if not has_score_line: 280 | ff.write('NaN\n') 281 | else: 282 | ff.write('NaN\n') 283 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2019 Jordane Preto 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /dockbox/rundbx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | from __future__ import with_statement 3 | 4 | import os 5 | import sys 6 | import shutil 7 | import argparse 8 | import ConfigParser 9 | import time 10 | 11 | from glob import glob 12 | import pandas as pd 13 | import subprocess 14 | 15 | from mdkit.utility import mol2 16 | from mdkit.amber.ambertools import load_PROTON_INFO 17 | from mdkit.amber.ambertools import load_atomic_ions 18 | 19 | import configure 20 | 21 | class DockingConfig(object): 22 | 23 | def __init__(self, args, task='docking'): 24 | 25 | # check if config file exist 26 | if os.path.exists(args.config_file): 27 | config = ConfigParser.SafeConfigParser() 28 | config.read(args.config_file) 29 | else: 30 | raise ValueError("Config file %s not found!"%(args.config_file)) 31 | 32 | # check if ligand file exists 33 | if not os.path.isfile(args.input_file_l): 34 | raise IOError("File %s not found!"%(args.input_file_l)) 35 | 36 | file_l_abs = os.path.abspath(args.input_file_l) 37 | base = os.path.basename(args.input_file_l) 38 | pref, ext = os.path.splitext(base) 39 | if ext != '.mol2': 40 | raise IOError("Ligand file provided with -l option should be in .mol2 format! %s format detected!"%ext) 41 | 42 | nligands = int(subprocess.check_output('fgrep -c "@MOLECULE" %s'%file_l_abs, shell=True)) 43 | if nligands == 0: 44 | raise IOError("No ligand detected in %s, check your file again!"%args.input_file_l) 45 | elif nligands > 1: 46 | raise IOError("More than one ligand detected in %s. Only one structure per ligand file is allowed!"%args.input_file_l) 47 | 48 | # new ligand file with unique names for every atom 49 | new_file_l = pref + '_dbx' + ext 50 | 51 | # create a ligand file with unique atom names 52 | mol2.update_mol2file(file_l_abs, new_file_l, unique=True, ligname='LIG') 53 | self.input_file_l = os.path.abspath(new_file_l) 54 | 55 | if task == 'docking': 56 | self.docking = configure.DockingSetup(config) 57 | self.rescoring = configure.RescoringSetup(config) 58 | elif task == 'scoring': 59 | self.scoring = configure.ScoringSetup(config) 60 | else: 61 | raise ValueError("Task should be one of docking or scoring") 62 | 63 | self.check_pdbfile(args.input_file_r) 64 | 65 | def check_pdbfile(self, filename): 66 | """Check if provided pdbfile is valid""" 67 | 68 | # check if receptor file exists 69 | if not os.path.isfile(filename): 70 | raise IOError("File %s not found!"%(filename)) 71 | 72 | proton_info = load_PROTON_INFO() 73 | ions_info = load_atomic_ions() 74 | 75 | with open(filename, 'r') as pdbf: 76 | is_end_line = False 77 | 78 | for line in pdbf: 79 | if line.startswith(('ATOM', 'HETATM')): 80 | resname = line[17:20].strip() 81 | 82 | if resname in ions_info: 83 | for instance, program, options in self.docking.instances: 84 | if program not in configure.programs_handling_ions: 85 | sys.exit("Ion %s found in structure %s! DockBox is not configured to apply %s with ions!" %(resname, filename, program)) 86 | 87 | elif resname not in proton_info or line.startswith('HETATM'): 88 | sys.exit('Unrecognized residue %s found in %s! The pdbfile should \ 89 | only contains one protein structure with standard residues (with possibly ions)!'%(resname, filename)) 90 | 91 | elif is_end_line: 92 | sys.exit("More than one structure detected in pdbfile! Check your file again!") 93 | 94 | elif line.startswith('END'): 95 | is_end_line = True 96 | 97 | self.input_file_r = os.path.abspath(filename) 98 | 99 | class Scoring(object): 100 | 101 | def create_arg_parser(self): 102 | parser = argparse.ArgumentParser(description="""runscore : score in-place with multiple software -------- 103 | Requires one file for the ligand (1 struct.) and one file for the receptor (1 struct.)""") 104 | 105 | parser.add_argument('-l', 106 | type=str, 107 | dest='input_file_l', 108 | required=True, 109 | help = 'Ligand coordinate file(s): .mol2') 110 | 111 | parser.add_argument('-r', 112 | type=str, 113 | dest='input_file_r', 114 | required=True, 115 | help = 'Receptor coordinate file(s): .pdb') 116 | 117 | parser.add_argument('-f', 118 | dest='config_file', 119 | required=True, 120 | help='config file containing docking parameters') 121 | 122 | return parser 123 | 124 | def run_scoring(self): 125 | """Run scoring on original poses provided""" 126 | 127 | parser = self.create_arg_parser() 128 | args = parser.parse_args() 129 | 130 | print "Setting up parameters..." 131 | config = DockingConfig(args, task='scoring') 132 | 133 | tcpu1 = time.time() 134 | file_r = config.input_file_r 135 | config_s = config.scoring 136 | 137 | print "Starting scoring..." 138 | for kdx in range(len(config_s.site)): 139 | site = config_s.site['site'+str(kdx+1)] 140 | 141 | # iterate over rescoring instances 142 | for instance, program, options in config_s.instances: 143 | 144 | # get docking class 145 | ScoringClass = getattr(sys.modules[program], program.capitalize()) 146 | 147 | ScoringInstance = ScoringClass(instance, site, options) 148 | outputfile = ScoringInstance.run_rescoring(config.input_file_r, [config.input_file_l]) 149 | 150 | tcpu2 = time.time() 151 | print "Scoring done. Total time needed: %i s" %(tcpu2-tcpu1) 152 | 153 | def run_rescoring(self, config, args): 154 | """Run rescoring on docking poses""" 155 | 156 | tcpu1 = time.time() 157 | 158 | file_r = config.input_file_r 159 | config_r = config.rescoring 160 | posedir = 'poses' 161 | 162 | # look for results folder 163 | if os.path.isdir(posedir): 164 | with open(posedir+'/info.dat') as inff: 165 | nposes = inff.next() 166 | nposes = nposes[1:] # the first character is a # sign 167 | nposes = map(int, nposes.split(',')) 168 | else: 169 | raise IOError('no folder %s found!'%posedir) 170 | 171 | curdir = os.getcwd() 172 | workdir = 'rescoring' 173 | if not os.path.exists(workdir): 174 | print "Creating rescoring folder..." 175 | os.mkdir(workdir) 176 | 177 | os.chdir(workdir) 178 | print "Starting rescoring..." 179 | # iterate over rescoring instances 180 | for instance, program, options in config_r.instances: 181 | 182 | # possibility of renaming the folder and output file 183 | if 'name' in options: 184 | name = options['name'] 185 | else: 186 | name = instance 187 | 188 | # remove old scoring file 189 | if os.path.isfile(name+'.score'): 190 | os.remove(name+'.score') 191 | 192 | for kdx in range(len(config_r.site)): 193 | site = config_r.site['site'+str(kdx+1)] 194 | 195 | # get complex filenames 196 | files_l = [os.path.abspath('../'+posedir+'/pose-%s.mol2'%idx) for idx in range(nposes[kdx], nposes[kdx+1])] 197 | # get docking class 198 | ScoringClass = getattr(sys.modules[program], program.capitalize()) 199 | 200 | ScoringInstance = ScoringClass(instance, site, options) 201 | outputfile = ScoringInstance.run_rescoring(file_r, files_l) 202 | 203 | # cat output in file (cat instead of copying because of the binding sites) 204 | subprocess.check_output('cat %s >> %s'%(outputfile,name+'.score'), shell=True, executable='/bin/bash') 205 | 206 | if config.docking.cleanup == 1: 207 | shutil.rmtree(os.path.dirname(outputfile), ignore_errors=True) 208 | 209 | os.chdir(curdir) 210 | tcpu2 = time.time() 211 | print "Rescoring done. Total time needed: %i s" %(tcpu2-tcpu1) 212 | 213 | class Docking(object): 214 | 215 | def create_arg_parser(self): 216 | parser = argparse.ArgumentParser(description="""rundbx : dock and rescore with multiple programs -------- 217 | Requires one file for the ligand (1 struct.) and one file for the receptor (1 struct.)""") 218 | 219 | parser.add_argument('-l', 220 | type=str, 221 | dest='input_file_l', 222 | required=True, 223 | help = 'Ligand coordinate file(s): .mol2') 224 | 225 | parser.add_argument('-r', 226 | type=str, 227 | dest='input_file_r', 228 | required=True, 229 | help = 'Receptor coordinate file(s): .pdb') 230 | 231 | parser.add_argument('-f', 232 | dest='config_file', 233 | required=True, 234 | help='config file containing docking parameters') 235 | 236 | parser.add_argument('-prepare_only', 237 | dest='prepare_only', 238 | action='store_true', 239 | help='Only prepare scripts for docking (does not run docking)') 240 | 241 | parser.add_argument('-rescore_only', 242 | dest='rescore_only', 243 | action='store_true', 244 | default=False, 245 | help='Run rescoring only') 246 | 247 | parser.add_argument('-skip_docking', 248 | dest='skip_docking', 249 | action='store_true', 250 | default=False, 251 | help=argparse.SUPPRESS) 252 | 253 | return parser 254 | 255 | def finalize(self, config): 256 | """create directory containing all the poses found!""" 257 | 258 | config_d = config.docking 259 | 260 | posedir = 'poses' 261 | shutil.rmtree(posedir, ignore_errors=True) 262 | os.mkdir(posedir) 263 | 264 | nposes = [1] # number of poses involved for each binding site 265 | sh = 1 # shift of model 266 | 267 | info = {} 268 | features = ['program', 'nposes', 'firstidx', 'site'] 269 | for ft in features: 270 | info[ft] = [] 271 | 272 | for kdx in range(len(config_d.site)): 273 | bs = config_d.site['site'+str(kdx+1)] # current binding site 274 | for name, program, options in config_d.instances: 275 | # find name for docking directory 276 | instdir = '%s'%name 277 | if bs[0]: 278 | instdir += '.' + bs[0] 279 | poses_idxs = [] 280 | for filename in glob(instdir+'/pose-*.mol2'): 281 | poses_idxs.append(int((filename.split('.')[-2]).split('-')[-1])) 282 | poses_idxs = sorted(poses_idxs) 283 | nposes_idxs = len(poses_idxs) 284 | 285 | for idx, pose_idx in enumerate(poses_idxs): 286 | shutil.copyfile(instdir+'/pose-%s.mol2'%pose_idx, posedir+'/pose-%s.mol2'%(idx+sh)) 287 | 288 | # update info 289 | info['program'].append(name) 290 | info['nposes'].append(nposes_idxs) 291 | info['firstidx'].append(sh) 292 | info['site'].append(bs[0]) 293 | 294 | # update shift 295 | sh += nposes_idxs 296 | nposes.append(sh) 297 | 298 | # write info 299 | info = pd.DataFrame(info) 300 | info[features].to_csv(posedir+'/info.dat', index=False) 301 | 302 | # insert line at the beginning of the info file 303 | with open(posedir+'/info.dat', 'r+') as ff: 304 | content = ff.read() 305 | ff.seek(0, 0) 306 | line = '#' + ','.join(map(str,nposes))+'\n' 307 | ff.write(line.rstrip('\r\n') + '\n' + content) 308 | 309 | def do_final_cleanup(self, config): 310 | 311 | if config.docking.cleanup == 1: 312 | config_d = config.docking 313 | # iterate over all the binding sites 314 | for kdx in range(len(config_d.site)): 315 | for instance, program, options in config_d.instances: # iterate over all the instances 316 | 317 | for filename in glob(instance + '/*'): 318 | base = os.path.basename(filename) 319 | if os.path.isfile(filename) and base.startswith('pose-'): 320 | os.remove(filename) 321 | #os.remove(config.input_file_l) 322 | 323 | def run_docking(self, config, args): 324 | """Running docking simulations using each program specified...""" 325 | 326 | tcpu1 = time.time() 327 | 328 | config_d = config.docking 329 | # iterate over all the binding sites 330 | for kdx in range(len(config_d.site)): 331 | for instance, program, options in config_d.instances: # iterate over all the instances 332 | 333 | # get docking class 334 | DockingClass = getattr(sys.modules[program], program.capitalize()) 335 | 336 | # create docking instance and run docking 337 | DockingInstance = DockingClass(instance, config_d.site['site'+str(kdx+1)], options) 338 | DockingInstance.run_docking(config.input_file_r, config.input_file_l, minimize_options=config_d.minimize, \ 339 | cleanup=config_d.cleanup, prepare_only=args.prepare_only, skip_docking=args.skip_docking) 340 | 341 | if args.prepare_only: 342 | return 343 | tcpu2 = time.time() 344 | print "Docking procedure done. Total time needed: %i s" %(tcpu2-tcpu1) 345 | 346 | def run(self): 347 | parser = self.create_arg_parser() 348 | args = parser.parse_args() 349 | 350 | print "Setting up parameters..." 351 | config = DockingConfig(args) 352 | 353 | # run docking 354 | if not args.rescore_only: 355 | self.run_docking(config, args) 356 | 357 | if args.prepare_only: 358 | return 359 | 360 | if not args.rescore_only: 361 | # create folder with poses 362 | self.finalize(config) 363 | 364 | # run rescoring 365 | if config.rescoring.is_rescoring: 366 | Scoring().run_rescoring(config, args) 367 | 368 | # final cleanup if needed 369 | if config.docking.cleanup == 1: 370 | self.do_final_cleanup(config) 371 | -------------------------------------------------------------------------------- /dockbox/dock.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import method 4 | 5 | import shutil 6 | import subprocess 7 | from glob import glob 8 | 9 | from mdkit.amber import ambertools 10 | 11 | from mdkit.utility import reader 12 | from mdkit.utility import mol2 13 | from mdkit.utility import utils 14 | 15 | required_programs = ['chimera', 'dms', 'sphgen_cpp', 'sphere_selector', 'showbox', 'grid', 'dock6'] 16 | 17 | default_settings = {'probe_radius': '1.4', 'minimum_sphere_radius': '1.4', 'maximum_sphere_radius': '4.0', \ 18 | 'grid_spacing': '0.3', 'extra_margin': '2.0', 'attractive_exponent': '6', 'repulsive_exponent': '12', \ 19 | 'max_orientations': '10000', 'num_scored_conformers': '5000', 'nposes': '20', 'charge_method': 'gas', 'rmsd': '2.0', 'grid_dir': None} 20 | 21 | class Dock(method.DockingMethod): 22 | 23 | def __init__(self, instance, site, options): 24 | 25 | super(Dock, self).__init__(instance, site, options) 26 | self.options['center'] = '\"' + ' '.join(map(str.strip, site[1].split(','))) + '\"' # set box center 27 | self.options['site'] = site[0] 28 | 29 | # set box size 30 | self.options['boxsize'] = map(float, map(str.strip, site[2].split(','))) 31 | self.options['sphgen_radius'] = str(max(self.options['boxsize'])/2) 32 | 33 | if self.options['site'] is None: 34 | self.options['dockdir'] = 'dock' 35 | else: 36 | self.options['dockdir'] = 'dock.' + self.options['site'] 37 | 38 | def write_rescoring_script(self, filename, file_r, files_l): 39 | """Rescore using DOCK6 grid scoring function""" 40 | 41 | locals().update(self.options) 42 | self.write_script_ligand_prep() 43 | 44 | # cat mol2 files into a single mol2 45 | file_all_poses = 'poses.mol2' 46 | 47 | if self.options['charge_method'].lower() not in ["none", "no"]: 48 | amber_version = utils.check_amber_version() 49 | ambertools.run_antechamber(files_l[0], 'pose-1.mol2', at='sybyl', c=self.options['charge_method'], version=amber_version) 50 | else: 51 | shutil.copyfile(files_l[0], 'pose-1.mol2') 52 | 53 | for idx, file_l in enumerate(files_l): 54 | if idx > 0: 55 | if self.options['charge_method'].lower() not in ["none", "no"]: 56 | # if not first one, do not regenerate the charges, copy charges generated the first time 57 | coords_l = mol2.get_coordinates(file_l) 58 | struct = mol2.Reader('pose-1.mol2').next() 59 | struct = mol2.replace_coordinates(struct, coords_l) 60 | mol2.Writer().write('pose-%i.mol2'%(idx+1), struct) 61 | else: 62 | shutil.copyfile(file_l, 'pose-%i.mol2'%(idx+1)) 63 | subprocess.check_output("cat pose-%i.mol2 >> %s"%(idx+1, file_all_poses), shell=True) 64 | if idx > 0: 65 | os.remove('pose-%i.mol2'%(idx+1)) 66 | 67 | script ="""#!/bin/bash 68 | set -e 69 | 70 | # shift ligand coordinates 71 | python prepare_ligand_dock.py pose-1.mol2 pose-1-centered.mol2 %(center)s\n"""%locals() 72 | 73 | if self.options['grid_dir'] is None: 74 | script += """\n# remove hydrogens from target 75 | echo "delete element.H 76 | write format pdb #0 target_noH.pdb" > removeH.cmd 77 | chimera --nogui %(file_r)s removeH.cmd 78 | rm -rf removeH.cmd 79 | 80 | # prepare receptor (add missing h, add partial charges,...) 81 | echo "import chimera 82 | from DockPrep import prep 83 | 84 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) 85 | prep(models) 86 | 87 | from WriteMol2 import writeMol2 88 | writeMol2(models, 'target.mol2')" > dockprep.py 89 | chimera --nogui %(file_r)s dockprep.py 90 | 91 | # generating receptor surface 92 | dms target_noH.pdb -n -w %(probe_radius)s -v -o target_noH.ms 93 | 94 | # generating spheres 95 | echo "target_noH.ms 96 | R 97 | X 98 | 0.0 99 | %(maximum_sphere_radius)s 100 | %(minimum_sphere_radius)s 101 | target_noH_site.sph" > INSPH 102 | sphgen_cpp 103 | 104 | # selecting spheres within a user-defined radius (sphgen_radius) 105 | sphere_selector target_noH_site.sph pose-1-centered.mol2 %(sphgen_radius)s 106 | 107 | # create box - the second argument in the file showbox.in 108 | # is the extra margin to also be enclosed to the box (angstroms) 109 | echo "Y 110 | %(extra_margin)s 111 | selected_spheres.sph 112 | 1 113 | target_noH_box.pdb" > showbox.in 114 | showbox < showbox.in 115 | 116 | dock6path=`which dock6` 117 | vdwfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/vdw_AMBER_parm99.defn'"` 118 | flexfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex.defn'"` 119 | flexdfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex_drive.tbl'"` 120 | 121 | # create grid 122 | echo "compute_grids yes 123 | grid_spacing %(grid_spacing)s 124 | output_molecule no 125 | contact_score yes 126 | energy_score yes 127 | energy_cutoff_distance 9999 128 | atom_model a 129 | attractive_exponent %(attractive_exponent)s 130 | repulsive_exponent %(repulsive_exponent)s 131 | distance_dielectric yes 132 | dielectric_factor 4 133 | bump_filter yes 134 | bump_overlap 0.75 135 | receptor_file target.mol2 136 | box_file target_noH_box.pdb 137 | vdw_definition_file $vdwfile 138 | score_grid_prefix grid 139 | contact_cutoff_distance 4.5" > grid.in 140 | grid -i grid.in\n"""%locals() 141 | else: 142 | # get directory where grid files are located 143 | grid_prefix = self.options['grid_dir'] + '/' + self.options['dockdir'] + '/grid' 144 | 145 | # check if grid file exists 146 | if os.path.isfile(grid_prefix+'.in'): 147 | # copy grid files to avoid opening the same file from multiple locations 148 | for gridfile in glob(grid_prefix+'*'): 149 | basename = os.path.basename(gridfile) 150 | shutil.copyfile(gridfile, basename) 151 | else: 152 | raise ValueError('No grid file detected in specified location %s'%self.options['grid_dir']) 153 | 154 | script += """\ndock6path=`which dock6` 155 | vdwfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/vdw_AMBER_parm99.defn'"` 156 | flexfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex.defn'"` 157 | flexdfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex_drive.tbl'"`\n""" 158 | 159 | script += """\necho "ligand_atom_file %(file_all_poses)s 160 | limit_max_ligands no 161 | skip_molecule no 162 | read_mol_solvation no 163 | calculate_rmsd no 164 | use_database_filter no 165 | orient_ligand no 166 | use_internal_energy yes 167 | internal_energy_rep_exp 12 168 | flexible_ligand no 169 | bump_filter no 170 | score_molecules yes 171 | contact_score_primary no 172 | contact_score_secondary no 173 | grid_score_primary yes 174 | grid_score_secondary no 175 | grid_score_rep_rad_scale 1 176 | grid_score_vdw_scale 1 177 | grid_score_es_scale 1 178 | grid_score_grid_prefix grid 179 | multigrid_score_secondary no 180 | dock3.5_score_secondary no 181 | continuous_score_secondary no 182 | descriptor_score_secondary no 183 | gbsa_zou_score_secondary no 184 | gbsa_hawkins_score_secondary no 185 | SASA_descriptor_score_secondary no 186 | amber_score_secondary no 187 | minimize_ligand no 188 | atom_model all 189 | vdw_defn_file $vdwfile 190 | flex_defn_file $flexfile 191 | flex_drive_file $flexdfile 192 | ligand_outfile_prefix poses_out 193 | write_orientations no 194 | num_scored_conformers 1 195 | rank_ligands no" > dock6.in 196 | 197 | dock6 -i dock6.in > dock.out\n"""%locals() 198 | 199 | # write DOCK6 rescoring script 200 | with open(filename, 'w') as ff: 201 | ff.write(script) 202 | 203 | def write_docking_script(self, filename, file_r, file_l): 204 | """Dock using DOCK6 flexible docking with grid scoring as primary score""" 205 | 206 | locals().update(self.options) 207 | self.write_script_ligand_prep() 208 | 209 | if self.options['charge_method'].lower() not in ["none", "no"]: 210 | amber_version = utils.check_amber_version() 211 | ambertools.run_antechamber(file_l, 'ligand-ref.mol2', at='sybyl', c=self.options['charge_method'], version=amber_version) 212 | else: 213 | shutil.copyfile(file_l, 'ligand-ref.mol2') 214 | 215 | script ="""#!/bin/bash 216 | set -e 217 | 218 | # shift ligand coordinates 219 | python prepare_ligand_dock.py ligand-ref.mol2 ligand-ref-centered.mol2 %(center)s\n"""%locals() 220 | 221 | if self.options['grid_dir'] is None: 222 | script += """\n# remove hydrogens from target 223 | echo "delete element.H 224 | write format pdb #0 target_noH.pdb" > removeH.cmd 225 | chimera --nogui %(file_r)s removeH.cmd 226 | rm -rf removeH.cmd 227 | 228 | # prepare receptor (add missing h, add partial charges,...) 229 | echo "import chimera 230 | from DockPrep import prep 231 | 232 | models = chimera.openModels.list(modelTypes=[chimera.Molecule]) 233 | prep(models) 234 | 235 | from WriteMol2 import writeMol2 236 | writeMol2(models, 'target.mol2')" > dockprep.py 237 | chimera --nogui %(file_r)s dockprep.py 238 | 239 | # generating receptor surface 240 | dms target_noH.pdb -n -w %(probe_radius)s -v -o target_noH.ms 241 | 242 | # generating spheres 243 | echo "target_noH.ms 244 | R 245 | X 246 | 0.0 247 | %(maximum_sphere_radius)s 248 | %(minimum_sphere_radius)s 249 | target_noH_site.sph" > INSPH 250 | sphgen_cpp 251 | 252 | # selecting spheres within a user-defined radius (sphgen_radius) 253 | sphere_selector target_noH_site.sph ligand-ref-centered.mol2 %(sphgen_radius)s 254 | 255 | # create box - the second argument in the file showbox.in 256 | # is the extra margin to also be enclosed to the box (angstroms) 257 | echo "Y 258 | %(extra_margin)s 259 | selected_spheres.sph 260 | 1 261 | target_noH_box.pdb" > showbox.in 262 | showbox < showbox.in 263 | 264 | dock6path=`which dock6` 265 | vdwfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/vdw_AMBER_parm99.defn'"` 266 | flexfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex.defn'"` 267 | flexdfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex_drive.tbl'"` 268 | 269 | # create grid 270 | echo "compute_grids yes 271 | grid_spacing %(grid_spacing)s 272 | output_molecule no 273 | contact_score yes 274 | energy_score yes 275 | energy_cutoff_distance 9999 276 | atom_model a 277 | attractive_exponent %(attractive_exponent)s 278 | repulsive_exponent %(repulsive_exponent)s 279 | distance_dielectric yes 280 | dielectric_factor 4 281 | bump_filter yes 282 | bump_overlap 0.75 283 | receptor_file target.mol2 284 | box_file target_noH_box.pdb 285 | vdw_definition_file $vdwfile 286 | score_grid_prefix grid 287 | contact_cutoff_distance 4.5" > grid.in 288 | grid -i grid.in 289 | 290 | # create box - the second argument in the file showbox.in 291 | # is the extra margin to also be enclosed to the box (angstroms) 292 | echo "Y 293 | %(extra_margin)s 294 | selected_spheres.sph 295 | 1 296 | target_noH_box.pdb" > showbox.in 297 | showbox < showbox.in\n"""%locals() 298 | else: 299 | # get directory where grid files are located 300 | grid_prefix = self.options['grid_dir'] + '/' + self.options['dockdir'] + '/grid' 301 | 302 | # check if grid file exists 303 | if os.path.isfile(grid_prefix+'.in'): 304 | # copy grid files to avoid opening the same file from multiple locations 305 | for gridfile in glob(grid_prefix+'*'): 306 | basename = os.path.basename(gridfile) 307 | shutil.copyfile(gridfile, basename) 308 | else: 309 | raise ValueError('No grid file detected in specified location %s'%self.options['grid_dir']) 310 | 311 | sphfile = self.options['grid_dir'] + '/' + self.options['dockdir'] + '/selected_spheres.sph' 312 | # check if sphere file exists 313 | if os.path.isfile(sphfile): 314 | shutil.copyfile(sphfile, 'selected_spheres.sph') 315 | else: 316 | raise ValueError('No selected_spheres.sph file detected in specified location %s'%self.options['grid_dir']) 317 | 318 | script += """\ndock6path=`which dock6` 319 | vdwfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/vdw_AMBER_parm99.defn'"` 320 | flexfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex.defn'"` 321 | flexdfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex_drive.tbl'"`\n""" 322 | 323 | script += """\n# flexible docking using grid score as primary score and no secondary score 324 | echo "ligand_atom_file ligand-ref-centered.mol2 325 | limit_max_ligands no 326 | skip_molecule no 327 | read_mol_solvation no 328 | calculate_rmsd no 329 | use_database_filter no 330 | orient_ligand yes 331 | automated_matching yes 332 | receptor_site_file selected_spheres.sph 333 | max_orientations %(max_orientations)s 334 | critical_points no 335 | chemical_matching no 336 | use_ligand_spheres no 337 | use_internal_energy yes 338 | internal_energy_rep_exp 12 339 | flexible_ligand yes 340 | user_specified_anchor no 341 | limit_max_anchors no 342 | min_anchor_size 5 343 | pruning_use_clustering yes 344 | pruning_max_orients 1000 345 | pruning_clustering_cutoff 100 346 | pruning_conformer_score_cutoff 100 347 | use_clash_overlap yes 348 | clash_overlap 0.5 349 | write_growth_tree no 350 | bump_filter yes 351 | bump_grid_prefix grid 352 | max_bumps_anchor 12 353 | max_bumps_growth 12 354 | score_molecules yes 355 | contact_score_primary no 356 | contact_score_secondary no 357 | grid_score_primary yes 358 | grid_score_secondary no 359 | grid_score_rep_rad_scale 1 360 | grid_score_vdw_scale 1 361 | grid_score_es_scale 1 362 | grid_score_grid_prefix grid 363 | multigrid_score_secondary no 364 | dock3.5_score_secondary no 365 | continuous_score_secondary no 366 | descriptor_score_secondary no 367 | gbsa_zou_score_secondary no 368 | gbsa_hawkins_score_secondary no 369 | SASA_descriptor_score_secondary no 370 | pbsa_score_secondary no 371 | amber_score_secondary no 372 | minimize_ligand yes 373 | minimize_anchor yes 374 | minimize_flexible_growth yes 375 | use_advanced_simplex_parameters no 376 | simplex_max_cycles 1 377 | simplex_score_converge 0.1 378 | simplex_cycle_converge 1.0 379 | simplex_trans_step 1.0 380 | simplex_rot_step 0.1 381 | simplex_tors_step 10.0 382 | simplex_anchor_max_iterations 1000 383 | simplex_grow_max_iterations 1000 384 | simplex_grow_tors_premin_iterations 0 385 | simplex_random_seed 0 386 | simplex_restraint_min no 387 | atom_model all 388 | vdw_defn_file $vdwfile 389 | flex_defn_file $flexfile 390 | flex_drive_file $flexdfile 391 | ligand_outfile_prefix poses_out 392 | write_orientations no 393 | num_scored_conformers %(num_scored_conformers)s 394 | write_conformations no 395 | cluster_conformations yes 396 | cluster_rmsd_threshold %(rmsd)s 397 | rank_ligands no" > dock6.in 398 | 399 | dock6 -i dock6.in\n"""%locals() 400 | 401 | # write DOCK6 script 402 | with open(filename, 'w') as ff: 403 | ff.write(script) 404 | 405 | def extract_docking_results(self, file_s, input_file_r, input_file_l): 406 | 407 | # save scores 408 | if os.path.isfile('poses_out_scored.mol2'): 409 | with open('poses_out_scored.mol2', 'r') as ffin: 410 | with open(file_s, 'w') as ffout: 411 | idx = 0 412 | for line in ffin: 413 | if line.startswith('########## Grid Score:'): 414 | ffout.write(line.split()[3]+'\n') 415 | idx += 1 416 | if idx == int(self.options['nposes']): 417 | break 418 | 419 | # create multiple mol2 files 420 | ligname = reader.open('poses_out_scored.mol2').ligname 421 | mol2.update_mol2file('poses_out_scored.mol2', 'pose-.mol2', ligname=ligname, multi=True, last=int(self.options['nposes'])) 422 | else: 423 | open(file_s, 'w').close() 424 | 425 | def extract_rescoring_results(self, filename, nligands=None): 426 | 427 | with open(filename, 'a') as ff: 428 | with open('dock.out', 'r') as outf: 429 | for line in outf: 430 | if line.strip().startswith('Grid Score:'): 431 | line_s = line.split() 432 | if len(line_s) > 2: 433 | ff.write(line.split()[2]+'\n') 434 | else: 435 | ff.write('NaN\n') 436 | elif line.strip().startswith('ERROR: Conformation could not be scored.'): 437 | ff.write('NaN\n') 438 | 439 | def write_script_ligand_prep(self): 440 | 441 | with open('prepare_ligand_dock.py', 'w') as ff: 442 | script ="""import os 443 | import sys 444 | import numpy as np 445 | import shutil 446 | 447 | from mdkit.utility import utils 448 | from mdkit.utility import mol2 449 | 450 | # read mol2 file 451 | mol2file = sys.argv[1] 452 | new_mol2file = sys.argv[2] 453 | center = map(float,(sys.argv[3]).split()) 454 | 455 | coords = np.array(mol2.get_coordinates(mol2file)) 456 | cog = utils.center_of_geometry(coords) 457 | coords = coords - (cog - center) 458 | 459 | idx = 0 460 | with open(new_mol2file, 'w') as nmol2f: 461 | with open(mol2file, 'r') as mol2f: 462 | is_structure = False 463 | for line in mol2f: 464 | if line.startswith('@ATOM'): 465 | is_structure = True 466 | nmol2f.write(line) 467 | elif line.startswith('@'): 468 | is_structure = False 469 | nmol2f.write(line) 470 | elif is_structure: 471 | new_coords = [format(coord, '.4f') for coord in coords[idx]] 472 | newline = line[:16] + ' '*(10-len(new_coords[0])) + str(new_coords[0]) + \ 473 | ' '*(10-len(new_coords[1])) + str(new_coords[1]) + ' '*(10-len(new_coords[2])) + str(new_coords[2]) + line[46:] 474 | nmol2f.write(newline) 475 | idx += 1 476 | else: 477 | nmol2f.write(line)"""%locals() 478 | ff.write(script) 479 | -------------------------------------------------------------------------------- /dockbox/pyqcprot.pyx: -------------------------------------------------------------------------------- 1 | ## this file corresponds to the file pyqcprot.pyx that can be cloned at https://github.com/synapticarbors/pyqcprot (or can be found in MDAnalysis package). Jordane Preto included function CenterCoords that is used to center coordinates before computing RMSD. No significant overhead with respect to the original version was reported. 2 | 3 | # ----------------------------------------------------------------------------- 4 | # Author(s) of Original Implementation: 5 | # Douglas L. Theobald 6 | # Department of Biochemistry 7 | # MS 009 8 | # Brandeis University 9 | # 415 South St 10 | # Waltham, MA 02453 11 | # USA 12 | # 13 | # dtheobald@brandeis.edu 14 | # 15 | # Pu Liu 16 | # Johnson & Johnson Pharmaceutical Research and Development, L.L.C. 17 | # 665 Stockton Drive 18 | # Exton, PA 19341 19 | # USA 20 | # 21 | # pliu24@its.jnj.com 22 | # 23 | # For the original code written in C see: 24 | # http://theobald.brandeis.edu/qcp/ 25 | # 26 | # 27 | # Author of Python Port: 28 | # Joshua L. Adelman 29 | # Department of Biological Sciences 30 | # University of Pittsburgh 31 | # Pittsburgh, PA 15260 32 | # 33 | # jla65@pitt.edu 34 | # 35 | # 36 | # If you use this QCP rotation calculation method in a publication, please 37 | # reference: 38 | # 39 | # Douglas L. Theobald (2005) 40 | # "Rapid calculation of RMSD using a quaternion-based characteristic 41 | # polynomial." 42 | # Acta Crystallographica A 61(4):478-480. 43 | # 44 | # Pu Liu, Dmitris K. Agrafiotis, and Douglas L. Theobald (2010) 45 | # "Fast determination of the optimal rotational matrix for macromolecular 46 | # superpositions." 47 | # J. Comput. Chem. 31, 1561-1563. 48 | # 49 | # 50 | # Copyright (c) 2009-2010, Pu Liu and Douglas L. Theobald 51 | # Copyright (c) 2011 Joshua L. Adelman 52 | # All rights reserved. 53 | # 54 | # Redistribution and use in source and binary forms, with or without modification, are permitted 55 | # provided that the following conditions are met: 56 | # 57 | # * Redistributions of source code must retain the above copyright notice, this list of 58 | # conditions and the following disclaimer. 59 | # * Redistributions in binary form must reproduce the above copyright notice, this list 60 | # of conditions and the following disclaimer in the documentation and/or other materials 61 | # provided with the distribution. 62 | # * Neither the name of the nor the names of its contributors may be used to 63 | # endorse or promote products derived from this software without specific prior written 64 | # permission. 65 | # 66 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 67 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 68 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 69 | # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 70 | # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 71 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 72 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 73 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 74 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 75 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 76 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 77 | # ----------------------------------------------------------------------------- 78 | 79 | import cython 80 | import numpy as np 81 | cimport numpy as np 82 | 83 | cdef extern from "math.h": 84 | double sqrt(double x) 85 | double fabs(double x) 86 | 87 | cdef double InnerProduct(np.ndarray[np.float64_t,ndim=1] A, 88 | np.ndarray[np.float64_t,ndim=2] coords1, 89 | np.ndarray[np.float64_t,ndim=2] coords2, 90 | int N, 91 | np.ndarray[np.float64_t,ndim=1] weight): 92 | """ 93 | Calculate the inner product of two structures. 94 | InnerProduct(A, coords1, coords2, N, weight) --> (G1+G2)/2 95 | If weight array is not ``None``, calculate the weighted inner product. 96 | :Input: 97 | - A[9] -- inner product array (modified in place) 98 | - coords1 -- reference structure 99 | - coords2 -- candidate structure 100 | - N -- the size of the system 101 | - weight -- the weight array of size N: set to None if not needed 102 | :Output: 103 | - A[9] -- the inner product matrix 104 | :Returns: 105 | - (G1 + G2) * 0.5; used as E0 in function :func:`FastCalcRMSDAndRotation` 106 | .. Warning:: 107 | 1. You MUST center the structures, coords1 and coords2, before calling this function. 108 | 2. Please note how the structure coordinates are stored as 3xN arrays, 109 | not Nx3 arrays as is also commonly used. The difference is 110 | something like this for storage of a structure with 8 atoms:: 111 | Nx3: xyzxyzxyzxyzxyzxyzxyzxyz 112 | 3xN: xxxxxxxxyyyyyyyyzzzzzzzz 113 | """ 114 | 115 | cdef double x1, x2, y1, y2, z1, z2 116 | cdef unsigned int i 117 | cdef double G1, G2 118 | 119 | G1 = 0.0 120 | G2 = 0.0 121 | 122 | A[0] = A[1] = A[2] = A[3] = A[4] = A[5] = A[6] = A[7] = A[8] = 0.0 123 | 124 | if (weight != None): 125 | for i in xrange(N): 126 | x1 = weight[i] * coords1[0,i] 127 | y1 = weight[i] * coords1[1,i] 128 | z1 = weight[i] * coords1[2,i] 129 | 130 | G1 += x1*coords1[0,i] + y1*coords1[1,i] + z1*coords1[2,i] 131 | 132 | x2 = coords2[0,i] 133 | y2 = coords2[1,i] 134 | z2 = coords2[2,i] 135 | 136 | G2 += weight[i] * (x2*x2 + y2*y2 + z2*z2) 137 | 138 | A[0] += (x1 * x2) 139 | A[1] += (x1 * y2) 140 | A[2] += (x1 * z2) 141 | 142 | A[3] += (y1 * x2) 143 | A[4] += (y1 * y2) 144 | A[5] += (y1 * z2) 145 | 146 | A[6] += (z1 * x2) 147 | A[7] += (z1 * y2) 148 | A[8] += (z1 * z2) 149 | 150 | else: 151 | for i in xrange(N): 152 | x1 = coords1[0,i] 153 | y1 = coords1[1,i] 154 | z1 = coords1[2,i] 155 | 156 | G1 += (x1*x1 + y1*y1 + z1*z1) 157 | 158 | x2 = coords2[0,i] 159 | y2 = coords2[1,i] 160 | z2 = coords2[2,i] 161 | 162 | G2 += (x2*x2 + y2*y2 + z2*z2) 163 | 164 | A[0] += (x1 * x2) 165 | A[1] += (x1 * y2) 166 | A[2] += (x1 * z2) 167 | 168 | A[3] += (y1 * x2) 169 | A[4] += (y1 * y2) 170 | A[5] += (y1 * z2) 171 | 172 | A[6] += (z1 * x2) 173 | A[7] += (z1 * y2) 174 | A[8] += (z1 * z2) 175 | 176 | return (G1 + G2) * 0.5 177 | 178 | cdef double FastCalcRMSDAndRotation(np.ndarray[np.float64_t,ndim=1] rot, np.ndarray[np.float64_t,ndim=1] A, double E0, int N): 179 | """ 180 | Calculate the RMSD, and/or the optimal rotation matrix. 181 | FastCalcRMSDAndRotation(rot, A, E0, N) 182 | :Input: 183 | - rot[9] -- rotation matrix (modified in place) 184 | - A[9] -- the inner product of two structures 185 | - E0 -- (G1 + G2) * 0.5 186 | - N -- the size of the system 187 | :Output: 188 | - rot[9] -- the rotation matrix in the order of xx, xy, xz, yx, yy, yz, zx, zy, zz 189 | - rmsd -- the RMSD value 190 | :Returns: 191 | - only the rmsd was calculated if rot is None 192 | - both the RMSD & rotational matrix calculated if rot is not None 193 | """ 194 | cdef double rmsd 195 | cdef double Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz 196 | cdef double Szz2, Syy2, Sxx2, Sxy2, Syz2, Sxz2, Syx2, Szy2, Szx2, 197 | cdef double SyzSzymSyySzz2, Sxx2Syy2Szz2Syz2Szy2, Sxy2Sxz2Syx2Szx2, 198 | cdef double SxzpSzx, SyzpSzy, SxypSyx, SyzmSzy, 199 | cdef double SxzmSzx, SxymSyx, SxxpSyy, SxxmSyy 200 | 201 | cdef np.ndarray[np.float64_t,ndim=1] C = np.zeros(4, dtype=A.dtype) 202 | cdef unsigned int i 203 | cdef double mxEigenV 204 | cdef double oldg = 0.0 205 | cdef double b, a, delta, rms, qsqr 206 | cdef double q1, q2, q3, q4, normq 207 | cdef double a11, a12, a13, a14, a21, a22, a23, a24 208 | cdef double a31, a64, a33, a34, a41, a42, a43, a44 209 | cdef double a2, x2, y2, z2 210 | cdef double xy, az, zx, ay, yz, ax 211 | cdef double a3344_4334, a6444_4234, a6443_4233, a3143_4133,a3144_4134, a3142_4164 212 | cdef double evecprec = 1e-6 213 | cdef double evalprec = 1e-14 214 | 215 | cdef double a1644_1423, a1224_1422, a1223_1642, a1124_1421, a1123_1641, a1122_1221 216 | Sxx = A[0] 217 | Sxy = A[1] 218 | Sxz = A[2] 219 | Syx = A[3] 220 | Syy = A[4] 221 | Syz = A[5] 222 | Szx = A[6] 223 | Szy = A[7] 224 | Szz = A[8] 225 | 226 | Sxx2 = Sxx * Sxx 227 | Syy2 = Syy * Syy 228 | Szz2 = Szz * Szz 229 | 230 | Sxy2 = Sxy * Sxy 231 | Syz2 = Syz * Syz 232 | Sxz2 = Sxz * Sxz 233 | 234 | Syx2 = Syx * Syx 235 | Szy2 = Szy * Szy 236 | Szx2 = Szx * Szx 237 | 238 | SyzSzymSyySzz2 = 2.0*(Syz*Szy - Syy*Szz) 239 | Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2 240 | 241 | C[2] = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2) 242 | C[1] = 8.0 * (Sxx*Syz*Szy + Syy*Szx*Sxz + Szz*Sxy*Syx - Sxx*Syy*Szz - Syz*Szx*Sxy - Szy*Syx*Sxz) 243 | 244 | SxzpSzx = Sxz + Szx 245 | SyzpSzy = Syz + Szy 246 | SxypSyx = Sxy + Syx 247 | SyzmSzy = Syz - Szy 248 | SxzmSzx = Sxz - Szx 249 | SxymSyx = Sxy - Syx 250 | SxxpSyy = Sxx + Syy 251 | SxxmSyy = Sxx - Syy 252 | Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2 253 | 254 | C[0] = (Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 255 | + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) 256 | + (-(SxzpSzx)*(SyzmSzy)+(SxymSyx)*(SxxmSyy-Szz)) * (-(SxzmSzx)*(SyzpSzy)+(SxymSyx)*(SxxmSyy+Szz)) 257 | + (-(SxzpSzx)*(SyzpSzy)-(SxypSyx)*(SxxpSyy-Szz)) * (-(SxzmSzx)*(SyzmSzy)-(SxypSyx)*(SxxpSyy+Szz)) 258 | + (+(SxypSyx)*(SyzpSzy)+(SxzpSzx)*(SxxmSyy+Szz)) * (-(SxymSyx)*(SyzmSzy)+(SxzpSzx)*(SxxpSyy+Szz)) 259 | + (+(SxypSyx)*(SyzmSzy)+(SxzmSzx)*(SxxmSyy-Szz)) * (-(SxymSyx)*(SyzpSzy)+(SxzmSzx)*(SxxpSyy-Szz))) 260 | 261 | mxEigenV = E0 262 | for i in xrange(50): 263 | oldg = mxEigenV 264 | x2 = mxEigenV*mxEigenV 265 | b = (x2 + C[2])*mxEigenV 266 | a = b + C[1] 267 | delta = ((a*mxEigenV + C[0])/(2.0*x2*mxEigenV + b + a)) 268 | mxEigenV -= delta 269 | if (fabs(mxEigenV - oldg) < fabs((evalprec)*mxEigenV)): 270 | break 271 | 272 | #if (i == 50): 273 | # print "\nMore than %d iterations needed!\n" % (i) 274 | 275 | # the fabs() is to guard against extremely small, but *negative* numbers due to npfloat point error 276 | rms = sqrt(fabs(2.0 * (E0 - mxEigenV)/N)) 277 | 278 | if (rot is None): 279 | return rms # Don't bother with rotation. 280 | 281 | a11 = SxxpSyy + Szz-mxEigenV; a12 = SyzmSzy; a13 = - SxzmSzx; a14 = SxymSyx 282 | a21 = SyzmSzy; a22 = SxxmSyy - Szz-mxEigenV; a23 = SxypSyx; a24= SxzpSzx 283 | a31 = a13; a64 = a23; a33 = Syy-Sxx-Szz - mxEigenV; a34 = SyzpSzy 284 | a41 = a14; a42 = a24; a43 = a34; a44 = Szz - SxxpSyy - mxEigenV 285 | a3344_4334 = a33 * a44 - a43 * a34; a6444_4234 = a64 * a44-a42*a34 286 | a6443_4233 = a64 * a43 - a42 * a33; a3143_4133 = a31 * a43-a41*a33 287 | a3144_4134 = a31 * a44 - a41 * a34; a3142_4164 = a31 * a42-a41*a64 288 | q1 = a22*a3344_4334-a23*a6444_4234+a24*a6443_4233 289 | q2 = -a21*a3344_4334+a23*a3144_4134-a24*a3143_4133 290 | q3 = a21*a6444_4234-a22*a3144_4134+a24*a3142_4164 291 | q4 = -a21*a6443_4233+a22*a3143_4133-a23*a3142_4164 292 | 293 | qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4 294 | 295 | # The following code tries to calculate another column in the adjoint matrix when the norm of the 296 | # current column is too small. 297 | # Usually this commented block will never be activated. To be absolutely safe this should be 298 | # uncommented, but it is most likely unnecessary. 299 | 300 | if (qsqr < evecprec): 301 | q1 = a12*a3344_4334 - a13*a6444_4234 + a14*a6443_4233 302 | q2 = -a11*a3344_4334 + a13*a3144_4134 - a14*a3143_4133 303 | q3 = a11*a6444_4234 - a12*a3144_4134 + a14*a3142_4164 304 | q4 = -a11*a6443_4233 + a12*a3143_4133 - a13*a3142_4164 305 | qsqr = q1*q1 + q2 *q2 + q3*q3+q4*q4 306 | 307 | if (qsqr < evecprec): 308 | a1644_1423 = a13 * a24 - a14 * a23 309 | a1224_1422 = a12 * a24 - a14 * a22 310 | a1223_1642 = a12 * a23 - a13 * a22 311 | a1124_1421 = a11 * a24 - a14 * a21 312 | a1123_1641 = a11 * a23 - a13 * a21 313 | a1122_1221 = a11 * a22 - a12 * a21 314 | 315 | q1 = a42 * a1644_1423 - a43 * a1224_1422 + a44 * a1223_1642 316 | q2 = -a41 * a1644_1423 + a43 * a1124_1421 - a44 * a1123_1641 317 | q3 = a41 * a1224_1422 - a42 * a1124_1421 + a44 * a1122_1221 318 | q4 = -a41 * a1223_1642 + a42 * a1123_1641 - a43 * a1122_1221 319 | qsqr = q1*q1 + q2 *q2 + q3*q3+q4*q4 320 | 321 | if (qsqr < evecprec): 322 | q1 = a64 * a1644_1423 - a33 * a1224_1422 + a34 * a1223_1642 323 | q2 = -a31 * a1644_1423 + a33 * a1124_1421 - a34 * a1123_1641 324 | q3 = a31 * a1224_1422 - a64 * a1124_1421 + a34 * a1122_1221 325 | q4 = -a31 * a1223_1642 + a64 * a1123_1641 - a33 * a1122_1221 326 | qsqr = q1*q1 + q2 *q2 + q3*q3 + q4*q4 327 | 328 | if (qsqr < evecprec): 329 | # if qsqr is still too small, return the identity matrix. # 330 | rot[0] = rot[4] = rot[8] = 1.0 331 | rot[1] = rot[2] = rot[3] = rot[5] = rot[6] = rot[7] = 0.0 332 | 333 | return rms 334 | 335 | 336 | normq = sqrt(qsqr) 337 | q1 /= normq 338 | q2 /= normq 339 | q3 /= normq 340 | q4 /= normq 341 | 342 | a2 = q1 * q1 343 | x2 = q2 * q2 344 | y2 = q3 * q3 345 | z2 = q4 * q4 346 | 347 | xy = q2 * q3 348 | az = q1 * q4 349 | zx = q4 * q2 350 | ay = q1 * q3 351 | yz = q3 * q4 352 | ax = q1 * q2 353 | 354 | rot[0] = a2 + x2 - y2 - z2 355 | rot[1] = 2 * (xy + az) 356 | rot[2] = 2 * (zx - ay) 357 | rot[3] = 2 * (xy - az) 358 | rot[4] = a2 - x2 + y2 - z2 359 | rot[5] = 2 * (yz + ax) 360 | rot[6] = 2 * (zx + ay) 361 | rot[7] = 2 * (yz - ax) 362 | rot[8] = a2 - x2 - y2 + z2 363 | 364 | return rms 365 | 366 | cdef void CenterCoords(np.ndarray[np.float64_t,ndim=2] coords, int N, np.ndarray[np.float64_t,ndim=1] weights): 367 | 368 | cdef double xsum, ysum, zsum, wsum 369 | cdef unsigned int i 370 | 371 | xsum = ysum = zsum = 0.0 372 | 373 | if (weights != None): 374 | wsum = 0.0 375 | for i in xrange(N): 376 | xsum += weights[i] * coords[0,i] 377 | ysum += weights[i] * coords[1,i] 378 | zsum += weights[i] * coords[2,i] 379 | 380 | wsum += weights[i] 381 | 382 | xsum /= wsum 383 | ysum /= wsum 384 | zsum /= wsum 385 | 386 | else: 387 | for i in xrange(N): 388 | xsum += coords[0,i] 389 | ysum += coords[1,i] 390 | zsum += coords[2,i] 391 | 392 | xsum /= N 393 | ysum /= N 394 | zsum /= N 395 | 396 | for i in xrange(N): 397 | coords[0,i] -= xsum 398 | coords[1,i] -= ysum 399 | coords[2,i] -= zsum 400 | 401 | return 402 | 403 | @cython.boundscheck(False) 404 | @cython.wraparound(False) 405 | def CalcRMSDRotationalMatrix(np.ndarray[np.float64_t,ndim=2] ref, 406 | np.ndarray[np.float64_t,ndim=2] conf, 407 | np.ndarray[np.float64_t,ndim=1] rot, 408 | np.ndarray[np.float64_t,ndim=1] weights): 409 | """ 410 | Calculate the RMSD & rotational matrix. 411 | CalcRMSDRotationalMatrix(ref, conf, N, rot, weights): 412 | :Input: 413 | - ref -- reference structure coordinates (*must* be `numpy.float64`) 414 | - conf -- candidate structure coordinates (*must* be `numpy.float64`) 415 | - rot[9] -- array to store rotation matrix; set to None if only calculating rmsd (modified in place) 416 | - weight -- the weight array of size len; set to None if not needed 417 | :Output: 418 | - rot[9] -- rotation matrix 419 | :Returns: 420 | - RMSD value 421 | .. Note:: All arrays *must* be of type `numpy.float64`. 422 | """ 423 | cdef double rmsd 424 | cdef int N = conf.shape[1] 425 | cdef double E0 426 | cdef np.ndarray[np.float64_t,ndim=1] A = np.zeros(9, dtype=ref.dtype) 427 | 428 | CenterCoords(ref, N, weights) 429 | CenterCoords(conf, N, weights) 430 | 431 | E0 = InnerProduct(A, conf, ref, N, weights) 432 | rmsd = FastCalcRMSDAndRotation(rot, A, E0, N) 433 | 434 | return rmsd 435 | 436 | # the following lines are here to show how to apply the rotational matrix in order to recover the right RMSD 437 | #cdef double trmsd = 0.0 438 | #cdef np.ndarray[np.float64_t,ndim=1] trot = np.zeros(3, dtype=ref.dtype) 439 | 440 | #rmsd = CalcRMSDRotationalMatrix(ref, conf, rot, weights) 441 | 442 | #for k in xrange(N): 443 | # for i in xrange(3): 444 | # trot[i] = 0.0 445 | # for j in xrange(3): 446 | # trot[i] += rot[3*i+j] * ref[j,k] 447 | # trmsd += (trot[0]-conf[0,k])**2 + (trot[1]-conf[1,k])**2 + (trot[2]-conf[2,k])**2 448 | #trmsd = sqrt(trmsd/N) 449 | #print rmsd, trmsd 450 | -------------------------------------------------------------------------------- /bin/extract_top_poses: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | import shutil 5 | import math 6 | import argparse 7 | 8 | from glob import glob 9 | import pandas as pd 10 | import numpy as np 11 | 12 | from mdkit.utility import mol2 13 | from dockbox.dbxtools import * 14 | 15 | programs_with_positive_scores = ['gnina'] 16 | 17 | # command-line arguments and options 18 | parser = argparse.ArgumentParser(description="Extract top docking poses after rundbx finished.") 19 | 20 | parser.add_argument('--all-sites', 21 | dest='combine_sites', 22 | action='store_true', 23 | default=False, 24 | help='Select top poses over all sites. If not specified, extract top pose separately for each site') 25 | 26 | parser.add_argument('--all-targets', 27 | dest='combine_targets', 28 | action='store_true', 29 | default=False, 30 | help='Select top poses over all targets. If not specified, extract top pose separately for each target. A "%s/%s" architecture \ 31 | of the folders is assumed'%(ligand_prefix, target_prefix)) 32 | 33 | parser.add_argument('-csvl', 34 | type=str, 35 | dest='csvfile_l', 36 | metavar='FILE', 37 | help='Filename containing info about compounds. Used to add names of compounds. Default: none') 38 | 39 | parser.add_argument('-csvr', 40 | dest='csvfile_r', 41 | default='targets.csv', 42 | metavar='FILENAME', 43 | help='Filename containing info about targets. If none, will look for a receptor file in the "poses" folders. Default: none') 44 | 45 | parser.add_argument('-cutoff', 46 | dest='cutoff', 47 | type=float, 48 | metavar='RMSD_VALUE', 49 | default=2.0, 50 | help='RMSD cutoff used for consensus docking or score-based consensus docking. Default: 2.0 A') 51 | 52 | parser.add_argument('-d', 53 | dest='docking_programs', 54 | nargs='+', 55 | metavar=('PRGM1', 'PRGM2'), 56 | help='Docking programs (instances) to be considered when extracting best poses') 57 | 58 | parser.add_argument('-dirs', 59 | dest='dirs', 60 | nargs='+', 61 | default=['.'], 62 | metavar=('DIR1', 'DIR2'), 63 | help='Directories considered for analysis. Should contain a folder called "poses". Default: curr. dir') 64 | 65 | parser.add_argument('-skip_errors', 66 | dest='skip_errors', 67 | action='store_true', 68 | default=False, 69 | help='Skip ligands that were not done') 70 | 71 | parser.add_argument('-site', 72 | dest='sites', 73 | nargs='+', 74 | metavar=('SITE1', 'SITE2'), 75 | help='Sites to be considered when extracting best poses') 76 | 77 | parser.add_argument('-save', 78 | default=False, 79 | action='store_true', 80 | help='Save structures of top poses (.mol2)') 81 | 82 | group = parser.add_mutually_exclusive_group(required=False) 83 | 84 | group.add_argument('-sf', 85 | dest='sf', 86 | metavar='FUNC', 87 | help='Scoring functions used to extract the best pose (combination of scores)') 88 | 89 | group.add_argument('-cd', 90 | dest='cd', 91 | nargs='+', 92 | metavar='PRGM', 93 | help='Docking programs used for standard consensus docking') 94 | 95 | group.add_argument('-sbcd', 96 | dest='sbcd', 97 | nargs='+', 98 | metavar='FUNC', 99 | help='Scoring functions used for score-based consensus docking') 100 | 101 | # update parsers with arguments 102 | args = parser.parse_args() 103 | 104 | #pd.set_option('display.max_rows', 20000) 105 | 106 | def add_names(csvfile, df): 107 | # add names in dataframe 108 | df_ligands = pd.read_csv(csvfile) 109 | 110 | df = df.merge(df_ligands[['ligID', 'name']], on='ligID') 111 | return df 112 | 113 | dirs = [] 114 | for dir in args.dirs: 115 | if os.path.isdir(dir+'/poses'): 116 | dirs.append(os.path.relpath(dir)) 117 | 118 | elif not args.skip_errors: 119 | raise ValueError('Poses folder in %s does not exist!'%dir) 120 | 121 | if not dirs: 122 | raise ValueError('No poses folders found in any of %s'%(', '.join(args.dirs))) 123 | 124 | iscwd, isligID, istargetID = check_directories(dirs) 125 | 126 | if not istargetID and args.combine_targets: 127 | args.combine_targets = False 128 | 129 | if istargetID: 130 | # check if info related to targets is there! 131 | df_targets = pd.read_csv(args.csvfile_r) 132 | csvfile_r_dir = os.path.dirname(args.csvfile_r) 133 | 134 | # update relative paths 135 | if not csvfile_r_dir: 136 | csvfile_r_dir = '.' 137 | df_targets['pdbfile'] = df_targets['pdbfile'].apply(lambda x: os.path.relpath(csvfile_r_dir+'/'+x)) 138 | 139 | # check options relative to best poses extraction 140 | found_scoring_functions = [] 141 | if args.sbcd: 142 | scoring_functions = args.sbcd 143 | programs_consensus = args.sbcd 144 | if len(args.sbcd) < 2: 145 | raise ValueError('Number of functions for score-based consensus docking should be at least 2!') 146 | resultsdir = 'sbcd_' + '_'.join(scoring_functions) 147 | 148 | elif args.cd: 149 | scoring_functions = None 150 | programs_consensus = args.cd 151 | if len(args.cd) < 2: 152 | raise ValueError('Number of programs for consensus docking should be at least 2!') 153 | resultsdir = 'cd_' + '_'.join(programs_consensus) 154 | 155 | elif args.sf: 156 | scoring_functions = args.sf 157 | programs_consensus = None 158 | resultsdir = 'sf_' + args.sf 159 | 160 | def save_top_poses(dir, row, suffix=''): 161 | file_l = row['file_l'+suffix] 162 | shutil.copyfile(file_l, dir+'/ligand%s.mol2'%suffix) 163 | 164 | if args.csvfile_l is not None: 165 | if not os.path.isfile(args.csvfile_l): 166 | raise IOError("csvfile %s not found!"%args.csvfile_l) 167 | 168 | features_ids = [] 169 | if isligID: 170 | features_ids += ['ligID'] 171 | elif not istargetID: 172 | features_ids += ['dir'] 173 | 174 | if istargetID: 175 | features_ids += ['targetID'] 176 | 177 | files_r = {} 178 | poses = [] 179 | for jdx, dir in enumerate(dirs): 180 | posedir = dir + '/poses' 181 | ligID, targetID = get_IDs(dir, isligID, istargetID) 182 | 183 | info_dir = {} 184 | for ft in features_ids: 185 | info_dir[ft] = [] 186 | info_dir['file_l'] = [] 187 | if istargetID: 188 | info_dir['file_r'] = [] 189 | for ft in ['site', 'program', 'instance', 'index_pose', 'score']: 190 | info_dir[ft] = [] 191 | 192 | # get location of poses and receptor files 193 | datfile = posedir + '/info.dat' 194 | with open(datfile, 'r') as inff: 195 | # skip the first two lines 196 | inff.next() 197 | inff.next() 198 | for line in inff: 199 | program, nposes, firstidx, site = line.strip().split(',') 200 | firstidx = int(firstidx) 201 | nposes = int(nposes) 202 | instance = program 203 | if site: 204 | instance += '.' + site 205 | poses_idxs = range(firstidx, firstidx+nposes) 206 | 207 | for index, idx in enumerate(poses_idxs): 208 | file_l = posedir + '/pose-%s.mol2'%idx 209 | if os.path.isfile(file_l): 210 | info_dir['file_l'].append(os.path.relpath(file_l)) 211 | else: 212 | raise IOError("File %s does not exist!"%file_l) 213 | info_dir['site'].append(site) 214 | info_dir['program'].append(program) 215 | info_dir['instance'].append(instance) 216 | info_dir['index_pose'].append(index) 217 | 218 | if isligID: 219 | info_dir['ligID'].append(ligID) 220 | elif not istargetID: 221 | info_dir['dir'].append(dir) 222 | if istargetID: 223 | info_dir['targetID'].append(targetID) 224 | 225 | # get the filename of the corresponding receptor file 226 | if istargetID: 227 | row = df_targets[df_targets['targetID']==targetID] 228 | file_r = row['pdbfile'].values[0] 229 | info_dir['file_r'].append(file_r) 230 | 231 | # update the dictionnary of targets 232 | if istargetID and targetID not in files_r: 233 | files_r[targetID] = file_r 234 | 235 | nscores = 0 236 | # extract original scores 237 | with open(dir+'/'+instance+'/score.out', 'r') as sout: 238 | for line_s in sout: 239 | nscores += 1 240 | info_dir['score'].append(float(line_s.strip())) 241 | if nscores != nposes: 242 | raise ValueError("Number of poses different from number of scores (%s/%s)"%(dir,instance)) 243 | 244 | nposes += firstidx - 1 245 | # extract all scores 246 | for scorefile in sorted(glob(dir+'/rescoring/*.score')): 247 | sf = os.path.basename(scorefile).split('.')[0] 248 | nscores = 0 249 | if jdx == 0: 250 | found_scoring_functions.append(sf) 251 | elif sf not in found_scoring_functions: 252 | raise ValueError("%s scores not computed in every directory!") 253 | info_dir[sf] = [] 254 | with open(scorefile, 'r') as sout: 255 | for line_s in sout: 256 | info_dir[sf].append(float(line_s)) 257 | nscores += 1 258 | #print nscores, nposes 259 | if nscores != nposes: 260 | info_dir[sf] = [float('nan') for idx in range(nposes)] 261 | 262 | df_dir = pd.DataFrame(info_dir) 263 | if args.docking_programs: 264 | df_dir = df_dir[df_dir['program'].isin(args.docking_programs)] 265 | 266 | if args.sites: 267 | df_dir = df_dir[df_dir['site'].isin(["site%s"%idx_site for idx_site in args.sites])] 268 | poses.append(df_dir) 269 | 270 | if poses: 271 | poses = pd.concat(poses, sort=False).reset_index(drop=True) 272 | if args.csvfile_l and isligID: 273 | poses = add_names(args.csvfile_l, poses) 274 | else: 275 | sys.exit("No poses to extract!") 276 | 277 | # define columns for groupby operation 278 | groupby_columns = [] 279 | 280 | if isligID: 281 | groupby_columns += ['ligID'] 282 | column_ligand = 'ligID' 283 | 284 | if not args.combine_targets and istargetID: 285 | groupby_columns += ['targetID'] 286 | 287 | is_site = list(set(poses['site'])) != [''] 288 | if is_site and not args.combine_sites: 289 | groupby_columns += ['site'] 290 | 291 | if not isligID: 292 | if not istargetID: 293 | groupby_columns += ['dir'] 294 | column_ligand = 'dir' 295 | elif args.combine_targets: 296 | poses['dummy'] = 0 297 | groupby_columns += ['dummy'] 298 | 299 | if args.sbcd or args.cd: 300 | best_poses = [] 301 | for prgm in programs_consensus: 302 | if args.sbcd: 303 | if groupby_columns: 304 | poses_groupby = poses.groupby(groupby_columns) 305 | 306 | if any([prgm.startswith(prgm_p) for prgm_p in programs_with_positive_scores]): 307 | minidxs = poses_groupby[prgm].idxmax() 308 | else: 309 | minidxs = poses_groupby[prgm].idxmin() 310 | 311 | # handle cases where all the scores provided by a program are nans (to be changed when istargetID is True) 312 | lignans = minidxs[minidxs.apply(np.isnan)] 313 | for ligID, row in lignans.iteritems(): 314 | if isinstance(ligID, tuple): 315 | ligID = ligID[0] 316 | row_poses = poses[poses[column_ligand]==ligID].iloc[0] 317 | minidxs[ligID] = float(row_poses.name) 318 | minidxs = minidxs.astype(int) 319 | 320 | # get best poses from indices 321 | best_poses_prgm = poses.loc[minidxs].copy() 322 | 323 | # set file_l to nan for those with no score 324 | for ligID, row in lignans.iteritems(): 325 | best_poses_prgm_row = best_poses_prgm[best_poses_prgm[column_ligand]==ligID] 326 | indices = best_poses_prgm_row.index.values 327 | best_poses_prgm.at[indices, 'file_l'] = np.nan 328 | 329 | elif args.cd: 330 | poses_prgm = poses[poses['program']==prgm] 331 | poses_groupby = poses_prgm.groupby(groupby_columns) 332 | if any([prgm.startswith(prgm_p) for prgm_p in programs_with_positive_scores]): 333 | best_poses_prgm = poses_prgm.loc[poses_groupby['score'].idxmax()] 334 | else: 335 | best_poses_prgm = poses_prgm.loc[poses_groupby['score'].idxmin()] 336 | 337 | new_columns_names = [] # renaming columns according to the scoring function 338 | for col in best_poses_prgm.columns.values: 339 | if col in groupby_columns + ['name']: 340 | new_columns_names.append(col) 341 | elif not isligID and col == 'dir': 342 | new_columns_names.append(col) 343 | elif args.sbcd and col == prgm: 344 | new_columns_names.append(prgm) 345 | else: 346 | new_columns_names.append(col + '_' + prgm) 347 | best_poses_prgm.columns = new_columns_names 348 | best_poses.append(best_poses_prgm) 349 | 350 | columns_to_be_merged = groupby_columns 351 | if 'name' in poses.columns.values: 352 | columns_to_be_merged += ['name'] 353 | 354 | # merge best poses into single dataframe 355 | best_poses_merged = best_poses[0] 356 | for item in best_poses[1:]: 357 | best_poses_merged = best_poses_merged.merge(item, on=columns_to_be_merged, how='outer') 358 | 359 | if args.cd and istargetID: 360 | # handle cases when poses were generated with one program but not with the other one (only for CD) 361 | for prgm1 in programs_consensus: 362 | best_poses_merged_null = best_poses_merged[best_poses_merged['targetID_'+prgm1].isnull()] 363 | 364 | for idx, row in best_poses_merged_null.iterrows(): 365 | for prgm2 in programs_consensus: 366 | if isinstance(row['targetID_'+prgm2], str): # occuring when the target ID is not nan 367 | best_poses_merged.loc[idx,'targetID_'+prgm1] = row['targetID_'+prgm2] 368 | break 369 | 370 | prgm_first = programs_consensus[0] 371 | if args.combine_targets: 372 | rmsd_rot_trans = get_rmsd_rotation_and_translations_all_targets(files_r) 373 | 374 | for prgm in programs_consensus[1:]: 375 | best_poses_merged['rmsd_'+prgm_first+'_'+prgm] = best_poses_merged.apply(lambda row: compute_rmsd(row['file_l_'+prgm_first], row['file_l_'+prgm], 376 | rotmat=rmsd_rot_trans[row['targetID_'+prgm_first]][row['targetID_'+prgm]][0], \ 377 | trans1=rmsd_rot_trans[row['targetID_'+prgm_first]][row['targetID_'+prgm]][1], \ 378 | trans2=rmsd_rot_trans[row['targetID_'+prgm_first]][row['targetID_'+prgm]][2]), axis=1) 379 | else: 380 | for prgm in programs_consensus[1:]: 381 | best_poses_merged['rmsd_'+prgm_first+'_'+prgm] = best_poses_merged.apply(lambda row: compute_rmsd(row['file_l_'+prgm_first], row['file_l_'+prgm]), axis=1) 382 | 383 | rmsd_columns = [col for col in best_poses_merged.columns.values if col.startswith('rmsd')] 384 | best_poses = best_poses_merged.assign(consensus=(best_poses_merged[rmsd_columns]<=args.cutoff).all(axis=1)) 385 | 386 | elif args.sf: 387 | # remove nan values 388 | poses = poses.dropna(subset=[args.sf]) 389 | poses_groupby = poses.groupby(groupby_columns) 390 | 391 | if any([args.sf.startswith(prgm_p) for prgm_p in programs_with_positive_scores]): 392 | best_poses = poses.loc[poses_groupby[args.sf].idxmax] 393 | else: 394 | best_poses = poses.loc[poses_groupby[args.sf].idxmin] 395 | 396 | features_ids_sorted = list(features_ids) 397 | if istargetID and 'targetID' not in best_poses.columns.values: 398 | features_ids_sorted.remove('targetID') 399 | if features_ids_sorted: 400 | best_poses = best_poses.sort_values(features_ids_sorted) 401 | 402 | shutil.rmtree(resultsdir, ignore_errors=True) 403 | os.mkdir(resultsdir) 404 | 405 | features_csv = [] 406 | if args.csvfile_l: 407 | features_csv.append('name') 408 | features_csv += features_ids 409 | features_csv += ['instance'] 410 | 411 | if istargetID: 412 | features_csv += ['file_l', 'file_r', 'instance'] 413 | else: 414 | features_csv += ['file_l', 'instance'] 415 | 416 | if is_site: 417 | features_csv.append('site') 418 | features_csv += found_scoring_functions + ['score'] 419 | 420 | # save all the poses to a file 421 | csvfile = resultsdir + '/poses.csv' 422 | poses[features_csv].to_csv(csvfile, index=False, float_format='%.3f') 423 | 424 | if best_poses is not None: 425 | if args.sbcd or args.cd: 426 | features_csv_best_poses = [] 427 | if args.csvfile_l: 428 | features_csv_best_poses.append('name') 429 | 430 | if isligID: 431 | features_csv_best_poses.append('ligID') 432 | 433 | elif not istargetID: 434 | features_csv_best_poses.append('dir') 435 | 436 | if istargetID and not args.combine_targets: 437 | features_csv_best_poses.append('targetID') 438 | 439 | for prgm in programs_consensus: 440 | if istargetID and args.combine_targets: 441 | features_csv_best_poses.append('targetID_'+prgm) 442 | features_csv_best_poses.append('file_l_'+prgm) 443 | features_csv_best_poses.append('instance_'+prgm) 444 | if args.sbcd: 445 | features_csv_best_poses.append(prgm) 446 | else: 447 | features_csv_best_poses.append('score_'+prgm) 448 | if is_site: 449 | if args.combine_sites: 450 | features_csv_best_poses.append('site_'+prgm) 451 | else: 452 | features_csv_best_poses.append('site') 453 | features_csv_best_poses.extend([col for col in best_poses.columns.values if col.startswith('rmsd')]) 454 | features_csv_best_poses.append('consensus') 455 | 456 | elif args.sf: 457 | features_csv_best_poses = list(features_csv) 458 | 459 | filename = resultsdir + '/best_poses.csv' 460 | best_poses[features_csv_best_poses].to_csv(filename, index=False, float_format='%.3f') 461 | 462 | if args.combine_targets and istargetID: 463 | features_ids.remove('targetID') 464 | 465 | if args.save: 466 | for idx, row in best_poses.iterrows(): 467 | if args.sf or ((args.sbcd or args.cd) and row['consensus']): 468 | if isligID: 469 | newdir = resultsdir + '/' + '/'.join(row[features_ids]) 470 | elif row['dir'] != '.': 471 | newdir = resultsdir + '/' + '_'.join(row['dir'].split('/')) 472 | else: 473 | newdir = resultsdir 474 | if not os.path.isdir(newdir): 475 | os.makedirs(newdir) 476 | 477 | if args.sbcd or args.cd: 478 | for idx, prgm in enumerate(programs_consensus): 479 | save_top_poses(newdir, row, suffix='_'+prgm) 480 | else: 481 | save_top_poses(newdir, row) 482 | -------------------------------------------------------------------------------- /dockbox/moe.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import glob 4 | import shutil 5 | import subprocess 6 | import method 7 | import license 8 | 9 | from mdkit.utility import reader 10 | from mdkit.utility import mol2 11 | 12 | required_programs = ['moebatch'] 13 | 14 | default_settings = {'placement': 'Triangle Matcher', 'placement_nsample': '10', 'placement_maxpose': '250', 'scoring': 'London dG', 15 | 'maxpose': '30', 'remaxpose': '5', 'gtest': '0.01', 'rescoring': 'GBVI/WSA dG'} 16 | 17 | known_scorings = ['ASE', 'Affinity dG', 'Alpha HB', 'GBVI/WSA dG', 'London dG', 'None'] 18 | known_placements = ['Alpha PMI', 'Alpha Triangle', 'Proxy Triangle', 'Triangle Matcher'] 19 | 20 | known_settings = {'placement': known_placements, 'scoring': known_scorings, 'rescoring': known_scorings} 21 | 22 | class Moe(method.DockingMethod): 23 | 24 | def __init__(self, instance, site, options): 25 | 26 | super(Moe, self).__init__(instance, site, options) 27 | 28 | # set box center 29 | self.options['center_bs'] = '[' + ', '.join(map(str.strip, site[1].split(','))) + ']' 30 | 31 | # set box size 32 | self.options['boxsize_bs'] = '[' + ', '.join(map(str.strip, site[2].split(','))) + ']' 33 | 34 | def write_docking_script(self, filename, file_r, file_l): 35 | 36 | self.write_moe_docking_script('moe_dock.svl') 37 | 38 | convertmol2_cmd = license.wrap_command("moebatch -exec \"mdb_key = db_Open ['ligand.mdb','create']; db_Close mdb_key;\ 39 | db_ImportMOL2 ['%(file_l)s','ligand.mdb', 'molecule']\""%locals(), 'moe') # create mdb for ligand 40 | 41 | dock_cmd = license.wrap_command("moebatch -run moe_dock.svl -rec %(file_r)s -lig ligand.mdb"%locals(), 'moe') # cmd for docking 42 | 43 | # write script 44 | with open(filename, 'w') as ff: 45 | script ="""#!/bin/bash 46 | # convert .mol2 file to mdb 47 | %(convertmol2_cmd)s 48 | 49 | # run docking 50 | %(dock_cmd)s\n"""% locals() 51 | ff.write(script) 52 | 53 | def write_moe_docking_script(self, filename): 54 | 55 | locals().update(self.options) 56 | 57 | # write vina script 58 | with open(filename, 'w') as ff: 59 | script ="""#svl 60 | function DockAtoms, DockFile; 61 | function DockMDBwAtoms, DockMDBwFile; 62 | 63 | global argv; 64 | function ArgvPull; 65 | 66 | local function main [] 67 | 68 | // Set potential and setup parameters 69 | pot_Load '$MOE/lib/Amber10EHT.ff'; 70 | 71 | pot_Setup [ 72 | strEnable: 1, 73 | angEnable: 1, 74 | stbEnable: 1, 75 | oopEnable: 1, 76 | torEnable: 1, 77 | vdwEnable: 1, 78 | eleEnable: 1, 79 | solEnable: 0, 80 | resEnable: 1, 81 | strWeight: 1, 82 | angWeight: 1, 83 | stbWeight: 1, 84 | oopWeight: 1, 85 | torWeight: 1, 86 | vdwWeight: 1, 87 | eleWeight: 1, 88 | solWeight: 1, 89 | resWeight: 1, 90 | cutoffEnable: 1, 91 | cutoffOn: 8, 92 | cutoffOff: 10, 93 | eleDist: 2, 94 | vdwScale14: 0.5, 95 | vdwBuffer1: 0, 96 | vdwBuffer2: 0, 97 | eleScale14: 0.833333, 98 | eleDielectric: 1, 99 | eleBuffer: 0, 100 | solDielectric: 80, 101 | solDielectricOffset: 0, 102 | state0: 1, 103 | state1: 0, 104 | state2: 1, 105 | threadCount: 0 106 | ]; 107 | 108 | ArgvReset ArgvExpand argv; 109 | local [recmdb, ligmdb, ph4file, outf] = ArgvPull [ 110 | ['-rec','-lig','-ph4','-o'], 111 | 1 112 | ]; 113 | 114 | // If no receptor given as argument use default rec.moe 115 | if isnull recmdb then 116 | recmdb = 'rec.moe'; 117 | endif 118 | 119 | local basename = fbase recmdb; 120 | local extension = fext recmdb; 121 | 122 | // output docking database file 123 | outf = 'dock.mdb'; 124 | 125 | // Receptor file or database 126 | // Assume that the file is a moe or pdb file extract chains atoms 127 | 128 | local chains = ReadAuto [recmdb, []]; 129 | local rec = cat cAtoms chains; // extract atom info from atom 130 | 131 | // get residues involved in the binding site 132 | local center_bs = %(center_bs)s; // center for the binding site 133 | local boxsize_bs = %(boxsize_bs)s; // size of the box for the binding site 134 | local residues_bs = []; // residues involved in binding site 135 | 136 | local idx, jdx; 137 | local com, dist; 138 | local isinbox; 139 | 140 | local rec_bs = cat cResidues chains; // extract residues info 141 | for idx = 1, length rec_bs loop 142 | com = oCenterOfMass rec_bs(idx); 143 | dist = sqrt add pow[sub[center_bs, com], 2]; 144 | isinbox = 1; 145 | for jdx = 1, 3 loop 146 | if abs(center_bs(jdx)-com(jdx)) > 0.5*boxsize_bs(jdx) then 147 | isinbox = 0; 148 | endif 149 | endloop 150 | if isinbox == 1 then 151 | residues_bs = append [residues_bs, rec_bs(idx)]; 152 | endif 153 | endloop 154 | 155 | rec_bs = cat rAtoms residues_bs; 156 | View (Atoms[]); 157 | 158 | local alpha_sites = run['sitefind.svl', [rec_bs, []], 'AlphaSites']; 159 | 160 | // Take first/highest scoring pocket alpha_sites(1) 161 | // Take fpos data alpha_sites(1)(1) 162 | // Take only coords of fpos data alpha_sites(1)(1)(2) 163 | local a_sites = apt cat alpha_sites(1)(1)(2); // x, y, z coords 164 | 165 | // Make dummy He atoms for alpha site 166 | // local dummy, x, y, z; 167 | // for x = 1, length a_sites(1) loop 168 | // dummy(x) = sm_Build ['[He]']; 169 | // aSetPos [dummy(x), [a_sites(1)(x), a_sites(2)(x), a_sites(3)(x)]]; 170 | //endloop 171 | 172 | // Make dummy He atoms for alpha site 173 | local dummy, x, y, z; 174 | for x = 1, length a_sites loop 175 | dummy(x) = sm_Build ['[He]']; 176 | aSetPos [dummy(x), a_sites(x)]; 177 | endloop 178 | 179 | // Make a collection of site atoms to send to docking 180 | // from the alpha site 181 | oSetCollection ['Site', dummy]; 182 | local site = oGetCollection 'Site'; 183 | 184 | // Ligand database 185 | local lmdb = _db_Open [ligmdb, 'read']; 186 | if lmdb == 0 then 187 | exit twrite ['Cannot read ligand mdb file {}', ligmdb]; 188 | endif 189 | 190 | local ent = 0; // must have this set to zero 191 | while ent = db_NextEntry[lmdb, ent] loop; //loop through ligand database 192 | local ligdata = db_Read[lmdb, ent]; //read data for each entry 193 | local ligmoldata = ligdata.mol; // extract into moldata 194 | local ligchains = mol_Create ligmoldata; //create molecule in window 195 | local lig = cat cAtoms ligchains; // extract atom info from atom 196 | endloop 197 | 198 | // Set options for docking and refinement 199 | // maxpose is set to accept 50 poses, change as required 200 | local opt = [ 201 | outrmsd: 1, 202 | sel_ent_only_rec: 0, 203 | sel_ent_only: 0, 204 | wall: [ '', 0, [ 0, 0, 0 ], [ 1000000, 1000000, 1000000 ], 0 ], 205 | csearch: 1, 206 | placement: '%(placement)s', 207 | placement_opt: [nsample : %(placement_nsample)s, maxpose : %(placement_maxpose)s ], 208 | scoring: '%(scoring)s', 209 | scoring_opt: [ train : 0 ], 210 | dup_placement: 1, 211 | maxpose: %(maxpose)s, 212 | refine: 'Forcefield', 213 | refine_opt: [ cutoff : 6, wholeres : 1, mmgbvi : 1, fixrec : 'Fix', tether : 10, gtest : %(gtest)s, 214 | maxit : 500, OverrideSetup : 1, k_potl : 100, offset : 0.4 ], 215 | rescoring: '%(rescoring)s', 216 | rescoring_opt: [ train : 0 ], 217 | dup_refine: 1, 218 | remaxpose: %(remaxpose)s, 219 | descexpr: '', 220 | receptor_mfield: '', 221 | ligand_mfield: '', 222 | tplate: [ ], 223 | tplateSel: [ ], 224 | //ph4: ph4file, 225 | ligmdbname: ligmdb, 226 | recmdbname: recmdb 227 | ]; 228 | 229 | //Perform the docking 230 | DockFile [rec, site, ligmdb, outf, opt]; 231 | 232 | oDestroy ligchains; 233 | db_Close lmdb; 234 | write ['Docking finished at {}.\\n', asctime []]; 235 | 236 | endfunction;"""% locals() 237 | ff.write(script) 238 | 239 | def extract_docking_results(self, file_s, input_file_r, input_file_l): 240 | 241 | subprocess.check_output(license.wrap_command("moebatch -exec \"db_ExportTriposMOL2 ['dock.mdb', 'poses.mol2', 'mol', []]\"", 'moe'), shell=True, executable='/bin/bash') 242 | 243 | if os.path.exists('poses.mol2'): 244 | ligname = reader.open(input_file_l).ligname 245 | mol2.update_mol2file('poses.mol2', 'pose-.mol2', ligname=ligname, multi=True) 246 | os.remove('poses.mol2') 247 | 248 | # get SDF to extract scores 249 | sdffile = 'ligand.sdf' 250 | subprocess.check_output(license.wrap_command("moebatch -exec \"db_ExportSD ['dock.mdb', '%s', ['mol','S'], []]\""%sdffile, 'moe'), shell=True, executable='/bin/bash') 251 | with open(sdffile, 'r') as sdff: 252 | with open(file_s, 'w') as sf: 253 | for line in sdff: 254 | if line.startswith("> "): 255 | sf.write(sdff.next().strip()+'\n') 256 | os.remove(sdffile) 257 | else: 258 | open(file_s, 'w').close() 259 | 260 | def write_rescoring_script(self, filename, file_r, file_l): 261 | 262 | locals().update(self.options) 263 | 264 | if self.options['rescoring'] == 'prolig': 265 | rescoring_cmd = license.wrap_command("moebatch -run moe_rescoring.svl -rec %(file_r)s -lig %(file_l)s"%locals(), 'moe') # cmd for docking 266 | 267 | with open(filename, 'w') as file: 268 | script ="""#!/bin/bash 269 | echo "#svl 270 | function prolig_Calculate; 271 | 272 | global argv; 273 | function ArgvPull; 274 | 275 | local function main[] 276 | 277 | ArgvReset ArgvExpand argv; 278 | local [recmdb, ligmdb, outf] = ArgvPull [ 279 | ['-rec','-lig','-o'], 280 | 1 281 | ]; 282 | local lk = ReadTriposMOL2 [ligmdb, []]; 283 | 284 | // Load pdb 285 | local rk = ReadAuto [recmdb, []]; 286 | 287 | local itypes = ['hbond', 'metal', 'ionic', 'covalent', 'arene', 'distance']; 288 | local iract = prolig_Calculate [itypes, lk, rk, []]; 289 | //local iract_v = Formulate2DInteractions [lk, rk, []]; 290 | 291 | local idx; 292 | local interaction_energy = 0.; 293 | for idx = 1, length iract(1) loop 294 | if iract(1)(idx) == 'distance' then 295 | break; 296 | else 297 | interaction_energy = interaction_energy + iract(4)(idx); 298 | endif 299 | endloop 300 | 301 | write ['Interaction energy: {f.2} kCal/mol \\n', interaction_energy]; 302 | 303 | endfunction;" > moe_rescoring.svl 304 | 305 | %(rescoring_cmd)s 306 | """ %locals() 307 | file.write(script) 308 | 309 | else: 310 | convertmol2_cmd = license.wrap_command("moebatch -exec \"mdb_key = db_Open ['ligand.mdb','create']; db_Close mdb_key;\ 311 | db_ImportMOL2 ['%(file_l)s','ligand.mdb', 'molecule']\""%locals(), 'moe') # create mdb for ligand 312 | rescoring_cmd = license.wrap_command("moebatch -run moe_rescoring.svl -rec %(file_r)s -lig ligand.mdb"%locals(), 'moe') # cmd for docking 313 | 314 | # write vina script 315 | with open(filename, 'w') as file: 316 | script ="""#!/bin/bash 317 | 318 | %(convertmol2_cmd)s 319 | 320 | echo "#svl 321 | function DockAtoms, DockFile; 322 | function DockMDBwAtoms, DockMDBwFile; 323 | 324 | global argv; 325 | function ArgvPull; 326 | 327 | local function main [] 328 | 329 | // Set potential and setup parameters 330 | pot_Load '$MOE/lib/Amber10EHT.ff'; 331 | 332 | pot_Setup [ 333 | strEnable: 1, 334 | angEnable: 1, 335 | stbEnable: 1, 336 | oopEnable: 1, 337 | torEnable: 1, 338 | vdwEnable: 1, 339 | eleEnable: 1, 340 | solEnable: 0, 341 | resEnable: 1, 342 | strWeight: 1, 343 | angWeight: 1, 344 | stbWeight: 1, 345 | oopWeight: 1, 346 | torWeight: 1, 347 | vdwWeight: 1, 348 | eleWeight: 1, 349 | solWeight: 1, 350 | resWeight: 1, 351 | cutoffEnable: 1, 352 | cutoffOn: 8, 353 | cutoffOff: 10, 354 | eleDist: 2, 355 | vdwScale14: 0.5, 356 | vdwBuffer1: 0, 357 | vdwBuffer2: 0, 358 | eleScale14: 0.833333, 359 | eleDielectric: 1, 360 | eleBuffer: 0, 361 | solDielectric: 80, 362 | solDielectricOffset: 0, 363 | state0: 1, 364 | state1: 0, 365 | state2: 1, 366 | threadCount: 0 367 | ]; 368 | 369 | ArgvReset ArgvExpand argv; 370 | local [recmdb, ligmdb, ph4file, outf] = ArgvPull [ 371 | ['-rec','-lig','-ph4','-o'], 372 | 1 373 | ]; 374 | 375 | // If no receptor given as argument use default rec.moe 376 | if isnull recmdb then 377 | recmdb = 'rec.moe'; 378 | endif 379 | 380 | local basename = fbase recmdb; 381 | local extension = fext recmdb; 382 | 383 | // output docking database file 384 | outf = 'dock.mdb'; 385 | 386 | // Receptor file or database 387 | // Assume that the file is a moe or pdb file extract chains atoms 388 | 389 | local chains = ReadAuto [recmdb, []]; 390 | local rec = cat cAtoms chains; // extract atom info from atom 391 | 392 | local alpha_sites = run['sitefind.svl', [rec, []], 'AlphaSites']; 393 | 394 | // Take first/highest scoring pocket alpha_sites(1) 395 | // Take fpos data alpha_sites(1)(1) 396 | // Take only coords of fpos data alpha_sites(1)(1)(2) 397 | local a_sites = apt cat alpha_sites(1)(1)(2); // x, y, z coords 398 | 399 | // Make dummy He atoms for alpha site 400 | local dummy, x, y, z; 401 | for x = 1, length a_sites loop 402 | dummy(x) = sm_Build ['[He]']; 403 | aSetPos [dummy(x), a_sites(x)]; 404 | endloop 405 | 406 | // Make a collection of site atoms to send to docking 407 | // from the alpha site 408 | oSetCollection ['Site', dummy]; 409 | local site = oGetCollection 'Site'; 410 | 411 | // Ligand database 412 | local lmdb = _db_Open [ligmdb, 'read']; 413 | if lmdb == 0 then 414 | exit twrite ['Cannot read ligand mdb file {}', ligmdb]; 415 | endif 416 | 417 | local ent = 0; // must have this set to zero 418 | while ent = db_NextEntry[lmdb, ent] loop; //loop through ligand database 419 | local ligdata = db_Read[lmdb, ent]; //read data for each entry 420 | local ligmoldata = ligdata.mol; // extract into moldata 421 | local ligchains = mol_Create ligmoldata; //create molecule in window 422 | local lig = cat cAtoms ligchains; // extract atom info from atom 423 | endloop 424 | 425 | // Set options for docking and refinement 426 | // maxpose is set to accept 50 poses, change as required 427 | local opt = [ 428 | outrmsd: 1, 429 | sel_ent_only_rec: 0, 430 | sel_ent_only: 0, 431 | wall: [ '', 0, [ 0, 0, 0 ], [ 1000000, 1000000, 1000000 ], 0 ], 432 | csearch: 1, 433 | placement: 'None', 434 | scoring: 'None', 435 | dup_placement: 1, 436 | rescoring: '%(rescoring)s', 437 | rescoring_opt: [ train : 0 ], 438 | dup_refine: 1, 439 | remaxpose: 1, 440 | descexpr: '', 441 | receptor_mfield: '', 442 | ligand_mfield: '', 443 | tplate: [ ], 444 | tplateSel: [ ], 445 | ligmdbname: ligmdb, 446 | recmdbname: recmdb 447 | ]; 448 | 449 | //Perform the docking 450 | DockFile [rec, site, ligmdb, outf, opt]; 451 | 452 | oDestroy ligchains; 453 | db_Close lmdb; 454 | write ['Docking finished at {}.\\n', asctime []]; 455 | 456 | endfunction;" > moe_rescoring.svl 457 | 458 | %(rescoring_cmd)s"""% locals() 459 | file.write(script) 460 | 461 | def extract_rescoring_results(self, file_s): 462 | 463 | locals().update(self.options) 464 | 465 | if self.options['rescoring'] == 'prolig': 466 | with open(file_s, 'a') as sf: 467 | if os.path.exists('moebatch.log'): 468 | with open('moebatch.log', 'r') as logf: 469 | is_interaction_energy = False 470 | for line in logf: 471 | if line.startswith("Interaction energy:"): 472 | sf.write(line.split()[-2]+'\n') 473 | is_interaction_energy = True 474 | break 475 | if not is_interaction_energy: 476 | sf.write('NaN\n') 477 | else: 478 | sf.write('NaN\n') 479 | else: 480 | # get SDF to extract scores 481 | sdffile = 'ligand.sdf' 482 | subprocess.check_output(license.wrap_command("moebatch -exec \"db_ExportSD ['dock.mdb', '%s', ['mol','S'], []]\""%sdffile, 'moe'), shell=True, executable='/bin/bash') 483 | with open(file_s, 'a') as sf: 484 | if os.path.exists(sdffile): 485 | with open(sdffile, 'r') as sdff: 486 | for line in sdff: 487 | if line.startswith("> "): 488 | sf.write(sdff.next().strip()+'\n') 489 | break 490 | os.remove(sdffile) 491 | else: 492 | sf.write('NaN\n') 493 | 494 | def write_sitefinder_script(filename, file_r, args): 495 | 496 | write_moe_sitefinder_script('sitefinder.svl', file_r, args) 497 | sitefinder_cmd = license.wrap_command("moebatch -run sitefinder.svl", 'moe') # cmd for docking 498 | 499 | # write script 500 | with open(filename, 'w') as file: 501 | script ="""#!/bin/bash 502 | # run docking 503 | %(sitefinder_cmd)s 504 | """% locals() 505 | file.write(script) 506 | 507 | def write_moe_sitefinder_script(filename, file_r, args): 508 | 509 | if args.nsitesmax == 0: 510 | nsitesmax = 'length alpha_sites' 511 | else: 512 | nsitesmax = str(args.nsitesmax) 513 | minplb = args.minplb 514 | 515 | # write svl script 516 | with open(filename, 'w') as file: 517 | script ="""#svl 518 | 519 | local function main [] 520 | local chains = ReadAuto ['%(file_r)s', []]; 521 | local rec = cat cAtoms chains; // extract atom info from atom 522 | 523 | // locate alpha sites 524 | local alpha_sites = run['sitefind.svl', [rec, []], 'AlphaSites']; 525 | 526 | local dummy, x, dist; 527 | local a_sites, plb; 528 | local minplb = %(minplb)s, maxdist; 529 | local idx; 530 | local nsites; 531 | local cog; // center of geometry 532 | 533 | write ['#ID PLB x y z radius\\n']; 534 | 535 | for idx = 1, length alpha_sites loop 536 | plb = alpha_sites(idx)(4)(2); 537 | 538 | if (plb > minplb or idx == 1) and idx <= %(nsitesmax)s then 539 | a_sites = alpha_sites(idx)(1)(2); 540 | nsites = length a_sites(1); 541 | 542 | // get center of geometry of the alpha sites 543 | cog = [0.0, 0.0, 0.0]; 544 | for x = 1, nsites loop 545 | cog = add[[a_sites(1)(x), a_sites(2)(x), a_sites(3)(x)], cog]; 546 | endloop 547 | cog = div[cog, nsites]; 548 | maxdist = 0; 549 | 550 | // get distance to the farthest atom 551 | for x = 1, nsites loop 552 | dist = sqrt add pow[sub[[a_sites(1)(x), a_sites(2)(x), a_sites(3)(x)], cog], 2]; 553 | if dist > maxdist then 554 | maxdist = dist; 555 | endif 556 | endloop 557 | write ['{f.0} {f.2} {f.3} {f.3}\\n', idx, plb, cog, maxdist]; 558 | endif 559 | endloop 560 | endfunction;""" %locals() 561 | file.write(script) 562 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: https://github.com/jp43/DockBox/raw/master/logo.png 2 | :align: right 3 | 4 | ******* 5 | DockBox 6 | ******* 7 | 8 | DockBox is a python wrapper library designed to facilitate the use of standard docking 9 | programs either alone or in combination. In addition, DockBox enables to rescore the 10 | generated docking poses with different popular scoring algorithms as well as to analyze 11 | the results using different consensus docking/scoring strategies. 12 | 13 | **List of programs supported by DockBox** 14 | 15 | * **Docking**: 16 | 17 | * AutoDock 4 (http://autodock.scripps.edu) 18 | * AutoDock Vina (http://vina.scripps.edu/index.html) 19 | * DOCK 6 (http://dock.compbio.ucsf.edu/DOCK_6/index.htm) 20 | * MOE (https://www.chemcomp.com/MOE-Molecular_Operating_Environment.htm) 21 | 22 | * **Rescoring**: 23 | 24 | * AutoDock 4 (http://autodock.scripps.edu) 25 | * AutoDock Vina (https://vina.scripps.edu) 26 | * DOCK 6 (http://dock.compbio.ucsf.edu/DOCK_6/index.htm) 27 | * DSX (https://agklebe.pharmazie.uni-marburg.de/?id=11&lang=en) 28 | * MOE (https://www.chemcomp.com/MOE-Molecular_Operating_Environment.htm) 29 | 30 | None of the above docking or scoring programs are included in the current repository. 31 | Hence the user eager to test a docking/scoring program with DockBox should first 32 | install the program separately on the same system DockBox is installed. 33 | 34 | To make a docking or scoring program usable by DockBox, make sure all the executable files 35 | required for each program can be directly run on the command line from any location, 36 | i.e., are in your PATH (see section **executable files required for each program** 37 | to find out the executables needed to run each docking/scoring programs). For MOE, 38 | additional environment variable, i.e, MOE, must be set up. 39 | 40 | .. contents:: **Table of Contents** 41 | 42 | Prerequisites 43 | ************* 44 | 45 | The following are the minimal requirements to install the DockBox module 46 | 47 | * python version 2.6 or 2.7 48 | 49 | * virtualenv version 1.11 or later 50 | 51 | 52 | Installation 53 | ************ 54 | 55 | The easiest way to install DockBox is to create a virtual environment. In this way, DockBox 56 | and its dependencies can easily be installed in user-space without clashing with potentially 57 | incompatible system-wide packages. 58 | 59 | Once virtualenv has been properly installed, simply type (and press the return key) 60 | 61 | :: 62 | 63 | virtualenv env 64 | 65 | on the command line followed by 66 | 67 | :: 68 | 69 | source env/bin/activate 70 | 71 | to activate the virtual environment (do not forget to activate your environment every time you log into a new shell environment). 72 | 73 | Finally, the DockBox package can be set up by going in DockBox installation directory and typing: 74 | 75 | :: 76 | 77 | python setup.py install 78 | 79 | 80 | Installation is complete! 81 | 82 | 83 | Required executable files 84 | ************************* 85 | 86 | Any docking/scoring software intended to be used by DockBox should be installed separetely and should work as a standalone program. In addition, make sure the following exectuables are in your PATH, depending on which docking/scoring software should be used: 87 | 88 | * **AutoDock 4** 89 | 90 | * autodock4 91 | * autogrid4 92 | * obabel 93 | * prepare_dpf4.py 94 | * prepare_gpf4.py 95 | * prepare_ligand4.py 96 | * prepare_receptor4.py 97 | 98 | The autodock4 and autogrid4 executables can be installed by installing `AutoDock4 `_. All the .py executable files are included in the `MGLTools `_ package. It is important to not move those executable to different locations outside of the MGLTools package as DockBox uses those locations to deduce additional MGLTools python modules (PyBabel, MolKit). The babel routine can be set by installing OpenBabel from it official `website `_. 99 | 100 | * **AutoDock Vina** 101 | 102 | * obabel 103 | * prepare_ligand4.py 104 | * prepare_receptor4.py 105 | * vina 106 | 107 | The vina executable is available within the `AutoDock Vina `_ package. See AutoDock 4 section for other executables. 108 | 109 | * **DOCK 6** 110 | 111 | * chimera 112 | * dms 113 | * dock6 114 | * grid 115 | * showbox 116 | * sphere_selector 117 | * sphgen_cpp 118 | 119 | The chimera command is accessible when installing `CHIMERA `_. The dock6, grid, showbox and sphere_selector commands are all part of the `DOCK 6 package `_. The dms program can be installed seperately from `here `_ while sphgen_cpp can be downloaded from the following `url `_. 120 | 121 | * **DSX** 122 | 123 | * dsx 124 | 125 | The dsx-like routines can be downloaded from the official `link `_. Note that DockBox assumes DSX is run via an executable file called dsx. Therefore, once you selected the appropriate routine to be run (e.g., dsx_linux_64.lnx for linux systems), create a symbolic link via ln. For example, assuming you are located in the same directory as dsx_linux_64.lnx, you can use the following command: 126 | 127 | :: 128 | 129 | ln -s dsx_linux_64.lnx dsx 130 | 131 | * **MOE** 132 | 133 | * moebatch 134 | 135 | Can be downloaded over `there `_. Make sure the MOE environment variable (pointing towards MOE's installation directory) has been correctly exported. Note that a valid license of MOE is required. 136 | 137 | **Note**: In case rescoring is enabled (*rescoring=yes* in the configuration file, see below), it is recommended to also enable minimization of the poses (*minimize=yes* in the configuration file). In this case, AmberTools (serial version) 14, 15, 16 or 17 is required for minimization. Make sure main AMBER executables are accessible from the PATH variable including sander, tleap, antechamber and parmchk which are used by DockBox. 138 | 139 | Commands 140 | ******** 141 | 142 | The DockBox package contains two main routines: *rundbx* and *extract_dbx_best_poses*. The former is intended to be used solely for docking and rescoring while the latter enables to analyze the results and to select the best pose(s) from a combination of scores or among different consensus docking schemes. 143 | 144 | *rundbx* 145 | ######## 146 | 147 | rundbx is used to dock a ligand to a protein structure and possibly minimize and rescore the output poses. When typing "rundbx -h" on the command line, the following help message will pop up: 148 | 149 | :: 150 | 151 | usage: rundbx [-h] -l INPUT_FILE_L -r INPUT_FILE_R -f CONFIG_FILE 152 | [-prepare_only] [-rescore_only] 153 | 154 | rundbx : dock and rescore with multiple programs -------- Requires one file 155 | for the ligand (1 struct.) and one file for the receptor (1 struct.) 156 | 157 | optional arguments: 158 | -h, --help show this help message and exit 159 | -l INPUT_FILE_L Ligand coordinate file(s): .mol2 160 | -r INPUT_FILE_R Receptor coordinate file(s): .pdb 161 | -f CONFIG_FILE config file containing docking parameters 162 | -prepare_only Only prepare scripts for docking (does not run docking) 163 | -rescore_only Run rescoring only 164 | 165 | * Inputs 166 | 167 | * -l INPUT_FILE_L: **.mol2** file containing the coordinates of the ligand (only one structure allowed) 168 | 169 | * -r INPUT_FILE_R: **.pdb** file containing the receptor coordinates (only one structure allowed) 170 | 171 | * -f CONFIG_FILE: **.ini** configuration file containing the docking parameters (see the section **preparing the rundbx configuration file**) 172 | 173 | * Options 174 | 175 | * -prepare_only: generate all docking folders and scripts needed to run each docking program separately. Does actually not run docking. 176 | 177 | * -rescore_only: option used to perform the rescoring step only. Using this option implies that you have already run *rundbx* and generated a **poses** folder in the current directory. If a **rescoring** folder already exists as an output of a previous *rundbx* run, every data generated previously by rescoring with the same scoring functions as the current ones will be overwritten while data generated with scoring functions different from the current ones will be kept. 178 | 179 | 180 | *extract_dbx_best_poses* 181 | ######################### 182 | 183 | *extract_dbx_best_poses* is the routine used to analyze the results and select the best docking poses from the outputs of the *rundbx* command. When typing "extract_dbx_best_poses -h" on the command line, the following help message will pop up: 184 | 185 | :: 186 | 187 | usage: extract_dbx_best_poses [-h] [-all-targets] [-all-isomers] [-csv FILE] 188 | [-cutoff RMSD_VALUE] [-d PRGM1 [PRGM2 ...]] 189 | [-dirs DIR1 [DIR2 ...]] [-r DIRECTORY NAME] 190 | [-s FUNC [FUNC ...] | -cd PRGM [PRGM ...] | 191 | -sbcd FUNC [FUNC ...]] 192 | 193 | Extract best docking poses after rundbx finished. 194 | 195 | optional arguments: 196 | -h, --help show this help message and exit 197 | -all-targets Select best poses over all the targets. If not 198 | specified, extract best pose separately for each 199 | target. A "lig/target/isomer" architecture of the 200 | folders is assumed 201 | -all-isomers Select best poses over all the isomers. If not 202 | specified, extract best pose separately for every 203 | isomer. A "lig/target/isomer" architecture of the 204 | folders is assumed 205 | -csv FILE .csv filename with compounds. Used to add names of 206 | compounds. Default: none 207 | -cutoff RMSD_VALUE RMSD cutoff used for consensus docking or score-based 208 | consensus docking. Default: 2.0 A 209 | -d PRGM1 [PRGM2 ...] Docking programs (instances) to be considered when 210 | extracting best poses 211 | -dirs DIR1 [DIR2 ...] 212 | Directories considered for analysis. Should contain a 213 | folder called "poses". Default: curr. dir 214 | -r DIRECTORY NAME Name of results directory. Default: results 215 | -s FUNC [FUNC ...] Scoring functions used to extract the best pose 216 | (combination of scores) 217 | -cd PRGM [PRGM ...] Docking programs used for standard consensus docking 218 | -sbcd FUNC [FUNC ...] 219 | Scoring functions used for score-based consensus 220 | docking 221 | 222 | 223 | Using *rundbx* 224 | ************** 225 | 226 | The *rundbx* routine allows the user to dock and rescore a ligand to a protein target using multiple docking 227 | and scoring functions. Running *rundbx* is fairly simple as it requires only three input files, namely a PDB file 228 | including the protein structure to dock on (-r flag), a file with Tripos Mol2 format containing a 3D structure of the 229 | ligand (1 structure per file, -l flag) and an INI configuration file (-f flag) which contains all the options 230 | related to docking and/or rescoring (see section **Preparing the INI configuration file**). 231 | 232 | When finished correcly, a *rundbx* job should have created a folder called **poses** containing all the poses 233 | generated by the different docking programs as specified in the INI configuration file. Each pose is provided 234 | in a file with .mol2 format named *lig-.mol2*, where is the index of the pose. Within the **poses** 235 | folder, a file called info.dat can also be found. The file contains information relative to each docking program/site 236 | combination specified in the INI file, including the number of poses generated and the index of the first 237 | pose generated for that combination. 238 | 239 | Below is an example of an *info.dat* file obtained when docking was performed with Autodock, Autodock Vina and DOCK 6 on the same binding site: 240 | 241 | :: 242 | 243 | #1,28 244 | program,nposes,firstidx,site 245 | autodock,10,1, 246 | vina,10,11, 247 | dock,7,21, 248 | 249 | showing that a total of 27 poses were generated (28 minus 1). 10 poses were generated with Autodock, namely, poses from index 1 to 10, 10 were generated with Autodock Vina, i.e., poses from index 11 to 20, and 7 were generated with DOCK 6, poses from index 21 to 27. No label for the binding site was specified as docking was performed on the same site. 250 | 251 | Other outputs of the *rundbx* command are folders created for every docking program/site combination specified in the INI file, which contain the docking poses, the docking scores (obtained with docking) and intermediate files generated by the docking software. For example, if Autodock and Autodock Vina were used to dock on three different binding sites called site1, site2 and site3 (see section **Preparing the INI configuration file**), then a total of six folders named **autodock.site1**, **autodock.site2**, **autodock.site3**, **vina.site1**, **vina.site2** and **vina.site3**, should have been created. 252 | 253 | Finally, if the rescoring option was enabled in the INI file, a folder called **rescoring** should have been created as well, containing file(s) named .score, where is the name of each program used for rescoring. 254 | 255 | 256 | Preparing the INI configuration file 257 | #################################### 258 | 259 | Besides one MOL2 file containing the ligand structure (-l flag) and one PDB file containing the receptor structure (-r flag), running *rundbx* requires a configuration file (-f flag) that specifies all the parameters needed for the docking procedure. 260 | 261 | The *rundbx* configuration file should be a INI file (https://en.wikipedia.org/wiki/INI_file), i.e., the file should be split in sections, each section name appearing on a line by itself, in square brackets ("[" and "]"). Each section contains a certain number of keys which refer to specific options used; all keys after the section declaration are associated with that section. Finally, every key should have a name (option name) and a value (option value), delimited by an equals sign (=). 262 | 263 | Below is an example of configuration file used to dock on two binding sites and rescore with DrugScoreX (dsx), Autodock and Autodock Vina. 264 | 265 | :: 266 | 267 | [DOCKING] 268 | site = site1, site2 269 | program = autodock, vina, dock 270 | rescoring = yes 271 | minimize = yes 272 | cleanup = yes 273 | 274 | [RESCORING] 275 | program = dsx, autodock, vina 276 | 277 | [DSX] 278 | pot_dir = /pmshare/jordane/CSD_potentials/DSX_CSD_Potentials_v0511/csd_pot_0511/ 279 | other_flags = -T0 1.0 -T1 1.0 -T3 1.0 -j 280 | 281 | [AUTODOCK] 282 | ga_run = 20 283 | spacing = 0.4 284 | 285 | [VINA] 286 | num_modes = 20 287 | 288 | [DOCK] 289 | nposes = 20 290 | 291 | [SITE1] 292 | center = 75.5, 80.0, 31.0 293 | boxsize = 40.0, 40.0, 40.0 294 | 295 | [SITE2] 296 | center = 75.5, 40.0, 50.0 297 | boxsize = 40.0, 40.0, 40.0 298 | 299 | General options 300 | ############### 301 | 302 | * The **DOCKING** section includes the software that should be used for docking, and if minimization, rescoring and/or cleanup should be performed. The docking software should be specified with coma separation through the key **programs**. The keys relative to the **DOCKING** section are: 303 | 304 | 305 | * **programs**: specifies the software which are used for docking (autodock, dock6, moe and/or vina). Options relative to each program (or instance) are specfied within the section of the same name. For example, if autodock is in the list of programs, options associated with autodock should be specified in the **AUTODOCK** section. In case the same software needs to be used multiple times, numbering can be appended to the name of the program (e.g., in the first example below, multiple runs of MOE are performed using different scoring methods: moe, moe1, moe2). 306 | 307 | * **minimization**: performs minimization on the generated poses (yes or no). 308 | 309 | * **rescoring**: performs rescoring on the generated poses (yes or no). I strongly recommend to enable minimization in case rescoring is done. This will avoid a lot clashes, especially when the software used for rescoring are different from those used for docking. If the rescoring option is enabled, a section RESCORING should be created that contains all the options relative to that step (see below). 310 | 311 | * **cleanup**: specifies if big intermediate files should be removed (yes or no). 312 | 313 | * **site**: specifies the labels for the binding sites in case multiple binding sites are considered (site1, site2,...). See the example configuration to dock on multiple binding site, minimize and rescore the poses with multiple software. 314 | 315 | 316 | Docking and rescoring options relative to each program are detailed in the section **Docking/scoring options relative to each software** 317 | 318 | * The **SITE** section includes the information about the box to spot the binding site. The keys are the following: 319 | 320 | * **center**: x, y, z coordinates of the center of the binding box (in Å). 321 | 322 | * **boxsize**: size of the box along each dimension x, y, z. The dimensions of the box should be no more than 50.0, 50.0, 50.0 (in Å). 323 | 324 | 325 | * The **RESCORING** section has only one key specifying the programs used to rescore: 326 | 327 | * **program**: specifies the software which are used for docking (autodock, dock6, moe and/or vina). Options relative to each program (or instance) are specfied within the section of the same name. For example, if autodock is in the list of programs, options associated with autodock should be specified in the **AUTODOCK** section. In case the same software needs to be used multiple times, numbering can be appended to the name of the program (e.g., in the example below, multiple runs of MOE are performed using different scoring methods: moe, moe1, moe2). 328 | 329 | 330 | Docking/scoring options 331 | ####################### 332 | 333 | Each section relative to a docking/scoring program should be named the way it was specified under **program** in the **DOCKING** and/or **RESCORING** section. Below is a list of all the options per software that can be specified in the configuration file. 334 | 335 | **Autodock** 336 | 337 | * ga_run (default: 100): number of autodock runs = targeted number of final poses 338 | * spacing (default: 0.3): grid spacing 339 | 340 | **Note 1**: the partial charges of the ligand are obtained from the Gasteiger method using the AutodockTools command *prepare_ligand4.py* 341 | 342 | **Note 2**: the number of energy evalutations *ga_num_evals* is automatically calculated from the number of torsions angles in the ligand structure via the formula: 343 | 344 | :: 345 | 346 | ga_num_evals = min(25000000, 987500 * n_torsion_angles + 125000) 347 | 348 | **Note 3**: As is usually the case for Autodock, non polar hydrogens in the ligand structure are removed prior to docking in order to properly use the Autodock force field. Once the docking has been performed, nonpolar hydrogens are reattributed in a way consistent with the input structure. Unless the *minimize* option in the configuration file is set to *yes*, no minimization is performed on those hydrogens. 349 | 350 | **Note 4** Final poses are extracted from the .dlg file using Open Babel via the following command: 351 | 352 | :: 353 | 354 | obabel -ad -ipdbqt dock.dlg -omol2 lig-.mol2 -m 355 | 356 | **Autodock Vina** 357 | 358 | * cpu (default: 1) 359 | * energy_range (default: 3) 360 | * num_modes (default: 9): targeted number of final poses 361 | 362 | **Note 1**: the partial charges of the ligand are obtained from the Gasteiger method using the AutodockTools command *prepare_ligand4.py* 363 | 364 | **Note 2**: As is usually the case for Autodock Vina, non polar hydrogens in the ligand structure are removed prior to docking in order to properly use the Autodock force field. Once the docking has been performed, nonpolar hydrogens are reattributed in a way consistent with the input structure. Unless the *minimize* option in the configuration file is set to *yes*, no minimization is performed on those hydrogens. 365 | 366 | **DOCK 6** 367 | 368 | * attractive_exponent (default: 6) 369 | * extra_margin (default: 2.0) 370 | * grid_spacing (default: 0.3) 371 | * maximum_sphere_radius (default: 4.0) 372 | * max_orientations (default: 10000) 373 | * minimum_sphere_radius (default: 1.4) 374 | * nposes (default: 20): targeted number of final poses 375 | * num_scored_conformers (default 5000) 376 | * probe_radius (default: 1.4) 377 | * repulsive_exponent (default: 12) 378 | 379 | **DSX** 380 | 381 | **MOE** (scoring) 382 | 383 | * gtest (default: 0.01) 384 | * maxpose (default: 5) 385 | * placement (default: Triangle Matcher) 386 | * placement_maxpose (default: 250) 387 | * placement_nsample (default: 10) 388 | * remaxpose (default: 1) 389 | * rescoring (default: GBVI/WSA dG) 390 | * scoring (default: London dG) 391 | 392 | 393 | Examples 394 | ######## 395 | 396 | **Multi-program docking on a single binding site** 397 | 398 | Below is an example of configuration file that can be used as an input of *rundbx*. The docking procedure is carried out on a single binding site specied as a box with dimensions 30.0 x 30.0 x 30.0 centered at the position (x, y, z) = 8.446, 25.365, 4.394. 399 | 400 | :: 401 | 402 | [DOCKING] 403 | program = autodock, vina, dock, moe, moe1, moe2 404 | rescoring = no 405 | minimize = yes 406 | cleanup = no 407 | 408 | [AUTODOCK] 409 | ga_run = 50 410 | spacing = 0.3 411 | 412 | [VINA] 413 | num_modes = 20 414 | 415 | [DOCK] 416 | nposes = 200 417 | 418 | [MOE] 419 | scoring = London dG 420 | maxpose = 100 421 | remaxpose = 50 422 | 423 | [MOE1] 424 | scoring = GBVI/WSA dG 425 | maxpose = 100 426 | remaxpose = 50 427 | 428 | [MOE2] 429 | scoring = Affinity dG 430 | maxpose = 100 431 | remaxpose = 50 432 | 433 | [SITE] 434 | center = 8.446, 25.365, 4.394 435 | boxsize = 30.0, 30.0, 30.0 436 | 437 | 438 | **Multi-program docking and rescoring on multiple binding sites** 439 | 440 | Below is another example of configuration file for *rundbx* used to dock on two binding sites and rescore with DrugScoreX (dsx), Autodock and Autodock Vina. 441 | 442 | :: 443 | 444 | [DOCKING] 445 | site = site1, site2 446 | program = autodock, vina, dock 447 | rescoring = yes 448 | minimize = yes 449 | cleanup = yes 450 | 451 | [RESCORING] 452 | program = dsx, autodock, vina 453 | 454 | [DSX] 455 | pot_dir = /pmshare/jordane/CSD_potentials/DSX_CSD_Potentials_v0511/csd_pot_0511/ 456 | other_flags = -T0 1.0 -T1 1.0 -T3 1.0 -j 457 | 458 | [AUTODOCK] 459 | ga_run = 20 460 | spacing = 0.4 461 | 462 | [VINA] 463 | num_modes = 20 464 | 465 | [DOCK] 466 | nposes = 20 467 | 468 | [SITE1] 469 | center = 75.5, 80.0, 31.0 470 | boxsize = 40.0, 40.0, 40.0 471 | 472 | [SITE2] 473 | center = 75.5, 40.0, 50.0 474 | boxsize = 40.0, 40.0, 40.0 475 | 476 | Note that the DOCKING section includes the label of the binding sites through the keyword *site*, here, site1 and site2. Each label refers to the section of the same name SITE1 and SITE2, respectively. 477 | 478 | --------------------------------------------------------------------------------