├── dockbox
    ├── __init__.py
    ├── gnina.py
    ├── dsx.py
    ├── license.py
    ├── vina.py
    ├── dbxtools.py
    ├── configure.py
    ├── method.py
    ├── autodock.py
    ├── rundbx.py
    ├── dock.py
    ├── pyqcprot.pyx
    └── moe.py
├── logo.png
├── examples
    ├── autodock
    │   ├── docking
    │   │   ├── run.sh
    │   │   ├── config.ini
    │   │   └── 1a30_ligand.mol2
    │   └── rescoring
    │   │   ├── run.sh
    │   │   ├── config.ini
    │   │   └── 1a30_ligand.mol2
    ├── autodock_vina_dock
    │   └── rescoring
    │   │   ├── run.sh
    │   │   ├── config.ini
    │   │   └── 4de2_ligand.mol2
    └── tlr7_chapter
    │   ├── vs
    │       ├── round1
    │       │   ├── config.ini
    │       │   ├── README
    │       │   └── ligand.mol2
    │       └── round2
    │       │   ├── README
    │       │   ├── config.ini
    │       │   └── ligand.mol2
    │   └── active_decoys
    │       ├── config.ini
    │       ├── README
    │       ├── analysis
    │           ├── README
    │           └── compute_hit_rates.py
    │       └── ligand.mol2
├── bin
    ├── rundbx
    └── extract_top_poses
├── .gitignore
├── setup.py
├── LICENSE.txt
└── README.rst


/dockbox/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jp43/DockBox/HEAD/logo.png


--------------------------------------------------------------------------------
/examples/autodock/docking/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | rundbx -f config.ini -l 1a30_ligand.mol2 -r 1a30_protein.pdb
3 | 


--------------------------------------------------------------------------------
/examples/autodock/rescoring/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | rundbx -f config.ini -l 1a30_ligand.mol2 -r 1a30_protein.pdb
3 | 


--------------------------------------------------------------------------------
/examples/autodock_vina_dock/rescoring/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | rundbx -f config.ini -l 4de2_ligand.mol2 -r 4de2_protein.pdb
3 | 


--------------------------------------------------------------------------------
/bin/rundbx:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | from dockbox import rundbx as dbx
3 | 
4 | if __name__ == '__main__':
5 |     dbx.Docking().run()
6 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/vs/round1/config.ini:
--------------------------------------------------------------------------------
 1 | [DOCKING]
 2 | program = dock
 3 | rescoring = no
 4 | minimize = no
 5 | cleanup = 1
 6 | 
 7 | [DOCK]
 8 | nposes = 1
 9 | charge_method = gas
10 | 


--------------------------------------------------------------------------------
/examples/autodock/docking/config.ini:
--------------------------------------------------------------------------------
 1 | [DOCKING]
 2 | program = autodock
 3 | rescoring = no
 4 | minimize = no
 5 | cleanup = no
 6 | 
 7 | [AUTODOCK]
 8 | ga_run = 2
 9 | spacing = 0.3
10 | 
11 | [SITE]
12 | center =  37.092, 37.149, 29.264
13 | boxsize = 30.0, 30.0, 30.0
14 | 


--------------------------------------------------------------------------------
/examples/autodock/rescoring/config.ini:
--------------------------------------------------------------------------------
 1 | [DOCKING]
 2 | program = autodock
 3 | rescoring = yes
 4 | minimize = yes
 5 | cleanup = yes
 6 | 
 7 | [RESCORING]
 8 | program = autodock
 9 | 
10 | [AUTODOCK]
11 | ga_run = 2
12 | spacing = 0.3
13 | 
14 | [SITE]
15 | center =  37.092, 37.149, 29.264
16 | boxsize = 30.0, 30.0, 30.0
17 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/vs/round1/README:
--------------------------------------------------------------------------------
1 | The current directory includes the input files to run rundbx on target #12 and a compound tested in VS (round 1). The config.ini file is the same as the one originally used in our study.
2 | 
3 | The rundbx command can be simply executed using:
4 | 
5 |     rundbx -f config.ini -l ligand.mol2 -r target12.pdb
6 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/vs/round2/README:
--------------------------------------------------------------------------------
1 | The current directory includes the input files to run rundbx on target #12 and a compound tested in VS (round 2). The config.ini file is the same as the one originally used in our study.
2 | 
3 | The rundbx command can be simply executed using:
4 | 
5 |     rundbx -f config.ini -l ligand.mol2 -r target12.pdb
6 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/vs/round2/config.ini:
--------------------------------------------------------------------------------
 1 | [DOCKING]
 2 | program = autodock, vina, dock
 3 | rescoring = yes
 4 | minimize = yes
 5 | cleanup = 2
 6 | 
 7 | [RESCORING]
 8 | program = autodock, vina, dock, dsx
 9 | 
10 | [AUTODOCK]
11 | ga_run = 10
12 | 
13 | [VINA]
14 | num_modes = 10
15 | 
16 | [MINIMIZATION]
17 | solvent = vacuo
18 | ncyc = 1000
19 | maxcyc = 2000
20 | cut = 999.0
21 | charge_method = gas
22 | 
23 | [DOCK]
24 | nposes = 10
25 | charge_method = gas
26 | 
27 | [SITE]
28 | center = 23.395, -12.165, 26.111
29 | boxsize = 35.0, 35.0, 35.0
30 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/active_decoys/config.ini:
--------------------------------------------------------------------------------
 1 | [DOCKING]
 2 | program = autodock, vina, dock
 3 | rescoring = yes
 4 | minimize = yes
 5 | cleanup = 2
 6 | 
 7 | [RESCORING]
 8 | program = autodock, vina, dock, dsx
 9 | 
10 | [AUTODOCK]
11 | ga_run = 10
12 | 
13 | [VINA]
14 | num_modes = 10
15 | 
16 | [MINIMIZATION]
17 | solvent = vacuo
18 | ncyc = 1000
19 | maxcyc = 2000
20 | cut = 999.0
21 | charge_method = gas
22 | 
23 | [DOCK]
24 | nposes = 10
25 | charge_method = gas
26 | 
27 | [SITE]
28 | center = 23.395, -12.165, 26.111
29 | boxsize = 35.0, 35.0, 35.0
30 | 


--------------------------------------------------------------------------------
/examples/autodock_vina_dock/rescoring/config.ini:
--------------------------------------------------------------------------------
 1 | [DOCKING]
 2 | program = autodock, vina, dock
 3 | rescoring = yes
 4 | minimize = yes
 5 | cleanup = yes
 6 | 
 7 | [RESCORING]
 8 | program = autodock, vina, dock
 9 | 
10 | [MINIMIZATION]
11 | solvent = vacuo
12 | ncyc = 5000
13 | maxcyc = 10000
14 | cut = 999.0
15 | charge_method = gas
16 | 
17 | [AUTODOCK]
18 | ga_run = 10
19 | spacing = 0.3
20 | 
21 | [VINA]
22 | num_modes = 10
23 | 
24 | [DOCK]
25 | nposes = 10
26 | charge_method = gas
27 | rmsd = 1.0
28 | 
29 | [SITE]
30 | center = 32.408, 38.328, 27.571
31 | boxsize = 30.0, 30.0, 30.0
32 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/active_decoys/README:
--------------------------------------------------------------------------------
 1 | The current directory includes the input files to run rundbx on target #12 and one TLR7 active compound. The config.ini file is the same as the one originally used in our study.
 2 | 
 3 | The rundbx command can be simply executed using:
 4 | 
 5 |     rundbx -f config.ini -l ligand.mol2 -r target12.pdb
 6 | 
 7 | * The analysis folder contains results for all the active molecules (103) and decoys (5850). It shows the performance of different docking, rescoring and 
 8 | consensus strategies in terms of hit-rates and Enrichment Factors (EF).
 9 | 
10 | * The mol2files folder contains MOL2 file including all the active molecules and decoys (note that rundbx does not work with MOL2 or PDB files containing
11 | multiple structures, thus, in order to be used with rundbx, MOL2 files inside that folder should be first split in multiple files containing one molecule
12 | each)
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # nfs
 7 | *nfs*
 8 | 
 9 | # C extensions
10 | *.c
11 | *.so
12 | 
13 | # Distribution / packaging
14 | .Python
15 | env/
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 | 
31 | bin/prepare_*
32 | 
33 | # PyInstaller
34 | #  Usually these files are written by a python script from a template
35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
36 | *.manifest
37 | *.spec
38 | 
39 | # Installer logs
40 | pip-log.txt
41 | pip-delete-this-directory.txt
42 | 
43 | # Unit test / coverage reports
44 | htmlcov/
45 | .tox/
46 | .coverage
47 | .coverage.*
48 | .cache
49 | nosetests.xml
50 | coverage.xml
51 | *,cover
52 | .hypothesis/
53 | 
54 | # Translations
55 | *.mo
56 | *.pot
57 | 
58 | # Django stuff:
59 | *.log
60 | 
61 | # Sphinx documentation
62 | docs/_build/
63 | 
64 | # PyBuilder
65 | target/
66 | 
67 | #Ipython Notebook
68 | .ipynb_checkpoints
69 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import numpy as np
 4 | from setuptools import setup, Extension
 5 | 
 6 | import Cython
 7 | from Cython.Distutils import build_ext
 8 | from Cython.Build import cythonize
 9 | 
10 | try:
11 |     numpy_include = np.get_include()
12 | except AttributeError:
13 |     numpy_include = np.get_numpy_include()
14 | 
15 | # check Python version
16 | if not (sys.version_info[0] == 2 and sys.version_info[1] >= 6):
17 |     sys.exit("You need Python 2.6.x or Python 2.7.x to install the DockBox package!")
18 | 
19 | ext_modules = [Extension(
20 |     name='dockbox.pyqcprot',
21 |     sources=["dockbox/pyqcprot.pyx"],
22 |     include_dirs=[numpy_include])]
23 | 
24 | setup(name='dockbox',
25 |     version='1.4',
26 |     packages=['dockbox'],
27 |     scripts=['bin/rundbx', 'bin/extract_top_poses'],
28 |     install_requires=['mdkit', 'pandas<=0.24.2', 'nwalign', 'oldnumeric'],
29 |     ext_modules = cythonize(ext_modules),
30 |     license='LICENSE.txt',
31 |     description='Platform package to simplify the use of docking programs and consensus methods',
32 |     long_description=open('README.rst').read(),
33 | )
34 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/active_decoys/analysis/README:
--------------------------------------------------------------------------------
 1 | The current directory contains folders generated with extract_dbx_best_poses related to the best strategies evidenced in our study. For the sake of space availability, we did not provide folders for all the strategies.
 2 | 
 3 | The current directory also contains the script compute_hit_rates.py which provides the hit-rate (top 100) and Enrichment Factor (EF) for every strategy.
 4 | 
 5 | The compute_hit_rates.py script requires each folder to include a file called best_poses.csv (normally generated with extract_dbx_best_poses).
 6 | 
 7 | The first molecules listed in each best_poses.csv should correspond to active compounds followed by decoys. When running compute_hit_rates.py, simply specify the
 8 | number of active molecules in the set.
 9 | 
10 | Here each */best_poses.csv file includes 103 actives followed by 5850 decoys. Therefore, compute_hit_rates.py can be directly executed with the command:
11 | 
12 |     python compute_hit_rates.py -n 103
13 | 
14 | Running the above command will generate a directory called top_hits containing the top 100 deduced from each strategy. It will also contain a file called ranking.csv where different strategies are ranked according to their hit rates (where EFs are also provided).
15 | 


--------------------------------------------------------------------------------
/dockbox/gnina.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import method
 4 | import subprocess
 5 | 
 6 | from mdkit.utility import mol2
 7 | 
 8 | required_programs = ['gnina']
 9 | default_settings = {'type': 'CNNscore', 'cnn': None}
10 | 
11 | class Gnina(method.ScoringMethod):
12 | 
13 |     def write_rescoring_script(self, filename, file_r, file_l):
14 | 
15 |         if self.options['cnn'] is None or self.options['cnn'].lower() in ["none", "no"]:
16 |             cnn_flag = ""
17 |         else:
18 |             cnn_flag = " --cnn %s"%self.options['cnn']
19 | 
20 |         # write vina script
21 |         with open(filename, 'w') as file:
22 |             script ="""#!/bin/bash
23 | 
24 | rm -rf gnina.out
25 | 
26 | # execute GNINA
27 | gnina -r %(file_r)s -l %(file_l)s%(cnn_flag)s --score_only > gnina.out\n"""% locals()
28 |             file.write(script)
29 | 
30 |     def extract_rescoring_results(self, filename):
31 | 
32 |         with open(filename, 'a') as sf:
33 |             is_score = False
34 |             if os.path.isfile('gnina.out'):
35 |                 with open('gnina.out', 'r') as outf:
36 |                     for line in outf:
37 |                         if line.startswith(self.options['type']):
38 |                             sf.write(line.split()[1]+'\n')
39 |                             is_score = True
40 |                             break
41 |                 if not is_score:
42 |                     sf.write('NaN\n')
43 |             else:
44 |                 sf.write('NaN\n')
45 | 


--------------------------------------------------------------------------------
/dockbox/dsx.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import method
 4 | from mdkit.utility import mol2
 5 | 
 6 | required_programs = ['dsx']
 7 | 
 8 | default_settings = {'pot_dir': None, 'other_flags': None}
 9 | 
10 | class Dsx(method.ScoringMethod):
11 | 
12 |     def write_rescoring_script(self, filename, file_r, file_l):
13 | 
14 |         locals().update(self.options)
15 | 
16 |         if self.options['pot_dir']:
17 |             pot_dir_str = ' -D ' + self.options['pot_dir']
18 |         else:
19 |             pot_dir_str = ''
20 | 
21 |         if self.options['other_flags']:
22 |             other_flags_str = ' ' + self.options['other_flags']
23 |         else:
24 |             other_flags_str = ''
25 | 
26 |         # write vina script
27 |         with open(filename, 'w') as file:
28 |             script ="""#!/bin/bash
29 | set -e
30 | # remove pre-existing result file
31 | rm -rf dsx.out
32 | 
33 | cp %(file_r)s protein.pdb
34 | cp %(file_l)s ligand.mol2
35 | 
36 | # execute DSX
37 | dsx -P protein.pdb -L ligand.mol2 -F dsx.out%(pot_dir_str)s%(other_flags_str)s
38 | """% locals()
39 |             file.write(script)
40 | 
41 |     def extract_rescoring_results(self, filename):
42 | 
43 |         with open(filename, 'a') as sf:
44 |             is_score = False
45 |             if os.path.isfile('dsx.out'):
46 |                 with open('dsx.out', 'r') as outf:
47 |                     for line in outf:
48 |                         if line.startswith(" 0"):
49 |                             sf.write(line.split('|')[3].strip()+'\n')
50 |                             is_score = True
51 |                             break
52 |                 if not is_score:
53 |                     sf.write('NaN\n')
54 |             else:
55 |                 sf.write('NaN\n')
56 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/active_decoys/analysis/compute_hit_rates.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import pandas as pd
 4 | from glob import glob
 5 | import shutil
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Computes hit rates and enrichment factors from output of extract_dbx_best_poses (each folder generated with extract_dbx_best_poses should contain a file called best_poses.csv)")
 9 | 
10 | parser.add_argument('-n',
11 |     dest='nactives',
12 |     type=int,
13 |     required=True,
14 |     help='Number of active compounds in the set (in best_poses.csv files, all the active compounds should be provided first, followed by decoys")')
15 | 
16 | nhits = 100 # number of hits that should be considered for hit rates
17 | 
18 | # update parsers with arguments
19 | args = parser.parse_args()
20 | 
21 | dirs = []
22 | for dir in glob("*"):
23 |     if os.path.isfile(dir+"/best_poses.csv"):
24 |         dirs.append(dir)
25 | 
26 | topdir = "top_hits"
27 | shutil.rmtree(topdir, ignore_errors=True)
28 | os.mkdir(topdir)
29 | 
30 | info = {"method": [], "EF": [], "hit-rate": []}
31 | 
32 | for dir in dirs:
33 |     df = pd.read_csv(dir+"/best_poses.csv")
34 |     df['status'] = "decoy"
35 |     df.iloc[:args.nactives, df.columns.get_loc('status')] = "active"
36 | 
37 |     if 'consensus' in df.columns:
38 |         df_groupby = df.groupby(['status'])[['consensus']].sum()
39 |         tp = int(df_groupby.ix[0]['consensus']) # True Positives
40 |         fn = args.nactives - tp # False Negatives
41 |         fp = int(df_groupby.ix[1]['consensus']) # False Negatives
42 |         tn = len(df)-args.nactives - fp # True Negatives
43 | 
44 |         nctot = tp + fn + fp + tn
45 |         nc = tp + fp
46 |         ratio = tp*1./fp
47 |         ef = tp*1./(tp+fn)*nctot*1./nc
48 | 
49 |         df = df[df['consensus']]
50 |     else:
51 |         ratio = 100.
52 |         ef = 1.
53 | 
54 |     if dir.startswith("docking"):
55 |         column = ["score"]
56 | 
57 |     elif dir.startswith("rescoring"):
58 |         column = [dir.split("_")[-1]]
59 | 
60 |     elif dir.startswith("cd"):
61 |         column = []
62 |         for prgm in dir.split("_")[1:]:
63 |             column.append("score_"+prgm)
64 | 
65 |     elif dir.startswith("sbcd"):
66 |         column =[]
67 |         for prgm in dir.split("_")[1:]:
68 |             column.append(prgm)
69 | 
70 |     for cc in column:
71 |         df_top_hits = df.sort_values(by=cc).head(nhits)
72 |         if dir.startswith(("cd", "sbcd")):
73 |             ccs = cc.split('_')
74 |             method = dir + "_scored_with_" + ccs[-1]
75 |         else:
76 |             method = dir
77 |         csvfile = topdir + "/" + method + ".csv"
78 |         df_top_hits[['ligID', cc]].to_csv(csvfile, index=False, float_format="%.3f")
79 |         info["method"].append(method)
80 |         info["EF"].append(ef)
81 |         info["hit-rate"].append(len(df_top_hits[df_top_hits['status']=='active']))
82 | 
83 | df_info = pd.DataFrame(info)
84 | df_info = df_info.sort_values('hit-rate', ascending=False)
85 | df_info[["method", "hit-rate", "EF"]].to_csv(topdir+"/ranking.csv",index=False, float_format="%.3f")
86 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/vs/round1/ligand.mol2:
--------------------------------------------------------------------------------
 1 | @<TRIPOS>MOLECULE
 2 | ZINC000001529646
 3 |   37   36    0    0    0
 4 | SMALL
 5 | USER_CHARGES
 6 | NO_NAME
 7 | @<TRIPOS>ATOM
 8 |       1 N1         -0.0111    0.9658    0.0074 N.pl3     1 LIG      -0.7800
 9 |       2 H2          0.0021   -0.0041    0.0020 H         1 LIG       0.4400
10 |       3 H3          0.8223    1.4622    0.0021 H         1 LIG       0.4200
11 |       4 C4         -1.1486    1.6021    0.0162 C.cat     1 LIG       0.7400
12 |       5 N5         -1.1673    2.9768    0.0238 N.pl3     1 LIG      -0.7200
13 |       6 C6          0.0914    3.7265    0.0219 C.3       1 LIG       0.1100
14 |       7 C7         -0.2086    5.2268    0.0314 C.3       1 LIG      -0.1400
15 |       8 C8          1.1060    6.0097    0.0295 C.3       1 LIG      -0.1200
16 |       9 C9          0.8060    7.5100    0.0390 C.3       1 LIG      -0.0500
17 |      10 H10         0.1608    7.7461    0.8852 H         1 LIG       0.1400
18 |      11 N11         0.1301    7.8822   -1.2110 N.4       1 LIG      -0.6100
19 |      12 H12        -0.0677    8.8716   -1.2047 H         1 LIG       0.4300
20 |      13 H13         0.7274    7.6636   -1.9943 H         1 LIG       0.4300
21 |      14 H14        -0.7336    7.3670   -1.2926 H         1 LIG       0.4100
22 |      15 C15         2.0960    8.2794    0.1609 C.2       1 LIG       0.4900
23 |      16 O16         2.5136    8.9234   -0.7864 O.co2     1 LIG      -0.6700
24 |      17 O17         2.7222    8.2579    1.2069 O.co2     1 LIG      -0.6300
25 |      18 N18        -2.3298    0.8986    0.0180 N.pl3     1 LIG      -0.6100
26 |      19 C19        -2.3099   -0.5662    0.0099 C.3       1 LIG       0.0800
27 |      20 H20        -1.4605   -0.9213    0.5934 H         1 LIG       0.0900
28 |      21 C21        -3.6073   -1.0973    0.6228 C.3       1 LIG      -0.1800
29 |      22 C22        -3.6723   -0.7074    2.0771 C.2       1 LIG       0.4900
30 |      23 O23        -2.7650   -0.0644    2.5770 O.co2     1 LIG      -0.7000
31 |      24 O24        -4.6321   -1.0350    2.7538 O.co2     1 LIG      -0.7000
32 |      25 C25        -2.1860   -1.0591   -1.4088 C.2       1 LIG       0.4800
33 |      26 O26        -2.1204   -0.2608   -2.3280 O.co2     1 LIG      -0.6900
34 |      27 O27        -2.1514   -2.2562   -1.6374 O.co2     1 LIG      -0.6700
35 |      28 H28        -2.0138    3.4503    0.0304 H         1 LIG       0.4200
36 |      29 H29         0.6707    3.4661    0.9077 H         1 LIG       0.0900
37 |      30 H30         0.6623    3.4758   -0.8722 H         1 LIG       0.0800
38 |      31 H31        -0.7879    5.4871   -0.8544 H         1 LIG       0.0700
39 |      32 H32        -0.7794    5.4774    0.9255 H         1 LIG       0.0900
40 |      33 H33         1.6853    5.7493    0.9153 H         1 LIG       0.1200
41 |      34 H34         1.6768    5.7590   -0.8646 H         1 LIG       0.0900
42 |      35 H35        -3.1763    1.3721    0.0246 H         1 LIG       0.4200
43 |      36 H36        -4.4600   -0.6712    0.0940 H         1 LIG       0.0700
44 |      37 H37        -3.6320   -2.1835    0.5364 H         1 LIG       0.0900
45 | @<TRIPOS>BOND
46 |    1    1    2 1
47 |    2    1    3 1
48 |    3    1    4 ar
49 |    4    4    5 ar
50 |    5    4   18 ar
51 |    6    5    6 1
52 |    7    5   28 1
53 |    8    6    7 1
54 |    9    6   29 1
55 |   10    6   30 1
56 |   11    7    8 1
57 |   12    7   31 1
58 |   13    7   32 1
59 |   14    8    9 1
60 |   15    8   33 1
61 |   16    8   34 1
62 |   17    9   10 1
63 |   18    9   11 1
64 |   19    9   15 1
65 |   20   11   12 1
66 |   21   11   13 1
67 |   22   11   14 1
68 |   23   15   16 ar
69 |   24   15   17 ar
70 |   25   18   19 1
71 |   26   18   35 1
72 |   27   19   20 1
73 |   28   19   21 1
74 |   29   19   25 1
75 |   30   21   22 1
76 |   31   21   36 1
77 |   32   21   37 1
78 |   33   22   23 ar
79 |   34   22   24 ar
80 |   35   25   26 ar
81 |   36   25   27 ar
82 | 


--------------------------------------------------------------------------------
/dockbox/license.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import time
  4 | import subprocess
  5 | 
  6 | def wrap_command(cmd, prgm):
  7 | 
  8 |     exe = os.path.abspath(__file__)
  9 |     if exe[-1] == 'c':
 10 |         exe = exe[:-1] # get .py from .pyc
 11 | 
 12 |     cmd_s = cmd.split()
 13 |     exe_cmd = cmd_s[0]
 14 | 
 15 |     if prgm == 'moe':
 16 |         logfile = 'moebatch.log'
 17 |         # write eval command until license is found
 18 |         newcmd = """while true; do
 19 |   %(cmd)s &> %(logfile)s
 20 |   status=`python %(exe)s %(prgm)s %(logfile)s`
 21 |   if [ "$status" == "0" ]; then break; fi
 22 |   sleep 10s
 23 | done"""% locals()
 24 | 
 25 |     elif prgm == 'gold':
 26 |         logfile = 'gold.err'
 27 |         newcmd = """while true; do
 28 |   %(cmd)s > /dev/null
 29 |   status=`python %(exe)s %(prgm)s %(logfile)s`
 30 |   if [ "$status" == "0" ]; then break; fi
 31 |   sleep 10s
 32 | done"""% locals()
 33 | 
 34 |     elif prgm == 'schrodinger':
 35 |         if exe_cmd == 'glide':
 36 |             filename1 = cmd_s[-1]
 37 |         elif exe_cmd == 'prepwizard':
 38 |             filename1 = cmd_s[-2]
 39 |         elif exe_cmd == 'ifd':
 40 |             filename1 = cmd_s[-1]
 41 |         else:
 42 |             raise ValueError("Schrodinger's command %s not recognized!"%exe_cmd)
 43 |         splitext_0 = os.path.splitext(filename1)[0]
 44 |         suffix = os.path.basename(splitext_0)
 45 |         logfile = suffix + '.log'
 46 |         newcmd = """while true; do
 47 |   output=`%(cmd)s`
 48 |   jobid=`echo "$output" | sed -n -e 's/^.*JobId: //p'`
 49 |   status=`python %(exe)s %(prgm)s %(logfile)s $jobid`
 50 |   if [ "$status" == "0" ]; then break; fi
 51 |   sleep 10s
 52 | done"""% locals()
 53 | 
 54 |     return newcmd
 55 | 
 56 | def check_schrodinger_license(logfile, jobid):
 57 |     """Check if schrodinger exe had license issues, design to avoid retry every 60 sec"""
 58 | 
 59 |     status = 0
 60 |     is_job_done = False
 61 |     is_job_killed = False
 62 | 
 63 |     while True:
 64 |         # (A) check if the job is still running
 65 |         output = subprocess.check_output('jobcontrol -list', shell=True, executable='/bin/bash')
 66 |         if jobid in output:
 67 |             time.sleep(2) # sleep for 2 sec
 68 |         else:
 69 |             is_job_done = True # the job is done
 70 |         # (B) check if the job has license issues
 71 |         if not is_job_killed:
 72 |             with open(logfile) as logf:
 73 |                 for line in logf:
 74 |                     if 'Licensed number of users already reached' in line:
 75 |                         output = subprocess.check_output('jobcontrol -killnooutput %s'%jobid, shell=True, executable='/bin/bash')
 76 |                         status = 1
 77 |                         is_job_killed = True
 78 |         if is_job_done:
 79 |             break
 80 |     return status
 81 | 
 82 | def check_moe_license(logfile):
 83 | 
 84 |     status = 0
 85 |     with open(logfile) as logf:
 86 |         for line in logf:
 87 |             if 'Licensed number of users already reached' in line:
 88 |                 status = 1
 89 |     return status
 90 | 
 91 | def check_gold_license(logfile):
 92 | 
 93 |     status = 0
 94 |     if os.path.exists(logfile):
 95 |         with open(logfile) as logf:
 96 |             for line in logf:
 97 |                 if 'Licensed number of users already reached' in line:
 98 |                     status = 1
 99 |     return status
100 | 
101 | def run(args):
102 | 
103 |     if len(args) < 2:
104 |         raise ValueError("check_licence.py should have at least two arguments")
105 | 
106 |     # first argument should be the program name
107 |     prgm = args[1]
108 |     status = 0
109 | 
110 |     # second argument should be the log file where to look for warning/error messages
111 |     logfile = args[2]
112 | 
113 |     if prgm == 'moe':
114 |         status = check_moe_license(logfile)
115 |     elif prgm == 'gold':
116 |         status = check_gold_license(logfile)
117 |     elif prgm == 'schrodinger':
118 |         jobid = args[3] # for Schrodinger, an extra argument is expected (job ID)
119 |         status = check_schrodinger_license(logfile, jobid)
120 | 
121 |     return status
122 | 
123 | if __name__ == '__main__':
124 |     status = run(sys.argv)
125 |     print(status)
126 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/vs/round2/ligand.mol2:
--------------------------------------------------------------------------------
 1 | @<TRIPOS>MOLECULE
 2 | ZINC000097231848
 3 | 41    43    1     0     0
 4 | SMALL
 5 | USER_CHARGES
 6 | @<TRIPOS>ATOM
 7 |       1 C1          0.0396    0.6774   -0.0050 C.3       1 LIG      -0.1400
 8 |       2 C2          0.1066   -0.7996    0.3883 C.3       1 LIG      -0.1800
 9 |       3 H3          0.5461   -1.3733   -0.4276 H         1 LIG       0.0700
10 |       4 C4          0.9684   -0.9547    1.6430 C.3       1 LIG       0.1100
11 |       5 N5          2.2663   -0.3107    1.4265 N.am      1 LIG      -0.5700
12 |       6 C6          2.4992    0.9200    1.9241 C.2       1 LIG       0.6000
13 |       7 O7          3.5875    1.4411    1.7729 O.2       1 LIG      -0.5500
14 |       8 C8          1.4347    1.6344    2.6517 C.2       1 LIG      -0.2700
15 |       9 C9          1.6011    2.7165    3.4935 C.2       1 LIG       0.1200
16 |      10 N10         0.3836    3.0628    3.9481 N.pl3     1 LIG      -0.3200
17 |      11 C11         0.1076    4.1169    4.8315 C.ar      1 LIG       0.1200
18 |      12 C12         1.1399    4.9116    5.3092 C.ar      1 LIG      -0.0900
19 |      13 C13         0.8664    5.9497    6.1788 C.ar      1 LIG      -0.1400
20 |      14 C14        -0.4369    6.1981    6.5750 C.ar      1 LIG       0.1200
21 |      15 F15        -0.7011    7.2137    7.4259 F         1 LIG      -0.1300
22 |      16 C16        -1.4700    5.4075    6.1002 C.ar      1 LIG      -0.1700
23 |      17 C17        -1.2018    4.3707    5.2255 C.ar      1 LIG       0.1400
24 |      18 F18        -2.2092    3.5991    4.7619 F         1 LIG      -0.1000
25 |      19 N19        -0.5613    2.1942    3.3905 N.2       1 LIG      -0.2800
26 |      20 C20         0.0523    1.3442    2.6134 C.2       1 LIG       0.0800
27 |      21 C21         3.3111   -1.0043    0.6692 C.3       1 LIG       0.0800
28 |      22 C22         4.4746   -0.1644    0.1384 C.3       1 LIG      -0.1900
29 |      23 C23         4.6937   -1.1183    1.3144 C.3       1 LIG      -0.1900
30 |      24 C24        -1.2840   -1.3085    0.6681 C.2       1 LIG       0.5000
31 |      25 O25        -2.1329   -1.2748   -0.2064 O.co2     1 LIG      -0.6900
32 |      26 O26        -1.5602   -1.7540    1.7689 O.co2     1 LIG      -0.6900
33 |      27 H27         1.0175    1.1367    0.1399 H         1 LIG       0.0500
34 |      28 H28        -0.2498    0.7617   -1.0525 H         1 LIG       0.0600
35 |      29 H29        -0.6965    1.1862    0.6174 H         1 LIG       0.0600
36 |      30 H30         0.4664   -0.4853    2.4889 H         1 LIG       0.1000
37 |      31 H31         1.1188   -2.0139    1.8518 H         1 LIG       0.0800
38 |      32 H32         2.5365    3.1961    3.7411 H         1 LIG       0.1900
39 |      33 H33         2.1570    4.7185    5.0015 H         1 LIG       0.1500
40 |      34 H34         1.6702    6.5680    6.5503 H         1 LIG       0.1500
41 |      35 H35        -2.4859    5.6031    6.4102 H         1 LIG       0.1600
42 |      36 H36        -0.4142    0.5526    2.0459 H         1 LIG       0.2400
43 |      37 H37         2.9715   -1.8390    0.0559 H         1 LIG       0.1300
44 |      38 H38         4.9006   -0.4464   -0.8244 H         1 LIG       0.0800
45 |      39 H39         4.4627    0.9042    0.3527 H         1 LIG       0.1400
46 |      40 H40         4.8260   -0.6772    2.3024 H         1 LIG       0.0900
47 |      41 H41         5.2639   -2.0278    1.1252 H         1 LIG       0.0900
48 | @<TRIPOS>BOND
49 | 1     1     2     1
50 | 2     1     27    1
51 | 3     1     28    1
52 | 4     1     29    1
53 | 5     2     3     1
54 | 6     2     4     1
55 | 7     2     24    1
56 | 8     4     5     1
57 | 9     4     30    1
58 | 10    4     31    1
59 | 11    5     6     am
60 | 12    5     21    1
61 | 13    6     7     2
62 | 14    6     8     1
63 | 15    8     20    1
64 | 16    8     9     2
65 | 17    9     10    1
66 | 18    9     32    1
67 | 19    10    11    1
68 | 20    10    19    1
69 | 21    11    17    ar
70 | 22    11    12    ar
71 | 23    12    13    ar
72 | 24    12    33    1
73 | 25    13    14    ar
74 | 26    13    34    1
75 | 27    14    15    1
76 | 28    14    16    ar
77 | 29    16    17    ar
78 | 30    16    35    1
79 | 31    17    18    1
80 | 32    19    20    2
81 | 33    20    36    1
82 | 34    21    23    1
83 | 35    21    22    1
84 | 36    21    37    1
85 | 37    22    23    1
86 | 38    22    38    1
87 | 39    22    39    1
88 | 40    23    40    1
89 | 41    23    41    1
90 | 42    24    25    ar
91 | 43    24    26    ar
92 | 


--------------------------------------------------------------------------------
/dockbox/vina.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import shutil
  4 | import subprocess
  5 | from glob import glob
  6 | 
  7 | from mdkit.utility import mol2
  8 | import autodock
  9 | 
 10 | required_programs = ['prepare_ligand4.py', 'prepare_receptor4.py', 'vina', 'obabel']
 11 | 
 12 | default_settings = {'cpu': '1', 'num_modes': '9', 'energy_range': '3', 'seed': None}
 13 | 
 14 | class Vina(autodock.ADBased):
 15 | 
 16 |     def __init__(self, instance, site, options):
 17 | 
 18 |         super(Vina, self).__init__(instance, site, options)
 19 | 
 20 |         center = map(str.strip, site[1].split(','))
 21 |         boxsize = map(str.strip, site[2].split(','))
 22 | 
 23 |         for idx, xyz in enumerate(['x', 'y', 'z']):
 24 |             self.options['center_'+xyz] = center[idx]
 25 |             self.options['size_'+xyz] = boxsize[idx]
 26 | 
 27 |     def write_docking_script(self, filename, file_r, file_l, rescoring=False):
 28 |         """write docking script for Vina"""
 29 | 
 30 |         locals().update(self.options)
 31 | 
 32 |         self.write_check_ligand_pdbqt_script('check_ligand_pdbqt.py')
 33 |         self.write_check_ions_script('check_ions.py')
 34 | 
 35 |         # write vina config file
 36 |         with open('vina.config', 'w') as cf:
 37 |             # write mandatory options
 38 |             cf.write('receptor = target.pdbqt\n')
 39 |             cf.write('ligand = ligand.pdbqt\n')
 40 |             # write other options
 41 |             for key, value in self.options.iteritems():
 42 |                 if value is not None:
 43 |                     cf.write(key+' = '+value+'\n')
 44 | 
 45 |         # write vina script
 46 |         if not rescoring:
 47 |             with open(filename, 'w') as ff:
 48 |                 script ="""#!/bin/bash
 49 | set -e
 50 | 
 51 | MGLPATH=`which prepare_ligand4.py`
 52 | MGLPATH=`python -c "print '/'.join('$MGLPATH'.split('/')[:-3])"`
 53 | export PYTHONPATH=$PYTHONPATH:$MGLPATH
 54 | 
 55 | # prepare ligand
 56 | prepare_ligand4.py -l %(file_l)s -o ligand.pdbqt
 57 | python check_ligand_pdbqt.py ligand.pdbqt
 58 | 
 59 | # prepare receptor
 60 | prepare_receptor4.py -U nphs_lps_waters -r %(file_r)s -o target.pdbqt &> prepare_receptor4.log
 61 | python check_ions.py target.pdbqt prepare_receptor4.log
 62 | 
 63 | # run vina
 64 | vina --config vina.config 1> vina.out 2> vina.err"""% locals()
 65 |                 ff.write(script)
 66 |         else:
 67 |             with open(filename, 'w') as ff:
 68 |                 script ="""#!/bin/bash
 69 | set -e
 70 | 
 71 | MGLPATH=`which prepare_ligand4.py`
 72 | MGLPATH=`python -c "print '/'.join('$MGLPATH'.split('/')[:-3])"`
 73 | export PYTHONPATH=$PYTHONPATH:$MGLPATH
 74 | 
 75 | # prepare ligand
 76 | prepare_ligand4.py -l %(file_l)s -o ligand.pdbqt
 77 | python check_ligand_pdbqt.py ligand.pdbqt
 78 | 
 79 | if [ ! -f target.pdbqt ]; then
 80 |   prepare_receptor4.py -U nphs_lps_waters -r %(file_r)s -o target.pdbqt > prepare_receptor4.log
 81 |   python check_ions.py target.pdbqt prepare_receptor4.log
 82 | fi
 83 | 
 84 | # run vina
 85 | vina --score_only --config vina.config > vina.out"""% locals()
 86 |                 ff.write(script)
 87 | 
 88 |     def extract_docking_results(self, file_s, input_file_r, input_file_l):
 89 |         """Extract output structures in .mol2 formats"""
 90 | 
 91 |         poses_extracted = False
 92 |         if os.path.exists('ligand_out.pdbqt'):
 93 |             try:
 94 |                 subprocess.check_output('obabel -ipdbqt ligand_out.pdbqt -omol2 -Opose-.mol2 -m &>/dev/null', shell=True, executable='/bin/bash')
 95 |                 self.update_output_mol2files(sample=input_file_l)
 96 |                 poses_extracted = True
 97 |             except:
 98 |                 for mol2file in glob('pose-*.mol2'):
 99 |                     os.remove(mol2file)
100 |                 poses_extracted = False
101 | 
102 |         if poses_extracted:
103 |             with open('ligand_out.pdbqt','r') as dlgf:
104 |                 with open(file_s, 'w') as sf:
105 |                     for line in dlgf:
106 |                         if line.startswith('REMARK VINA RESULT:'):
107 |                             score = line[19:].split()[0].strip()
108 |                             sf.write(score+'\n')
109 |         else:
110 |             open(file_s, 'w').close()
111 | 
112 |     def write_rescoring_script(self, filename, file_r, file_l):
113 |         self.write_docking_script(filename, file_r, file_l, rescoring=True)
114 |     
115 |     def extract_rescoring_results(self, filename):
116 |         with open(filename, 'a') as ff:
117 |             with open('vina.out', 'r') as outf:
118 |                 for line in outf:
119 |                     if line.startswith('Affinity:'):
120 |                         ff.write(line.split()[1].strip()+'\n')
121 |         filenames = ['ligand.pdbqt', 'target.pdbqt']
122 |         for ff in filenames:
123 |             if os.path.isfile(ff):
124 |                 os.remove(ff)
125 | 


--------------------------------------------------------------------------------
/examples/autodock_vina_dock/rescoring/4de2_ligand.mol2:
--------------------------------------------------------------------------------
  1 | ### 
  2 | ### Created by X-TOOL on Mon Nov 18 15:37:28 2013
  3 | ### 
  4 | 
  5 | @<TRIPOS>MOLECULE
  6 | 4de2 ligand
  7 |    43    45     1     0     0
  8 | SMALL
  9 | GAST_HUCK
 10 | 
 11 | 
 12 | @<TRIPOS>ATOM
 13 |       1 O14        -9.2570   55.8220   13.5960 O.2       1 DN3        -0.3730
 14 |       2 C13        -8.8590   55.9810   12.4460 C.2       1 DN3         0.2307
 15 |       3 C15        -9.6340   56.8840   11.5140 C.ar      1 DN3         0.0495
 16 |       4 C16        -9.2810   56.9990   10.1710 C.ar      1 DN3        -0.0484
 17 |       5 C17        -9.9640   57.9060    9.3610 C.ar      1 DN3        -0.0627
 18 |       6 C18       -11.0130   58.6660    9.8650 C.ar      1 DN3        -0.0510
 19 |       7 C19       -11.2930   58.6230   11.2310 C.ar      1 DN3         0.0068
 20 |       8 C21       -12.4100   59.4660   11.8250 C.3       1 DN3         0.0024
 21 |       9 N22       -13.6880   58.9840   11.2650 N.4       1 DN3         0.2429
 22 |      10 C23       -13.9840   57.6630   11.8460 C.3       1 DN3        -0.0419
 23 |      11 C24       -14.7930   59.9020   11.5810 C.3       1 DN3        -0.0419
 24 |      12 C20       -10.6200   57.7150   12.0380 C.ar      1 DN3        -0.0351
 25 |      13 N12        -7.7600   55.3740   11.9800 N.am      1 DN3        -0.2334
 26 |      14 C10        -6.9040   54.5610   12.7580 C.ar      1 DN3         0.0663
 27 |      15 C11        -5.5580   54.5190   12.4080 C.ar      1 DN3        -0.0321
 28 |      16 C6         -4.6570   53.7620   13.1500 C.ar      1 DN3        -0.0043
 29 |      17 C7         -5.0900   53.0350   14.2540 C.ar      1 DN3        -0.0672
 30 |      18 C8         -6.4250   53.0920   14.6240 C.ar      1 DN3        -0.0745
 31 |      19 C9         -7.3410   53.8320   13.8640 C.ar      1 DN3        -0.0410
 32 |      20 C2         -3.2280   53.7180   12.7620 C.2       1 DN3         0.1452
 33 |      21 N1         -2.7510   53.9830   11.5290 N.2       1 DN3        -0.1437
 34 |      22 N5         -1.3720   53.8130   11.5380 N.pl3     1 DN3        -0.1036
 35 |      23 N4         -1.0230   53.4450   12.8380 N.2       1 DN3        -0.0518
 36 |      24 N3         -2.2040   53.3760   13.5800 N.2       1 DN3        -0.1756
 37 |      25 H1         -8.4840   56.3900    9.7601 H         1 DN3         0.0611
 38 |      26 H2         -9.6725   58.0199    8.3232 H         1 DN3         0.0611
 39 |      27 H3        -11.6074   59.2857    9.2034 H         1 DN3         0.0594
 40 |      28 H4        -12.2613   60.5234   11.5609 H         1 DN3         0.0980
 41 |      29 H5        -12.4175   59.3590   12.9197 H         1 DN3         0.0980
 42 |      30 H6        -13.6019   58.9034   10.2639 H         1 DN3         0.2042
 43 |      31 H7        -14.9347   57.2881   11.4390 H         1 DN3         0.0778
 44 |      32 H8        -14.0633   57.7528   12.9394 H         1 DN3         0.0778
 45 |      33 H9        -13.1751   56.9617   11.5933 H         1 DN3         0.0778
 46 |      34 H10       -14.5814   60.8922   11.1511 H         1 DN3         0.0778
 47 |      35 H11       -14.8954   59.9905   12.6726 H         1 DN3         0.0778
 48 |      36 H12       -15.7285   59.5099   11.1555 H         1 DN3         0.0778
 49 |      37 H13       -10.8663   57.6525   13.0918 H         1 DN3         0.0609
 50 |      38 H14        -7.5290   55.5047   11.0159 H         1 DN3         0.2251
 51 |      39 H15        -5.2083   55.0811   11.5496 H         1 DN3         0.0504
 52 |      40 H16        -4.3906   52.4299   14.8195 H         1 DN3         0.0498
 53 |      41 H17        -6.7624   52.5609   15.5067 H         1 DN3         0.0579
 54 |      42 H18        -8.3903   53.8378   14.1360 H         1 DN3         0.0505
 55 |      43 H19        -0.7319   53.9349   10.7403 H         1 DN3         0.2946
 56 | @<TRIPOS>BOND
 57 |      1    1    2 2  
 58 |      2    2    3 1  
 59 |      3    2   13 am 
 60 |      4    3    4 ar 
 61 |      5    3   12 ar 
 62 |      6    4    5 ar 
 63 |      7    5    6 ar 
 64 |      8    6    7 ar 
 65 |      9    7    8 1  
 66 |     10    7   12 ar 
 67 |     11    8    9 1  
 68 |     12    9   10 1  
 69 |     13    9   11 1  
 70 |     14   13   14 1  
 71 |     15   14   15 ar 
 72 |     16   14   19 ar 
 73 |     17   15   16 ar 
 74 |     18   16   17 ar 
 75 |     19   16   20 1  
 76 |     20   17   18 ar 
 77 |     21   18   19 ar 
 78 |     22   20   21 2  
 79 |     23   20   24 1  
 80 |     24   21   22 1  
 81 |     25   22   23 1  
 82 |     26   23   24 2  
 83 |     27    4   25 1  
 84 |     28    5   26 1  
 85 |     29    6   27 1  
 86 |     30    8   28 1  
 87 |     31    8   29 1  
 88 |     32    9   30 1  
 89 |     33   10   31 1  
 90 |     34   10   32 1  
 91 |     35   10   33 1  
 92 |     36   11   34 1  
 93 |     37   11   35 1  
 94 |     38   11   36 1  
 95 |     39   12   37 1  
 96 |     40   13   38 1  
 97 |     41   15   39 1  
 98 |     42   17   40 1  
 99 |     43   18   41 1  
100 |     44   19   42 1  
101 |     45   22   43 1  
102 | @<TRIPOS>SUBSTRUCTURE
103 |      1 DN3         1
104 | 
105 | 


--------------------------------------------------------------------------------
/examples/autodock/docking/1a30_ligand.mol2:
--------------------------------------------------------------------------------
  1 | ### 
  2 | ### Created by X-TOOL on Mon Nov 18 12:13:00 2013
  3 | ### 
  4 | 
  5 | @<TRIPOS>MOLECULE
  6 | 1a30 ligand
  7 |    49    48     1     0     0
  8 | SMALL
  9 | GAST_HUCK
 10 | 
 11 | 
 12 | @<TRIPOS>ATOM
 13 |       1 N           4.8410   27.5760    5.3100 N.4       1 MOL         0.2380
 14 |       2 CA          5.7330   26.3940    5.1650 C.3       1 MOL         0.0665
 15 |       3 C           7.1740   26.7720    5.5040 C.2       1 MOL         0.2269
 16 |       4 O           7.6240   27.8870    5.2180 O.2       1 MOL        -0.3907
 17 |       5 CB          5.6380   25.8180    3.7500 C.3       1 MOL         0.0194
 18 |       6 CG          5.8270   26.8460    2.6520 C.3       1 MOL         0.0075
 19 |       7 CD          5.6250   26.2750    1.2620 C.2       1 MOL         0.0351
 20 |       8 OE1         4.6630   25.4970    1.0650 O.co2     1 MOL        -0.5690
 21 |       9 OE2         6.4250   26.6200    0.3630 O.co2     1 MOL        -0.5690
 22 |      10 N           7.8730   25.8560    6.1630 N.am      1 MOL        -0.2595
 23 |      11 CA          9.2580   26.0720    6.5600 C.3       1 MOL         0.1424
 24 |      12 C          10.1010   24.9180    6.0470 C.2       1 MOL         0.2051
 25 |      13 O          10.1820   23.8640    6.6820 O.2       1 MOL        -0.3943
 26 |      14 CB          9.3460   26.1960    8.0790 C.3       1 MOL         0.0406
 27 |      15 CG          8.6490   27.4410    8.5880 C.2       1 MOL         0.0393
 28 |      16 OD1         9.2700   28.5180    8.5210 O.co2     1 MOL        -0.5688
 29 |      17 OD2         7.4740   27.3570    9.0120 O.co2     1 MOL        -0.5688
 30 |      18 N          10.6860   25.1150    4.8690 N.am      1 MOL        -0.2694
 31 |      19 CA         11.4980   24.0930    4.2260 C.3       1 MOL         0.0946
 32 |      20 C          12.9840   24.3990    4.2680 C.2       1 MOL         0.0601
 33 |      21 O          13.7840   23.4420    4.1920 O.co2     1 MOL        -0.5666
 34 |      22 CB         11.0380   23.9220    2.7850 C.3       1 MOL        -0.0156
 35 |      23 CG          9.5830   23.4660    2.7700 C.3       1 MOL        -0.0431
 36 |      24 CD1         8.8190   24.1970    1.7080 C.3       1 MOL        -0.0625
 37 |      25 CD2         9.5210   21.9550    2.6020 C.3       1 MOL        -0.0625
 38 |      26 OXT        13.3340   25.5920    4.3780 O.co2     1 MOL        -0.5666
 39 |      27 H1          3.8947   27.3126    5.0836 H         1 MOL         0.2015
 40 |      28 H2          5.1455   28.3055    4.6845 H         1 MOL         0.2015
 41 |      29 H3          4.8784   27.9107    6.2601 H         1 MOL         0.2015
 42 |      30 H4          5.4024   25.6212    5.8745 H         1 MOL         0.1099
 43 |      31 H5          4.6449   25.3608    3.6285 H         1 MOL         0.0363
 44 |      32 H6          6.4134   25.0458    3.6380 H         1 MOL         0.0363
 45 |      33 H7          6.8488   27.2476    2.7204 H         1 MOL         0.0434
 46 |      34 H8          5.1023   27.6592    2.8052 H         1 MOL         0.0434
 47 |      35 H9          7.4339   24.9880    6.3949 H         1 MOL         0.1886
 48 |      36 H10         9.6222   27.0066    6.1085 H         1 MOL         0.0819
 49 |      37 H11        10.4057   26.2377    8.3712 H         1 MOL         0.0478
 50 |      38 H12         8.8743   25.3133    8.5353 H         1 MOL         0.0478
 51 |      39 H13        10.5637   25.9949    4.4098 H         1 MOL         0.1875
 52 |      40 H14        11.3353   23.1452    4.7600 H         1 MOL         0.0726
 53 |      41 H15        11.6644   23.1676    2.2866 H         1 MOL         0.0308
 54 |      42 H16        11.1276   24.8815    2.2546 H         1 MOL         0.0308
 55 |      43 H17         9.1313   23.7168    3.7411 H         1 MOL         0.0297
 56 |      44 H18         7.7727   23.8576    1.7097 H         1 MOL         0.0232
 57 |      45 H19         9.2693   23.9921    0.7255 H         1 MOL         0.0232
 58 |      46 H20         8.8541   25.2779    1.9092 H         1 MOL         0.0232
 59 |      47 H21        10.0967   21.4728    3.4058 H         1 MOL         0.0232
 60 |      48 H22         9.9482   21.6766    1.6273 H         1 MOL         0.0232
 61 |      49 H23         8.4734   21.6232    2.6515 H         1 MOL         0.0232
 62 | @<TRIPOS>BOND
 63 |      1    3    2 1  
 64 |      2    2    5 1  
 65 |      3    2    1 1  
 66 |      4    5    6 1  
 67 |      5    6    7 1  
 68 |      6    7    8 ar 
 69 |      7    7    9 ar 
 70 |      8    3    4 2  
 71 |      9   12   11 1  
 72 |     10   11   14 1  
 73 |     11   11   10 1  
 74 |     12   14   15 1  
 75 |     13   15   16 ar 
 76 |     14   15   17 ar 
 77 |     15   12   13 2  
 78 |     16   19   20 1  
 79 |     17   19   22 1  
 80 |     18   18   19 1  
 81 |     19   22   23 1  
 82 |     20   23   25 1  
 83 |     21   23   24 1  
 84 |     22   20   26 ar 
 85 |     23   20   21 ar 
 86 |     24   10    3 am 
 87 |     25   12   18 am 
 88 |     26    1   27 1  
 89 |     27    1   28 1  
 90 |     28    1   29 1  
 91 |     29    2   30 1  
 92 |     30    5   31 1  
 93 |     31    5   32 1  
 94 |     32    6   33 1  
 95 |     33    6   34 1  
 96 |     34   10   35 1  
 97 |     35   11   36 1  
 98 |     36   14   37 1  
 99 |     37   14   38 1  
100 |     38   18   39 1  
101 |     39   19   40 1  
102 |     40   22   41 1  
103 |     41   22   42 1  
104 |     42   23   43 1  
105 |     43   24   44 1  
106 |     44   24   45 1  
107 |     45   24   46 1  
108 |     46   25   47 1  
109 |     47   25   48 1  
110 |     48   25   49 1  
111 | @<TRIPOS>SUBSTRUCTURE
112 |      1 MOL         1
113 | 
114 | 


--------------------------------------------------------------------------------
/examples/autodock/rescoring/1a30_ligand.mol2:
--------------------------------------------------------------------------------
  1 | ### 
  2 | ### Created by X-TOOL on Mon Nov 18 12:13:00 2013
  3 | ### 
  4 | 
  5 | @<TRIPOS>MOLECULE
  6 | 1a30 ligand
  7 |    49    48     1     0     0
  8 | SMALL
  9 | GAST_HUCK
 10 | 
 11 | 
 12 | @<TRIPOS>ATOM
 13 |       1 N           4.8410   27.5760    5.3100 N.4       1 MOL         0.2380
 14 |       2 CA          5.7330   26.3940    5.1650 C.3       1 MOL         0.0665
 15 |       3 C           7.1740   26.7720    5.5040 C.2       1 MOL         0.2269
 16 |       4 O           7.6240   27.8870    5.2180 O.2       1 MOL        -0.3907
 17 |       5 CB          5.6380   25.8180    3.7500 C.3       1 MOL         0.0194
 18 |       6 CG          5.8270   26.8460    2.6520 C.3       1 MOL         0.0075
 19 |       7 CD          5.6250   26.2750    1.2620 C.2       1 MOL         0.0351
 20 |       8 OE1         4.6630   25.4970    1.0650 O.co2     1 MOL        -0.5690
 21 |       9 OE2         6.4250   26.6200    0.3630 O.co2     1 MOL        -0.5690
 22 |      10 N           7.8730   25.8560    6.1630 N.am      1 MOL        -0.2595
 23 |      11 CA          9.2580   26.0720    6.5600 C.3       1 MOL         0.1424
 24 |      12 C          10.1010   24.9180    6.0470 C.2       1 MOL         0.2051
 25 |      13 O          10.1820   23.8640    6.6820 O.2       1 MOL        -0.3943
 26 |      14 CB          9.3460   26.1960    8.0790 C.3       1 MOL         0.0406
 27 |      15 CG          8.6490   27.4410    8.5880 C.2       1 MOL         0.0393
 28 |      16 OD1         9.2700   28.5180    8.5210 O.co2     1 MOL        -0.5688
 29 |      17 OD2         7.4740   27.3570    9.0120 O.co2     1 MOL        -0.5688
 30 |      18 N          10.6860   25.1150    4.8690 N.am      1 MOL        -0.2694
 31 |      19 CA         11.4980   24.0930    4.2260 C.3       1 MOL         0.0946
 32 |      20 C          12.9840   24.3990    4.2680 C.2       1 MOL         0.0601
 33 |      21 O          13.7840   23.4420    4.1920 O.co2     1 MOL        -0.5666
 34 |      22 CB         11.0380   23.9220    2.7850 C.3       1 MOL        -0.0156
 35 |      23 CG          9.5830   23.4660    2.7700 C.3       1 MOL        -0.0431
 36 |      24 CD1         8.8190   24.1970    1.7080 C.3       1 MOL        -0.0625
 37 |      25 CD2         9.5210   21.9550    2.6020 C.3       1 MOL        -0.0625
 38 |      26 OXT        13.3340   25.5920    4.3780 O.co2     1 MOL        -0.5666
 39 |      27 H1          3.8947   27.3126    5.0836 H         1 MOL         0.2015
 40 |      28 H2          5.1455   28.3055    4.6845 H         1 MOL         0.2015
 41 |      29 H3          4.8784   27.9107    6.2601 H         1 MOL         0.2015
 42 |      30 H4          5.4024   25.6212    5.8745 H         1 MOL         0.1099
 43 |      31 H5          4.6449   25.3608    3.6285 H         1 MOL         0.0363
 44 |      32 H6          6.4134   25.0458    3.6380 H         1 MOL         0.0363
 45 |      33 H7          6.8488   27.2476    2.7204 H         1 MOL         0.0434
 46 |      34 H8          5.1023   27.6592    2.8052 H         1 MOL         0.0434
 47 |      35 H9          7.4339   24.9880    6.3949 H         1 MOL         0.1886
 48 |      36 H10         9.6222   27.0066    6.1085 H         1 MOL         0.0819
 49 |      37 H11        10.4057   26.2377    8.3712 H         1 MOL         0.0478
 50 |      38 H12         8.8743   25.3133    8.5353 H         1 MOL         0.0478
 51 |      39 H13        10.5637   25.9949    4.4098 H         1 MOL         0.1875
 52 |      40 H14        11.3353   23.1452    4.7600 H         1 MOL         0.0726
 53 |      41 H15        11.6644   23.1676    2.2866 H         1 MOL         0.0308
 54 |      42 H16        11.1276   24.8815    2.2546 H         1 MOL         0.0308
 55 |      43 H17         9.1313   23.7168    3.7411 H         1 MOL         0.0297
 56 |      44 H18         7.7727   23.8576    1.7097 H         1 MOL         0.0232
 57 |      45 H19         9.2693   23.9921    0.7255 H         1 MOL         0.0232
 58 |      46 H20         8.8541   25.2779    1.9092 H         1 MOL         0.0232
 59 |      47 H21        10.0967   21.4728    3.4058 H         1 MOL         0.0232
 60 |      48 H22         9.9482   21.6766    1.6273 H         1 MOL         0.0232
 61 |      49 H23         8.4734   21.6232    2.6515 H         1 MOL         0.0232
 62 | @<TRIPOS>BOND
 63 |      1    3    2 1  
 64 |      2    2    5 1  
 65 |      3    2    1 1  
 66 |      4    5    6 1  
 67 |      5    6    7 1  
 68 |      6    7    8 ar 
 69 |      7    7    9 ar 
 70 |      8    3    4 2  
 71 |      9   12   11 1  
 72 |     10   11   14 1  
 73 |     11   11   10 1  
 74 |     12   14   15 1  
 75 |     13   15   16 ar 
 76 |     14   15   17 ar 
 77 |     15   12   13 2  
 78 |     16   19   20 1  
 79 |     17   19   22 1  
 80 |     18   18   19 1  
 81 |     19   22   23 1  
 82 |     20   23   25 1  
 83 |     21   23   24 1  
 84 |     22   20   26 ar 
 85 |     23   20   21 ar 
 86 |     24   10    3 am 
 87 |     25   12   18 am 
 88 |     26    1   27 1  
 89 |     27    1   28 1  
 90 |     28    1   29 1  
 91 |     29    2   30 1  
 92 |     30    5   31 1  
 93 |     31    5   32 1  
 94 |     32    6   33 1  
 95 |     33    6   34 1  
 96 |     34   10   35 1  
 97 |     35   11   36 1  
 98 |     36   14   37 1  
 99 |     37   14   38 1  
100 |     38   18   39 1  
101 |     39   19   40 1  
102 |     40   22   41 1  
103 |     41   22   42 1  
104 |     42   23   43 1  
105 |     43   24   44 1  
106 |     44   24   45 1  
107 |     45   24   46 1  
108 |     46   25   47 1  
109 |     47   25   48 1  
110 |     48   25   49 1  
111 | @<TRIPOS>SUBSTRUCTURE
112 |      1 MOL         1
113 | 
114 | 


--------------------------------------------------------------------------------
/examples/tlr7_chapter/active_decoys/ligand.mol2:
--------------------------------------------------------------------------------
  1 | @<TRIPOS>MOLECULE
  2 | CHEMBL1836893
  3 | 57 60 1 0 0 
  4 | SMALL
  5 | USER_CHARGES
  6 | 
  7 | 
  8 | @<TRIPOS>ATOM
  9 |       1 F1          8.8190   -2.5950   -0.4650 F         1 LIG        -0.3400
 10 |       2 C1          7.8060   -2.3110    0.3790 C.3       1 LIG         1.1640
 11 |       3 F2          8.2970   -2.1650    1.6270 F         1 LIG        -0.3400
 12 |       4 F3          6.9440   -3.3530    0.3840 F         1 LIG        -0.3400
 13 |       5 C2          7.0420   -1.0220   -0.0280 C.ar      1 LIG         0.1660
 14 |       6 N1          7.5610   -0.1670   -0.9750 N.ar      1 LIG        -0.6200
 15 |       7 C3          6.9050    0.9360   -1.3060 C.ar      1 LIG         0.4100
 16 |       8 N2          7.5190    1.7010   -2.2560 N.pl3     1 LIG        -0.9000
 17 |       9 H1          8.2800    1.1810   -2.7160 H         1 LIG         0.4000
 18 |      10 H2          6.8860    2.1920   -2.8970 H         1 LIG         0.4000
 19 |      11 C4          5.6740    1.2600   -0.7160 C.ar      1 LIG         0.1170
 20 |      12 N3          4.7940    2.3330   -0.8570 N.am      1 LIG        -0.5470
 21 |      13 H3          4.9300    3.1940   -1.4010 H         1 LIG         0.3700
 22 |      14 C5          3.7390    2.1700   -0.0240 C.2       1 LIG         0.6900
 23 |      15 O1          2.7760    2.9250    0.0650 O.2       1 LIG        -0.5700
 24 |      16 N4          3.9270    0.9760    0.6500 N.am      1 LIG        -0.4770
 25 |      17 C6          3.1400    0.5400    1.8100 C.3       1 LIG         0.2840
 26 |      18 H4          3.3450    1.2180    2.6300 H         1 LIG         0.0800
 27 |      19 H5          3.3880   -0.4840    2.0590 H         1 LIG         0.0800
 28 |      20 C7          1.6530    0.6050    1.4720 C.ar      1 LIG        -0.1440
 29 |      21 C8          1.1760    0.3660    0.1980 C.ar      1 LIG         0.2110
 30 |      22 H6          1.7800    0.1120   -0.6390 H         1 LIG         0.1500
 31 |      23 N5         -0.1620    0.4820   -0.0640 N.ar      1 LIG        -0.1790
 32 |      24 H7         -0.5030    0.3530   -1.0230 H         1 LIG         0.4570
 33 |      25 C9         -1.0510    0.8030    0.8910 C.ar      1 LIG         0.4610
 34 |      26 N6         -2.3600    0.9020    0.5450 N.pl3     1 LIG        -0.8690
 35 |      27 H8         -2.9520    1.3760    1.2570 H         1 LIG         0.4000
 36 |      28 C10        -2.9590    0.7950   -0.8240 C.3       1 LIG         0.2090
 37 |      29 H9         -2.7690    1.7090   -1.3930 H         1 LIG         0.0800
 38 |      30 H10        -2.5590   -0.0600   -1.3710 H         1 LIG         0.0800
 39 |      31 C11        -4.5000    0.6210   -0.6730 C.3       1 LIG         0.3430
 40 |      32 H11        -4.8780    1.5100   -0.1800 H         1 LIG         0.0800
 41 |      33 H12        -4.9410    0.5000   -1.6530 H         1 LIG         0.0800
 42 |      34 N7         -4.7510   -0.5770    0.1950 N.4       1 LIG        -0.9590
 43 |      35 H13        -4.0100   -0.6110    0.8910 H         1 LIG         0.4500
 44 |      36 C12        -4.7230   -1.8640   -0.5790 C.3       1 LIG         0.3430
 45 |      37 H14        -3.7810   -1.9620   -1.1060 H         1 LIG         0.0800
 46 |      38 H15        -4.8770   -2.6940    0.1030 H         1 LIG         0.0800
 47 |      39 C13        -5.8620   -1.9020   -1.6180 C.3       1 LIG        -0.1600
 48 |      40 H16        -5.7550   -1.1350   -2.3840 H         1 LIG         0.0800
 49 |      41 H17        -5.8480   -2.8740   -2.1170 H         1 LIG         0.0800
 50 |      42 C14        -7.1930   -1.7460   -0.8740 C.3       1 LIG        -0.1600
 51 |      43 H18        -8.0120   -1.7600   -1.5970 H         1 LIG         0.0800
 52 |      44 H19        -7.3390   -2.5820   -0.1850 H         1 LIG         0.0800
 53 |      45 C15        -7.2190   -0.4190   -0.1080 C.3       1 LIG        -0.1600
 54 |      46 H20        -8.1640   -0.3410    0.4350 H         1 LIG         0.0800
 55 |      47 H21        -7.1530    0.4270   -0.7930 H         1 LIG         0.0800
 56 |      48 C16        -6.0610   -0.4390    0.9110 C.3       1 LIG         0.3430
 57 |      49 H22        -6.0850    0.4920    1.4660 H         1 LIG         0.0800
 58 |      50 H23        -6.2100   -1.2920    1.5640 H         1 LIG         0.0800
 59 |      51 C17        -0.6010    1.0330    2.2070 C.ar      1 LIG        -0.1500
 60 |      52 H24        -1.3010    1.2880    2.9610 H         1 LIG         0.1500
 61 |      53 C18         0.7390    0.9360    2.4950 C.ar      1 LIG        -0.1500
 62 |      54 H25         1.0870    1.1210    3.4830 H         1 LIG         0.1500
 63 |      55 C19         5.1380    0.4230    0.2240 C.ar      1 LIG         0.1170
 64 |      56 C20         5.8180   -0.7380    0.5940 C.ar      1 LIG        -0.1500
 65 |      57 H26         5.4170   -1.3870    1.3280 H         1 LIG         0.1500
 66 | @<TRIPOS>BOND
 67 |   1   1   2  1   
 68 |   2   2   3  1   
 69 |   3   2   4  1   
 70 |   4   2   5  1   
 71 |   5   5   6  ar  
 72 |   6   5  56  ar  
 73 |   7   6   7  ar  
 74 |   8   7   8  1   
 75 |   9   7  11  ar  
 76 |  10   8   9  1   
 77 |  11   8  10  1   
 78 |  12  11  12  1   
 79 |  13  11  55  ar  
 80 |  14  12  13  1   
 81 |  15  12  14  am  
 82 |  16  14  15  2   
 83 |  17  14  16  am  
 84 |  18  16  17  1   
 85 |  19  16  55  1   
 86 |  20  17  18  1   
 87 |  21  17  19  1   
 88 |  22  17  20  1   
 89 |  23  20  21  ar  
 90 |  24  20  53  ar  
 91 |  25  21  22  1   
 92 |  26  21  23  ar  
 93 |  27  23  24  1   
 94 |  28  23  25  ar  
 95 |  29  25  26  1   
 96 |  30  25  51  ar  
 97 |  31  26  27  1   
 98 |  32  26  28  1   
 99 |  33  28  29  1   
100 |  34  28  30  1   
101 |  35  28  31  1   
102 |  36  31  32  1   
103 |  37  31  33  1   
104 |  38  31  34  1   
105 |  39  34  35  1   
106 |  40  34  36  1   
107 |  41  34  48  1   
108 |  42  36  37  1   
109 |  43  36  38  1   
110 |  44  36  39  1   
111 |  45  39  40  1   
112 |  46  39  41  1   
113 |  47  39  42  1   
114 |  48  42  43  1   
115 |  49  42  44  1   
116 |  50  42  45  1   
117 |  51  45  46  1   
118 |  52  45  47  1   
119 |  53  45  48  1   
120 |  54  48  49  1   
121 |  55  48  50  1   
122 |  56  51  52  1   
123 |  57  51  53  ar  
124 |  58  53  54  1   
125 |  59  55  56  ar  
126 |  60  56  57  1   
127 | @<TRIPOS>SUBSTRUCTURE
128 |   1 ****  21 GROUP 4 **** **** 0
129 | 
130 | # MOE 2018.01 (io_trps.svl 2018.01)
131 | 
132 | 


--------------------------------------------------------------------------------
/dockbox/dbxtools.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import math
  4 | import numpy as np
  5 | import nwalign as nw
  6 | 
  7 | from mdkit.utility import mol2
  8 | from dockbox import pyqcprot
  9 | 
 10 | # prefix to identify ligand, target and isomer directories
 11 | ligand_prefix = 'lig'
 12 | target_prefix = 'target'
 13 | 
 14 | residues_3_to_1 = {'ALA': 'A',
 15 | 'ARG': 'R',
 16 | 'ASN': 'N',
 17 | 'ASP': 'D',
 18 | 'CYS': 'C',
 19 | 'GLU': 'E',
 20 | 'GLY': 'G',
 21 | 'HIS': 'H',
 22 | 'ILE': 'I',
 23 | 'LEU': 'L',
 24 | 'LYS': 'K',
 25 | 'MET': 'M',
 26 | 'PHE': 'F',
 27 | 'PRO': 'P',
 28 | 'GLN': 'Q',
 29 | 'SER': 'S',
 30 | 'SEC': 'U',
 31 | 'THR': 'T',
 32 | 'TRP': 'W',
 33 | 'TYR': 'Y',
 34 | 'VAL': 'V'}
 35 | 
 36 | equivalent_residues = {'CYM': 'CYS',
 37 | 'LYN': 'LYS',
 38 | 'ASH': 'ASP',
 39 | 'CYX': 'CYS',
 40 | 'GLH': 'GLU',
 41 | 'HID': 'HIS',
 42 | 'HIE': 'HIS',
 43 | 'HIP': 'HIS'}
 44 | 
 45 | def get_total_residue_number(filename):
 46 |     indices = []
 47 |     nresidues = 0
 48 |     with open(filename, 'r') as pdbf:
 49 |         for line in pdbf:
 50 |             if line.startswith('ATOM'):
 51 |                 resnum = line[22:26].strip()
 52 |                 if resnum not in indices:
 53 |                     indices.append(resnum)
 54 |                     nresidues += 1
 55 |     return nresidues
 56 | 
 57 | def get_sequence_from_PDB(filename):
 58 |     indices = []
 59 |     sequence = ''
 60 | 
 61 |     with open(filename, 'r') as pdbf:
 62 |         for line in pdbf:
 63 |             if line.startswith('ATOM'):
 64 |                 resnum = line[22:26].strip()
 65 |                 resname = line[17:20].strip()
 66 |                 if resname in equivalent_residues:
 67 |                     resname = equivalent_residues[resname]
 68 | 
 69 |                 if resnum not in indices and resname in residues_3_to_1:
 70 |                     sequence += residues_3_to_1[resname]
 71 |                     indices.append(resnum)
 72 |     return sequence, indices
 73 | 
 74 | def get_residues_coordinates(filename, indices):
 75 |     indices_new = []
 76 |     coords = []
 77 | 
 78 |     with open(filename, 'r') as pdbf:
 79 |         for line in pdbf:
 80 |             if line.startswith('ATOM'):
 81 |                 resnum = line[22:26].strip()
 82 |                 resname = line[17:20].strip()
 83 |                 atomname = line[12:16].strip()
 84 | 
 85 |                 if resnum not in indices_new and resnum in indices:
 86 |                     coords.append([])
 87 |                     indices_new.append(resnum)
 88 |                 #if resnum in indices and atomname[0] != 'H':
 89 |                 if resnum in indices and atomname in ['CA', 'C', 'N', 'O']:
 90 |                     x = float(line[30:38])
 91 |                     y = float(line[38:46])
 92 |                     z = float(line[46:54])
 93 |                     coords[-1].append([atomname, x, y, z])
 94 | 
 95 |     return coords, indices_new
 96 | 
 97 | def compute_rmsd(file1, file2, rotmat=np.eye(3), trans1=np.zeros(3), trans2=np.zeros(3)):
 98 |     """Compute RMSD between 2 poses"""
 99 | 
100 |     for file in [file1, file2]:
101 |         if isinstance(file, float) and math.isnan(file):
102 |             return float('nan')
103 | 
104 |     # load coordinates of first pose (non-hydrogen atoms)
105 |     coords1 = mol2.get_coordinates(file1, keep_h=False)
106 |     coords1 = np.array(coords1)
107 |     natoms = coords1.shape[0]
108 | 
109 |     coords1_rot = np.empty_like(coords1)
110 |     for idx in range(natoms):
111 |         coords1t = coords1[idx,:] + trans1
112 |         coords1t = coords1t[:,np.newaxis]
113 |         coords1_rot[idx,:] = np.dot(rotmat, coords1t).flatten() - trans2
114 | 
115 |     # load coordinates of second pose (non-hydrogen atoms)
116 |     coords2 = mol2.get_coordinates(file2, keep_h=False)
117 |     coords2 = np.array(coords2)
118 | 
119 |     rmsd = np.sqrt(np.sum((coords1_rot-coords2)**2)/natoms)
120 |     return rmsd
121 | 
122 | def get_rmsd_rotation_and_translations(file1, file2):
123 | 
124 |     nres1 = get_total_residue_number(file1)
125 |     nres2 = get_total_residue_number(file2)
126 | 
127 |     seq1, ind1 = get_sequence_from_PDB(file1)
128 |     seq2, ind2 = get_sequence_from_PDB(file2)
129 | 
130 |     alignment = nw.global_align(seq1, seq2)
131 | 
132 |     nalign = len(alignment[0])
133 |     nresidues_min = min(len(seq1), len(seq2))
134 | 
135 |     ind1new = []
136 |     ind2new = []
137 |     idx1, idx2 = 0, 0
138 | 
139 |     for idx in range(nalign):
140 |         if (idx < nresidues_min) and seq1[idx] == seq2[idx] and seq1[idx] != '-':
141 |             ind1new.append(ind1[idx1])
142 |             ind2new.append(ind2[idx2])
143 |         if (idx < len(seq1)) and seq1[idx] != '-':
144 |             idx1 += 1
145 |         if (idx < len(seq2)) and seq2[idx] != '-':
146 |             idx2 += 1
147 | 
148 |     ind1 = ind1new
149 |     ind2 = ind2new
150 | 
151 |     #TODO: add a threshold for the number of residues considered
152 |     frac1 = len(ind1)*100.0/nres1
153 |     frac2 = len(ind2)*100.0/nres2
154 | 
155 |     # get coordinates of specific residues 
156 |     coords1, ind1 = get_residues_coordinates(file1, ind1)
157 |     coords2, ind2 = get_residues_coordinates(file2, ind2)
158 | 
159 |     new_coords1 = []
160 |     new_coords2 = []
161 | 
162 |     # check if there is consistency in atom names
163 |     nresidues1 = len(coords1)
164 |     for idx in range(nresidues1):
165 |         coords1_res = coords1[idx]
166 |         coords2_res = coords2[idx]
167 | 
168 |         atomnames1 = [item[0] for item in coords1_res]
169 |         atomnames2 = [item[0] for item in coords2_res]
170 |         if set(atomnames1) != set(atomnames2):
171 |             sys.exit("Inconsistency found in residue %s in file %s and residue %s in file %s! Missing atom suspected..."%(ind1[idx],file1,ind2[idx],file2))
172 | 
173 |         # create new coordinates
174 |         for an1, x1, y1, z1 in coords1_res:
175 |             for an2, x2, y2, z2 in coords2_res:
176 |                 if an1 == an2:
177 |                     new_coords1.append([x1, y1, z1])
178 |                     new_coords2.append([x2, y2, z2])
179 |                     break
180 | 
181 |     new_coords1 = np.array(new_coords1).T
182 |     new_coords2 = np.array(new_coords2).T
183 | 
184 |     rotation = np.zeros(9)
185 |     trans1 = -new_coords1[:,0]
186 |     trans2 = -new_coords2[:,0]
187 | 
188 |     rmsd = pyqcprot.CalcRMSDRotationalMatrix(new_coords1, new_coords2, rotation, None)
189 | 
190 |     rotation = rotation.reshape((3, 3))
191 |     trans1 += new_coords1[:,0]
192 |     trans2 += new_coords2[:,0]
193 | 
194 |     return rotation, trans1, trans2
195 | 
196 | def get_rmsd_rotation_and_translations_all_targets(files_r):
197 |     rmsd_rot_trans = {}
198 | 
199 |     for key1 in files_r:
200 |         rmsd_rot_trans[key1] = {}
201 | 
202 |         for key2 in files_r:
203 |             if key1 == key2:
204 |                 rotation = np.eye(3)
205 |                 trans1 = np.zeros(3)
206 |                 trans2 = np.zeros(3)
207 |             else:
208 |                 file1 = files_r[key1]
209 |                 file2 = files_r[key2]
210 |                 rotation, trans1, trans2 = get_rmsd_rotation_and_translations(file1, file2)
211 |             rmsd_rot_trans[key1][key2] = [rotation, trans1, trans2]
212 | 
213 |     return rmsd_rot_trans
214 | 
215 | def check_architecture(directory):
216 |     """Check architecture %s*/%s* of specified directories"""%(ligand_prefix,target_prefix)
217 | 
218 |     if os.path.isdir(directory):
219 |         dir_split = directory.split('/')
220 |         if dir_split[-1].startswith(target_prefix):
221 |             istargetID = True
222 |             if len(dir_split) > 1 and dir_split[-2].startswith(ligand_prefix):
223 |                 isligID = True
224 |             else:
225 |                 isligID = False
226 |         elif dir_split[-1].startswith(ligand_prefix):
227 |             istargetID = False
228 |             isligID = True
229 |         else:
230 |             istargetID = False
231 |             isligID = False
232 | 
233 |     return isligID, istargetID
234 | 
235 | def get_IDs(directory, isligID, istargetID):
236 |     """Get IDs of ligand target and isomer (if applicable) according to the current architecture."""
237 | 
238 |     if istargetID:
239 |         targetID = directory.split('/')[-1]
240 |         if isligID:
241 |             ligID = directory.split('/')[-2]
242 |         else:
243 |             ligID = None
244 |     elif isligID:
245 |         targetID = None
246 |         ligID = directory.split('/')[-1]
247 |     else:
248 |         targetID = None
249 |         ligID = None
250 | 
251 |     return ligID, targetID
252 | 
253 | def check_directories(dirs):
254 |     if dirs != ['.']:
255 |         iscwd = False
256 |         for jdx, dir in enumerate(dirs):
257 |             isligID, istargetID = check_architecture(dir)
258 |             if jdx == 0:
259 |                 isligID_ref = isligID
260 |                 istargetID_ref = istargetID
261 |             elif isligID != isligID_ref or istargetID != istargetID_ref:
262 |                 raise ValueError("%s*/%s* architecture architecture inconsistent between folders!"%(ligand_prefix,target_prefix))
263 |     else:
264 |         iscwd = True
265 |         isligID = False
266 |         istargetID = False
267 | 
268 |     return iscwd, isligID, istargetID
269 | 
270 | 


--------------------------------------------------------------------------------
/dockbox/configure.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from glob import glob
  4 | import subprocess
  5 | 
  6 | known_programs = {'docking': ['autodock', 'vina', 'dock', 'moe'], 'rescoring': ['autodock', 'vina', 'dock', 'moe', 'dsx', 'gnina']}
  7 | known_programs['scoring'] = known_programs['rescoring']
  8 | 
  9 | single_run_scoring_programs = ['dock']
 10 | programs_handling_ions = ['autodock', 'vina', 'dock']
 11 | 
 12 | default_minimize_options = {'charge_method': 'gas', 'ncyc': 1000, 'maxcyc': 2000, 'cut': 999.0, 'solvent': 'vacuo'}
 13 | path_options = {'dock': ['grid_dir']}
 14 | 
 15 | class ConfigSetup(object):
 16 | 
 17 |     def __init__(self, task, config):
 18 | 
 19 |         self.task = task
 20 |         self.section = task.upper()
 21 | 
 22 |         self.setup_instances(task, config)
 23 |         self.set_site_options(config)
 24 | 
 25 |     def setup_instances(self, task, config):
 26 |         self.instances = []
 27 | 
 28 |         if config.has_option(self.section, 'program'):
 29 | 
 30 |             instances = config.get(self.section, 'program').lower()
 31 |             instances = map(str.strip, instances.split(','))
 32 | 
 33 |             for instance in instances:
 34 |                 program = ''.join([c for c in instance if not c.isdigit()]) # get program's exact name
 35 |                 if program not in known_programs[task]:
 36 |                     raise ValueError("%s programs should be one of "%task.capitalize() + ", ".join(known_programs[task]))
 37 |                 sys.modules[program] = __import__('dockbox.'+program, fromlist=['a'])
 38 | 
 39 |                 options = {}
 40 |                 # check if all needed executables are available
 41 |                 if hasattr(sys.modules[program], 'required_programs'):
 42 |                     required_programs = getattr(sys.modules[program], 'required_programs')
 43 |                     for exe in required_programs:
 44 |                         try:
 45 |                             subprocess.check_call('which %s > /dev/null'%exe, shell=True)
 46 |                         except subprocess.CalledProcessError:
 47 |                             raise ValueError('Executable %s needed for docking with %s not found! \
 48 | Make sure the program has been installed and is in your PATH!'%(exe, program))
 49 | 
 50 |                 # check if mandatory options are set up
 51 |                 if hasattr(sys.modules[program], 'mandatory_settings'):
 52 |                     madatory_settings = getattr(sys.modules[program], 'mandatory_settings')
 53 |                     config_d = dict(config.items(instance.upper()))
 54 |                     for setting in madatory_settings:
 55 |                         if setting not in config_d or not config_d[setting]:
 56 |                             raise ValueError('Option %s when using %s is mandatory!'%(setting,program))
 57 | 
 58 |                 # load default parameters
 59 |                 if hasattr(sys.modules[program], 'default_settings'):
 60 |                     default_settings = getattr(sys.modules[program], 'default_settings')
 61 |                     for key, value in default_settings.iteritems():
 62 |                         options[key] = value
 63 | 
 64 |                 known_settings = {}
 65 |                 if hasattr(sys.modules[program], 'known_settings'):
 66 |                     known_settings = getattr(sys.modules[program], 'known_settings')
 67 | 
 68 |                 def check_value(key, value, instance):
 69 |                    if not key in default_settings.keys():
 70 |                        raise ValueError("Option %s not recognized in instance %s!"%(key, instance))
 71 |                    # TODO: check that value has the required type, e.g. set known_settings as a dict with the type and the list of possible choices if any!
 72 |                    if key in known_settings:
 73 |                        for known_value in known_settings[key]:
 74 |                            if value.lower() == known_value.lower():
 75 |                                return known_value
 76 |                        raise ValueError("Value %s not recognized for option %s in instance %s!"%(value, key, instance))
 77 |                    elif key.endswith('dir'): # path value
 78 |                        return os.path.abspath(value)
 79 |                    else:
 80 |                        return value
 81 | 
 82 |                 # get parameters from config file (would possibly overwrite default preset parameters)
 83 |                 if config.has_section(instance.upper()):
 84 |                    config_d = dict(config.items(instance.upper()))
 85 |                    for key, value in config_d.iteritems(): 
 86 |                        if program in path_options and key in path_options[program]:
 87 |                            value = os.path.abspath(value) 
 88 |                        options[key] = check_value(key, value, instance)
 89 | 
 90 |                 self.instances.append((instance, program, options))
 91 |         else:
 92 |             raise ValueError("option program in section %s is required in config file!"%self.section)
 93 | 
 94 |     def set_site_options(self, config):
 95 |         """set options for the binding site"""
 96 | 
 97 |         site = {}
 98 |         required_options = ['center', 'boxsize']
 99 | 
100 |         if config.has_option('DOCKING', 'site'):
101 |             sitenames = config.get('DOCKING', 'site').lower()
102 |             sitenames = map(str.strip, sitenames.split(','))
103 |             for idx, name in enumerate(sitenames):
104 |                 site['site'+str(idx+1)] = [name]
105 |                 for option in required_options:
106 |                     section = name.upper()
107 |                     if config.has_option(section, option):
108 |                         value = config.get(section, option)
109 |                         site['site'+str(idx+1)].append(value)
110 |                     else:
111 |                         raise ValueError("Option %s in section %s is required in config file!"%(option,section))
112 |         else:
113 |             section = 'SITE'
114 |             site['site1'] = [None]
115 |             for option in required_options:
116 |                 if config.has_option(section, option):
117 |                     value = config.get(section, option)
118 |                     site['site1'].append(value)
119 |                 else:
120 |                     raise ValueError("Option %s in section %s is required in config file for local docking!"%(option,section))
121 |         self.site = site
122 |         self.nsites = len(site)
123 | 
124 | 
125 |     def get_value_yesno_option(self, config, section, option, default=False):
126 | 
127 |         if config.has_option(section, option):
128 |             yesno = config.get(section, option).lower()
129 |             if yesno == 'yes':
130 |                 return True
131 |             elif yesno == 'no':
132 |                 return False
133 |             else:
134 |                 raise ValueError("option %s should be yes or no!"%option)
135 |         else:
136 |             return default
137 | 
138 |     def get_value_cleanup_option(self, config, section, default=0):
139 | 
140 |         if config.has_option(section, 'cleanup'):
141 |             value = config.get(section, 'cleanup').lower()
142 |             if value == 'no' or value == '0':
143 |                 return 0
144 |             elif value == 'yes' or value == '1':
145 |                 return 1
146 |             elif value == '2' or value == '3':
147 |                 return int(value)
148 |             else:
149 |                 raise ValueError("cleanup option in section DOCKING should be yes, no or 0 to 3!")
150 |         else:
151 |             return default
152 | 
153 | class DockingSetup(ConfigSetup):
154 | 
155 |     def __init__(self, config):
156 | 
157 |         super(DockingSetup, self).__init__('docking', config)
158 | 
159 |         self.cleanup = self.get_value_cleanup_option(config, 'DOCKING')
160 |         self.minimize = self.set_minimization_options(config)
161 | 
162 |     def set_minimization_options(self, config):
163 |         """set options for minimization"""
164 | 
165 |         self.minimize_options = {}
166 |         self.minimize_options['minimization'] = self.get_value_yesno_option(config, 'DOCKING', 'minimize')
167 | 
168 |         section = 'MINIMIZATION'
169 |         if self.minimize_options['minimization']:
170 | 
171 |             # check AMBER version
172 |             self.minimize_options['amber_version'] = self.check_amber_version()
173 | 
174 |             # load default parameters
175 |             for key, value in default_minimize_options.iteritems():
176 |                 self.minimize_options[key] = value
177 | 
178 |             # get parameters from config file (would possibly overwrite default preset parameters)
179 |             if config.has_section(section):
180 |               config_m = dict(config.items(section))
181 |               for key, value in config_m.iteritems():
182 |                   self.minimize_options[key] = value
183 | 
184 |         return self.minimize_options
185 | 
186 |     def check_amber_version(self):
187 |         error_msg = 'AmberTools serial version >= 14 and <= 17 is required for minimization with DockBox!'
188 | 
189 |         if os.environ.get('AMBERHOME'):
190 |             for exe in ['tleap', 'sander', 'cpptraj']:
191 |                 try:
192 |                     subprocess.check_call('which %s > /dev/null'%exe, shell=True)
193 |                 except subprocess.CalledProcessError:
194 |                     raise ValueError('Executable %s is not found in your PATH! %s'%(exe, error_msg))
195 | 
196 |             docfile = glob(os.environ.get('AMBERHOME')+'/doc/Amber*.pdf')
197 |             amber_version = os.path.basename(docfile[0])[5:-4]
198 |             try:
199 |                 int(amber_version)
200 |                 if amber_version not in ['14', '15', '16', '17']:
201 |                     raise ValueError("Amber version %s detected! %s"%error_msg)
202 |                 return amber_version
203 |             except ValueError:
204 |                 raise ValueError("Amber version not detected! %s"%error_msg)
205 |         else:
206 |             raise ValueError("AMBERHOME is not set! %s"%error_msg)
207 | 
208 | class RescoringSetup(ConfigSetup):
209 | 
210 |     def __init__(self, config):
211 |         self.is_rescoring = self.get_value_yesno_option(config, 'DOCKING', 'rescoring')
212 | 
213 |         if self.is_rescoring:
214 |             super(RescoringSetup, self).__init__('rescoring', config)
215 | 
216 | class ScoringSetup(ConfigSetup):
217 |     pass
218 | 


--------------------------------------------------------------------------------
/dockbox/method.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import stat
  4 | import shutil
  5 | import subprocess
  6 | 
  7 | from glob import glob
  8 | 
  9 | from mdkit.amber import minimization
 10 | from mdkit.utility import mol2
 11 | 
 12 | import configure
 13 | 
 14 | class DockingMethod(object):
 15 | 
 16 |     def __init__(self, instance, site, options):
 17 |         """Initialize docking instance"""
 18 | 
 19 |         self.instance = instance
 20 |         self.site = site
 21 |         self.options = options
 22 | 
 23 |         self.program = self.__class__.__name__.lower()
 24 | 
 25 |     def run_docking(self, file_r, file_l, minimize_options=None, cleanup=0, prepare_only=False, skip_docking=False):
 26 |         """Run docking one (file per ligand and receptor)"""
 27 | 
 28 |         curdir = os.getcwd()
 29 |         # find name for docking directory
 30 |         if 'name' in self.options:
 31 |             dockdir = self.options['name']
 32 |         else:
 33 |             dockdir = self.instance
 34 | 
 35 |         if self.site[0]:
 36 |             dockdir += '.' + self.site[0]
 37 | 
 38 |         if not skip_docking:
 39 |             # create directory for docking (remove directory if exists)
 40 |             shutil.rmtree(dockdir, ignore_errors=True)
 41 |             os.mkdir(dockdir)
 42 |         os.chdir(dockdir)
 43 | 
 44 |         if not skip_docking:
 45 |             print "Starting docking with %s..."%self.program.capitalize()
 46 |             print "The following options will be used:"
 47 |             options_info = ""
 48 |             for key, value in self.options.iteritems():
 49 |                 options_info += str(key) + ': ' + str(value) + ', '
 50 |             print options_info[:-2]
 51 | 
 52 |             # (A) run docking
 53 |             script_name = "run_" + self.program + ".sh"
 54 |             self.write_docking_script(script_name, file_r, file_l)
 55 |             os.chmod(script_name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR)
 56 | 
 57 |             if prepare_only:
 58 |                 return
 59 |             try:
 60 |                 # try running docking procedure
 61 |                 subprocess.check_output('./' + script_name + " &> " + self.program + ".log", shell=True, executable='/bin/bash')
 62 |             except subprocess.CalledProcessError as e:
 63 |                 print e
 64 |                 print "Error: check %s file for more details!"%(dockdir+'/'+self.program+'.log')
 65 |                 os.chdir(curdir)
 66 |                 return
 67 | 
 68 |         if prepare_only:
 69 |             return
 70 | 
 71 |         # (B) extract docking results
 72 |         self.extract_docking_results('score.out', file_r, file_l)
 73 | 
 74 |         # (C) cleanup poses (minimization, remove out-of-box poses)
 75 |         if minimize_options['minimization']:
 76 |             self.backup_files('origin')
 77 |             self.minimize_extracted_poses(file_r, 'score.out', **minimize_options)
 78 |         self.remove_out_of_range_poses('score.out')
 79 | 
 80 |         # (D) remove intermediate files if required
 81 |         if cleanup == 1:
 82 |             self.cleanup()
 83 | 
 84 |         os.chdir(curdir)
 85 |         print "Docking with %s done."%self.program.capitalize()
 86 | 
 87 |     def run_rescoring(self, file_r, files_l):
 88 |         """Rescore multiple ligands on one receptor"""
 89 | 
 90 |         curdir = os.getcwd()
 91 |         # get name of rescoring from instance
 92 |         rescordir = self.instance
 93 |         if self.site[0]:
 94 |             rescordir += '.' + self.site[0]
 95 | 
 96 |         # overwrite previous directory if exists
 97 |         shutil.rmtree(rescordir, ignore_errors=True)
 98 |         os.mkdir(rescordir)
 99 | 
100 |         # change directory
101 |         os.chdir(rescordir)
102 | 
103 |         mol2files = files_l
104 |         if self.program in configure.single_run_scoring_programs:
105 |             # if the program rescores in one run, provides a list of files
106 |             mol2files = [mol2files]
107 | 
108 |         if mol2files:
109 |             # iterate over all the poses
110 |             for idx, file_l in enumerate(mol2files):
111 |                 # (A) write script
112 |                 script_name = "run_scoring_" + self.program + ".sh"
113 |                 self.write_rescoring_script(script_name, file_r, file_l)
114 |                 os.chmod(script_name, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH | stat.S_IXUSR)
115 | 
116 |                 # (B) run scoring method
117 |                 try:
118 |                     subprocess.check_output('./' + script_name + ' &> ' + self.program + '.log', shell=True, executable='/bin/bash')
119 |                 except subprocess.CalledProcessError as e:
120 |                     print e.output
121 |                     pass
122 | 
123 |                 # (C) extract rescoring results
124 |                 if self.program in configure.single_run_scoring_programs:
125 |                     nligands = len(file_l)
126 |                     self.extract_rescoring_results('score.out', nligands=nligands)
127 |                 else:
128 |                     self.extract_rescoring_results('score.out')
129 |         else:
130 |             # if no files provided, create an empty score.out file
131 |             open('score.out', 'w').close()
132 | 
133 |         os.chdir(curdir)
134 |         return rescordir + '/score.out'
135 | 
136 |     def get_output_mol2files(self):
137 |         """Get output mol2files sorted by pose ranking after docking"""
138 | 
139 |         filenames_idxs = []
140 |         for filename in glob('pose-*.mol2'):
141 |             suffix, ext = os.path.splitext(filename)
142 |             filenames_idxs.append(int(suffix.split('-')[-1]))
143 |         filenames_idxs = sorted(filenames_idxs)
144 | 
145 |         mol2files = []
146 |         for idx in filenames_idxs:
147 |             mol2files.append('pose-%s.mol2'%idx)
148 |         return mol2files
149 | 
150 |     def backup_files(self, dir):
151 |         """Do a backup of output mol2files""" 
152 | 
153 |         mol2files = self.get_output_mol2files()
154 |         shutil.rmtree(dir, ignore_errors=True)
155 |         os.mkdir(dir)
156 |         for filename in mol2files:
157 |             shutil.copyfile(filename, dir+'/'+filename) 
158 | 
159 |     def remove_scores_from_scorefile(self, file_s, indices, nligands=None):
160 |         """Remove scores of bad poses (failed minimization, out of the box...) from score.out"""
161 |         if os.path.exists(file_s):
162 |             new_content = []
163 |             with open(file_s, 'r') as sf:
164 |                 for idx, line in enumerate(sf):
165 |                     if idx not in indices:
166 |                         new_content.append(line)
167 |             if nligands:
168 |                 # consistency check
169 |                 assert nligands == idx+1, "number of ligand mol2files should be equal to number of lines in score.out"
170 |             with open(file_s, 'w') as sf:
171 |                 for line in new_content:
172 |                     sf.write(line)
173 | 
174 |     def minimize_extracted_poses(self, file_r, file_s, **minimize_options):
175 |         """Perform AMBER minimization on extracted poses"""
176 | 
177 |         mol2files = self.get_output_mol2files()
178 |         if mol2files:
179 |             # do energy minimization on ligand
180 |             minimization.do_minimization_after_docking(file_r, mol2files, keep_hydrogens=True, charge_method=minimize_options['charge_method'],\
181 | ncyc=minimize_options['ncyc'], maxcyc=minimize_options['maxcyc'], cut=minimize_options['cut'], amber_version=minimize_options['amber_version'])
182 | 
183 |             failed_idxs = []
184 |             # extract results from minimization and purge out
185 |             for idx, filename_before_min in enumerate(mol2files):
186 |                 suffix, ext = os.path.splitext(filename_before_min)
187 |                 filename = 'em/' + suffix + '-out' + ext
188 |                 if os.path.isfile(filename): # the minimization succeeded
189 |                     shutil.copyfile(filename, filename_before_min)
190 |                 else: # the minimization failed
191 |                     os.remove(filename_before_min)
192 |                     failed_idxs.append(idx)
193 | 
194 |             # remove scores of failed poses
195 |             self.remove_scores_from_scorefile(file_s, failed_idxs, nligands=len(mol2files))
196 | 
197 |             if failed_idxs:
198 |                 # display warning message
199 |                 failed_mol2files = [mol2files[idx] for idx in failed_idxs]
200 |                 print "Warning: minimization of poses %s failed, poses were removed!"%(', '.join(failed_mol2files))
201 | 
202 |     def remove_out_of_range_poses(self, file_s):
203 |         """Get rid of poses which were predicted outside the box"""
204 | 
205 |         mol2files = self.get_output_mol2files()
206 |         if mol2files:
207 |             sitename, center, boxsize = self.site
208 |             # get values of docking box center and boxsize
209 |             center = map(float, center.split(','))
210 |             boxsize = map(float, boxsize.split(','))
211 |  
212 |             out_of_range_idxs = []
213 |             for jdx, filename in enumerate(mol2files):
214 |                 is_out = False
215 |                 for coord in mol2.get_coordinates(filename):
216 |                     for idx, value in enumerate(coord):
217 |                         # check if the pose is out of the box
218 |                         if abs(value - center[idx]) > boxsize[idx]*1./2:
219 |                             is_out = True
220 |                             break
221 |                     if is_out:
222 |                         os.remove(filename)
223 |                         out_of_range_idxs.append(jdx)
224 |                         break
225 |             # remove scores of failed poses
226 |             self.remove_scores_from_scorefile(file_s, out_of_range_idxs, nligands=len(mol2files))
227 | 
228 |             if out_of_range_idxs:
229 |                 # display warning message
230 |                 out_of_range_mol2files = [mol2files[idx] for idx in out_of_range_idxs]
231 |                 print "Warning: poses %s were found out of the box, poses were removed!"%(', '.join(out_of_range_mol2files))
232 | 
233 |     def cleanup(self):
234 |         """Remove all intermediate files"""
235 |         for filename in glob('*'):
236 |             if os.path.isfile(filename) and not filename.startswith('pose-') and filename != 'score.out':
237 |                 os.remove(filename)
238 | 
239 |             elif os.path.isdir(filename):
240 |                 shutil.rmtree(filename)
241 |                 
242 |     def write_rescoring_script(self, script_name, file_r, file_l):
243 |         pass
244 | 
245 |     def extract_rescoring_results(self, filename):
246 |         pass
247 | 
248 |     def write_docking_script(self, script_name, file_r, file_l):
249 |         pass
250 | 
251 |     def extract_docking_results(self, file_r, file_l, file_s, input_file_r):
252 |         pass
253 | 
254 | class ScoringMethod(DockingMethod):
255 | 
256 |     def run_docking(self, file_r, file_l, minimize=False, cleanup=0, extract_only=False):
257 |         pass
258 | 
259 |     def remove_out_of_range_poses(self, file_s):
260 |         pass
261 | 
262 |     def minimize_extracted_poses(self, file_r):
263 |         pass
264 | 


--------------------------------------------------------------------------------
/dockbox/autodock.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import subprocess
  4 | import shutil
  5 | from glob import glob
  6 | 
  7 | from mdkit.utility import mol2
  8 | import method
  9 | 
 10 | required_programs = ['prepare_ligand4.py', 'prepare_receptor4.py', 'prepare_dpf4.py', 'prepare_gpf4.py', 'autogrid4', 'autodock4', 'obabel']
 11 | 
 12 | default_settings = {'ga_run': '100', 'spacing': '0.3'}
 13 | 
 14 | class ADBased(method.DockingMethod):
 15 | 
 16 |     def write_rescoring_script(self, filename, file_r, file_l):
 17 |         self.write_docking_script(filename, file_r, file_l, rescoring=True)
 18 | 
 19 |     def update_output_mol2files(self, sample=None):
 20 |         nfiles = len(glob('pose-*.mol2'))
 21 | 
 22 |         mgltools_path = subprocess.check_output('which prepare_ligand4.py', shell=True, executable='/bin/bash')
 23 |         mgltools_path = '/'.join(mgltools_path.split('/')[:-3]) 
 24 | 
 25 |         for idx in range(nfiles):
 26 |             mol2file = 'pose-%s.mol2'%(idx+1)
 27 |             mol2.update_mol2file(mol2file, mol2file, ADupdate=sample, unique=True, mask=['h','H'])
 28 |             mol2.arrange_hydrogens(mol2file, 'tmp.mol2', path=mgltools_path)
 29 |             shutil.move('tmp.mol2', mol2file)
 30 | 
 31 |     def write_check_ligand_pdbqt_script(self, filename):
 32 | 
 33 |         with open(filename, 'w') as ff:
 34 |             content ="""import os
 35 | import sys
 36 | import shutil
 37 | 
 38 | input_file = sys.argv[1]
 39 | 
 40 | filename, ext = os.path.splitext(input_file)
 41 | file_tmp = filename + '_tmp.pdbqt'
 42 | 
 43 | lines_to_be_removed = []
 44 | 
 45 | has_branch_started = False
 46 | with open(input_file, 'r') as ff:
 47 |     for line in ff:
 48 |         if has_branch_started:
 49 |             has_branch_started = False
 50 |             branch_num = start_branch_line.split()[-1]
 51 |             if line.split()[1] != branch_num:
 52 |                 lines_to_be_removed.append(start_branch_line)
 53 |                 lines_to_be_removed.append('END' + start_branch_line)
 54 |         if line.startswith('BRANCH'):
 55 |             start_branch_line = line
 56 |             has_branch_started = True
 57 | 
 58 | if lines_to_be_removed:
 59 |     with open(input_file, 'r') as ff:
 60 |         with open(file_tmp, 'w') as of:
 61 |             for line in ff:
 62 |                 if line.startswith(('BRANCH', 'ENDBRANCH')) and line in lines_to_be_removed:
 63 |                     pass
 64 |                 else:
 65 |                     of.write(line)
 66 |     shutil.move(file_tmp, input_file)"""
 67 |             ff.write(content)
 68 | 
 69 |     def write_check_ions_script(self, filename):
 70 | 
 71 |         with open(filename, 'w') as file:
 72 |             script = """import sys
 73 | import shutil
 74 | from tempfile import mkstemp
 75 | 
 76 | from mdkit.amber.ambertools import load_atomic_ions
 77 | 
 78 | # first all residues are supposed to be recognized
 79 | are_unrecognized_residues = False
 80 | 
 81 | # check if and which atoms were not recognized
 82 | unrecognized_residues = []
 83 | with open(sys.argv[2], 'r') as logf:
 84 |     for line in logf:
 85 |         if line.startswith('Sorry, there are no Gasteiger parameters available for atom'):
 86 |             are_unrecognized_residues = True
 87 |             resname = line.split()[-1].split(':')[0]
 88 |             resname = ''.join([i for i in resname if not i.isdigit()])
 89 |             unrecognized_residues.append(resname)
 90 | 
 91 | if are_unrecognized_residues:
 92 | 
 93 |     ions_amber = load_atomic_ions()
 94 |     print "No charges specified for ion(s) " + ', '.join(unrecognized_residues)
 95 |     print "Attributing formal charges..."
 96 | 
 97 |     # update .pdbqt file for the receptor
 98 |     fh, abs_path = mkstemp()
 99 | 
100 |     with open(abs_path, 'w') as tempf:
101 |         with open(sys.argv[1], 'r') as ff:
102 | 
103 |             for line in ff:
104 |                 is_ion = False
105 | 
106 |                 if line.startswith(('ATOM', 'HETATM')):
107 |                     resname = line[17:20].strip()
108 |                     if resname in unrecognized_residues:
109 |                         assert resname in ions_amber
110 |                         charge = "%.3f"%ions_amber[resname]
111 |                         is_ion = True
112 | 
113 |                 if is_ion:
114 |                     tempf.write(line[:70] + ' '*(6-len(charge)) + charge + line[76:])
115 |                 else:
116 |                     tempf.write(line)
117 | 
118 |     shutil.move(abs_path, sys.argv[1])"""
119 |             file.write(script)
120 | 
121 | class Autodock(ADBased):
122 | 
123 |     def __init__(self, instance, site, options):
124 | 
125 |         super(Autodock, self).__init__(instance, site, options)
126 | 
127 |         # set box center
128 |         self.options['gridcenter'] = '\"' + ' '.join(map(str.strip, site[1].split(','))) + '\"'
129 |  
130 |         # set box size
131 |         boxsize = map(float, map(str.strip, site[2].split(',')))
132 |         spacing = float(options['spacing'])
133 |         npts = []
134 |         for size in boxsize:
135 |             sz = int(size*1.0/spacing) + 1
136 |             npts.append(str(sz)) # round to the integer above
137 |         self.options['npts'] =  ','.join(npts)
138 | 
139 |         autogrid_options_names = ['spacing', 'npts', 'gridcenter']
140 |         autodock_options_names = ['ga_run', 'ga_pop_size', 'ga_num_evals', 'ga_num_generations', 'outlev', 'seed']
141 | 
142 |         self.autogrid_options = {}
143 |         for name in autogrid_options_names:
144 |             if name in options:
145 |                 self.autogrid_options[name] = options[name]
146 | 
147 |         self.autodock_options = {}
148 |         for name in autodock_options_names:
149 |             if name in options:
150 |                 self.autodock_options[name] = options[name]
151 | 
152 |     def write_docking_script(self, filename, file_r, file_l, rescoring=False):
153 |         #TODO: add treatment of ions for autogrid: http://autodock.scripps.edu/faqs-help/how-to/adding-new-atom-parameters-to-autodock
154 | 
155 |         # create flags with specified options for autogrid and autodock
156 |         autogrid_options_flag = ' '.join(['-p ' + key + '=' + value for key, value in self.autogrid_options.iteritems()])
157 |         autodock_options_flag = ' '.join(['-p ' + key + '=' + value for key, value in self.autodock_options.iteritems()])
158 | 
159 |         self.write_check_ligand_pdbqt_script('check_ligand_pdbqt.py')
160 |         self.write_check_ions_script('check_ions.py')
161 | 
162 |         if not rescoring:
163 |             if 'ga_num_evals' not in self.options:
164 |                 ga_num_evals_lines="""prepare_dpf4.py -l ligand.pdbqt -r target.pdbqt -o dock.dpf -p move=ligand.pdbqt
165 | ga_num_evals_flag=`python -c \"with open('dock.dpf') as ff:
166 |     for line in ff:
167 |         if line.startswith('torsdof'):
168 |             torsion = int(line.split()[1])
169 |             break
170 | ga_num_evals = min(25000000, 987500 * torsion + 125000)
171 | print \'-p ga_num_evals=%i\'%ga_num_evals\"`"""
172 |             else:
173 |                 ga_num_evals_lines=""
174 |  
175 |             # write autodock script
176 |             with open(filename, 'w') as ff:
177 |                 script ="""#!/bin/bash
178 | set -e
179 | 
180 | MGLPATH=`which prepare_ligand4.py`
181 | MGLPATH=`python -c "print '/'.join('$MGLPATH'.split('/')[:-3])"`
182 | export PYTHONPATH=$PYTHONPATH:$MGLPATH
183 | 
184 | # prepare ligand
185 | prepare_ligand4.py -l %(file_l)s -o ligand.pdbqt
186 | python check_ligand_pdbqt.py ligand.pdbqt
187 | 
188 | # prepare receptor
189 | prepare_receptor4.py -U nphs_lps_waters -r %(file_r)s -o target.pdbqt &> prepare_receptor4.log
190 | python check_ions.py target.pdbqt prepare_receptor4.log
191 | 
192 | # run autogrid
193 | prepare_gpf4.py -l ligand.pdbqt -r target.pdbqt -o grid.gpf %(autogrid_options_flag)s
194 | autogrid4 -p grid.gpf -l grid.glg
195 | 
196 | # prepare .dpf file
197 | %(ga_num_evals_lines)s
198 | prepare_dpf4.py -l ligand.pdbqt -r target.pbdqt -o dock.dpf -p move=ligand.pdbqt %(autodock_options_flag)s $ga_num_evals_flag
199 | 
200 | # run autodock
201 | autodock4 -p dock.dpf -l dock.dlg"""% locals()
202 |                 ff.write(script)
203 |  
204 |         else:
205 |             # write autodock script for rescoring
206 |             with open(filename, 'w') as ff:
207 |                 script ="""#!/bin/bash
208 | set -e
209 | 
210 | MGLPATH=`which prepare_ligand4.py`
211 | MGLPATH=`python -c "print '/'.join('$MGLPATH'.split('/')[:-3])"`
212 | export PYTHONPATH=$PYTHONPATH:$MGLPATH
213 | 
214 | # prepare ligand
215 | prepare_ligand4.py -l %(file_l)s -o ligand.pdbqt
216 | python check_ligand_pdbqt.py ligand.pdbqt
217 | 
218 | # prepare receptor only once
219 | if [ ! -f target.pdbqt ]; then
220 |   prepare_receptor4.py -U nphs_lps_waters -r %(file_r)s -o target.pdbqt > prepare_receptor4.log
221 |   python check_ions.py target.pdbqt prepare_receptor4.log
222 | fi
223 | 
224 | # run autogrid
225 | if [ ! -f grid.glg ]; then
226 |   prepare_gpf4.py -l ligand.pdbqt -r target.pdbqt -o grid.gpf %(autogrid_options_flag)s
227 |   autogrid4 -p grid.gpf -l grid.glg
228 | fi
229 | 
230 | # prepare .dpf file
231 | if [ ! -f dock.dpf ]; then
232 |   prepare_dpf4.py -l ligand.pdbqt -r target.pbdqt -o dock.dpf -p move=ligand.pdbqt %(autodock_options_flag)s $ga_num_evals_flag
233 |   # construct new dock.dpf with rescoring options only
234 |   sed -e "1,/about/w tmp.dpf" dock.dpf > /dev/null
235 |   mv tmp.dpf dock.dpf
236 |   echo 'epdb                                 # small molecule to be evaluated' >> dock.dpf
237 | fi
238 | 
239 | # run autodock
240 | autodock4 -p dock.dpf -l dock.dlg"""% locals()
241 |                 ff.write(script)
242 | 
243 |     def extract_docking_results(self, file_s, input_file_r, input_file_l):
244 |         """Extract output structures in .mol2 formats"""
245 | 
246 |         poses_extracted = False
247 |         if os.path.exists('dock.dlg'):
248 |             try:
249 |                 subprocess.check_output('obabel -ad -ipdbqt dock.dlg -omol2 -Opose-.mol2 -m &>/dev/null', shell=True, executable='/bin/bash')
250 |                 self.update_output_mol2files(sample=input_file_l)
251 |                 poses_extracted = True
252 |             except:
253 |                 for mol2file in glob('pose-*.mol2'):
254 |                     os.remove(mol2file)
255 |                 poses_extracted = False
256 | 
257 |         if poses_extracted:
258 |             with open('dock.dlg','r') as dlgf:
259 |                 with open(file_s, 'w') as sf:
260 |                     for line in dlgf:
261 |                         if line.startswith('DOCKED: USER    Estimated Free Energy of Binding'):
262 |                             score = line.split()[8].strip()
263 |                             sf.write(score+'\n')
264 |                         if 'CLUSTERING HISTOGRAM' in line:
265 |                             break
266 |         else:
267 |             open(file_s, 'w').close()
268 | 
269 |     def extract_rescoring_results(self, filename):
270 |         """extract scores from .dlg file"""
271 |         with open(filename, 'a') as ff:
272 |             if os.path.exists('dock.dlg'):
273 |                 with open('dock.dlg', 'r') as dlgf:
274 |                     has_score_line = False
275 |                     for line in dlgf:
276 |                         if line.startswith('epdb: USER    Estimated Free Energy of Binding'):
277 |                             ff.write(line.split()[8].strip()+'\n')
278 |                             has_score_line = True
279 |                     if not has_score_line:
280 |                         ff.write('NaN\n')
281 |             else:
282 |                 ff.write('NaN\n')
283 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 |      Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2019 Jordane Preto
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/dockbox/rundbx.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | from __future__ import with_statement
  3 | 
  4 | import os
  5 | import sys
  6 | import shutil
  7 | import argparse
  8 | import ConfigParser
  9 | import time
 10 | 
 11 | from glob import glob
 12 | import pandas as pd
 13 | import subprocess
 14 | 
 15 | from mdkit.utility import mol2
 16 | from mdkit.amber.ambertools import load_PROTON_INFO
 17 | from mdkit.amber.ambertools import load_atomic_ions
 18 | 
 19 | import configure
 20 | 
 21 | class DockingConfig(object):
 22 | 
 23 |     def __init__(self, args, task='docking'):
 24 | 
 25 |         # check if config file exist
 26 |         if os.path.exists(args.config_file):
 27 |             config = ConfigParser.SafeConfigParser()
 28 |             config.read(args.config_file)
 29 |         else:
 30 |             raise ValueError("Config file %s not found!"%(args.config_file))
 31 | 
 32 |         # check if ligand file exists
 33 |         if not os.path.isfile(args.input_file_l):
 34 |             raise IOError("File %s not found!"%(args.input_file_l))
 35 | 
 36 |         file_l_abs = os.path.abspath(args.input_file_l)
 37 |         base = os.path.basename(args.input_file_l)
 38 |         pref, ext = os.path.splitext(base)
 39 |         if ext != '.mol2':
 40 |             raise IOError("Ligand file provided with -l option should be in .mol2 format! %s format detected!"%ext)
 41 | 
 42 |         nligands = int(subprocess.check_output('fgrep -c "@<TRIPOS>MOLECULE" %s'%file_l_abs, shell=True))
 43 |         if nligands == 0:
 44 |             raise IOError("No ligand detected in %s, check your file again!"%args.input_file_l)
 45 |         elif nligands > 1:
 46 |             raise IOError("More than one ligand detected in %s. Only one structure per ligand file is allowed!"%args.input_file_l)
 47 | 
 48 |         # new ligand file with unique names for every atom
 49 |         new_file_l = pref + '_dbx' + ext
 50 | 
 51 |         # create a ligand file with unique atom names
 52 |         mol2.update_mol2file(file_l_abs, new_file_l, unique=True, ligname='LIG')
 53 |         self.input_file_l = os.path.abspath(new_file_l)
 54 | 
 55 |         if task == 'docking':
 56 |             self.docking = configure.DockingSetup(config)
 57 |             self.rescoring = configure.RescoringSetup(config)
 58 |         elif task == 'scoring':
 59 |             self.scoring = configure.ScoringSetup(config)
 60 |         else:
 61 |             raise ValueError("Task should be one of docking or scoring")
 62 | 
 63 |         self.check_pdbfile(args.input_file_r)
 64 | 
 65 |     def check_pdbfile(self, filename):
 66 |         """Check if provided pdbfile is valid"""
 67 | 
 68 |         # check if receptor file exists
 69 |         if not os.path.isfile(filename):
 70 |             raise IOError("File %s not found!"%(filename))
 71 | 
 72 |         proton_info = load_PROTON_INFO()
 73 |         ions_info = load_atomic_ions()
 74 | 
 75 |         with open(filename, 'r') as pdbf:
 76 |             is_end_line = False
 77 | 
 78 |             for line in pdbf:
 79 |                 if line.startswith(('ATOM', 'HETATM')):
 80 |                     resname = line[17:20].strip()
 81 | 
 82 |                     if resname in ions_info:
 83 |                         for instance, program, options in self.docking.instances:
 84 |                             if program not in configure.programs_handling_ions:
 85 |                                 sys.exit("Ion %s found in structure %s! DockBox is not configured to apply %s with ions!" %(resname, filename, program))
 86 | 
 87 |                     elif resname not in proton_info or line.startswith('HETATM'):
 88 |                         sys.exit('Unrecognized residue %s found in %s! The pdbfile should \
 89 | only contains one protein structure with standard residues (with possibly ions)!'%(resname, filename))
 90 | 
 91 |                     elif is_end_line:
 92 |                         sys.exit("More than one structure detected in pdbfile! Check your file again!")
 93 | 
 94 |                 elif line.startswith('END'):
 95 |                     is_end_line = True
 96 | 
 97 |         self.input_file_r = os.path.abspath(filename)
 98 | 
 99 | class Scoring(object):
100 | 
101 |     def create_arg_parser(self):
102 |         parser = argparse.ArgumentParser(description="""runscore : score in-place with multiple software --------
103 | Requires one file for the ligand (1 struct.) and one file for the receptor (1 struct.)""")
104 | 
105 |         parser.add_argument('-l',
106 |             type=str,
107 |             dest='input_file_l',
108 |             required=True,
109 |             help = 'Ligand coordinate file(s): .mol2')
110 | 
111 |         parser.add_argument('-r',
112 |             type=str,
113 |             dest='input_file_r',
114 |             required=True,
115 |             help = 'Receptor coordinate file(s): .pdb')
116 | 
117 |         parser.add_argument('-f',
118 |             dest='config_file',
119 |             required=True,
120 |             help='config file containing docking parameters')
121 | 
122 |         return parser
123 | 
124 |     def run_scoring(self):
125 |         """Run scoring on original poses provided"""
126 | 
127 |         parser = self.create_arg_parser()
128 |         args = parser.parse_args()
129 | 
130 |         print "Setting up parameters..."
131 |         config = DockingConfig(args, task='scoring')
132 | 
133 |         tcpu1 = time.time()
134 |         file_r = config.input_file_r
135 |         config_s = config.scoring
136 | 
137 |         print "Starting scoring..."
138 |         for kdx in range(len(config_s.site)):
139 |             site = config_s.site['site'+str(kdx+1)]
140 | 
141 |             # iterate over rescoring instances
142 |             for instance, program, options in config_s.instances:
143 | 
144 |                 # get docking class
145 |                 ScoringClass = getattr(sys.modules[program], program.capitalize())
146 |  
147 |                 ScoringInstance = ScoringClass(instance, site, options)
148 |                 outputfile = ScoringInstance.run_rescoring(config.input_file_r, [config.input_file_l])
149 |  
150 |         tcpu2 = time.time()
151 |         print "Scoring done. Total time needed: %i s" %(tcpu2-tcpu1)
152 | 
153 |     def run_rescoring(self, config, args):
154 |         """Run rescoring on docking poses"""
155 | 
156 |         tcpu1 = time.time()
157 | 
158 |         file_r = config.input_file_r
159 |         config_r = config.rescoring
160 |         posedir = 'poses'
161 | 
162 |         # look for results folder
163 |         if os.path.isdir(posedir):
164 |             with open(posedir+'/info.dat') as inff:
165 |                 nposes = inff.next()
166 |                 nposes = nposes[1:] # the first character is a # sign
167 |                 nposes = map(int, nposes.split(','))
168 |         else:
169 |             raise IOError('no folder %s found!'%posedir)
170 | 
171 |         curdir = os.getcwd()
172 |         workdir = 'rescoring'
173 |         if not os.path.exists(workdir):
174 |             print "Creating rescoring folder..."
175 |             os.mkdir(workdir)
176 | 
177 |         os.chdir(workdir)
178 |         print "Starting rescoring..."
179 |         # iterate over rescoring instances
180 |         for instance, program, options in config_r.instances:
181 | 
182 |             # possibility of renaming the folder and output file 
183 |             if 'name' in options:
184 |                 name = options['name']
185 |             else:
186 |                 name = instance
187 | 
188 |             # remove old scoring file
189 |             if os.path.isfile(name+'.score'):
190 |                 os.remove(name+'.score')
191 | 
192 |             for kdx in range(len(config_r.site)):
193 |                 site = config_r.site['site'+str(kdx+1)]
194 | 
195 |                 # get complex filenames
196 |                 files_l = [os.path.abspath('../'+posedir+'/pose-%s.mol2'%idx) for idx in range(nposes[kdx], nposes[kdx+1])]
197 |                 # get docking class
198 |                 ScoringClass = getattr(sys.modules[program], program.capitalize())
199 | 
200 |                 ScoringInstance = ScoringClass(instance, site, options)
201 |                 outputfile = ScoringInstance.run_rescoring(file_r, files_l)
202 | 
203 |                 # cat output in file (cat instead of copying because of the binding sites)
204 |                 subprocess.check_output('cat %s >> %s'%(outputfile,name+'.score'), shell=True, executable='/bin/bash')
205 | 
206 |                 if config.docking.cleanup == 1:
207 |                     shutil.rmtree(os.path.dirname(outputfile), ignore_errors=True)
208 | 
209 |         os.chdir(curdir)
210 |         tcpu2 = time.time()
211 |         print "Rescoring done. Total time needed: %i s" %(tcpu2-tcpu1)
212 | 
213 | class Docking(object):
214 | 
215 |     def create_arg_parser(self):
216 |         parser = argparse.ArgumentParser(description="""rundbx : dock and rescore with multiple programs --------
217 | Requires one file for the ligand (1 struct.) and one file for the receptor (1 struct.)""")
218 | 
219 |         parser.add_argument('-l',
220 |             type=str,
221 |             dest='input_file_l',
222 |             required=True,
223 |             help = 'Ligand coordinate file(s): .mol2')
224 | 
225 |         parser.add_argument('-r',
226 |             type=str,
227 |             dest='input_file_r',
228 |             required=True,
229 |             help = 'Receptor coordinate file(s): .pdb')
230 | 
231 |         parser.add_argument('-f',
232 |             dest='config_file',
233 |             required=True,
234 |             help='config file containing docking parameters')
235 | 
236 |         parser.add_argument('-prepare_only',
237 |             dest='prepare_only',
238 |             action='store_true',
239 |             help='Only prepare scripts for docking (does not run docking)')
240 | 
241 |         parser.add_argument('-rescore_only',
242 |             dest='rescore_only',
243 |             action='store_true',
244 |             default=False,
245 |             help='Run rescoring only')
246 | 
247 |         parser.add_argument('-skip_docking',
248 |             dest='skip_docking',
249 |             action='store_true',
250 |             default=False,
251 |             help=argparse.SUPPRESS)
252 | 
253 |         return parser
254 | 
255 |     def finalize(self, config):
256 |         """create directory containing all the poses found!"""
257 | 
258 |         config_d = config.docking
259 | 
260 |         posedir = 'poses'
261 |         shutil.rmtree(posedir, ignore_errors=True)
262 |         os.mkdir(posedir)
263 | 
264 |         nposes = [1] # number of poses involved for each binding site
265 |         sh = 1 # shift of model
266 | 
267 |         info = {}
268 |         features = ['program', 'nposes', 'firstidx', 'site']
269 |         for ft in features:
270 |             info[ft] = []
271 | 
272 |         for kdx in range(len(config_d.site)):
273 |             bs = config_d.site['site'+str(kdx+1)] # current binding site
274 |             for name, program, options in config_d.instances:
275 |                 # find name for docking directory
276 |                 instdir = '%s'%name
277 |                 if bs[0]:
278 |                     instdir += '.' + bs[0]                
279 |                 poses_idxs = []
280 |                 for filename in glob(instdir+'/pose-*.mol2'):
281 |                     poses_idxs.append(int((filename.split('.')[-2]).split('-')[-1]))
282 |                 poses_idxs = sorted(poses_idxs)
283 |                 nposes_idxs = len(poses_idxs)
284 | 
285 |                 for idx, pose_idx in enumerate(poses_idxs):
286 |                     shutil.copyfile(instdir+'/pose-%s.mol2'%pose_idx, posedir+'/pose-%s.mol2'%(idx+sh))
287 | 
288 |                 # update info
289 |                 info['program'].append(name)
290 |                 info['nposes'].append(nposes_idxs)
291 |                 info['firstidx'].append(sh)
292 |                 info['site'].append(bs[0])
293 | 
294 |                 # update shift
295 |                 sh += nposes_idxs
296 |             nposes.append(sh)
297 | 
298 |         # write info
299 |         info = pd.DataFrame(info)
300 |         info[features].to_csv(posedir+'/info.dat', index=False)
301 | 
302 |         # insert line at the beginning of the info file
303 |         with open(posedir+'/info.dat', 'r+') as ff:
304 |             content = ff.read()
305 |             ff.seek(0, 0)
306 |             line = '#' + ','.join(map(str,nposes))+'\n'
307 |             ff.write(line.rstrip('\r\n') + '\n' + content)
308 | 
309 |     def do_final_cleanup(self, config):
310 | 
311 |         if config.docking.cleanup == 1:
312 |             config_d = config.docking
313 |             # iterate over all the binding sites
314 |             for kdx in range(len(config_d.site)):
315 |                 for instance, program, options in config_d.instances: # iterate over all the instances
316 | 
317 |                     for filename in glob(instance + '/*'):
318 |                         base = os.path.basename(filename)
319 |                         if os.path.isfile(filename) and base.startswith('pose-'):
320 |                             os.remove(filename)
321 |         #os.remove(config.input_file_l)
322 | 
323 |     def run_docking(self, config, args):
324 |         """Running docking simulations using each program specified..."""
325 | 
326 |         tcpu1 = time.time()
327 | 
328 |         config_d = config.docking
329 |         # iterate over all the binding sites
330 |         for kdx in range(len(config_d.site)):
331 |             for instance, program, options in config_d.instances: # iterate over all the instances
332 | 
333 |                 # get docking class
334 |                 DockingClass = getattr(sys.modules[program], program.capitalize())
335 | 
336 |                 # create docking instance and run docking
337 |                 DockingInstance = DockingClass(instance, config_d.site['site'+str(kdx+1)], options)
338 |                 DockingInstance.run_docking(config.input_file_r, config.input_file_l, minimize_options=config_d.minimize, \
339 | cleanup=config_d.cleanup, prepare_only=args.prepare_only, skip_docking=args.skip_docking)
340 | 
341 |         if args.prepare_only:
342 |             return
343 |         tcpu2 = time.time()
344 |         print "Docking procedure done. Total time needed: %i s" %(tcpu2-tcpu1)
345 | 
346 |     def run(self):
347 |         parser = self.create_arg_parser()
348 |         args = parser.parse_args()    
349 | 
350 |         print "Setting up parameters..."
351 |         config = DockingConfig(args)
352 | 
353 |         # run docking
354 |         if not args.rescore_only:
355 |             self.run_docking(config, args)
356 | 
357 |         if args.prepare_only:
358 |             return
359 | 
360 |         if not args.rescore_only:
361 |             # create folder with poses
362 |             self.finalize(config)
363 | 
364 |         # run rescoring
365 |         if config.rescoring.is_rescoring:
366 |             Scoring().run_rescoring(config, args)
367 | 
368 |         # final cleanup if needed
369 |         if config.docking.cleanup == 1:
370 |             self.do_final_cleanup(config)
371 | 


--------------------------------------------------------------------------------
/dockbox/dock.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import method
  4 | 
  5 | import shutil
  6 | import subprocess
  7 | from glob import glob
  8 | 
  9 | from mdkit.amber import ambertools
 10 | 
 11 | from mdkit.utility import reader
 12 | from mdkit.utility import mol2
 13 | from mdkit.utility import utils
 14 | 
 15 | required_programs = ['chimera', 'dms', 'sphgen_cpp', 'sphere_selector', 'showbox', 'grid', 'dock6']
 16 | 
 17 | default_settings = {'probe_radius': '1.4', 'minimum_sphere_radius': '1.4', 'maximum_sphere_radius': '4.0', \
 18 | 'grid_spacing': '0.3', 'extra_margin': '2.0', 'attractive_exponent': '6', 'repulsive_exponent': '12', \
 19 | 'max_orientations': '10000', 'num_scored_conformers': '5000', 'nposes': '20', 'charge_method': 'gas', 'rmsd': '2.0', 'grid_dir': None}
 20 | 
 21 | class Dock(method.DockingMethod):
 22 | 
 23 |     def __init__(self, instance, site, options):
 24 | 
 25 |         super(Dock, self).__init__(instance, site, options)
 26 |         self.options['center'] = '\"' + ' '.join(map(str.strip, site[1].split(','))) + '\"' # set box center
 27 |         self.options['site'] = site[0]
 28 | 
 29 |         # set box size
 30 |         self.options['boxsize'] = map(float, map(str.strip, site[2].split(',')))
 31 |         self.options['sphgen_radius'] = str(max(self.options['boxsize'])/2)
 32 | 
 33 |         if self.options['site'] is None:
 34 |             self.options['dockdir'] = 'dock'
 35 |         else:
 36 |             self.options['dockdir'] = 'dock.' + self.options['site']
 37 | 
 38 |     def write_rescoring_script(self, filename, file_r, files_l):
 39 |         """Rescore using DOCK6 grid scoring function"""
 40 | 
 41 |         locals().update(self.options)
 42 |         self.write_script_ligand_prep()
 43 | 
 44 |         # cat mol2 files into a single mol2
 45 |         file_all_poses = 'poses.mol2'
 46 | 
 47 |         if self.options['charge_method'].lower() not in ["none", "no"]:
 48 |             amber_version = utils.check_amber_version()
 49 |             ambertools.run_antechamber(files_l[0], 'pose-1.mol2', at='sybyl', c=self.options['charge_method'], version=amber_version)
 50 |         else:
 51 |             shutil.copyfile(files_l[0], 'pose-1.mol2')
 52 | 
 53 |         for idx, file_l in enumerate(files_l):
 54 |             if idx > 0:
 55 |                 if self.options['charge_method'].lower() not in ["none", "no"]:
 56 |                     # if not first one, do not regenerate the charges, copy charges generated the first time
 57 |                     coords_l = mol2.get_coordinates(file_l)
 58 |                     struct = mol2.Reader('pose-1.mol2').next()
 59 |                     struct = mol2.replace_coordinates(struct, coords_l)
 60 |                     mol2.Writer().write('pose-%i.mol2'%(idx+1), struct)
 61 |                 else:
 62 |                     shutil.copyfile(file_l, 'pose-%i.mol2'%(idx+1))
 63 |             subprocess.check_output("cat pose-%i.mol2 >> %s"%(idx+1, file_all_poses), shell=True)
 64 |             if idx > 0:
 65 |                 os.remove('pose-%i.mol2'%(idx+1))
 66 | 
 67 |         script ="""#!/bin/bash
 68 | set -e
 69 | 
 70 | # shift ligand coordinates
 71 | python prepare_ligand_dock.py pose-1.mol2 pose-1-centered.mol2 %(center)s\n"""%locals()
 72 | 
 73 |         if self.options['grid_dir'] is None:
 74 |             script += """\n# remove hydrogens from target
 75 | echo "delete element.H
 76 | write format pdb #0 target_noH.pdb" > removeH.cmd
 77 | chimera --nogui %(file_r)s removeH.cmd
 78 | rm -rf removeH.cmd
 79 | 
 80 | # prepare receptor (add missing h, add partial charges,...)
 81 | echo "import chimera
 82 | from DockPrep import prep
 83 | 
 84 | models = chimera.openModels.list(modelTypes=[chimera.Molecule])
 85 | prep(models)
 86 | 
 87 | from WriteMol2 import writeMol2
 88 | writeMol2(models, 'target.mol2')" > dockprep.py
 89 | chimera --nogui %(file_r)s dockprep.py
 90 | 
 91 | # generating receptor surface
 92 | dms target_noH.pdb -n -w %(probe_radius)s -v -o target_noH.ms
 93 | 
 94 | # generating spheres
 95 | echo "target_noH.ms
 96 | R
 97 | X
 98 | 0.0
 99 | %(maximum_sphere_radius)s
100 | %(minimum_sphere_radius)s
101 | target_noH_site.sph" > INSPH
102 | sphgen_cpp
103 | 
104 | # selecting spheres within a user-defined radius (sphgen_radius)
105 | sphere_selector target_noH_site.sph pose-1-centered.mol2 %(sphgen_radius)s
106 | 
107 | # create box - the second argument in the file showbox.in
108 | # is the extra margin to also be enclosed to the box (angstroms)
109 | echo "Y
110 | %(extra_margin)s
111 | selected_spheres.sph
112 | 1
113 | target_noH_box.pdb" > showbox.in
114 | showbox < showbox.in
115 | 
116 | dock6path=`which dock6`
117 | vdwfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/vdw_AMBER_parm99.defn'"`
118 | flexfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex.defn'"`
119 | flexdfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex_drive.tbl'"`
120 | 
121 | # create grid
122 | echo "compute_grids yes
123 | grid_spacing %(grid_spacing)s
124 | output_molecule no
125 | contact_score yes
126 | energy_score yes
127 | energy_cutoff_distance 9999
128 | atom_model a
129 | attractive_exponent %(attractive_exponent)s
130 | repulsive_exponent %(repulsive_exponent)s
131 | distance_dielectric yes
132 | dielectric_factor 4
133 | bump_filter yes
134 | bump_overlap 0.75
135 | receptor_file target.mol2
136 | box_file target_noH_box.pdb
137 | vdw_definition_file $vdwfile
138 | score_grid_prefix grid
139 | contact_cutoff_distance 4.5" > grid.in
140 | grid -i grid.in\n"""%locals()
141 |         else:
142 |             # get directory where grid files are located
143 |             grid_prefix = self.options['grid_dir'] + '/' + self.options['dockdir'] + '/grid'
144 | 
145 |             # check if grid file exists
146 |             if os.path.isfile(grid_prefix+'.in'):
147 |                 # copy grid files to avoid opening the same file from multiple locations
148 |                 for gridfile in glob(grid_prefix+'*'):
149 |                     basename = os.path.basename(gridfile)
150 |                     shutil.copyfile(gridfile, basename)
151 |             else:
152 |                 raise ValueError('No grid file detected in specified location %s'%self.options['grid_dir'])
153 | 
154 |             script += """\ndock6path=`which dock6`
155 | vdwfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/vdw_AMBER_parm99.defn'"`
156 | flexfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex.defn'"`
157 | flexdfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex_drive.tbl'"`\n"""
158 | 
159 |         script += """\necho "ligand_atom_file %(file_all_poses)s
160 | limit_max_ligands no
161 | skip_molecule no
162 | read_mol_solvation no
163 | calculate_rmsd no
164 | use_database_filter no
165 | orient_ligand no
166 | use_internal_energy yes
167 | internal_energy_rep_exp 12
168 | flexible_ligand no
169 | bump_filter no
170 | score_molecules yes
171 | contact_score_primary no
172 | contact_score_secondary no
173 | grid_score_primary yes
174 | grid_score_secondary no
175 | grid_score_rep_rad_scale 1
176 | grid_score_vdw_scale 1
177 | grid_score_es_scale 1
178 | grid_score_grid_prefix grid
179 | multigrid_score_secondary no
180 | dock3.5_score_secondary no
181 | continuous_score_secondary no
182 | descriptor_score_secondary no
183 | gbsa_zou_score_secondary no
184 | gbsa_hawkins_score_secondary no
185 | SASA_descriptor_score_secondary no
186 | amber_score_secondary no
187 | minimize_ligand no
188 | atom_model all
189 | vdw_defn_file $vdwfile
190 | flex_defn_file $flexfile
191 | flex_drive_file $flexdfile
192 | ligand_outfile_prefix poses_out
193 | write_orientations no
194 | num_scored_conformers 1
195 | rank_ligands no" > dock6.in
196 | 
197 | dock6 -i dock6.in > dock.out\n"""%locals()
198 | 
199 |         # write DOCK6 rescoring script
200 |         with open(filename, 'w') as ff:
201 |             ff.write(script)
202 | 
203 |     def write_docking_script(self, filename, file_r, file_l):
204 |         """Dock using DOCK6 flexible docking with grid scoring as primary score"""
205 | 
206 |         locals().update(self.options)
207 |         self.write_script_ligand_prep()
208 | 
209 |         if self.options['charge_method'].lower() not in ["none", "no"]:
210 |             amber_version = utils.check_amber_version()
211 |             ambertools.run_antechamber(file_l, 'ligand-ref.mol2', at='sybyl', c=self.options['charge_method'], version=amber_version)
212 |         else:
213 |             shutil.copyfile(file_l, 'ligand-ref.mol2')
214 | 
215 |         script ="""#!/bin/bash
216 | set -e
217 | 
218 | # shift ligand coordinates
219 | python prepare_ligand_dock.py ligand-ref.mol2 ligand-ref-centered.mol2 %(center)s\n"""%locals()
220 | 
221 |         if self.options['grid_dir'] is None:
222 |             script += """\n# remove hydrogens from target
223 | echo "delete element.H
224 | write format pdb #0 target_noH.pdb" > removeH.cmd
225 | chimera --nogui %(file_r)s removeH.cmd
226 | rm -rf removeH.cmd
227 | 
228 | # prepare receptor (add missing h, add partial charges,...)
229 | echo "import chimera
230 | from DockPrep import prep
231 | 
232 | models = chimera.openModels.list(modelTypes=[chimera.Molecule])
233 | prep(models)
234 | 
235 | from WriteMol2 import writeMol2
236 | writeMol2(models, 'target.mol2')" > dockprep.py
237 | chimera --nogui %(file_r)s dockprep.py
238 | 
239 | # generating receptor surface
240 | dms target_noH.pdb -n -w %(probe_radius)s -v -o target_noH.ms
241 | 
242 | # generating spheres
243 | echo "target_noH.ms
244 | R
245 | X
246 | 0.0
247 | %(maximum_sphere_radius)s
248 | %(minimum_sphere_radius)s
249 | target_noH_site.sph" > INSPH
250 | sphgen_cpp
251 | 
252 | # selecting spheres within a user-defined radius (sphgen_radius)
253 | sphere_selector target_noH_site.sph ligand-ref-centered.mol2 %(sphgen_radius)s
254 | 
255 | # create box - the second argument in the file showbox.in
256 | # is the extra margin to also be enclosed to the box (angstroms)
257 | echo "Y
258 | %(extra_margin)s
259 | selected_spheres.sph
260 | 1
261 | target_noH_box.pdb" > showbox.in
262 | showbox < showbox.in
263 | 
264 | dock6path=`which dock6`
265 | vdwfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/vdw_AMBER_parm99.defn'"`
266 | flexfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex.defn'"`
267 | flexdfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex_drive.tbl'"`
268 | 
269 | # create grid
270 | echo "compute_grids yes
271 | grid_spacing %(grid_spacing)s
272 | output_molecule no
273 | contact_score yes
274 | energy_score yes
275 | energy_cutoff_distance 9999
276 | atom_model a
277 | attractive_exponent %(attractive_exponent)s
278 | repulsive_exponent %(repulsive_exponent)s
279 | distance_dielectric yes
280 | dielectric_factor 4
281 | bump_filter yes
282 | bump_overlap 0.75
283 | receptor_file target.mol2
284 | box_file target_noH_box.pdb
285 | vdw_definition_file $vdwfile
286 | score_grid_prefix grid
287 | contact_cutoff_distance 4.5" > grid.in
288 | grid -i grid.in
289 | 
290 | # create box - the second argument in the file showbox.in
291 | # is the extra margin to also be enclosed to the box (angstroms)
292 | echo "Y
293 | %(extra_margin)s
294 | selected_spheres.sph
295 | 1
296 | target_noH_box.pdb" > showbox.in
297 | showbox < showbox.in\n"""%locals()
298 |         else:
299 |             # get directory where grid files are located
300 |             grid_prefix = self.options['grid_dir'] + '/' + self.options['dockdir'] + '/grid'
301 | 
302 |             # check if grid file exists
303 |             if os.path.isfile(grid_prefix+'.in'):
304 |                 # copy grid files to avoid opening the same file from multiple locations
305 |                 for gridfile in glob(grid_prefix+'*'):
306 |                     basename = os.path.basename(gridfile)
307 |                     shutil.copyfile(gridfile, basename)
308 |             else:
309 |                 raise ValueError('No grid file detected in specified location %s'%self.options['grid_dir'])
310 | 
311 |             sphfile = self.options['grid_dir'] + '/' + self.options['dockdir'] + '/selected_spheres.sph'
312 |             # check if sphere file exists
313 |             if os.path.isfile(sphfile):
314 |                 shutil.copyfile(sphfile, 'selected_spheres.sph')
315 |             else:
316 |                 raise ValueError('No selected_spheres.sph file detected in specified location %s'%self.options['grid_dir'])
317 | 
318 |             script += """\ndock6path=`which dock6`
319 | vdwfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/vdw_AMBER_parm99.defn'"`
320 | flexfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex.defn'"`
321 | flexdfile=`python -c "print '/'.join('$dock6path'.split('/')[:-2]) + '/parameters/flex_drive.tbl'"`\n"""
322 | 
323 |         script += """\n# flexible docking using grid score as primary score and no secondary score
324 | echo "ligand_atom_file ligand-ref-centered.mol2
325 | limit_max_ligands no
326 | skip_molecule no
327 | read_mol_solvation no
328 | calculate_rmsd no
329 | use_database_filter no
330 | orient_ligand yes
331 | automated_matching yes
332 | receptor_site_file selected_spheres.sph
333 | max_orientations %(max_orientations)s
334 | critical_points no
335 | chemical_matching no
336 | use_ligand_spheres no
337 | use_internal_energy yes
338 | internal_energy_rep_exp 12
339 | flexible_ligand yes
340 | user_specified_anchor no
341 | limit_max_anchors no
342 | min_anchor_size 5
343 | pruning_use_clustering yes
344 | pruning_max_orients 1000
345 | pruning_clustering_cutoff 100
346 | pruning_conformer_score_cutoff 100
347 | use_clash_overlap yes
348 | clash_overlap 0.5
349 | write_growth_tree no
350 | bump_filter yes
351 | bump_grid_prefix grid
352 | max_bumps_anchor 12
353 | max_bumps_growth 12
354 | score_molecules yes
355 | contact_score_primary no
356 | contact_score_secondary no
357 | grid_score_primary yes
358 | grid_score_secondary no
359 | grid_score_rep_rad_scale 1
360 | grid_score_vdw_scale 1
361 | grid_score_es_scale 1
362 | grid_score_grid_prefix grid
363 | multigrid_score_secondary no
364 | dock3.5_score_secondary no
365 | continuous_score_secondary no
366 | descriptor_score_secondary no
367 | gbsa_zou_score_secondary no
368 | gbsa_hawkins_score_secondary no
369 | SASA_descriptor_score_secondary no
370 | pbsa_score_secondary no
371 | amber_score_secondary no
372 | minimize_ligand yes
373 | minimize_anchor yes
374 | minimize_flexible_growth yes
375 | use_advanced_simplex_parameters no
376 | simplex_max_cycles 1
377 | simplex_score_converge 0.1
378 | simplex_cycle_converge 1.0
379 | simplex_trans_step 1.0
380 | simplex_rot_step 0.1
381 | simplex_tors_step 10.0
382 | simplex_anchor_max_iterations 1000
383 | simplex_grow_max_iterations 1000
384 | simplex_grow_tors_premin_iterations 0
385 | simplex_random_seed 0
386 | simplex_restraint_min no
387 | atom_model all
388 | vdw_defn_file $vdwfile
389 | flex_defn_file $flexfile
390 | flex_drive_file $flexdfile
391 | ligand_outfile_prefix poses_out
392 | write_orientations no
393 | num_scored_conformers %(num_scored_conformers)s
394 | write_conformations no
395 | cluster_conformations yes
396 | cluster_rmsd_threshold %(rmsd)s
397 | rank_ligands no" > dock6.in
398 | 
399 | dock6 -i dock6.in\n"""%locals()
400 | 
401 |         # write DOCK6 script
402 |         with open(filename, 'w') as ff:
403 |             ff.write(script)
404 | 
405 |     def extract_docking_results(self, file_s, input_file_r, input_file_l):
406 |     
407 |         # save scores
408 |         if os.path.isfile('poses_out_scored.mol2'):
409 |             with open('poses_out_scored.mol2', 'r') as ffin:
410 |                 with open(file_s, 'w') as ffout:
411 |                     idx = 0
412 |                     for line in ffin:
413 |                         if line.startswith('##########    Grid Score:'):
414 |                             ffout.write(line.split()[3]+'\n')
415 |                             idx += 1
416 |                         if idx == int(self.options['nposes']):
417 |                             break 
418 | 
419 |             # create multiple mol2 files
420 |             ligname = reader.open('poses_out_scored.mol2').ligname
421 |             mol2.update_mol2file('poses_out_scored.mol2', 'pose-.mol2', ligname=ligname, multi=True, last=int(self.options['nposes']))
422 |         else:
423 |             open(file_s, 'w').close()
424 | 
425 |     def extract_rescoring_results(self, filename, nligands=None):
426 | 
427 |         with open(filename, 'a') as ff:
428 |             with open('dock.out', 'r') as outf:
429 |                 for line in outf:
430 |                     if line.strip().startswith('Grid Score:'):
431 |                         line_s = line.split()
432 |                         if len(line_s) > 2:
433 |                             ff.write(line.split()[2]+'\n')
434 |                         else:
435 |                             ff.write('NaN\n')
436 |                     elif line.strip().startswith('ERROR:  Conformation could not be scored.'):
437 |                         ff.write('NaN\n')
438 | 
439 |     def write_script_ligand_prep(self):
440 | 
441 |         with open('prepare_ligand_dock.py', 'w') as ff:
442 |             script ="""import os
443 | import sys
444 | import numpy as np
445 | import shutil
446 | 
447 | from mdkit.utility import utils
448 | from mdkit.utility import mol2
449 | 
450 | # read mol2 file
451 | mol2file = sys.argv[1]
452 | new_mol2file = sys.argv[2]
453 | center = map(float,(sys.argv[3]).split())
454 | 
455 | coords = np.array(mol2.get_coordinates(mol2file))
456 | cog = utils.center_of_geometry(coords)
457 | coords = coords - (cog - center)
458 | 
459 | idx = 0
460 | with open(new_mol2file, 'w') as nmol2f:
461 |     with open(mol2file, 'r') as mol2f:
462 |         is_structure = False
463 |         for line in mol2f:
464 |             if line.startswith('@<TRIPOS>ATOM'):
465 |                 is_structure = True
466 |                 nmol2f.write(line)
467 |             elif line.startswith('@<TRIPOS>'):
468 |                 is_structure = False
469 |                 nmol2f.write(line)
470 |             elif is_structure:
471 |                 new_coords = [format(coord, '.4f') for coord in coords[idx]]
472 |                 newline = line[:16] + ' '*(10-len(new_coords[0])) + str(new_coords[0]) + \
473 | ' '*(10-len(new_coords[1])) + str(new_coords[1]) + ' '*(10-len(new_coords[2])) + str(new_coords[2]) + line[46:]
474 |                 nmol2f.write(newline)
475 |                 idx += 1
476 |             else:
477 |                 nmol2f.write(line)"""%locals()
478 |             ff.write(script)
479 | 


--------------------------------------------------------------------------------
/dockbox/pyqcprot.pyx:
--------------------------------------------------------------------------------
  1 | ## this file corresponds to the file pyqcprot.pyx that can be cloned at https://github.com/synapticarbors/pyqcprot (or can be found in MDAnalysis package). Jordane Preto included function CenterCoords that is used to center coordinates before computing RMSD. No significant overhead with respect to the original version was reported.
  2 | 
  3 | # -----------------------------------------------------------------------------
  4 | #    Author(s) of Original Implementation:     
  5 | #                  Douglas L. Theobald
  6 | #                  Department of Biochemistry
  7 | #                  MS 009
  8 | #                  Brandeis University
  9 | #                  415 South St
 10 | #                  Waltham, MA  02453
 11 | #                  USA
 12 | #
 13 | #                  dtheobald@brandeis.edu
 14 | #                  
 15 | #                  Pu Liu
 16 | #                  Johnson & Johnson Pharmaceutical Research and Development, L.L.C.
 17 | #                  665 Stockton Drive
 18 | #                  Exton, PA  19341
 19 | #                  USA
 20 | #
 21 | #                  pliu24@its.jnj.com
 22 | #
 23 | #                  For the original code written in C see:
 24 | #                  http://theobald.brandeis.edu/qcp/ 
 25 | #
 26 | #
 27 | #    Author of Python Port:
 28 | #                  Joshua L. Adelman
 29 | #                  Department of Biological Sciences
 30 | #                  University of Pittsburgh
 31 | #                  Pittsburgh, PA 15260
 32 | #                   
 33 | #                  jla65@pitt.edu
 34 | #
 35 | # 
 36 | #    If you use this QCP rotation calculation method in a publication, please
 37 | #    reference:
 38 | #
 39 | #      Douglas L. Theobald (2005)
 40 | #      "Rapid calculation of RMSD using a quaternion-based characteristic
 41 | #      polynomial."
 42 | #      Acta Crystallographica A 61(4):478-480.
 43 | #
 44 | #      Pu Liu, Dmitris K. Agrafiotis, and Douglas L. Theobald (2010)
 45 | #      "Fast determination of the optimal rotational matrix for macromolecular 
 46 | #      superpositions."
 47 | #      J. Comput. Chem. 31, 1561-1563. 
 48 | #
 49 | #
 50 | #    Copyright (c) 2009-2010, Pu Liu and Douglas L. Theobald 
 51 | #    Copyright (c) 2011       Joshua L. Adelman
 52 | #    All rights reserved.
 53 | #
 54 | #    Redistribution and use in source and binary forms, with or without modification, are permitted 
 55 | #    provided that the following conditions are met:
 56 | #
 57 | #    * Redistributions of source code must retain the above copyright notice, this list of 
 58 | #      conditions and the following disclaimer.
 59 | #    * Redistributions in binary form must reproduce the above copyright notice, this list 
 60 | #      of conditions and the following disclaimer in the documentation and/or other materials 
 61 | #      provided with the distribution.
 62 | #    * Neither the name of the <ORGANIZATION> nor the names of its contributors may be used to 
 63 | #      endorse or promote products derived from this software without specific prior written 
 64 | #      permission.
 65 | #
 66 | #    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 67 | #    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 68 | #    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 69 | #    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 70 | #    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 71 | #    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 72 | #    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 73 | #    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 74 | #    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 75 | #    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 76 | #    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 77 | # -----------------------------------------------------------------------------
 78 | 
 79 | import cython
 80 | import numpy as np
 81 | cimport numpy as np
 82 | 
 83 | cdef extern from "math.h":
 84 |     double sqrt(double x)
 85 |     double fabs(double x)
 86 | 
 87 | cdef double InnerProduct(np.ndarray[np.float64_t,ndim=1] A,
 88 |                  np.ndarray[np.float64_t,ndim=2] coords1,
 89 |                  np.ndarray[np.float64_t,ndim=2] coords2,
 90 |                  int N,
 91 |                  np.ndarray[np.float64_t,ndim=1] weight):
 92 |     """
 93 |     Calculate the inner product of two structures.
 94 |     InnerProduct(A, coords1, coords2, N, weight) --> (G1+G2)/2
 95 |     If weight array is not ``None``, calculate the weighted inner product.
 96 |             :Input:
 97 |                    - A[9]    -- inner product array (modified in place)
 98 |                    - coords1 -- reference structure
 99 |                    - coords2 -- candidate structure
100 |                    - N       -- the size of the system
101 |                    - weight  -- the weight array of size N: set to None if not needed
102 |             :Output:
103 |                    - A[9]    -- the inner product matrix
104 |             :Returns:
105 |                    - (G1 + G2) * 0.5; used as E0 in function :func:`FastCalcRMSDAndRotation`
106 |             .. Warning::
107 |                 1. You MUST center the structures, coords1 and coords2, before calling this function.
108 |                 2. Please note how the structure coordinates are stored as 3xN arrays,
109 |                    not Nx3 arrays as is also commonly used. The difference is
110 |                    something like this for storage of a structure with 8 atoms::
111 |                       Nx3: xyzxyzxyzxyzxyzxyzxyzxyz
112 |                       3xN: xxxxxxxxyyyyyyyyzzzzzzzz
113 |     """
114 | 
115 |     cdef double          x1, x2, y1, y2, z1, z2
116 |     cdef unsigned int    i
117 |     cdef double          G1, G2
118 | 
119 |     G1 = 0.0
120 |     G2 = 0.0
121 | 
122 |     A[0] = A[1] = A[2] = A[3] = A[4] = A[5] = A[6] = A[7] = A[8] = 0.0
123 | 
124 |     if (weight != None):
125 |         for i in xrange(N):
126 |             x1 = weight[i] * coords1[0,i]
127 |             y1 = weight[i] * coords1[1,i]
128 |             z1 = weight[i] * coords1[2,i]
129 | 
130 |             G1 += x1*coords1[0,i] + y1*coords1[1,i] + z1*coords1[2,i]
131 | 
132 |             x2 = coords2[0,i]
133 |             y2 = coords2[1,i]
134 |             z2 = coords2[2,i]
135 | 
136 |             G2 += weight[i] * (x2*x2 + y2*y2 + z2*z2)
137 | 
138 |             A[0] +=  (x1 * x2)
139 |             A[1] +=  (x1 * y2)
140 |             A[2] +=  (x1 * z2)
141 | 
142 |             A[3] +=  (y1 * x2)
143 |             A[4] +=  (y1 * y2)
144 |             A[5] +=  (y1 * z2)
145 | 
146 |             A[6] +=  (z1 * x2)
147 |             A[7] +=  (z1 * y2)
148 |             A[8] +=  (z1 * z2)
149 | 
150 |     else:
151 |         for i in xrange(N):
152 |             x1 = coords1[0,i]
153 |             y1 = coords1[1,i]
154 |             z1 = coords1[2,i]
155 | 
156 |             G1 += (x1*x1 + y1*y1 + z1*z1)
157 | 
158 |             x2 = coords2[0,i]
159 |             y2 = coords2[1,i]
160 |             z2 = coords2[2,i]
161 | 
162 |             G2 += (x2*x2 + y2*y2 + z2*z2)
163 | 
164 |             A[0] +=  (x1 * x2)
165 |             A[1] +=  (x1 * y2)
166 |             A[2] +=  (x1 * z2)
167 | 
168 |             A[3] +=  (y1 * x2)
169 |             A[4] +=  (y1 * y2)
170 |             A[5] +=  (y1 * z2)
171 | 
172 |             A[6] +=  (z1 * x2)
173 |             A[7] +=  (z1 * y2)
174 |             A[8] +=  (z1 * z2)
175 | 
176 |     return (G1 + G2) * 0.5
177 | 
178 | cdef double FastCalcRMSDAndRotation(np.ndarray[np.float64_t,ndim=1] rot, np.ndarray[np.float64_t,ndim=1] A, double E0, int N):
179 |     """
180 |     Calculate the RMSD, and/or the optimal rotation matrix.
181 |     FastCalcRMSDAndRotation(rot, A, E0, N)
182 |             :Input:
183 |                     - rot[9]  -- rotation matrix (modified in place)
184 |                     - A[9]    -- the inner product of two structures
185 |                     - E0      -- (G1 + G2) * 0.5
186 |                     - N       -- the size of the system
187 |             :Output:
188 |                     - rot[9]   -- the rotation matrix in the order of xx, xy, xz, yx, yy, yz, zx, zy, zz
189 |                     - rmsd     -- the RMSD value
190 |             :Returns:
191 |                     - only the rmsd was calculated if rot is None
192 |                     - both the RMSD & rotational matrix calculated if rot is not None
193 |     """
194 |     cdef double rmsd
195 |     cdef double Sxx, Sxy, Sxz, Syx, Syy, Syz, Szx, Szy, Szz
196 |     cdef double Szz2, Syy2, Sxx2, Sxy2, Syz2, Sxz2, Syx2, Szy2, Szx2,
197 |     cdef double SyzSzymSyySzz2, Sxx2Syy2Szz2Syz2Szy2, Sxy2Sxz2Syx2Szx2,
198 |     cdef double SxzpSzx, SyzpSzy, SxypSyx, SyzmSzy,
199 |     cdef double SxzmSzx, SxymSyx, SxxpSyy, SxxmSyy
200 | 
201 |     cdef np.ndarray[np.float64_t,ndim=1] C = np.zeros(4, dtype=A.dtype)
202 |     cdef unsigned int i
203 |     cdef double mxEigenV
204 |     cdef double oldg = 0.0
205 |     cdef double b, a, delta, rms, qsqr
206 |     cdef double q1, q2, q3, q4, normq
207 |     cdef double a11, a12, a13, a14, a21, a22, a23, a24
208 |     cdef double a31, a64, a33, a34, a41, a42, a43, a44
209 |     cdef double a2, x2, y2, z2
210 |     cdef double xy, az, zx, ay, yz, ax
211 |     cdef double a3344_4334, a6444_4234, a6443_4233, a3143_4133,a3144_4134, a3142_4164
212 |     cdef double evecprec = 1e-6
213 |     cdef double evalprec = 1e-14
214 | 
215 |     cdef double a1644_1423, a1224_1422, a1223_1642, a1124_1421, a1123_1641, a1122_1221
216 |     Sxx = A[0]
217 |     Sxy = A[1]
218 |     Sxz = A[2]
219 |     Syx = A[3]
220 |     Syy = A[4]
221 |     Syz = A[5]
222 |     Szx = A[6]
223 |     Szy = A[7]
224 |     Szz = A[8]
225 | 
226 |     Sxx2 = Sxx * Sxx
227 |     Syy2 = Syy * Syy
228 |     Szz2 = Szz * Szz
229 | 
230 |     Sxy2 = Sxy * Sxy
231 |     Syz2 = Syz * Syz
232 |     Sxz2 = Sxz * Sxz
233 | 
234 |     Syx2 = Syx * Syx
235 |     Szy2 = Szy * Szy
236 |     Szx2 = Szx * Szx
237 | 
238 |     SyzSzymSyySzz2 = 2.0*(Syz*Szy - Syy*Szz)
239 |     Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2
240 | 
241 |     C[2] = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2)
242 |     C[1] = 8.0 * (Sxx*Syz*Szy + Syy*Szx*Sxz + Szz*Sxy*Syx - Sxx*Syy*Szz - Syz*Szx*Sxy - Szy*Syx*Sxz)
243 | 
244 |     SxzpSzx = Sxz + Szx
245 |     SyzpSzy = Syz + Szy
246 |     SxypSyx = Sxy + Syx
247 |     SyzmSzy = Syz - Szy
248 |     SxzmSzx = Sxz - Szx
249 |     SxymSyx = Sxy - Syx
250 |     SxxpSyy = Sxx + Syy
251 |     SxxmSyy = Sxx - Syy
252 |     Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2
253 | 
254 |     C[0] = (Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2
255 |          + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2)
256 |          + (-(SxzpSzx)*(SyzmSzy)+(SxymSyx)*(SxxmSyy-Szz)) * (-(SxzmSzx)*(SyzpSzy)+(SxymSyx)*(SxxmSyy+Szz))
257 |          + (-(SxzpSzx)*(SyzpSzy)-(SxypSyx)*(SxxpSyy-Szz)) * (-(SxzmSzx)*(SyzmSzy)-(SxypSyx)*(SxxpSyy+Szz))
258 |          + (+(SxypSyx)*(SyzpSzy)+(SxzpSzx)*(SxxmSyy+Szz)) * (-(SxymSyx)*(SyzmSzy)+(SxzpSzx)*(SxxpSyy+Szz))
259 |          + (+(SxypSyx)*(SyzmSzy)+(SxzmSzx)*(SxxmSyy-Szz)) * (-(SxymSyx)*(SyzpSzy)+(SxzmSzx)*(SxxpSyy-Szz)))
260 | 
261 |     mxEigenV = E0
262 |     for i in xrange(50):
263 |         oldg = mxEigenV
264 |         x2 = mxEigenV*mxEigenV
265 |         b = (x2 + C[2])*mxEigenV
266 |         a = b + C[1]
267 |         delta = ((a*mxEigenV + C[0])/(2.0*x2*mxEigenV + b + a))
268 |         mxEigenV -= delta
269 |         if (fabs(mxEigenV - oldg) < fabs((evalprec)*mxEigenV)):
270 |             break
271 | 
272 |     #if (i == 50):
273 |     #   print "\nMore than %d iterations needed!\n" % (i)
274 | 
275 |     # the fabs() is to guard against extremely small, but *negative* numbers due to npfloat point error
276 |     rms = sqrt(fabs(2.0 * (E0 - mxEigenV)/N))
277 | 
278 |     if (rot is None):
279 |         return rms # Don't bother with rotation.
280 | 
281 |     a11 = SxxpSyy + Szz-mxEigenV; a12 = SyzmSzy; a13 = - SxzmSzx; a14 = SxymSyx
282 |     a21 = SyzmSzy; a22 = SxxmSyy - Szz-mxEigenV; a23 = SxypSyx; a24= SxzpSzx
283 |     a31 = a13; a64 = a23; a33 = Syy-Sxx-Szz - mxEigenV; a34 = SyzpSzy
284 |     a41 = a14; a42 = a24; a43 = a34; a44 = Szz - SxxpSyy - mxEigenV
285 |     a3344_4334 = a33 * a44 - a43 * a34; a6444_4234 = a64 * a44-a42*a34
286 |     a6443_4233 = a64 * a43 - a42 * a33; a3143_4133 = a31 * a43-a41*a33
287 |     a3144_4134 = a31 * a44 - a41 * a34; a3142_4164 = a31 * a42-a41*a64
288 |     q1 =  a22*a3344_4334-a23*a6444_4234+a24*a6443_4233
289 |     q2 = -a21*a3344_4334+a23*a3144_4134-a24*a3143_4133
290 |     q3 =  a21*a6444_4234-a22*a3144_4134+a24*a3142_4164
291 |     q4 = -a21*a6443_4233+a22*a3143_4133-a23*a3142_4164
292 | 
293 |     qsqr = q1 * q1 + q2 * q2 + q3 * q3 + q4 * q4
294 | 
295 | # The following code tries to calculate another column in the adjoint matrix when the norm of the
296 | #   current column is too small.
297 | #   Usually this commented block will never be activated.  To be absolutely safe this should be
298 | #   uncommented, but it is most likely unnecessary.
299 | 
300 |     if (qsqr < evecprec):
301 |         q1 =  a12*a3344_4334 - a13*a6444_4234 + a14*a6443_4233
302 |         q2 = -a11*a3344_4334 + a13*a3144_4134 - a14*a3143_4133
303 |         q3 =  a11*a6444_4234 - a12*a3144_4134 + a14*a3142_4164
304 |         q4 = -a11*a6443_4233 + a12*a3143_4133 - a13*a3142_4164
305 |         qsqr = q1*q1 + q2 *q2 + q3*q3+q4*q4
306 | 
307 |         if (qsqr < evecprec):
308 |             a1644_1423 = a13 * a24 - a14 * a23
309 |             a1224_1422 = a12 * a24 - a14 * a22
310 |             a1223_1642 = a12 * a23 - a13 * a22
311 |             a1124_1421 = a11 * a24 - a14 * a21
312 |             a1123_1641 = a11 * a23 - a13 * a21
313 |             a1122_1221 = a11 * a22 - a12 * a21
314 | 
315 |             q1 =  a42 * a1644_1423 - a43 * a1224_1422 + a44 * a1223_1642
316 |             q2 = -a41 * a1644_1423 + a43 * a1124_1421 - a44 * a1123_1641
317 |             q3 =  a41 * a1224_1422 - a42 * a1124_1421 + a44 * a1122_1221
318 |             q4 = -a41 * a1223_1642 + a42 * a1123_1641 - a43 * a1122_1221
319 |             qsqr = q1*q1 + q2 *q2 + q3*q3+q4*q4
320 | 
321 |             if (qsqr < evecprec):
322 |                 q1 =  a64 * a1644_1423 - a33 * a1224_1422 + a34 * a1223_1642
323 |                 q2 = -a31 * a1644_1423 + a33 * a1124_1421 - a34 * a1123_1641
324 |                 q3 =  a31 * a1224_1422 - a64 * a1124_1421 + a34 * a1122_1221
325 |                 q4 = -a31 * a1223_1642 + a64 * a1123_1641 - a33 * a1122_1221
326 |                 qsqr = q1*q1 + q2 *q2 + q3*q3 + q4*q4
327 | 
328 |                 if (qsqr < evecprec):
329 |                     # if qsqr is still too small, return the identity matrix. #
330 |                     rot[0] = rot[4] = rot[8] = 1.0
331 |                     rot[1] = rot[2] = rot[3] = rot[5] = rot[6] = rot[7] = 0.0
332 | 
333 |                     return rms
334 | 
335 | 
336 |     normq = sqrt(qsqr)
337 |     q1 /= normq
338 |     q2 /= normq
339 |     q3 /= normq
340 |     q4 /= normq
341 | 
342 |     a2 = q1 * q1
343 |     x2 = q2 * q2
344 |     y2 = q3 * q3
345 |     z2 = q4 * q4
346 | 
347 |     xy = q2 * q3
348 |     az = q1 * q4
349 |     zx = q4 * q2
350 |     ay = q1 * q3
351 |     yz = q3 * q4
352 |     ax = q1 * q2
353 | 
354 |     rot[0] = a2 + x2 - y2 - z2
355 |     rot[1] = 2 * (xy + az)
356 |     rot[2] = 2 * (zx - ay)
357 |     rot[3] = 2 * (xy - az)
358 |     rot[4] = a2 - x2 + y2 - z2
359 |     rot[5] = 2 * (yz + ax)
360 |     rot[6] = 2 * (zx + ay)
361 |     rot[7] = 2 * (yz - ax)
362 |     rot[8] = a2 - x2 - y2 + z2
363 | 
364 |     return rms
365 | 
366 | cdef void CenterCoords(np.ndarray[np.float64_t,ndim=2] coords, int N, np.ndarray[np.float64_t,ndim=1] weights):
367 | 
368 |     cdef double          xsum, ysum, zsum, wsum
369 |     cdef unsigned int    i
370 | 
371 |     xsum = ysum = zsum = 0.0
372 | 
373 |     if (weights != None):
374 |         wsum = 0.0
375 |         for i in xrange(N):
376 |             xsum += weights[i] * coords[0,i]
377 |             ysum += weights[i] * coords[1,i]
378 |             zsum += weights[i] * coords[2,i]
379 | 
380 |             wsum += weights[i]
381 | 
382 |         xsum /= wsum
383 |         ysum /= wsum
384 |         zsum /= wsum
385 | 
386 |     else:
387 |         for i in xrange(N):
388 |             xsum += coords[0,i]
389 |             ysum += coords[1,i]
390 |             zsum += coords[2,i]
391 | 
392 |         xsum /= N
393 |         ysum /= N
394 |         zsum /= N
395 | 
396 |     for i in xrange(N):
397 |         coords[0,i] -= xsum
398 |         coords[1,i] -= ysum 
399 |         coords[2,i] -= zsum
400 | 
401 |     return
402 | 
403 | @cython.boundscheck(False)
404 | @cython.wraparound(False)
405 | def CalcRMSDRotationalMatrix(np.ndarray[np.float64_t,ndim=2] ref,
406 |                              np.ndarray[np.float64_t,ndim=2] conf,
407 |                              np.ndarray[np.float64_t,ndim=1] rot,
408 |                              np.ndarray[np.float64_t,ndim=1] weights):
409 |     """
410 |     Calculate the RMSD & rotational matrix.
411 |     CalcRMSDRotationalMatrix(ref, conf, N, rot, weights):
412 |             :Input:
413 |                    - ref     -- reference structure coordinates (*must* be `numpy.float64`)
414 |                    - conf    -- candidate structure coordinates (*must* be `numpy.float64`)
415 |                    - rot[9]  -- array to store rotation matrix; set to None if only calculating rmsd (modified in place)
416 |                    - weight  -- the weight array of size len; set to None if not needed
417 |             :Output:
418 |                    - rot[9]  -- rotation matrix
419 |             :Returns:
420 |                    - RMSD value
421 |     .. Note:: All arrays *must* be of type `numpy.float64`.
422 |     """
423 |     cdef double rmsd
424 |     cdef int N = conf.shape[1]
425 |     cdef double E0
426 |     cdef np.ndarray[np.float64_t,ndim=1] A = np.zeros(9, dtype=ref.dtype)
427 |  
428 |     CenterCoords(ref, N, weights)
429 |     CenterCoords(conf, N, weights)
430 | 
431 |     E0 = InnerProduct(A, conf, ref, N, weights)
432 |     rmsd = FastCalcRMSDAndRotation(rot, A, E0, N)
433 | 
434 |     return rmsd
435 | 
436 |     # the following lines are here to show how to apply the rotational matrix in order to recover the right RMSD
437 |     #cdef double trmsd = 0.0
438 |     #cdef np.ndarray[np.float64_t,ndim=1] trot = np.zeros(3, dtype=ref.dtype)
439 | 
440 |     #rmsd = CalcRMSDRotationalMatrix(ref, conf, rot, weights)
441 | 
442 |     #for k in xrange(N):
443 |     #    for i in xrange(3):
444 |     #        trot[i] = 0.0
445 |     #        for j in xrange(3):
446 |     #            trot[i] += rot[3*i+j] * ref[j,k]
447 |     #    trmsd += (trot[0]-conf[0,k])**2 + (trot[1]-conf[1,k])**2 + (trot[2]-conf[2,k])**2
448 |     #trmsd = sqrt(trmsd/N)
449 |     #print rmsd, trmsd
450 | 


--------------------------------------------------------------------------------
/bin/extract_top_poses:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | import sys
  4 | import shutil
  5 | import math
  6 | import argparse
  7 | 
  8 | from glob import glob
  9 | import pandas as pd
 10 | import numpy as np
 11 | 
 12 | from mdkit.utility import mol2
 13 | from dockbox.dbxtools import *
 14 | 
 15 | programs_with_positive_scores = ['gnina']
 16 | 
 17 | # command-line arguments and options
 18 | parser = argparse.ArgumentParser(description="Extract top docking poses after rundbx finished.")
 19 | 
 20 | parser.add_argument('--all-sites',
 21 |     dest='combine_sites',
 22 |     action='store_true',
 23 |     default=False,
 24 |     help='Select top poses over all sites. If not specified, extract top pose separately for each site')
 25 | 
 26 | parser.add_argument('--all-targets',
 27 |     dest='combine_targets',
 28 |     action='store_true',
 29 |     default=False,
 30 |     help='Select top poses over all targets. If not specified, extract top pose separately for each target. A "%s/%s" architecture \
 31 | of the folders is assumed'%(ligand_prefix, target_prefix))
 32 | 
 33 | parser.add_argument('-csvl',
 34 |     type=str,
 35 |     dest='csvfile_l',
 36 |     metavar='FILE',
 37 |     help='Filename containing info about compounds. Used to add names of compounds. Default: none')
 38 | 
 39 | parser.add_argument('-csvr',
 40 |     dest='csvfile_r',
 41 |     default='targets.csv',
 42 |     metavar='FILENAME',
 43 |     help='Filename containing info about targets. If none, will look for a receptor file in the "poses" folders.  Default: none')
 44 | 
 45 | parser.add_argument('-cutoff',
 46 |     dest='cutoff',
 47 |     type=float,
 48 |     metavar='RMSD_VALUE',
 49 |     default=2.0,
 50 |     help='RMSD cutoff used for consensus docking or score-based consensus docking. Default: 2.0 A')
 51 | 
 52 | parser.add_argument('-d',
 53 |     dest='docking_programs',
 54 |     nargs='+',
 55 |     metavar=('PRGM1', 'PRGM2'),
 56 |     help='Docking programs (instances) to be considered when extracting best poses')
 57 | 
 58 | parser.add_argument('-dirs',
 59 |     dest='dirs',
 60 |     nargs='+',
 61 |     default=['.'],
 62 |     metavar=('DIR1', 'DIR2'),
 63 |     help='Directories considered for analysis. Should contain a folder called "poses". Default: curr. dir')
 64 | 
 65 | parser.add_argument('-skip_errors',
 66 |     dest='skip_errors',
 67 |     action='store_true',
 68 |     default=False,
 69 |     help='Skip ligands that were not done')
 70 | 
 71 | parser.add_argument('-site',
 72 |     dest='sites',
 73 |     nargs='+',
 74 |     metavar=('SITE1', 'SITE2'),
 75 |     help='Sites to be considered when extracting best poses')
 76 | 
 77 | parser.add_argument('-save',
 78 |     default=False,
 79 |     action='store_true',
 80 |     help='Save structures of top poses (.mol2)')
 81 | 
 82 | group = parser.add_mutually_exclusive_group(required=False)
 83 | 
 84 | group.add_argument('-sf',
 85 |     dest='sf',
 86 |     metavar='FUNC',
 87 |     help='Scoring functions used to extract the best pose (combination of scores)')
 88 | 
 89 | group.add_argument('-cd',
 90 |     dest='cd',
 91 |     nargs='+',
 92 |     metavar='PRGM',
 93 |     help='Docking programs used for standard consensus docking')
 94 | 
 95 | group.add_argument('-sbcd',
 96 |     dest='sbcd',
 97 |     nargs='+',
 98 |     metavar='FUNC',
 99 |     help='Scoring functions used for score-based consensus docking')
100 | 
101 | # update parsers with arguments
102 | args = parser.parse_args()
103 | 
104 | #pd.set_option('display.max_rows', 20000)
105 | 
106 | def add_names(csvfile, df):
107 |     # add names in dataframe
108 |     df_ligands = pd.read_csv(csvfile)
109 | 
110 |     df = df.merge(df_ligands[['ligID', 'name']], on='ligID')
111 |     return df
112 | 
113 | dirs = []
114 | for dir in args.dirs:
115 |     if os.path.isdir(dir+'/poses'):
116 |         dirs.append(os.path.relpath(dir))
117 | 
118 |     elif not args.skip_errors:
119 |         raise ValueError('Poses folder in %s does not exist!'%dir)
120 | 
121 | if not dirs:
122 |     raise ValueError('No poses folders found in any of %s'%(', '.join(args.dirs)))
123 | 
124 | iscwd, isligID, istargetID = check_directories(dirs)
125 | 
126 | if not istargetID and args.combine_targets:
127 |     args.combine_targets = False
128 | 
129 | if istargetID: 
130 |     # check if info related to targets is there!
131 |     df_targets = pd.read_csv(args.csvfile_r)
132 |     csvfile_r_dir = os.path.dirname(args.csvfile_r)
133 | 
134 |     # update relative paths
135 |     if not csvfile_r_dir:
136 |         csvfile_r_dir = '.'
137 |     df_targets['pdbfile'] =  df_targets['pdbfile'].apply(lambda x: os.path.relpath(csvfile_r_dir+'/'+x))
138 | 
139 | # check options relative to best poses extraction
140 | found_scoring_functions = []
141 | if args.sbcd:
142 |     scoring_functions = args.sbcd
143 |     programs_consensus = args.sbcd
144 |     if len(args.sbcd) < 2:
145 |         raise ValueError('Number of functions for score-based consensus docking should be at least 2!')
146 |     resultsdir = 'sbcd_' + '_'.join(scoring_functions) 
147 | 
148 | elif args.cd:
149 |     scoring_functions = None
150 |     programs_consensus = args.cd
151 |     if len(args.cd) < 2:
152 |         raise ValueError('Number of programs for consensus docking should be at least 2!')
153 |     resultsdir = 'cd_' + '_'.join(programs_consensus)
154 | 
155 | elif args.sf:
156 |     scoring_functions = args.sf
157 |     programs_consensus = None
158 |     resultsdir = 'sf_' + args.sf
159 | 
160 | def save_top_poses(dir, row, suffix=''):
161 |     file_l = row['file_l'+suffix]
162 |     shutil.copyfile(file_l, dir+'/ligand%s.mol2'%suffix)
163 | 
164 | if args.csvfile_l is not None:
165 |     if not os.path.isfile(args.csvfile_l):
166 |         raise IOError("csvfile %s not found!"%args.csvfile_l)
167 | 
168 | features_ids = []
169 | if isligID:
170 |     features_ids += ['ligID']
171 | elif not istargetID:
172 |     features_ids += ['dir']
173 | 
174 | if istargetID:
175 |     features_ids += ['targetID']
176 | 
177 | files_r = {}
178 | poses = []
179 | for jdx, dir in enumerate(dirs):
180 |     posedir = dir + '/poses'
181 |     ligID, targetID = get_IDs(dir, isligID, istargetID)
182 | 
183 |     info_dir = {}
184 |     for ft in features_ids:
185 |         info_dir[ft] = []
186 |     info_dir['file_l'] = []
187 |     if istargetID:
188 |         info_dir['file_r'] = []
189 |     for ft in ['site', 'program', 'instance', 'index_pose', 'score']:
190 |         info_dir[ft] = []
191 | 
192 |     # get location of poses and receptor files
193 |     datfile = posedir + '/info.dat'
194 |     with open(datfile, 'r') as inff:
195 |         # skip the first two lines
196 |         inff.next()
197 |         inff.next()
198 |         for line in inff:
199 |             program, nposes, firstidx, site = line.strip().split(',')
200 |             firstidx = int(firstidx)
201 |             nposes = int(nposes)
202 |             instance = program
203 |             if site:
204 |                 instance += '.' + site
205 |             poses_idxs = range(firstidx, firstidx+nposes)
206 | 
207 |             for index, idx in enumerate(poses_idxs):
208 |                 file_l = posedir + '/pose-%s.mol2'%idx
209 |                 if os.path.isfile(file_l):
210 |                     info_dir['file_l'].append(os.path.relpath(file_l))
211 |                 else:
212 |                     raise IOError("File %s does not exist!"%file_l)
213 |                 info_dir['site'].append(site)
214 |                 info_dir['program'].append(program)
215 |                 info_dir['instance'].append(instance)
216 |                 info_dir['index_pose'].append(index)
217 | 
218 |                 if isligID:    
219 |                     info_dir['ligID'].append(ligID)
220 |                 elif not istargetID:
221 |                     info_dir['dir'].append(dir)
222 |                 if istargetID:
223 |                     info_dir['targetID'].append(targetID)
224 | 
225 |                 # get the filename of the corresponding receptor file
226 |                 if istargetID:
227 |                     row = df_targets[df_targets['targetID']==targetID]
228 |                     file_r = row['pdbfile'].values[0]
229 |                     info_dir['file_r'].append(file_r)
230 | 
231 |                 # update the dictionnary of targets
232 |                 if istargetID and targetID not in files_r:
233 |                     files_r[targetID] = file_r
234 | 
235 |             nscores = 0
236 |             # extract original scores
237 |             with open(dir+'/'+instance+'/score.out', 'r') as sout:
238 |                 for line_s in sout:
239 |                     nscores += 1
240 |                     info_dir['score'].append(float(line_s.strip()))
241 |                 if nscores != nposes:
242 |                     raise ValueError("Number of poses different from number of scores (%s/%s)"%(dir,instance))
243 | 
244 |     nposes += firstidx - 1
245 |     # extract all scores
246 |     for scorefile in sorted(glob(dir+'/rescoring/*.score')):
247 |         sf = os.path.basename(scorefile).split('.')[0]
248 |         nscores = 0
249 |         if jdx == 0:
250 |             found_scoring_functions.append(sf)
251 |         elif sf not in found_scoring_functions:
252 |             raise ValueError("%s scores not computed in every directory!")
253 |         info_dir[sf] = []
254 |         with open(scorefile, 'r') as sout:
255 |             for line_s in sout:
256 |                 info_dir[sf].append(float(line_s))
257 |                 nscores += 1
258 |         #print nscores, nposes
259 |         if nscores != nposes:
260 |             info_dir[sf] = [float('nan') for idx in range(nposes)]
261 | 
262 |     df_dir = pd.DataFrame(info_dir)
263 |     if args.docking_programs: 
264 |         df_dir = df_dir[df_dir['program'].isin(args.docking_programs)]
265 | 
266 |     if args.sites:
267 |         df_dir = df_dir[df_dir['site'].isin(["site%s"%idx_site for idx_site in args.sites])]
268 |     poses.append(df_dir)
269 | 
270 | if poses:
271 |     poses = pd.concat(poses, sort=False).reset_index(drop=True)
272 |     if args.csvfile_l and isligID:
273 |         poses = add_names(args.csvfile_l, poses)
274 | else:
275 |     sys.exit("No poses to extract!")
276 | 
277 | # define columns for groupby operation
278 | groupby_columns = []
279 | 
280 | if isligID:
281 |     groupby_columns += ['ligID']
282 |     column_ligand = 'ligID'
283 | 
284 | if not args.combine_targets and istargetID:
285 |     groupby_columns += ['targetID']
286 | 
287 | is_site = list(set(poses['site'])) != ['']
288 | if is_site and not args.combine_sites:
289 |     groupby_columns += ['site']
290 | 
291 | if not isligID:
292 |     if not istargetID:
293 |         groupby_columns += ['dir']
294 |         column_ligand = 'dir'
295 |     elif args.combine_targets:
296 |         poses['dummy'] = 0
297 |         groupby_columns += ['dummy']
298 | 
299 | if args.sbcd or args.cd:
300 |     best_poses = []
301 |     for prgm in programs_consensus:
302 |         if args.sbcd:
303 |             if groupby_columns:
304 |                 poses_groupby = poses.groupby(groupby_columns)
305 | 
306 |             if any([prgm.startswith(prgm_p) for prgm_p in programs_with_positive_scores]):
307 |                 minidxs = poses_groupby[prgm].idxmax()
308 |             else:
309 |                 minidxs = poses_groupby[prgm].idxmin()
310 | 
311 |             # handle cases where all the scores provided by a program are nans (to be changed when istargetID is True)
312 |             lignans = minidxs[minidxs.apply(np.isnan)]
313 |             for ligID, row in lignans.iteritems():
314 |                 if isinstance(ligID, tuple):
315 |                     ligID = ligID[0]
316 |                 row_poses = poses[poses[column_ligand]==ligID].iloc[0]
317 |                 minidxs[ligID] = float(row_poses.name)
318 |             minidxs = minidxs.astype(int)
319 | 
320 |             # get best poses from indices
321 |             best_poses_prgm = poses.loc[minidxs].copy()
322 | 
323 |             # set file_l to nan for those with no score
324 |             for ligID, row in lignans.iteritems():
325 |                 best_poses_prgm_row = best_poses_prgm[best_poses_prgm[column_ligand]==ligID]
326 |                 indices = best_poses_prgm_row.index.values
327 |                 best_poses_prgm.at[indices, 'file_l'] = np.nan
328 | 
329 |         elif args.cd:
330 |             poses_prgm = poses[poses['program']==prgm]
331 |             poses_groupby = poses_prgm.groupby(groupby_columns)
332 |             if any([prgm.startswith(prgm_p) for prgm_p in programs_with_positive_scores]):
333 |                 best_poses_prgm = poses_prgm.loc[poses_groupby['score'].idxmax()]
334 |             else:
335 |                 best_poses_prgm = poses_prgm.loc[poses_groupby['score'].idxmin()]
336 | 
337 |         new_columns_names = [] # renaming columns according to the scoring function
338 |         for col in best_poses_prgm.columns.values:
339 |             if col in groupby_columns + ['name']:
340 |                 new_columns_names.append(col)
341 |             elif not isligID and col == 'dir':
342 |                 new_columns_names.append(col)
343 |             elif args.sbcd and col == prgm:
344 |                 new_columns_names.append(prgm)
345 |             else:
346 |                 new_columns_names.append(col + '_' + prgm)
347 |         best_poses_prgm.columns = new_columns_names
348 |         best_poses.append(best_poses_prgm)
349 | 
350 |     columns_to_be_merged = groupby_columns
351 |     if 'name' in poses.columns.values:
352 |        columns_to_be_merged += ['name']
353 | 
354 |     # merge best poses into single dataframe
355 |     best_poses_merged = best_poses[0]    
356 |     for item in best_poses[1:]:
357 |         best_poses_merged = best_poses_merged.merge(item, on=columns_to_be_merged, how='outer')
358 | 
359 |     if args.cd and istargetID:
360 |         # handle cases when poses were generated with one program but not with the other one (only for CD)
361 |         for prgm1 in programs_consensus:
362 |             best_poses_merged_null = best_poses_merged[best_poses_merged['targetID_'+prgm1].isnull()]
363 | 
364 |             for idx, row in best_poses_merged_null.iterrows():
365 |                 for prgm2 in programs_consensus:
366 |                     if isinstance(row['targetID_'+prgm2], str): # occuring when the target ID is not nan
367 |                         best_poses_merged.loc[idx,'targetID_'+prgm1] = row['targetID_'+prgm2]
368 |                         break
369 | 
370 |     prgm_first = programs_consensus[0]
371 |     if args.combine_targets:
372 |         rmsd_rot_trans = get_rmsd_rotation_and_translations_all_targets(files_r)
373 | 
374 |         for prgm in programs_consensus[1:]:
375 |             best_poses_merged['rmsd_'+prgm_first+'_'+prgm] = best_poses_merged.apply(lambda row: compute_rmsd(row['file_l_'+prgm_first], row['file_l_'+prgm],
376 |             rotmat=rmsd_rot_trans[row['targetID_'+prgm_first]][row['targetID_'+prgm]][0], \
377 |             trans1=rmsd_rot_trans[row['targetID_'+prgm_first]][row['targetID_'+prgm]][1], \
378 |             trans2=rmsd_rot_trans[row['targetID_'+prgm_first]][row['targetID_'+prgm]][2]), axis=1)
379 |     else:
380 |         for prgm in programs_consensus[1:]:
381 |             best_poses_merged['rmsd_'+prgm_first+'_'+prgm] = best_poses_merged.apply(lambda row: compute_rmsd(row['file_l_'+prgm_first], row['file_l_'+prgm]), axis=1)
382 | 
383 |     rmsd_columns = [col for col in best_poses_merged.columns.values if col.startswith('rmsd')]
384 |     best_poses = best_poses_merged.assign(consensus=(best_poses_merged[rmsd_columns]<=args.cutoff).all(axis=1))
385 | 
386 | elif args.sf:
387 |     # remove nan values
388 |     poses = poses.dropna(subset=[args.sf])
389 |     poses_groupby = poses.groupby(groupby_columns)
390 | 
391 |     if any([args.sf.startswith(prgm_p) for prgm_p in programs_with_positive_scores]):
392 |         best_poses = poses.loc[poses_groupby[args.sf].idxmax]
393 |     else:
394 |         best_poses = poses.loc[poses_groupby[args.sf].idxmin]
395 | 
396 | features_ids_sorted = list(features_ids)
397 | if istargetID and 'targetID' not in best_poses.columns.values:
398 |     features_ids_sorted.remove('targetID')
399 | if features_ids_sorted:
400 |     best_poses = best_poses.sort_values(features_ids_sorted)
401 | 
402 | shutil.rmtree(resultsdir, ignore_errors=True)
403 | os.mkdir(resultsdir)
404 | 
405 | features_csv = []
406 | if args.csvfile_l:
407 |     features_csv.append('name')
408 | features_csv += features_ids
409 | features_csv += ['instance']
410 | 
411 | if istargetID:
412 |     features_csv += ['file_l', 'file_r', 'instance']
413 | else:
414 |     features_csv += ['file_l', 'instance']
415 | 
416 | if is_site:
417 |     features_csv.append('site')
418 | features_csv += found_scoring_functions + ['score']
419 | 
420 | # save all the poses to a file
421 | csvfile = resultsdir + '/poses.csv'
422 | poses[features_csv].to_csv(csvfile, index=False, float_format='%.3f')
423 | 
424 | if best_poses is not None:
425 |     if args.sbcd or args.cd:
426 |         features_csv_best_poses = []
427 |         if args.csvfile_l:
428 |             features_csv_best_poses.append('name')
429 | 
430 |         if isligID:
431 |             features_csv_best_poses.append('ligID')
432 | 
433 |         elif not istargetID:
434 |             features_csv_best_poses.append('dir')
435 | 
436 |         if istargetID and not args.combine_targets:
437 |             features_csv_best_poses.append('targetID')
438 | 
439 |         for prgm in programs_consensus:
440 |             if istargetID and args.combine_targets:
441 |                 features_csv_best_poses.append('targetID_'+prgm)
442 |             features_csv_best_poses.append('file_l_'+prgm)
443 |             features_csv_best_poses.append('instance_'+prgm)
444 |             if args.sbcd:
445 |                 features_csv_best_poses.append(prgm)
446 |             else:
447 |                 features_csv_best_poses.append('score_'+prgm)
448 |             if is_site:
449 |                 if args.combine_sites:
450 |                     features_csv_best_poses.append('site_'+prgm)
451 |                 else:
452 |                     features_csv_best_poses.append('site')
453 |         features_csv_best_poses.extend([col for col in best_poses.columns.values if col.startswith('rmsd')])
454 |         features_csv_best_poses.append('consensus')
455 | 
456 |     elif args.sf:
457 |         features_csv_best_poses = list(features_csv)
458 | 
459 |     filename = resultsdir + '/best_poses.csv'
460 |     best_poses[features_csv_best_poses].to_csv(filename, index=False, float_format='%.3f')
461 | 
462 |     if args.combine_targets and istargetID:
463 |         features_ids.remove('targetID')
464 |     
465 |     if args.save:
466 |         for idx, row in best_poses.iterrows():
467 |             if args.sf or ((args.sbcd or args.cd) and row['consensus']):
468 |                 if isligID:
469 |                     newdir = resultsdir + '/' + '/'.join(row[features_ids])
470 |                 elif row['dir'] != '.':
471 |                     newdir = resultsdir + '/' + '_'.join(row['dir'].split('/'))
472 |                 else:
473 |                     newdir = resultsdir
474 |                 if not os.path.isdir(newdir):
475 |                     os.makedirs(newdir)
476 | 
477 |                 if args.sbcd or args.cd:
478 |                     for idx, prgm in enumerate(programs_consensus):
479 |                         save_top_poses(newdir, row, suffix='_'+prgm)
480 |                 else:
481 |                     save_top_poses(newdir, row)
482 | 


--------------------------------------------------------------------------------
/dockbox/moe.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import glob
  4 | import shutil
  5 | import subprocess
  6 | import method
  7 | import license
  8 | 
  9 | from mdkit.utility import reader
 10 | from mdkit.utility import mol2
 11 | 
 12 | required_programs = ['moebatch']
 13 | 
 14 | default_settings = {'placement': 'Triangle Matcher', 'placement_nsample': '10', 'placement_maxpose': '250',  'scoring': 'London dG',
 15 | 'maxpose': '30', 'remaxpose': '5', 'gtest': '0.01', 'rescoring': 'GBVI/WSA dG'}
 16 | 
 17 | known_scorings = ['ASE', 'Affinity dG', 'Alpha HB', 'GBVI/WSA dG', 'London dG', 'None']
 18 | known_placements = ['Alpha PMI', 'Alpha Triangle', 'Proxy Triangle', 'Triangle Matcher'] 
 19 | 
 20 | known_settings = {'placement': known_placements, 'scoring': known_scorings, 'rescoring': known_scorings}
 21 | 
 22 | class Moe(method.DockingMethod):
 23 | 
 24 |     def __init__(self, instance, site, options):
 25 | 
 26 |         super(Moe, self).__init__(instance, site, options)
 27 | 
 28 |         # set box center
 29 |         self.options['center_bs'] = '[' + ', '.join(map(str.strip, site[1].split(','))) + ']'
 30 | 
 31 |         # set box size
 32 |         self.options['boxsize_bs'] = '[' + ', '.join(map(str.strip, site[2].split(','))) + ']'
 33 | 
 34 |     def write_docking_script(self, filename, file_r, file_l):
 35 |    
 36 |         self.write_moe_docking_script('moe_dock.svl')
 37 | 
 38 |         convertmol2_cmd = license.wrap_command("moebatch -exec \"mdb_key = db_Open ['ligand.mdb','create']; db_Close mdb_key;\
 39 | db_ImportMOL2 ['%(file_l)s','ligand.mdb', 'molecule']\""%locals(), 'moe') # create mdb for ligand
 40 | 
 41 |         dock_cmd = license.wrap_command("moebatch -run moe_dock.svl -rec %(file_r)s -lig ligand.mdb"%locals(), 'moe') # cmd for docking
 42 | 
 43 |         # write script
 44 |         with open(filename, 'w') as ff:
 45 |             script ="""#!/bin/bash
 46 | # convert .mol2 file to mdb
 47 | %(convertmol2_cmd)s
 48 | 
 49 | # run docking
 50 | %(dock_cmd)s\n"""% locals()
 51 |             ff.write(script)
 52 |     
 53 |     def write_moe_docking_script(self, filename):
 54 |     
 55 |         locals().update(self.options)
 56 |     
 57 |         # write vina script
 58 |         with open(filename, 'w') as ff:
 59 |             script ="""#svl
 60 | function DockAtoms, DockFile;
 61 | function DockMDBwAtoms, DockMDBwFile;
 62 | 
 63 | global argv;
 64 | function ArgvPull;
 65 | 
 66 | local function main []
 67 | 
 68 |     // Set potential and setup parameters
 69 |     pot_Load '$MOE/lib/Amber10EHT.ff';
 70 | 
 71 |     pot_Setup [
 72 |         strEnable: 1,
 73 |         angEnable: 1,
 74 |         stbEnable: 1,
 75 |         oopEnable: 1,
 76 |         torEnable: 1,
 77 |         vdwEnable: 1,
 78 |         eleEnable: 1,
 79 |         solEnable: 0,
 80 |         resEnable: 1,
 81 |         strWeight: 1,
 82 |         angWeight: 1,
 83 |         stbWeight: 1,
 84 |         oopWeight: 1,
 85 |         torWeight: 1,
 86 |         vdwWeight: 1,
 87 |         eleWeight: 1,
 88 |         solWeight: 1,
 89 |         resWeight: 1,
 90 |         cutoffEnable: 1,
 91 |         cutoffOn: 8,
 92 |         cutoffOff: 10,
 93 |         eleDist: 2,
 94 |         vdwScale14: 0.5,
 95 |         vdwBuffer1: 0,
 96 |         vdwBuffer2: 0,
 97 |         eleScale14: 0.833333,
 98 |         eleDielectric: 1,
 99 |         eleBuffer: 0,
100 |         solDielectric: 80,
101 |         solDielectricOffset: 0,
102 |         state0: 1,
103 |         state1: 0,
104 |         state2: 1,
105 |         threadCount: 0
106 |     ];
107 | 
108 | ArgvReset ArgvExpand argv;
109 |     local [recmdb, ligmdb, ph4file, outf] = ArgvPull [
110 |         ['-rec','-lig','-ph4','-o'],
111 |         1
112 |     ];
113 | 
114 |     // If no receptor given as argument use default rec.moe
115 |     if isnull recmdb then
116 |         recmdb = 'rec.moe';
117 |     endif
118 | 
119 |     local basename = fbase recmdb;
120 |     local extension = fext recmdb;
121 | 
122 |     // output docking database file
123 |     outf = 'dock.mdb';
124 | 
125 |     // Receptor file or database
126 |     // Assume that the file is a moe or pdb file extract chains atoms
127 | 
128 |     local chains = ReadAuto [recmdb, []];
129 |     local rec = cat cAtoms chains; // extract atom info from atom
130 | 
131 |     // get residues involved in the binding site
132 |     local center_bs = %(center_bs)s; // center for the binding site
133 |     local boxsize_bs = %(boxsize_bs)s; // size of the box for the binding site
134 |     local residues_bs = []; // residues involved in binding site
135 | 
136 |     local idx, jdx;
137 |     local com, dist;
138 |     local isinbox;
139 | 
140 |     local rec_bs = cat cResidues chains; // extract residues info
141 |     for idx = 1, length rec_bs loop
142 |         com = oCenterOfMass rec_bs(idx);
143 |         dist = sqrt add pow[sub[center_bs, com], 2];
144 |         isinbox = 1;
145 |         for jdx = 1, 3 loop
146 |             if abs(center_bs(jdx)-com(jdx)) > 0.5*boxsize_bs(jdx) then
147 |                 isinbox = 0;
148 |             endif
149 |         endloop
150 |         if isinbox == 1 then
151 |             residues_bs = append [residues_bs, rec_bs(idx)];
152 |         endif
153 |     endloop
154 | 
155 |     rec_bs = cat rAtoms residues_bs;
156 |     View (Atoms[]);
157 | 
158 |     local alpha_sites = run['sitefind.svl', [rec_bs, []], 'AlphaSites'];
159 | 
160 |     // Take first/highest scoring pocket alpha_sites(1)
161 |     // Take fpos data alpha_sites(1)(1)
162 |     // Take only coords of fpos data alpha_sites(1)(1)(2)
163 |     local a_sites = apt cat alpha_sites(1)(1)(2); // x, y, z coords
164 | 
165 |     // Make dummy He atoms for alpha site
166 |     // local dummy, x, y, z;
167 |     // for x = 1, length a_sites(1) loop
168 |     //    dummy(x) = sm_Build ['[He]'];
169 |     //    aSetPos [dummy(x), [a_sites(1)(x), a_sites(2)(x), a_sites(3)(x)]];
170 |     //endloop
171 | 
172 |     // Make dummy He atoms for alpha site
173 |     local dummy, x, y, z;
174 |     for x = 1, length a_sites loop
175 |         dummy(x) = sm_Build ['[He]'];
176 |         aSetPos [dummy(x), a_sites(x)];
177 |     endloop
178 | 
179 |     // Make a collection of site atoms to send to docking
180 |     // from the alpha site
181 |     oSetCollection ['Site', dummy];
182 |     local site = oGetCollection 'Site';
183 | 
184 |     // Ligand database
185 |     local lmdb = _db_Open [ligmdb, 'read'];
186 |     if lmdb == 0 then
187 |         exit twrite ['Cannot read ligand mdb file {}', ligmdb];
188 |     endif
189 | 
190 |     local ent = 0; // must have this set to zero
191 |     while ent = db_NextEntry[lmdb, ent] loop; //loop through ligand database
192 |         local ligdata = db_Read[lmdb, ent]; //read data for each entry
193 |         local ligmoldata = ligdata.mol; // extract into moldata
194 |         local ligchains = mol_Create ligmoldata; //create molecule in window
195 |         local lig = cat cAtoms ligchains; // extract atom info from atom
196 |     endloop
197 | 
198 |     // Set options for docking and refinement
199 |     // maxpose is set to accept 50 poses, change as required
200 |     local opt = [
201 |                 outrmsd: 1,
202 |                 sel_ent_only_rec: 0,
203 |                 sel_ent_only: 0,
204 |                 wall: [ '', 0, [ 0, 0, 0 ], [ 1000000, 1000000, 1000000 ], 0 ],
205 |                 csearch: 1,
206 |                 placement: '%(placement)s',
207 |                 placement_opt: [nsample : %(placement_nsample)s, maxpose : %(placement_maxpose)s ],
208 |                 scoring: '%(scoring)s',
209 |                 scoring_opt: [ train : 0 ],
210 |                 dup_placement: 1,
211 |                 maxpose: %(maxpose)s,
212 |                 refine: 'Forcefield',
213 |                 refine_opt: [ cutoff : 6, wholeres : 1, mmgbvi : 1, fixrec : 'Fix', tether : 10, gtest : %(gtest)s,
214 |                 maxit : 500, OverrideSetup : 1, k_potl : 100, offset : 0.4 ],
215 |                 rescoring: '%(rescoring)s',
216 |                 rescoring_opt: [ train : 0 ],
217 |                 dup_refine: 1,
218 |                 remaxpose: %(remaxpose)s,
219 |                 descexpr: '',
220 |                 receptor_mfield: '',
221 |                 ligand_mfield: '',
222 |                 tplate: [  ],
223 |                 tplateSel: [  ],
224 |                 //ph4: ph4file,
225 |                 ligmdbname: ligmdb,
226 |                 recmdbname: recmdb
227 |     ];
228 | 
229 |     //Perform the docking
230 |     DockFile [rec, site, ligmdb, outf, opt];
231 | 
232 |     oDestroy ligchains;
233 |     db_Close lmdb;
234 |     write ['Docking finished at {}.\\n', asctime []];
235 | 
236 | endfunction;"""% locals()
237 |             ff.write(script)
238 |     
239 |     def extract_docking_results(self, file_s, input_file_r, input_file_l):
240 | 
241 |         subprocess.check_output(license.wrap_command("moebatch -exec \"db_ExportTriposMOL2 ['dock.mdb', 'poses.mol2', 'mol', []]\"", 'moe'), shell=True, executable='/bin/bash')
242 | 
243 |         if os.path.exists('poses.mol2'):
244 |             ligname = reader.open(input_file_l).ligname
245 |             mol2.update_mol2file('poses.mol2', 'pose-.mol2', ligname=ligname, multi=True)
246 |             os.remove('poses.mol2')
247 | 
248 |             # get SDF to extract scores
249 |             sdffile = 'ligand.sdf'
250 |             subprocess.check_output(license.wrap_command("moebatch -exec \"db_ExportSD ['dock.mdb', '%s', ['mol','S'], []]\""%sdffile, 'moe'), shell=True, executable='/bin/bash')
251 |             with open(sdffile, 'r') as sdff:
252 |                 with open(file_s, 'w') as sf:
253 |                     for line in sdff:
254 |                         if line.startswith("> <S>"):
255 |                             sf.write(sdff.next().strip()+'\n')
256 |             os.remove(sdffile)
257 |         else:
258 |             open(file_s, 'w').close()
259 |     
260 |     def write_rescoring_script(self, filename, file_r, file_l):
261 | 
262 |         locals().update(self.options)
263 | 
264 |         if self.options['rescoring'] == 'prolig':
265 |             rescoring_cmd = license.wrap_command("moebatch -run moe_rescoring.svl -rec %(file_r)s -lig %(file_l)s"%locals(), 'moe') # cmd for docking
266 | 
267 |             with open(filename, 'w') as file:
268 |                 script ="""#!/bin/bash
269 | echo "#svl
270 | function prolig_Calculate;
271 | 
272 | global argv;
273 | function ArgvPull;
274 | 
275 | local function main[]
276 | 
277 |     ArgvReset ArgvExpand argv;
278 |     local [recmdb, ligmdb, outf] = ArgvPull [
279 |         ['-rec','-lig','-o'],
280 |         1
281 |     ];
282 |     local lk = ReadTriposMOL2 [ligmdb, []];
283 | 
284 |     // Load pdb
285 |     local rk = ReadAuto [recmdb, []];
286 | 
287 |     local itypes = ['hbond', 'metal', 'ionic', 'covalent', 'arene', 'distance'];
288 |     local iract = prolig_Calculate [itypes, lk, rk, []];
289 |     //local iract_v = Formulate2DInteractions [lk, rk, []];
290 | 
291 |     local idx;
292 |     local interaction_energy = 0.;
293 |     for idx = 1, length iract(1) loop
294 |         if iract(1)(idx) == 'distance' then
295 |             break;
296 |         else
297 |             interaction_energy = interaction_energy + iract(4)(idx);
298 |         endif
299 |     endloop
300 | 
301 |     write ['Interaction energy: {f.2} kCal/mol \\n', interaction_energy];
302 | 
303 | endfunction;" > moe_rescoring.svl
304 | 
305 | %(rescoring_cmd)s
306 | """ %locals()
307 |                 file.write(script)
308 | 
309 |         else:
310 |             convertmol2_cmd = license.wrap_command("moebatch -exec \"mdb_key = db_Open ['ligand.mdb','create']; db_Close mdb_key;\
311 | db_ImportMOL2 ['%(file_l)s','ligand.mdb', 'molecule']\""%locals(), 'moe') # create mdb for ligand
312 |             rescoring_cmd = license.wrap_command("moebatch -run moe_rescoring.svl -rec %(file_r)s -lig ligand.mdb"%locals(), 'moe') # cmd for docking
313 | 
314 |             # write vina script
315 |             with open(filename, 'w') as file:
316 |                 script ="""#!/bin/bash
317 | 
318 | %(convertmol2_cmd)s
319 | 
320 | echo "#svl
321 | function DockAtoms, DockFile;
322 | function DockMDBwAtoms, DockMDBwFile;
323 | 
324 | global argv;
325 | function ArgvPull;
326 | 
327 | local function main []
328 | 
329 |     // Set potential and setup parameters
330 |     pot_Load '$MOE/lib/Amber10EHT.ff';
331 | 
332 |     pot_Setup [
333 |         strEnable: 1,
334 |         angEnable: 1,
335 |         stbEnable: 1,
336 |         oopEnable: 1,
337 |         torEnable: 1,
338 |         vdwEnable: 1,
339 |         eleEnable: 1,
340 |         solEnable: 0,
341 |         resEnable: 1,
342 |         strWeight: 1,
343 |         angWeight: 1,
344 |         stbWeight: 1,
345 |         oopWeight: 1,
346 |         torWeight: 1,
347 |         vdwWeight: 1,
348 |         eleWeight: 1,
349 |         solWeight: 1,
350 |         resWeight: 1,
351 |         cutoffEnable: 1,
352 |         cutoffOn: 8,
353 |         cutoffOff: 10,
354 |         eleDist: 2,
355 |         vdwScale14: 0.5,
356 |         vdwBuffer1: 0,
357 |         vdwBuffer2: 0,
358 |         eleScale14: 0.833333,
359 |         eleDielectric: 1,
360 |         eleBuffer: 0,
361 |         solDielectric: 80,
362 |         solDielectricOffset: 0,
363 |         state0: 1,
364 |         state1: 0,
365 |         state2: 1,
366 |         threadCount: 0
367 |     ];
368 | 
369 | ArgvReset ArgvExpand argv;
370 |     local [recmdb, ligmdb, ph4file, outf] = ArgvPull [
371 |         ['-rec','-lig','-ph4','-o'],
372 |         1
373 |     ];
374 | 
375 |     // If no receptor given as argument use default rec.moe
376 |     if isnull recmdb then
377 |         recmdb = 'rec.moe';
378 |     endif
379 | 
380 |     local basename = fbase recmdb;
381 |     local extension = fext recmdb;
382 | 
383 |     // output docking database file
384 |     outf = 'dock.mdb';
385 | 
386 |     // Receptor file or database
387 |     // Assume that the file is a moe or pdb file extract chains atoms
388 | 
389 |     local chains = ReadAuto [recmdb, []];
390 |     local rec = cat cAtoms chains; // extract atom info from atom
391 | 
392 |     local alpha_sites = run['sitefind.svl', [rec, []], 'AlphaSites'];
393 | 
394 |     // Take first/highest scoring pocket alpha_sites(1)
395 |     // Take fpos data alpha_sites(1)(1)
396 |     // Take only coords of fpos data alpha_sites(1)(1)(2)
397 |     local a_sites = apt cat alpha_sites(1)(1)(2); // x, y, z coords
398 | 
399 |     // Make dummy He atoms for alpha site
400 |     local dummy, x, y, z;
401 |     for x = 1, length a_sites loop
402 |         dummy(x) = sm_Build ['[He]'];
403 |         aSetPos [dummy(x), a_sites(x)];
404 |     endloop
405 | 
406 |     // Make a collection of site atoms to send to docking
407 |     // from the alpha site
408 |     oSetCollection ['Site', dummy];
409 |     local site = oGetCollection 'Site';
410 | 
411 |     // Ligand database
412 |     local lmdb = _db_Open [ligmdb, 'read'];
413 |     if lmdb == 0 then
414 |         exit twrite ['Cannot read ligand mdb file {}', ligmdb];
415 |     endif
416 | 
417 |     local ent = 0; // must have this set to zero
418 |     while ent = db_NextEntry[lmdb, ent] loop; //loop through ligand database
419 |         local ligdata = db_Read[lmdb, ent]; //read data for each entry
420 |         local ligmoldata = ligdata.mol; // extract into moldata
421 |         local ligchains = mol_Create ligmoldata; //create molecule in window
422 |         local lig = cat cAtoms ligchains; // extract atom info from atom
423 |     endloop
424 | 
425 |     // Set options for docking and refinement
426 |     // maxpose is set to accept 50 poses, change as required
427 |     local opt = [
428 |                 outrmsd: 1,
429 |                 sel_ent_only_rec: 0,
430 |                 sel_ent_only: 0,
431 |                 wall: [ '', 0, [ 0, 0, 0 ], [ 1000000, 1000000, 1000000 ], 0 ],
432 |                 csearch: 1,
433 |                 placement: 'None',
434 |                 scoring: 'None',
435 |                 dup_placement: 1,
436 |                 rescoring: '%(rescoring)s',
437 |                 rescoring_opt: [ train : 0 ],
438 |                 dup_refine: 1,
439 |                 remaxpose: 1,
440 |                 descexpr: '',
441 |                 receptor_mfield: '',
442 |                 ligand_mfield: '',
443 |                 tplate: [  ],
444 |                 tplateSel: [  ],
445 |                 ligmdbname: ligmdb,
446 |                 recmdbname: recmdb
447 |     ];
448 | 
449 |     //Perform the docking
450 |     DockFile [rec, site, ligmdb, outf, opt];
451 | 
452 |     oDestroy ligchains;
453 |     db_Close lmdb;
454 |     write ['Docking finished at {}.\\n', asctime []];
455 | 
456 | endfunction;" > moe_rescoring.svl
457 | 
458 | %(rescoring_cmd)s"""% locals()
459 |                 file.write(script)
460 | 
461 |     def extract_rescoring_results(self, file_s):
462 | 
463 |         locals().update(self.options)
464 | 
465 |         if self.options['rescoring'] == 'prolig': 
466 |             with open(file_s, 'a') as sf:
467 |                 if os.path.exists('moebatch.log'):
468 |                     with open('moebatch.log', 'r') as logf:
469 |                         is_interaction_energy = False
470 |                         for line in logf:
471 |                             if line.startswith("Interaction energy:"):
472 |                                 sf.write(line.split()[-2]+'\n')
473 |                                 is_interaction_energy = True
474 |                                 break
475 |                         if not is_interaction_energy:
476 |                             sf.write('NaN\n')
477 |                 else:
478 |                      sf.write('NaN\n')
479 |         else:
480 |             # get SDF to extract scores
481 |             sdffile = 'ligand.sdf'
482 |             subprocess.check_output(license.wrap_command("moebatch -exec \"db_ExportSD ['dock.mdb', '%s', ['mol','S'], []]\""%sdffile, 'moe'), shell=True, executable='/bin/bash')
483 |             with open(file_s, 'a') as sf:
484 |                 if os.path.exists(sdffile):
485 |                     with open(sdffile, 'r') as sdff:
486 |                         for line in sdff:
487 |                             if line.startswith("> <S>"):
488 |                                 sf.write(sdff.next().strip()+'\n')
489 |                                 break
490 |                         os.remove(sdffile)
491 |                 else:
492 |                     sf.write('NaN\n')
493 | 
494 | def write_sitefinder_script(filename, file_r, args):
495 |     
496 |     write_moe_sitefinder_script('sitefinder.svl', file_r, args)
497 |     sitefinder_cmd = license.wrap_command("moebatch -run sitefinder.svl", 'moe') # cmd for docking
498 | 
499 |     # write script
500 |     with open(filename, 'w') as file:
501 |         script ="""#!/bin/bash
502 | # run docking
503 | %(sitefinder_cmd)s
504 | """% locals()
505 |         file.write(script)
506 | 
507 | def write_moe_sitefinder_script(filename, file_r, args):
508 | 
509 |     if args.nsitesmax == 0:
510 |         nsitesmax = 'length alpha_sites'
511 |     else:
512 |         nsitesmax = str(args.nsitesmax)
513 |     minplb = args.minplb
514 | 
515 |     # write svl script
516 |     with open(filename, 'w') as file:
517 |         script ="""#svl
518 | 
519 | local function main []
520 |     local chains = ReadAuto ['%(file_r)s', []];
521 |     local rec = cat cAtoms chains; // extract atom info from atom
522 | 
523 |     // locate alpha sites
524 |     local alpha_sites = run['sitefind.svl', [rec, []], 'AlphaSites'];
525 | 
526 |     local dummy, x, dist;
527 |     local a_sites, plb;
528 |     local minplb = %(minplb)s, maxdist;
529 |     local idx;
530 |     local nsites;
531 |     local cog; // center of geometry
532 | 
533 |     write ['#ID PLB  x  y  z  radius\\n'];
534 | 
535 |     for idx = 1, length alpha_sites loop
536 |         plb = alpha_sites(idx)(4)(2);
537 | 
538 |         if (plb > minplb or idx == 1) and idx <= %(nsitesmax)s then
539 |             a_sites = alpha_sites(idx)(1)(2);
540 |             nsites = length a_sites(1);
541 | 
542 |             // get center of geometry of the alpha sites
543 |             cog = [0.0, 0.0, 0.0];
544 |             for x = 1, nsites loop
545 |                 cog = add[[a_sites(1)(x), a_sites(2)(x), a_sites(3)(x)], cog];
546 |             endloop
547 |             cog = div[cog, nsites];
548 |             maxdist = 0;
549 | 
550 |             // get distance to the farthest atom
551 |             for x = 1, nsites loop
552 |                 dist = sqrt add pow[sub[[a_sites(1)(x), a_sites(2)(x), a_sites(3)(x)], cog], 2];
553 |                 if dist > maxdist then
554 |                     maxdist = dist;
555 |                 endif
556 |             endloop
557 |             write ['{f.0} {f.2} {f.3} {f.3}\\n', idx, plb, cog, maxdist];
558 |         endif
559 |     endloop
560 | endfunction;""" %locals()
561 |         file.write(script)
562 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | .. image:: https://github.com/jp43/DockBox/raw/master/logo.png
  2 |    :align: right
  3 | 
  4 | *******
  5 | DockBox
  6 | *******
  7 | 
  8 | DockBox is a python wrapper library designed to facilitate the use of standard docking
  9 | programs either alone or in combination. In addition, DockBox enables to rescore the
 10 | generated docking poses with different popular scoring algorithms as well as to analyze
 11 | the results using different consensus docking/scoring strategies.
 12 | 
 13 | **List of programs supported by DockBox**
 14 | 
 15 | * **Docking**:
 16 | 
 17 |   * AutoDock 4 (http://autodock.scripps.edu)
 18 |   * AutoDock Vina (http://vina.scripps.edu/index.html)
 19 |   * DOCK 6 (http://dock.compbio.ucsf.edu/DOCK_6/index.htm)
 20 |   * MOE (https://www.chemcomp.com/MOE-Molecular_Operating_Environment.htm)
 21 | 
 22 | * **Rescoring**:
 23 | 
 24 |   * AutoDock 4 (http://autodock.scripps.edu)
 25 |   * AutoDock Vina (https://vina.scripps.edu)
 26 |   * DOCK 6 (http://dock.compbio.ucsf.edu/DOCK_6/index.htm)
 27 |   * DSX (https://agklebe.pharmazie.uni-marburg.de/?id=11&lang=en)
 28 |   * MOE (https://www.chemcomp.com/MOE-Molecular_Operating_Environment.htm)
 29 | 
 30 | None of the above docking or scoring programs are included in the current repository.
 31 | Hence the user eager to test a docking/scoring program with DockBox should first 
 32 | install the program separately on the same system DockBox is installed. 
 33 | 
 34 | To make a docking or scoring program usable by DockBox, make sure all the executable files
 35 | required for each program can be directly run on the command line from any location,
 36 | i.e., are in your PATH (see section **executable files required for each program** 
 37 | to find out the executables needed to run each docking/scoring programs). For MOE, 
 38 | additional environment variable, i.e, MOE, must be set up.
 39 | 
 40 | .. contents:: **Table of Contents**
 41 | 
 42 | Prerequisites
 43 | *************
 44 | 
 45 | The following are the minimal requirements to install the DockBox module
 46 | 
 47 | * python version 2.6 or 2.7
 48 | 
 49 | * virtualenv version 1.11 or later
 50 | 
 51 | 
 52 | Installation
 53 | ************
 54 | 
 55 | The easiest way to install DockBox is to create a virtual environment. In this way, DockBox 
 56 | and its dependencies can easily be installed in user-space without clashing with potentially 
 57 | incompatible system-wide packages.
 58 | 
 59 | Once virtualenv has been properly installed, simply type (and press the return key)
 60 | 
 61 | ::
 62 | 
 63 |  virtualenv env
 64 |   
 65 | on the command line followed by
 66 | 
 67 | ::
 68 | 
 69 |  source env/bin/activate
 70 |  
 71 | to activate the virtual environment (do not forget to activate your environment every time you log into a new shell environment).
 72 | 
 73 | Finally, the DockBox package can be set up by going in DockBox installation directory and typing:
 74 | 
 75 | ::
 76 | 
 77 |  python setup.py install
 78 |  
 79 |  
 80 | Installation is complete!
 81 | 
 82 | 
 83 | Required executable files
 84 | *************************
 85 | 
 86 | Any docking/scoring software intended to be used by DockBox should be installed separetely and should work as a standalone program. In addition, make sure the following exectuables are in your PATH, depending on which docking/scoring software should be used:
 87 | 
 88 | * **AutoDock 4** 
 89 | 
 90 |   * autodock4
 91 |   * autogrid4
 92 |   * obabel
 93 |   * prepare_dpf4.py
 94 |   * prepare_gpf4.py
 95 |   * prepare_ligand4.py
 96 |   * prepare_receptor4.py
 97 | 
 98 |   The autodock4 and autogrid4 executables can be installed by installing `AutoDock4   <http://autodock.scripps.edu/downloads/autodock-registration/autodock-4-2-download-page/>`_. All the .py executable files are included in the `MGLTools <http://mgltools.scripps.edu/downloads>`_ package. It is important to not move those executable to different locations outside of the MGLTools package as DockBox uses those locations to deduce additional MGLTools python modules (PyBabel, MolKit). The babel routine can be set by installing OpenBabel from it official `website <http://openbabel.org/wiki/Main_Page>`_.
 99 | 
100 | * **AutoDock Vina**
101 | 
102 |   * obabel
103 |   * prepare_ligand4.py
104 |   * prepare_receptor4.py
105 |   * vina
106 | 
107 |   The vina executable is available within the `AutoDock Vina <https://vina.scripps.edu>`_ package. See AutoDock 4 section for other executables.
108 | 
109 | * **DOCK 6**
110 | 
111 |   * chimera
112 |   * dms
113 |   * dock6
114 |   * grid
115 |   * showbox
116 |   * sphere_selector
117 |   * sphgen_cpp
118 | 
119 |   The chimera command is accessible when installing `CHIMERA <http://www.cgl.ucsf.edu/chimera>`_. The dock6, grid, showbox and sphere_selector commands are all part of the `DOCK 6 package <http://dock.compbio.ucsf.edu/Online_Licensing/index.htm>`_. The dms program can be installed seperately from `here <http://www.cgl.ucsf.edu/chimera/docs/UsersGuide/midas/dms1.html>`_ while sphgen_cpp can be downloaded from the following `url <http://dock.compbio.ucsf.edu/Contributed_Code/sphgen_cpp.htm>`_.
120 | 
121 | * **DSX**
122 | 
123 |   * dsx
124 | 
125 |   The dsx-like routines can be downloaded from the official `link <https://agklebe.pharmazie.uni-marburg.de/?id=11&lang=en>`_. Note that DockBox assumes DSX is run via an executable file called dsx. Therefore, once you selected the appropriate routine to be run (e.g., dsx_linux_64.lnx for linux systems), create a symbolic link via ln. For example, assuming you are located in the same directory as dsx_linux_64.lnx, you can use the following command:
126 |   
127 | ::
128 | 
129 |   ln -s dsx_linux_64.lnx dsx
130 | 
131 | * **MOE**
132 | 
133 |   * moebatch
134 |   
135 |   Can be downloaded over `there <https://www.chemcomp.com/MOE-Molecular_Operating_Environment.htm>`_. Make sure the MOE environment variable (pointing towards MOE's installation directory) has been correctly exported. Note that a valid license of MOE is required.
136 | 
137 | **Note**: In case rescoring is enabled (*rescoring=yes* in the configuration file, see below), it is recommended to also enable minimization of the poses (*minimize=yes* in the configuration file). In this case, AmberTools (serial version) 14, 15, 16 or 17 is required for minimization. Make sure main AMBER executables are accessible from the PATH variable including sander, tleap, antechamber and parmchk which are used by DockBox.
138 | 
139 | Commands
140 | ********
141 | 
142 | The DockBox package contains two main routines: *rundbx* and *extract_dbx_best_poses*. The former is intended to be used solely for docking and rescoring while the latter enables to analyze the results and to select the best pose(s) from a combination of scores or among different consensus docking schemes.
143 | 
144 | *rundbx*
145 | ########
146 | 
147 | rundbx is used to dock a ligand to a protein structure and possibly minimize and rescore the output poses. When typing "rundbx -h" on the command line, the following help message will pop up:
148 | 
149 | :: 
150 | 
151 |     usage: rundbx [-h] -l INPUT_FILE_L -r INPUT_FILE_R -f CONFIG_FILE
152 |                   [-prepare_only] [-rescore_only]
153 |     
154 |     rundbx : dock and rescore with multiple programs -------- Requires one file
155 |     for the ligand (1 struct.) and one file for the receptor (1 struct.)
156 |     
157 |     optional arguments:
158 |       -h, --help       show this help message and exit
159 |       -l INPUT_FILE_L  Ligand coordinate file(s): .mol2
160 |       -r INPUT_FILE_R  Receptor coordinate file(s): .pdb
161 |       -f CONFIG_FILE   config file containing docking parameters
162 |       -prepare_only    Only prepare scripts for docking (does not run docking)
163 |       -rescore_only    Run rescoring only
164 | 
165 | * Inputs
166 | 
167 |   * -l INPUT_FILE_L: **.mol2** file containing the coordinates of the ligand (only one structure allowed)
168 |   
169 |   * -r INPUT_FILE_R: **.pdb** file containing the receptor coordinates (only one structure allowed)
170 |   
171 |   * -f CONFIG_FILE: **.ini** configuration file containing the docking parameters (see the section **preparing the rundbx configuration file**)
172 |   
173 | * Options
174 | 
175 |   * -prepare_only: generate all docking folders and scripts needed to run each docking program separately. Does actually not run docking.
176 |   
177 |   * -rescore_only: option used to perform the rescoring step only. Using this option implies that you have already run *rundbx* and generated a **poses** folder in the current directory. If a **rescoring** folder already exists as an output of a previous *rundbx* run, every data generated previously by rescoring with the same scoring functions as the current ones will be overwritten while data generated with scoring functions different from the current ones will be kept.
178 | 
179 | 
180 | *extract_dbx_best_poses*
181 | #########################
182 | 
183 | *extract_dbx_best_poses* is the routine used to analyze the results and select the best docking poses from the outputs of the *rundbx* command. When typing "extract_dbx_best_poses -h" on the command line, the following help message will pop up:
184 | 
185 | ::
186 | 
187 |   usage: extract_dbx_best_poses [-h] [-all-targets] [-all-isomers] [-csv FILE]
188 |                               [-cutoff RMSD_VALUE] [-d PRGM1 [PRGM2 ...]]
189 |                               [-dirs DIR1 [DIR2 ...]] [-r DIRECTORY NAME]
190 |                               [-s FUNC [FUNC ...] | -cd PRGM [PRGM ...] |
191 |                               -sbcd FUNC [FUNC ...]]
192 | 
193 |   Extract best docking poses after rundbx finished.
194 | 
195 |   optional arguments:
196 |   -h, --help            show this help message and exit
197 |   -all-targets          Select best poses over all the targets. If not
198 |                         specified, extract best pose separately for each
199 |                         target. A "lig/target/isomer" architecture of the
200 |                         folders is assumed
201 |   -all-isomers          Select best poses over all the isomers. If not
202 |                         specified, extract best pose separately for every
203 |                         isomer. A "lig/target/isomer" architecture of the
204 |                         folders is assumed
205 |   -csv FILE             .csv filename with compounds. Used to add names of
206 |                         compounds. Default: none
207 |   -cutoff RMSD_VALUE    RMSD cutoff used for consensus docking or score-based
208 |                         consensus docking. Default: 2.0 A
209 |   -d PRGM1 [PRGM2 ...]  Docking programs (instances) to be considered when
210 |                         extracting best poses
211 |   -dirs DIR1 [DIR2 ...]
212 |                         Directories considered for analysis. Should contain a
213 |                         folder called "poses". Default: curr. dir
214 |   -r DIRECTORY NAME     Name of results directory. Default: results
215 |   -s FUNC [FUNC ...]    Scoring functions used to extract the best pose
216 |                         (combination of scores)
217 |   -cd PRGM [PRGM ...]   Docking programs used for standard consensus docking
218 |   -sbcd FUNC [FUNC ...]
219 |                         Scoring functions used for score-based consensus
220 |                         docking
221 | 
222 | 
223 | Using *rundbx*
224 | **************
225 | 
226 | The *rundbx* routine allows the user to dock and rescore a ligand to a protein target using multiple docking 
227 | and scoring functions. Running *rundbx* is fairly simple as it requires only three input files, namely a PDB file 
228 | including the protein structure to dock on (-r flag), a file with Tripos Mol2 format containing a 3D structure of the 
229 | ligand (1 structure per file, -l flag) and an INI configuration file (-f flag) which contains all the options 
230 | related to docking and/or rescoring (see section **Preparing the INI configuration file**). 
231 | 
232 | When finished correcly, a *rundbx* job should have created a folder called **poses** containing all the poses 
233 | generated by the different docking programs as specified in the INI configuration file. Each pose is provided 
234 | in a file with .mol2 format named *lig-<index>.mol2*, where <index> is the index of the pose. Within the **poses**
235 | folder, a file called info.dat can also be found. The file contains information relative to each docking program/site
236 | combination specified in the INI file, including the number of poses generated and the index of the first 
237 | pose generated for that combination. 
238 | 
239 | Below is an example of an *info.dat* file obtained when docking was performed with Autodock, Autodock Vina and DOCK 6 on the same binding site:
240 | 
241 | ::
242 | 
243 |   #1,28
244 |   program,nposes,firstidx,site
245 |   autodock,10,1,
246 |   vina,10,11,
247 |   dock,7,21,
248 |  
249 | showing that a total of 27 poses were generated (28 minus 1). 10 poses were generated with Autodock, namely, poses from index 1 to 10, 10 were generated with Autodock Vina, i.e., poses from index 11 to 20, and 7 were generated with DOCK 6, poses from index 21 to 27. No label for the binding site was specified as docking was performed on the same site.
250 |   
251 | Other outputs of the *rundbx* command are folders created for every docking program/site combination specified in the INI file, which contain the docking poses, the docking scores (obtained with docking) and intermediate files generated by the docking software. For example, if Autodock and Autodock Vina were used to dock on three different binding sites called site1, site2 and site3 (see section **Preparing the INI configuration file**), then a total of six folders named **autodock.site1**, **autodock.site2**, **autodock.site3**, **vina.site1**, **vina.site2** and **vina.site3**, should have been created.
252 |   
253 | Finally, if the rescoring option was enabled in the INI file, a folder called **rescoring** should have been created as well, containing file(s) named <program>.score, where <program> is the name of each program used for rescoring.
254 | 
255 | 
256 | Preparing the INI configuration file
257 | ####################################
258 | 
259 | Besides one MOL2 file containing the ligand structure (-l flag) and one PDB file containing the receptor structure (-r flag), running *rundbx* requires a configuration file (-f flag) that specifies all the parameters needed for the docking procedure.
260 | 
261 | The *rundbx* configuration file should be a INI file (https://en.wikipedia.org/wiki/INI_file), i.e., the file should be split in sections, each section name appearing on a line by itself, in square brackets ("[" and "]"). Each section contains a certain number of keys which refer to specific options used; all keys after the section declaration are associated with that section. Finally, every key should have a name (option name) and a value (option value), delimited by an equals sign (=).
262 | 
263 | Below is an example of configuration file used to dock on two binding sites and rescore with DrugScoreX (dsx), Autodock and Autodock Vina.
264 | 
265 | ::
266 | 
267 |     [DOCKING]
268 |     site = site1, site2
269 |     program = autodock, vina, dock
270 |     rescoring = yes
271 |     minimize = yes
272 |     cleanup = yes
273 |     
274 |     [RESCORING]
275 |     program = dsx, autodock, vina
276 |     
277 |     [DSX]
278 |     pot_dir = /pmshare/jordane/CSD_potentials/DSX_CSD_Potentials_v0511/csd_pot_0511/
279 |     other_flags = -T0 1.0 -T1 1.0 -T3 1.0 -j
280 |     
281 |     [AUTODOCK]
282 |     ga_run = 20
283 |     spacing = 0.4
284 |     
285 |     [VINA]
286 |     num_modes = 20
287 |     
288 |     [DOCK]
289 |     nposes = 20
290 |     
291 |     [SITE1]
292 |     center = 75.5, 80.0, 31.0
293 |     boxsize = 40.0, 40.0, 40.0
294 |     
295 |     [SITE2]
296 |     center = 75.5, 40.0, 50.0
297 |     boxsize = 40.0, 40.0, 40.0
298 | 
299 | General options
300 | ###############
301 | 
302 | * The **DOCKING** section includes the software that should be used for docking, and if minimization, rescoring and/or cleanup should be performed. The docking software should be specified with coma separation through the key **programs**. The keys relative to the **DOCKING** section are:
303 | 
304 | 
305 |   * **programs**: specifies the software which are used for docking (autodock, dock6, moe and/or vina). Options relative to each program (or instance) are specfied within the section of the same name. For example, if autodock is in the list of programs, options associated with autodock should be specified in the **AUTODOCK** section. In case the same software needs to be used multiple times, numbering can be appended to the name of the program (e.g., in the first example below, multiple runs of MOE are performed using different scoring methods: moe, moe1, moe2).
306 | 
307 |   * **minimization**: performs minimization on the generated poses (yes or no).
308 | 
309 |   * **rescoring**: performs rescoring on the generated poses (yes or no). I strongly recommend to enable minimization in case rescoring is done. This will avoid a lot clashes, especially when the software used for rescoring are different from those used for docking. If the rescoring option is enabled, a section RESCORING should be created that contains all the options relative to that step (see below).
310 | 
311 |   * **cleanup**: specifies if big intermediate files should be removed (yes or no).
312 | 
313 |   * **site**: specifies the labels for the binding sites in case multiple binding sites are considered (site1, site2,...). See the example configuration to dock on multiple binding site, minimize and rescore the poses with multiple software.
314 | 
315 | 
316 | Docking and rescoring options relative to each program are detailed in the section **Docking/scoring options relative to each software**
317 | 
318 | * The **SITE** section includes the information about the box to spot the binding site. The keys are the following:
319 | 
320 |   * **center**: x, y, z coordinates of the center of the binding box (in Å).
321 | 
322 |   * **boxsize**: size of the box along each dimension x, y, z. The dimensions of the box should be no more than 50.0, 50.0, 50.0 (in Å).
323 | 
324 | 
325 | * The **RESCORING** section has only one key specifying the programs used to rescore:
326 | 
327 |   * **program**: specifies the software which are used for docking (autodock, dock6, moe and/or vina). Options relative to each program (or instance) are specfied within the section of the same name. For example, if autodock is in the list of programs, options associated with autodock should be specified in the **AUTODOCK** section. In case the same software needs to be used multiple times, numbering can be appended to the name of the program (e.g., in the example below, multiple runs of MOE are performed using different scoring methods: moe, moe1, moe2).
328 | 
329 | 
330 | Docking/scoring options
331 | #######################
332 | 
333 | Each section relative to a docking/scoring program should be named the way it was specified under **program** in the **DOCKING** and/or **RESCORING** section. Below is a list of all the options per software that can be specified in the configuration file.
334 | 
335 | **Autodock**
336 | 
337 | * ga_run (default: 100): number of autodock runs = targeted number of final poses
338 | * spacing (default: 0.3): grid spacing
339 | 
340 | **Note 1**: the partial charges of the ligand are obtained from the Gasteiger method using the AutodockTools command *prepare_ligand4.py*
341 | 
342 | **Note 2**: the number of energy evalutations *ga_num_evals* is automatically calculated from the number of torsions angles in the ligand structure via the formula:
343 | 
344 | ::
345 | 
346 |         ga_num_evals = min(25000000, 987500 * n_torsion_angles + 125000)
347 | 
348 | **Note 3**: As is usually the case for Autodock, non polar hydrogens in the ligand structure are removed prior to docking in order to properly use the Autodock force field. Once the docking has been performed, nonpolar hydrogens are reattributed in a way consistent with the input structure. Unless the *minimize* option in the configuration file is set to *yes*, no minimization is performed on those hydrogens.
349 | 
350 | **Note 4** Final poses are extracted from the .dlg file using Open Babel via the following command:
351 | 
352 | ::
353 | 
354 |         obabel -ad -ipdbqt dock.dlg -omol2 lig-.mol2 -m
355 | 
356 | **Autodock Vina**
357 | 
358 | * cpu (default: 1)
359 | * energy_range (default: 3)
360 | * num_modes (default: 9): targeted number of final poses
361 | 
362 | **Note 1**: the partial charges of the ligand are obtained from the Gasteiger method using the AutodockTools command *prepare_ligand4.py*
363 | 
364 | **Note 2**: As is usually the case for Autodock Vina, non polar hydrogens in the ligand structure are removed prior to docking in order to properly use the Autodock force field. Once the docking has been performed, nonpolar hydrogens are reattributed in a way consistent with the input structure. Unless the *minimize* option in the configuration file is set to *yes*, no minimization is performed on those hydrogens.
365 | 
366 | **DOCK 6**
367 | 
368 | * attractive_exponent (default: 6)
369 | * extra_margin (default: 2.0)
370 | * grid_spacing (default: 0.3)
371 | * maximum_sphere_radius (default: 4.0)
372 | * max_orientations (default: 10000)
373 | * minimum_sphere_radius (default: 1.4)
374 | * nposes (default: 20): targeted number of final poses
375 | * num_scored_conformers (default 5000)
376 | * probe_radius (default: 1.4)
377 | * repulsive_exponent (default: 12)
378 | 
379 | **DSX**
380 | 
381 | **MOE** (scoring)
382 | 
383 | * gtest (default: 0.01)
384 | * maxpose (default: 5)
385 | * placement (default: Triangle Matcher)
386 | * placement_maxpose (default: 250)
387 | * placement_nsample (default: 10)
388 | * remaxpose (default: 1)
389 | * rescoring (default: GBVI/WSA dG)
390 | * scoring (default: London dG)
391 | 
392 | 
393 | Examples
394 | ########
395 | 
396 | **Multi-program docking on a single binding site**
397 | 
398 | Below is an example of configuration file that can be used as an input of *rundbx*. The docking procedure is carried out on a single binding site specied as a box with dimensions 30.0 x 30.0 x 30.0 centered at the position (x, y, z) = 8.446, 25.365, 4.394.
399 | 
400 | ::
401 | 
402 |     [DOCKING]
403 |     program = autodock, vina, dock, moe, moe1, moe2
404 |     rescoring = no
405 |     minimize = yes
406 |     cleanup = no
407 |     
408 |     [AUTODOCK]
409 |     ga_run = 50
410 |     spacing = 0.3
411 |     
412 |     [VINA]
413 |     num_modes = 20
414 |     
415 |     [DOCK]
416 |     nposes = 200
417 |     
418 |     [MOE]
419 |     scoring = London dG
420 |     maxpose = 100
421 |     remaxpose = 50
422 |     
423 |     [MOE1]
424 |     scoring = GBVI/WSA dG
425 |     maxpose = 100
426 |     remaxpose = 50
427 |     
428 |     [MOE2]
429 |     scoring = Affinity dG
430 |     maxpose = 100
431 |     remaxpose = 50
432 |     
433 |     [SITE]
434 |     center = 8.446, 25.365, 4.394
435 |     boxsize = 30.0, 30.0, 30.0
436 | 
437 | 
438 | **Multi-program docking and rescoring on multiple binding sites**
439 | 
440 | Below is another example of configuration file for *rundbx* used to dock on two binding sites and rescore with DrugScoreX (dsx), Autodock and Autodock Vina.
441 | 
442 | ::
443 | 
444 |     [DOCKING]
445 |     site = site1, site2
446 |     program = autodock, vina, dock
447 |     rescoring = yes
448 |     minimize = yes
449 |     cleanup = yes
450 |     
451 |     [RESCORING]
452 |     program = dsx, autodock, vina
453 |     
454 |     [DSX]
455 |     pot_dir = /pmshare/jordane/CSD_potentials/DSX_CSD_Potentials_v0511/csd_pot_0511/
456 |     other_flags = -T0 1.0 -T1 1.0 -T3 1.0 -j
457 |     
458 |     [AUTODOCK]
459 |     ga_run = 20
460 |     spacing = 0.4
461 |     
462 |     [VINA]
463 |     num_modes = 20
464 |     
465 |     [DOCK]
466 |     nposes = 20
467 |     
468 |     [SITE1]
469 |     center = 75.5, 80.0, 31.0
470 |     boxsize = 40.0, 40.0, 40.0
471 |     
472 |     [SITE2]
473 |     center = 75.5, 40.0, 50.0
474 |     boxsize = 40.0, 40.0, 40.0
475 | 
476 | Note that the DOCKING section includes the label of the binding sites through the keyword *site*, here, site1 and site2. Each label refers to the section of the same name SITE1 and SITE2, respectively. 
477 | 
478 | 


--------------------------------------------------------------------------------