├── .gitignore ├── README.md ├── prepare_ligands.py ├── prepare_receptor.py ├── run_autodock_vina.py ├── test_files └── ZINC67842136.mol2 └── tests.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # DockingWatch 56 | .DockingWatch.conf 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | docking-scripts 2 | =============== 3 | 4 | Scripts used for virtual screening of small molecule inhibitors from ZINC database using Autodock Vina 5 | -------------------------------------------------------------------------------- /prepare_ligands.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ########################################################################## 4 | # Author: joemartaganna (joemar.ct@gmail.com) 5 | # 6 | # Description: 7 | # 8 | # Script for converting mol2 files downloaded from the ZINC database into 9 | # pdbqt files with charges and polar hydrogens added in preparation for 10 | # docking with AutoDock Vina. The conversion is done in parallel through 11 | # multithreading. 12 | # 13 | # Requires: babel, joblib 14 | ########################################################################## 15 | 16 | 17 | from subprocess import Popen, PIPE 18 | from joblib import Parallel, delayed 19 | import gzip 20 | import os 21 | 22 | 23 | def convert_to_pdbqt(mol2, output_dir, remove_input=True): 24 | mol_id = mol2.split('\n')[1] 25 | # Write the mol2 26 | mol2_outf_path = os.path.join(output_dir, mol_id + '.mol2') 27 | mol2_outf = open(mol2_outf_path, 'w') 28 | mol2_outf.write(mol2) 29 | mol2_outf.close() 30 | # Convert to pdbqt 31 | pdbqt_outf_path = mol2_outf_path.replace('.mol2', '.pdbqt') 32 | cmd = 'babel -imol2 %s -opdbqt %s --partialcharge gasteiger --AddPolarH' % (mol2_outf_path, pdbqt_outf_path) 33 | p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) 34 | stdout, stderr = p.communicate() 35 | if remove_input: 36 | os.remove(mol2_outf_path) 37 | return {'mol2': mol2_outf_path, 'pdbqt': pdbqt_outf_path} 38 | 39 | 40 | def split_gzipped_mol2(gzipped_mol2, output_dir=None): 41 | f = gzip.open(gzipped_mol2, 'rb').read() 42 | p = '@MOLECULE' 43 | mols_count = f.count(p) 44 | mols = f.split(p) 45 | if not output_dir: 46 | output_dir = gzipped_mol2.split('.mol2.gz')[0] 47 | if not os.path.exists(output_dir): 48 | os.mkdir(output_dir) 49 | def format_mol(mol): 50 | mol = [p] + [x.strip() for x in mol.split('\n') if x] 51 | mol = '\n'.join(mol) 52 | return mol 53 | mols = [format_mol(x) for x in mols if x] 54 | return mols 55 | 56 | 57 | def generate_pdbqt_files(gzipped_mol2, mols): 58 | output_dir = gzipped_mol2.split('.mol2.gz')[0] 59 | Parallel(n_jobs=50, backend='threading', verbose=55)(delayed(convert_to_pdbqt)(mol, output_dir) for mol in mols) 60 | 61 | 62 | if __name__ == '__main__': 63 | import sys 64 | infiles = sys.argv[1:] 65 | for i, f in enumerate(infiles): 66 | print '\n\n%s/%s - dealing with %s' % (i+1, len(infiles), f) 67 | mols = split_gzipped_mol2(f) 68 | generate_pdbqt_files(f, mols) -------------------------------------------------------------------------------- /prepare_receptor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import glob 6 | from subprocess import Popen, PIPE 7 | 8 | 9 | def check_mgl_library(): 10 | if os.path.exists('/Library/MGLTools'): 11 | MGL_LIB = glob.glob('/Library/MGLTools/*') 12 | pythonpath = os.path.join(MGL_LIB[0], 'bin', 'pythonsh') 13 | utilities24 = os.path.join(MGL_LIB[0], 'MGLToolsPckgs', 'AutoDockTools', 'Utilities24') 14 | prep_receptor_script = os.path.join(utilities24, 'prepare_receptor4.py') 15 | return (pythonpath, prep_receptor_script) 16 | else: 17 | raise Exception('The prepare_receptor4.py script cannot be found because MGLTools is not installed.') 18 | 19 | 20 | def execute_receptor_prep(receptor_pdb, output_pdbqt): 21 | pythonpath, prep_script = check_mgl_library() 22 | args = (pythonpath, prep_script, receptor_pdb, output_pdbqt) 23 | cmd = '%s %s -r %s -o %s -A checkhydrogens' % args 24 | p = Popen(cmd, shell=True, stdout=PIPE) 25 | stdout, stderr = p.communicate() 26 | print 'Done!' 27 | 28 | 29 | if __name__ == '__main__': 30 | try: 31 | receptor_pdb, output_pdbqt = sys.argv[1:] 32 | execute_receptor_prep(receptor_pdb, output_pdbqt) 33 | except ValueError: 34 | print '\nUsage: prepare_receptor.py \n' -------------------------------------------------------------------------------- /run_autodock_vina.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ########################################################################## 4 | # Author: joemartaganna (joemar.ct@gmail.com) 5 | # 6 | # Description: 7 | # 8 | # Script for running AutoDock Vina directly against gzipped mol2 files 9 | # to be more memory- and disk space-efficient. Individual mol2 files are 10 | # extracted from a gzipped file downloaded from the ZINC database. Each 11 | # file is then converted to pdbqt, then docked against a receptor protein. 12 | # An affinity cut-off is set so that the intermediate files produced 13 | # during docking with results below this cut-off are discarded. Only the 14 | # output files from high affinity conformers (very few) are stored. 15 | # 16 | # The ZINC id of the ligands docked are saved into a Redis database. Each 17 | # docking run checks this database to avoid redocking the same ligands 18 | # if the script is rerun because of errors or accidental shutdown. 19 | # 20 | # There is an optional support for delivery of binders into Dropbox. This 21 | # is useful for easy monitoring of results in cases when the virtual 22 | # screening is run remotely on a server. 23 | # 24 | # Requires: vina, babel, joblib, redis, redispy, prepare_ligands 25 | # Optional: dropbox 26 | ########################################################################## 27 | 28 | import os 29 | import sys 30 | import json 31 | import redis 32 | import shutil 33 | from joblib import Parallel, delayed 34 | from subprocess import Popen, PIPE 35 | from prepare_ligands import split_gzipped_mol2, convert_to_pdbqt 36 | 37 | 38 | try: 39 | import dropbox 40 | dropbox_installed = True 41 | except ImportError: 42 | dropbox_installed = False 43 | 44 | 45 | def setup_dropbox_integration(): 46 | script_path = os.path.realpath(__file__) 47 | conf_path = os.path.join(os.path.split(script_path)[0], '.DockingWatch.conf') 48 | app_key = '0raajjtnfqaq6ag' 49 | app_secret = 'ruc4zlu5gyk0rn3' 50 | flow = dropbox.client.DropboxOAuth2FlowNoRedirect(app_key, app_secret) 51 | if os.path.exists(conf_path): 52 | dbox_args = json.load(open(conf_path, 'r')) 53 | access_token = dbox_args['access_token'] 54 | folder = dbox_args['folder'] 55 | else: 56 | authorize_url = flow.start() 57 | # Authorize the Dropbox integration 58 | msg = '\nFor Dropbox integration, visit the following URL in your browser, ' 59 | msg += 'grant access, and copy the resulting authorization code.' 60 | msg += ' Just hit enter without the code to skip.' 61 | print msg, '\n' 62 | print authorize_url 63 | code = raw_input("\nEnter the authorization code here: ").strip() 64 | folder = raw_input("Enter custom folder under DockingWatch app: ").strip() 65 | access_token, user_id = flow.finish(code) 66 | conf = {'access_token': access_token, 'folder': folder} 67 | json.dump(conf, open(conf_path, 'w')) 68 | return access_token, folder 69 | 70 | 71 | def upload_to_dropbox(f, client, folder): 72 | fname = os.path.split(f)[-1] 73 | f = open(f, 'r') 74 | client.put_file(folder+'/binders/'+fname, f) 75 | 76 | 77 | def dock_ligand(vina_conf, mol2_string, output_dir, affinity_cutoff): 78 | global db 79 | global dropbox_installed 80 | mol_id = mol2_string.split('\n')[1] 81 | try: 82 | # Check if this has been docked 83 | db[mol_id] 84 | except KeyError: 85 | temp_dir = os.path.join(output_dir, 'temp') 86 | if not os.path.exists(temp_dir): 87 | os.mkdir(temp_dir) 88 | ligand_paths = convert_to_pdbqt(mol2_string, temp_dir) 89 | ligand = ligand_paths['pdbqt'] 90 | cmd = 'vina --config %s --ligand %s' % (vina_conf, ligand) 91 | p = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) 92 | stdout, stderr = p.communicate() 93 | if 'Refining results ... done' in stdout: 94 | lines = stdout.splitlines() 95 | best = None 96 | for i, line in enumerate(lines): 97 | if '-+-' in line: 98 | best = lines[i+1] 99 | if best: 100 | affinity = float(best.split()[1]) * -1 101 | else: 102 | affinity = 0 103 | # Put this into the record 104 | db[mol_id] = affinity 105 | # Remove the input and output files if affinity is below cut-off 106 | docking_out = ligand.replace('.pdbqt', '_out.pdbqt') 107 | if affinity < affinity_cutoff: 108 | os.remove(ligand) 109 | os.remove(docking_out) 110 | else: 111 | binders_dir = os.path.join(output_dir, 'binders') 112 | if not os.path.exists(binders_dir): 113 | os.mkdir(binders_dir) 114 | shutil.move(ligand, binders_dir) 115 | shutil.move(ligand.replace('.pdbqt', '_out.pdbqt'), binders_dir) 116 | if dropbox_installed: 117 | global client 118 | global dropbox_folder 119 | upload_to_dropbox(os.path.join(binders_dir, mol_id+'.pdbqt'), client, dropbox_folder) 120 | upload_to_dropbox(os.path.join(binders_dir, mol_id+'_out.pdbqt'), client, dropbox_folder) 121 | else: 122 | print 'Error on %s: The conversion from mol2 to pdbqt may not have succeeded.' % mol_id 123 | 124 | 125 | def execute_virtual_screening(vina_conf, gzipped_mol2, output_dir=None, affinity_cutoff=0): 126 | mols = split_gzipped_mol2(gzipped_mol2, output_dir=output_dir) 127 | print 'There are %s ligands in this file.' % len(mols) 128 | Parallel(n_jobs=-1, backend='multiprocessing', verbose=55)(delayed(dock_ligand)(vina_conf, mol, output_dir, affinity_cutoff) for mol in mols) 129 | 130 | 131 | def connect_to_redisdb(): 132 | db = redis.Redis(host='localhost', port=6379, db=7) 133 | return db 134 | 135 | 136 | if __name__ == '__main__': 137 | 138 | conf = sys.argv[1] 139 | infiles = sys.argv[2:] 140 | # Set the output directory 141 | output_dir = os.path.join(os.getcwd(), 'docking_results') 142 | if not os.path.exists(output_dir): 143 | os.mkdir(output_dir) 144 | # Start the redis db 145 | redis_conf = os.path.join(os.getcwd(), 'docking_results', 'redis.conf') 146 | # Setup Dropbox integration 147 | if dropbox_installed: 148 | access_token, dropbox_folder = setup_dropbox_integration() 149 | if access_token: 150 | client = dropbox.client.DropboxClient(access_token) 151 | # Execute virtual screening 152 | if os.path.exists(redis_conf): 153 | db = connect_to_redisdb() 154 | for i, f in enumerate(infiles): 155 | print '\n\n%s/%s - dealing with %s' % (i+1, len(infiles), f) 156 | execute_virtual_screening(conf, f, output_dir=output_dir, affinity_cutoff=8.0) 157 | else: 158 | rc = open(redis_conf, 'w') 159 | rc.write('dbfilename records.rdb\ndir %s\nsave 900 1' % output_dir) 160 | rc.close() 161 | print '\nRedis configuration file has been created.' 162 | print 'Run redis-server using the redis.conf file and rerun this script to start the virtual screening.\n' 163 | 164 | 165 | -------------------------------------------------------------------------------- /test_files/ZINC67842136.mol2: -------------------------------------------------------------------------------- 1 | @MOLECULE 2 | ZINC67842136 3 | 49 52 0 0 0 4 | SMALL 5 | USER_CHARGES 6 | @ATOM 7 | 1 C1 0.2282 2.4095 -4.5870 C.3 1 <0> -0.1014 8 | 2 C2 -0.6279 2.2038 -3.3639 C.ar 1 <0> -0.0179 9 | 3 C3 -1.9904 2.4551 -3.4298 C.ar 1 <0> 0.2711 10 | 4 C4 -2.7691 2.2606 -2.2932 C.ar 1 <0> -0.1151 11 | 5 C5 -2.1467 1.8212 -1.1361 C.ar 1 <0> -0.0399 12 | 6 C6 -0.7833 1.5922 -1.1482 C.ar 1 <0> -0.0060 13 | 7 N1 -0.0750 1.7816 -2.2440 N.ar 1 <0> -0.3881 14 | 8 C7 -2.9464 1.5960 0.1212 C.3 1 <0> 0.1212 15 | 9 O1 -2.0632 1.2639 1.1944 O.3 1 <0> -0.5758 16 | 10 C8 -4.2530 2.5219 -2.3200 C.3 1 <0> 0.1049 17 | 11 N2 -4.5223 3.8696 -1.8013 N.3 1 <0> -0.5303 18 | 12 C9 -4.0885 4.8974 -2.7562 C.3 1 <0> 0.0579 19 | 13 C10 -4.2493 6.2823 -2.1251 C.3 1 <0> -0.1147 20 | 14 C11 -5.7172 6.4979 -1.7464 C.3 1 <0> -0.1115 21 | 15 C12 -6.1577 5.3868 -0.7894 C.3 1 <0> 0.0099 22 | 16 H1 -5.5677 5.4395 0.1256 H 1 <0> 0.1093 23 | 17 C13 -5.9416 4.0279 -1.4592 C.3 1 <0> 0.0655 24 | 18 C14 -7.6176 5.5584 -0.4573 C.2 1 <0> 0.2569 25 | 19 N3 -8.5839 5.5383 -1.3253 N.2 1 <0> -0.4717 26 | 20 C15 -9.7634 5.7233 -0.6728 C.ar 1 <0> 0.0214 27 | 21 C16 -11.0952 5.7916 -1.0851 C.ar 1 <0> -0.0613 28 | 22 C17 -12.0887 5.9942 -0.1536 C.ar 1 <0> -0.1193 29 | 23 C18 -11.7741 6.1311 1.1904 C.ar 1 <0> -0.0930 30 | 24 C19 -10.4626 6.0663 1.6120 C.ar 1 <0> -0.0931 31 | 25 C20 -9.4493 5.8619 0.6876 C.ar 1 <0> 0.0061 32 | 26 O2 -8.1061 5.7482 0.7786 O.3 1 <0> -0.2127 33 | 27 O3 -2.5568 2.8857 -4.5878 O.3 1 <0> -0.6252 34 | 28 H2 0.6009 3.4337 -4.6015 H 1 <0> 0.0820 35 | 29 H3 1.0697 1.7170 -4.5621 H 1 <0> 0.0661 36 | 30 H4 -0.3666 2.2265 -5.4818 H 1 <0> 0.1018 37 | 31 H5 -0.2926 1.2499 -0.2491 H 1 <0> 0.1851 38 | 32 H6 -3.4953 2.5041 0.3703 H 1 <0> 0.0832 39 | 33 H7 -3.6495 0.7783 -0.0369 H 1 <0> 0.0670 40 | 34 H8 -2.5103 1.1050 2.0370 H 1 <0> 0.3903 41 | 35 H9 -4.6165 2.4477 -3.3450 H 1 <0> 0.0673 42 | 36 H10 -4.7630 1.7851 -1.6994 H 1 <0> 0.0789 43 | 37 H11 -3.0419 4.7351 -3.0140 H 1 <0> 0.0845 44 | 38 H12 -4.6981 4.8361 -3.6577 H 1 <0> 0.0435 45 | 39 H13 -3.6292 6.3491 -1.2311 H 1 <0> 0.0717 46 | 40 H14 -3.9419 7.0458 -2.8397 H 1 <0> 0.0753 47 | 41 H15 -5.8288 7.4656 -1.2572 H 1 <0> 0.0696 48 | 42 H16 -6.3331 6.4700 -2.6453 H 1 <0> 0.0773 49 | 43 H17 -6.5425 3.9707 -2.3668 H 1 <0> 0.0541 50 | 44 H18 -6.2410 3.2340 -0.7749 H 1 <0> 0.0778 51 | 45 H19 -11.3453 5.6863 -2.1304 H 1 <0> 0.1364 52 | 46 H20 -13.1195 6.0471 -0.4715 H 1 <0> 0.1385 53 | 47 H21 -12.5619 6.2900 1.9120 H 1 <0> 0.1415 54 | 48 H22 -10.2252 6.1745 2.6600 H 1 <0> 0.1356 55 | 49 H23 0.8794 1.6091 -2.2247 H 1 <0> 0.4251 56 | @BOND 57 | 1 1 28 1 58 | 2 1 29 1 59 | 3 1 30 1 60 | 4 1 2 1 61 | 5 2 3 ar 62 | 6 2 7 ar 63 | 7 3 27 1 64 | 8 3 4 ar 65 | 9 4 5 ar 66 | 10 4 10 1 67 | 11 5 6 ar 68 | 12 5 8 1 69 | 13 6 7 ar 70 | 14 6 31 1 71 | 15 7 49 1 72 | 16 8 9 1 73 | 17 8 32 1 74 | 18 8 33 1 75 | 19 9 34 1 76 | 20 10 11 1 77 | 21 10 35 1 78 | 22 10 36 1 79 | 23 11 17 1 80 | 24 11 12 1 81 | 25 12 13 1 82 | 26 12 37 1 83 | 27 12 38 1 84 | 28 13 14 1 85 | 29 13 39 1 86 | 30 13 40 1 87 | 31 14 15 1 88 | 32 14 41 1 89 | 33 14 42 1 90 | 34 15 16 1 91 | 35 15 17 1 92 | 36 15 18 1 93 | 37 17 43 1 94 | 38 17 44 1 95 | 39 18 26 1 96 | 40 18 19 2 97 | 41 19 20 1 98 | 42 20 25 ar 99 | 43 20 21 ar 100 | 44 21 22 ar 101 | 45 21 45 1 102 | 46 22 23 ar 103 | 47 22 46 1 104 | 48 23 24 ar 105 | 49 23 47 1 106 | 50 24 25 ar 107 | 51 24 48 1 108 | 52 25 26 1 -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unittest 3 | from prepare_ligands import convert_to_pdbqt 4 | 5 | 6 | class LigandPrepTests(unittest.TestCase): 7 | 8 | def test_babel(self): 9 | mol2 = open('test_files/ZINC67842136.mol2', 'r').read() 10 | ligand_paths = convert_to_pdbqt(mol2, 'test_files', remove_input=False) 11 | pdbqt = ligand_paths['pdbqt'] 12 | pdbqt_contents = open(pdbqt, 'r').read() 13 | os.remove(pdbqt) 14 | self.assertTrue(len(pdbqt_contents) > 0) 15 | 16 | 17 | if __name__ == '__main__': 18 | unittest.main() --------------------------------------------------------------------------------