├── .gitignore ├── LICENSE.txt ├── README.md ├── docs ├── argparse_to_md.py ├── ipynb2markdown.py ├── mkdocs.yml ├── sources │ ├── changelog.md │ ├── cite.md │ ├── contact.md │ ├── extra.css │ ├── images │ │ ├── automated-pipeline-flowchart.jpg │ │ ├── automated-pipeline-flowchart.pdf │ │ ├── logo-transparent-bg.png │ │ ├── logo.png │ │ ├── obtaining-screenlamp-2.png │ │ ├── obtaining-screenlamp.png │ │ └── toolkit-tutorial │ │ │ └── dataset-overview.png │ ├── index.md │ ├── installation.md │ ├── license.md │ └── user_guide │ │ ├── doc-overview.md │ │ ├── images │ │ └── tools-tutorial-1 │ │ │ ├── 3kpzs-keto-sulfur.png │ │ │ ├── 5-mol2ids.png │ │ │ ├── atomtype-match-ex-1.png │ │ │ ├── atomtype-match-ex-2.png │ │ │ ├── charge-match-ex-1.png │ │ │ ├── fgroup-match-overlays-pymol.png │ │ │ ├── open-fgroup-match-overlays.png │ │ │ ├── pipe-step-1.jpg │ │ │ ├── pipe-step-2.jpg │ │ │ ├── pipe-step-3.jpg │ │ │ ├── pipe-step-4.jpg │ │ │ ├── pipe-step-5.jpg │ │ │ ├── pipe-step-6.jpg │ │ │ ├── pipe-step-7.jpg │ │ │ ├── pipe-step-8.jpg │ │ │ ├── pipeline-overview.jpg │ │ │ ├── pymol-overlay-ex-1.png │ │ │ ├── pymol-overlay-ex-2.png │ │ │ └── zincdata-spreadsheat.png │ │ ├── pipeline-tutorial-1.md │ │ ├── tools │ │ ├── tools-tutorial-1.ipynb │ │ ├── tools-tutorial-1.md │ │ └── tools.md ├── united │ ├── _LICENSE │ ├── __init__.py │ ├── base.html │ ├── content.html │ ├── css │ │ ├── base.css │ │ ├── bootstrap-custom.min.css │ │ ├── font-awesome-4.0.3.css │ │ └── highlight.css │ ├── fonts │ │ ├── fontawesome-webfont.eot │ │ ├── fontawesome-webfont.svg │ │ ├── fontawesome-webfont.ttf │ │ └── fontawesome-webfont.woff │ ├── img │ │ └── favicon.ico │ ├── js │ │ ├── base.js │ │ ├── bootstrap-3.0.3.min.js │ │ ├── highlight.pack.js │ │ └── jquery-1.10.2.min.js │ ├── main.html │ ├── nav-sub.html │ ├── nav.html │ └── toc.html └── update_docs.py ├── requirements.txt └── tools ├── count_mol2.py ├── datatable_to_id.py ├── enumerate_conformers.py ├── funcgroup_distance_to_id.py ├── funcgroup_matching.py ├── funcgroup_matching_selection.py ├── funcgroup_presence_to_id.py ├── generate_conformers_obabel.py ├── generate_conformers_omega.py ├── id_to_mol2.py ├── merge_id_files.py ├── mol2_to_id.py ├── overlay_molecules_rocs.py ├── overlay_molecules_shapeit.py ├── pipelines ├── experimental │ ├── pipeline-example-1-config_obabel.yaml │ └── pipeline-example-1_obabel.py ├── pipeline-example-1-config.yaml └── pipeline-example-1.py └── sort_rocs_mol2.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Datafiles 2 | *.graffle 3 | example-files/ 4 | docs/workflow/ 5 | docs/html/ 6 | docs/sources/workflow/80698/ 7 | docs/sources/workflow/3keto-24sulfate/ 8 | docs/sources/workflow/example_1/ 9 | docs/sources/user_guide/tutorial-results/ 10 | docs/sources/user_guide/tk-tutorial_data 11 | 12 | # MacOS 13 | .DS_Store 14 | 15 | # Jupyter Notebook 16 | .ipynb_checkpoints 17 | 18 | 19 | # Byte-compiled / optimized / DLL files 20 | __pycache__/ 21 | *.py[cod] 22 | *$py.class 23 | 24 | # C extensions 25 | *.so 26 | 27 | # Distribution / packaging 28 | .Python 29 | env/ 30 | build/ 31 | develop-eggs/ 32 | dist/ 33 | downloads/ 34 | eggs/ 35 | .eggs/ 36 | lib/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | 45 | # PyInstaller 46 | # Usually these files are written by a python script from a template 47 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 48 | *.manifest 49 | *.spec 50 | 51 | # Installer logs 52 | pip-log.txt 53 | pip-delete-this-directory.txt 54 | 55 | # Unit test / coverage reports 56 | htmlcov/ 57 | .tox/ 58 | .coverage 59 | .coverage.* 60 | .cache 61 | nosetests.xml 62 | coverage.xml 63 | *,cover 64 | .hypothesis/ 65 | 66 | # Translations 67 | *.mo 68 | *.pot 69 | 70 | # Django stuff: 71 | *.log 72 | local_settings.py 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | target/ 86 | 87 | # IPython Notebook 88 | .ipynb_checkpoints 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # celery beat schedule file 94 | celerybeat-schedule 95 | 96 | # dotenv 97 | .env 98 | 99 | # virtualenv 100 | venv/ 101 | ENV/ 102 | 103 | # Spyder project settings 104 | .spyderproject 105 | 106 | # Rope project settings 107 | .ropeproject 108 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | 180 | Copyright 2017 Michigan State University 181 | 182 | The screenlamp software was developed by Sebastian Raschka 183 | and Leslie A. Kuhn in the Protein Structure Lab 184 | (http://www.kuhnlab.bmb.msu.edu) at Michigan State University. 185 | 186 | Licensed under the Apache License, Version 2.0 (the "License"); 187 | you may not use this file except in compliance with the License. 188 | You may obtain a copy of the License at 189 | 190 | http://www.apache.org/licenses/LICENSE-2.0 191 | 192 | Unless required by applicable law or agreed to in writing, software 193 | distributed under the License is distributed on an "AS IS" BASIS, 194 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 195 | See the License for the specific language governing permissions and 196 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |  8 | [](license) 9 | [](license) 10 | 11 | ### A toolkit for ligand-based virtual screening 12 | 13 | 14 | Screenlamp is a Python package for facilitating ligand-based virtual screening workflows and toolkits with hypothesis-driven filtering steps. 15 | 16 | **The official documentation is available at https://psa-lab.github.io/screenlamp.** 17 | 18 | 19 | ## About 20 | 21 | The screenlamp toolkit was developed in the [Protein Structure Analysis & Design Laboratory](http://www.kuhnlab.bmb.msu.edu) at Michigan State University. For additional information about screenlamp, please refer to the accompanying research publication, which is currently under revision: 22 | 23 | - Raschka, Sebastian, Anne M. Scott, Nan Liu, Santosh Gunturu, Mar Huertas, Weiming Li, and Leslie A. Kuhn (2018). "Enabling the hypothesis-driven prioritization of ligand candidates in big databases: 24 | Screenlamp and its application to GPCR inhibitor discovery for invasive species control". *Journal of Computer-Aided Molecular Design* 32: 415. 25 | [[biorxiv preprint](https://www.biorxiv.org/content/early/2018/01/17/249151)] 26 | [[Journal article](https://link.springer.com/article/10.1007/s10822-018-0100-7)] 27 | 28 | 29 | Screenlamp is research software and has been made available to other researchers under a permissive [Apache v2 open source license](license). If you use screenlamp in your scientific projects or any derivative work, the authors of the screenlamp software must be acknowledged and the publication listed above should be cited. 30 | 31 | 32 | # Contact 33 | 34 | If you encounter bugs or other technical issues with the screenlamp software package, please send an email to [kuhnlab@msu.edu](mailto:kuhnlab@msu.edu) or use the [Issue Tracker](https://github.com/psa-lab/screenlamp/issues). For questions about the [screenlamp research article](cite/index.html), please contact the publisher or [corresponding author](mailto:kuhnl@msu.edu) directly instead. 35 | -------------------------------------------------------------------------------- /docs/argparse_to_md.py: -------------------------------------------------------------------------------- 1 | # Tool to generate a markdown documentation 2 | # from argparse scripts. 3 | # 4 | # usage: python argparse_to_md.py scipts/ > doc.md 5 | # 6 | # Copyright (C) 2017 Sebastian Raschka 7 | # License: MIT 8 | # 9 | # Author: Sebastian Raschka 10 | # Author email: mail@sebastianraschka.com 11 | 12 | 13 | import subprocess 14 | import os 15 | import sys 16 | 17 | 18 | def get_pyfiles(path): 19 | files = [] 20 | for file in os.listdir(path): 21 | if file.endswith('.py') and not file.startswith('_'): 22 | files.append(os.path.join(os.path.abspath(path), file)) 23 | return files 24 | 25 | 26 | def get_help_messages(path): 27 | s = subprocess.check_output('python %s --help' % path, shell=True) 28 | return s.decode() 29 | 30 | 31 | def help_to_md(s): 32 | out_lines = [] 33 | 34 | 35 | example_section = False 36 | for line in s.split('\n'): 37 | 38 | lstripped = line.lstrip() 39 | stripped = lstripped.rstrip() 40 | 41 | if not stripped: 42 | continue 43 | 44 | if stripped == "-v, --version show program's version number and exit": 45 | out_lines.append('- `-v, --version` ') 46 | out_lines.append("Show program's version number and exit") 47 | 48 | elif stripped == "-h, --help show this help message and exit": 49 | out_lines.append('- `-h, --help` ') 50 | out_lines.append("Show this help message and exit") 51 | 52 | elif stripped.startswith('Example:'): 53 | example_section = True 54 | out_lines.append('\n**Example:**\n\n```') 55 | 56 | elif example_section: 57 | if stripped.startswith('#'): 58 | out_lines.append('```\n') 59 | example_section = False 60 | out_lines.append(stripped) 61 | 62 | elif stripped.startswith('[-'): 63 | out_lines.append('`%s` ' % stripped) 64 | 65 | elif stripped.startswith('usage:'): 66 | usage = stripped.split('usage:')[-1] 67 | out_lines.append('\n**Usage:**\n\n %s\n\n' % usage) 68 | 69 | elif stripped.startswith('optional arguments:'): 70 | out_lines.append('\n**Arguments:**\n\n') 71 | 72 | elif stripped.startswith(' --') or stripped.startswith('python'): 73 | out_lines.append('`%s` ' % stripped) 74 | 75 | elif line.startswith(' -'): 76 | out_lines.append('- `%s` ' % stripped) 77 | 78 | #usage = line.split('Example')[-1].strip().strip(':') 79 | 80 | else: 81 | if stripped.startswith(' '): 82 | stripped = ' ' + stripped.strip() 83 | out_lines.append(stripped) 84 | 85 | if example_section: 86 | out_lines.append('```\n') 87 | 88 | return out_lines 89 | 90 | 91 | def main(dir_path): 92 | s = ("This page serves as a quick lookup reference for the different" 93 | " modules within screenlamp. Please see the [Toolkit Tutorial](tools-tutorial-1)" 94 | " for a" 95 | " more detailed explanation of the different modules and how" 96 | " they can be combined in a typical virtual screening pipeline.") 97 | 98 | contents = ["# Tools", "\n", s] 99 | paths = get_pyfiles(dir_path) 100 | for f in paths: 101 | contents.append('\n\n## %s\n\n' % os.path.basename(f)) 102 | s = get_help_messages(f) 103 | lines = help_to_md(s) 104 | contents.extend(lines) 105 | for line in contents: 106 | print(line) 107 | 108 | 109 | if __name__ == '__main__': 110 | main(sys.argv[1]) 111 | -------------------------------------------------------------------------------- /docs/ipynb2markdown.py: -------------------------------------------------------------------------------- 1 | # IPython Notebook to Markdown conversion script 2 | # 3 | # Sebastian Raschka 2014-2016 4 | # mlxtend Machine Learning Library Extensions 5 | # 6 | # Author: Sebastian Raschka 7 | # 8 | # License: BSD 3 clause 9 | 10 | import subprocess 11 | import glob 12 | import shutil 13 | import os 14 | import markdown 15 | from markdown.treeprocessors import Treeprocessor 16 | from markdown.extensions import Extension 17 | from nbconvert.exporters import MarkdownExporter 18 | 19 | 20 | class ImgExtractor(Treeprocessor): 21 | def run(self, doc): 22 | self.markdown.images = [] 23 | for image in doc.findall('.//img'): 24 | self.markdown.images.append(image.get('src')) 25 | 26 | 27 | class ImgExtExtension(Extension): 28 | def extendMarkdown(self, md, md_globals): 29 | img_ext = ImgExtractor(md) 30 | md.treeprocessors.add('imgext', img_ext, '>inline') 31 | 32 | 33 | def ipynb_to_md(ipynb_path): 34 | orig_path = os.getcwd() 35 | os.chdir(os.path.dirname(ipynb_path)) 36 | file_name = os.path.basename(ipynb_path) 37 | subprocess.call(['python', '-m', 'nbconvert', 38 | '--to', 'markdown', file_name]) 39 | 40 | new_s = [] 41 | md_name = file_name.replace('.ipynb', '.md') 42 | with open(md_name, 'r') as f: 43 | for line in f: 44 | if line.startswith('#'): 45 | new_s.append(line) 46 | break 47 | for line in f: 48 | if line.startswith('## API'): 49 | new_s.append(line) 50 | new_s.append('\n') 51 | break 52 | new_s.append(line) 53 | for line in f: 54 | if line.lstrip().startswith('#'): 55 | break 56 | for line in f: 57 | if line.lstrip().startswith('```'): 58 | continue 59 | else: 60 | new_s.append(line[4:]) 61 | 62 | with open(md_name, 'w') as f: 63 | f.write(''.join(new_s)) 64 | os.chdir(orig_path) 65 | 66 | 67 | # md = markdown.Markdown(extensions=[ImgExtExtension()]) 68 | # html = md.convert(data) 69 | # print(md.images) 70 | 71 | 72 | if __name__ == "__main__": 73 | 74 | import argparse 75 | parser = argparse.ArgumentParser( 76 | description='Convert docstring into a markdown API documentation.', 77 | formatter_class=argparse.RawTextHelpFormatter) 78 | 79 | parser.add_argument('-i', '--ipynb', 80 | help='Path to the IPython file') 81 | 82 | parser.add_argument('-a', '--all', 83 | help='Path to parse all ipynb recursively') 84 | 85 | parser.add_argument('-v', '--version', 86 | action='version', 87 | version='v. 0.1') 88 | 89 | args = parser.parse_args() 90 | 91 | if args.all and args.ipynb: 92 | raise AttributeError('Conflicting flags --ipynb and --all; choose one') 93 | 94 | if args.ipynb: 95 | ipynb_to_md(ipynb_path=args.ipynb) 96 | else: 97 | tree = os.walk(args.all) 98 | for d in tree: 99 | filenames = glob.glob(os.path.join(d[0], '*')) 100 | for f in filenames: 101 | if f.endswith('.ipynb'): 102 | print(f) 103 | 104 | ipynb_to_md(ipynb_path=f) 105 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: screenlamp 2 | site_url: http://rasbt.github.io/screenlamp 3 | site_author: Sebastian Raschka 4 | site_description: A toolkit for ligand-based screening 5 | 6 | repo_url: https://github.com/psa-lab/screenlamp 7 | 8 | #include_search: true # not necessary for this theme 9 | docs_dir: sources 10 | site_dir: html 11 | theme_dir: united 12 | 13 | use_directory_urls: false 14 | site_favicon: favicon.ico 15 | 16 | markdown_extensions: 17 | - mathjax 18 | - extra 19 | - tables 20 | - fenced_code 21 | extra_javascript: 22 | - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML 23 | - mathjaxhelper.js 24 | extra_css: 25 | - extra.css 26 | 27 | copyright: Copyright © 2017 Michigan State University. Developed in the PSA Lab.Sebastian Raschka et al. 2017. 28 | 29 | pages: 30 | - Home: index.md 31 | - Installation: installation.md 32 | - User Documentation: 33 | - Overview: user_guide/doc-overview.md 34 | - Tools: user_guide/tools.md 35 | - Toolkit Tutorial: user_guide/tools-tutorial-1.md 36 | - Pipeline Tutorial: user_guide/pipeline-tutorial-1.md 37 | - About: 38 | - Contact: contact.md 39 | - Release Notes: changelog.md 40 | - License: license.md 41 | - Citing screenlamp: cite.md 42 | -------------------------------------------------------------------------------- /docs/sources/changelog.md: -------------------------------------------------------------------------------- 1 | # Release Notes 2 | 3 | ### Version 1.0.0 (2017-10-31) 4 | 5 | - First release 6 | 7 | ##### Downloads 8 | 9 | - [Source code (zip)](https://github.com/psa-lab/screenlamp/archive/v1.0.0.zip) 10 | - [Source code (tar.gz)](https://github.com/psa-lab/screenlamp/archive/v1.0.0.tar.gz) 11 | -------------------------------------------------------------------------------- /docs/sources/cite.md: -------------------------------------------------------------------------------- 1 | # Citing screenlamp 2 | 3 | Screenlamp is research software and has been made available to other researchers under a permissive [Apache v2 open source license](license). If you use screenlamp in your scientific projects or any derivative work, the authors of the screenlamp software must be acknowledged and the following publication should be cited: 4 | 5 | - Raschka, Sebastian, Anne M. Scott, Nan Liu, Santosh Gunturu, Mar Huertas, Weiming Li, and Leslie A. Kuhn (2018). "Enabling the hypothesis-driven prioritization of ligand candidates in big databases: 6 | Screenlamp and its application to GPCR inhibitor discovery for invasive species control". *Journal of Computer-Aided Molecular Design* 32: 415. 7 | [[biorxiv preprint](https://www.biorxiv.org/content/early/2018/01/17/249151)] 8 | [[Journal article](https://link.springer.com/article/10.1007/s10822-018-0100-7)] 9 | 10 | 11 | 12 | ### BibTeX Entry 13 | 14 | ```tex 15 | @article{raschka2018, 16 | title={Enabling the hypothesis-driven prioritization of ligand 17 | candidates in big databases: Screenlamp and its application to GPCR 18 | inhibitor discovery for invasive species control}, 19 | volume={32}, 20 | DOI={10.1007/s10822-018-0100-7}, 21 | number={3}, 22 | journal={Journal of Computer-Aided Molecular Design}, 23 | author={Raschka, Sebastian and Scott, Anne M. 24 | and Liu, Nan and Gunturu, Santosh and Huertas, 25 | Mar and Li, Weiming and Kuhn, Leslie A.}, 26 | year={2018}, 27 | month={Mar}, 28 | pages={415–433} 29 | } 30 | 31 | ``` 32 | 33 | -------------------------------------------------------------------------------- /docs/sources/contact.md: -------------------------------------------------------------------------------- 1 | # Contact 2 | 3 | If you encounter bugs or other technical issues with the screenlamp software package, please send an email to [kuhnlab@msu.edu](mailto:kuhnlab@msu.edu) or use the [Issue Tracker](https://github.com/psa-lab/screenlamp/issues). For questions about the [screenlamp research article](cite/index.html), please contact the publisher or [corresponding author](mailto:kuhnl@msu.edu) directly instead. 4 | -------------------------------------------------------------------------------- /docs/sources/extra.css: -------------------------------------------------------------------------------- 1 | h1, h2, h3, h4 { 2 | padding-top: 2em; 3 | padding-bottom: 0.5em; 4 | } 5 | 6 | h5, h6 { 7 | padding-top: 1em; 8 | padding-bottom: 0.2em; 9 | } 10 | -------------------------------------------------------------------------------- /docs/sources/images/automated-pipeline-flowchart.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/automated-pipeline-flowchart.jpg -------------------------------------------------------------------------------- /docs/sources/images/automated-pipeline-flowchart.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/automated-pipeline-flowchart.pdf -------------------------------------------------------------------------------- /docs/sources/images/logo-transparent-bg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/logo-transparent-bg.png -------------------------------------------------------------------------------- /docs/sources/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/logo.png -------------------------------------------------------------------------------- /docs/sources/images/obtaining-screenlamp-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/obtaining-screenlamp-2.png -------------------------------------------------------------------------------- /docs/sources/images/obtaining-screenlamp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/obtaining-screenlamp.png -------------------------------------------------------------------------------- /docs/sources/images/toolkit-tutorial/dataset-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/toolkit-tutorial/dataset-overview.png -------------------------------------------------------------------------------- /docs/sources/index.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 |  11 | [](license) 12 | [](license) 13 | 14 | ### A toolkit for ligand-based virtual screening 15 | 16 | 17 | Screenlamp is a Python package for facilitating ligand-based virtual screening workflows and toolkits with hypothesis-driven filtering steps. 18 | 19 | 20 | ## About 21 | 22 | The screenlamp toolkit was developed in the [Protein Structure Analysis & Design Laboratory](http://www.kuhnlab.bmb.msu.edu) at Michigan State University. For additional information about screenlamp, please refer to the accompanying research publication, which is currently under revision: 23 | 24 | - Raschka, Sebastian, Anne M. Scott, Nan Liu, Santosh Gunturu, Mar Huertas, Weiming Li, and Leslie A. Kuhn (2018). "Enabling the hypothesis-driven prioritization of ligand candidates in big databases: 25 | Screenlamp and its application to GPCR inhibitor discovery for invasive species control". *Journal of Computer-Aided Molecular Design* 32: 415. 26 | [[biorxiv preprint](https://www.biorxiv.org/content/early/2018/01/17/249151)] 27 | [[Journal article](https://link.springer.com/article/10.1007/s10822-018-0100-7)] 28 | 29 | 30 | 31 | Screenlamp is research software and has been made available to other researchers under a permissive [Apache v2 open source license](license). If you use screenlamp in your scientific projects or any derivative work, the authors of the screenlamp software must be acknowledged and the publication listed above should be cited. 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /docs/sources/installation.md: -------------------------------------------------------------------------------- 1 | # Installing screenlamp 2 | 3 | --- 4 | 5 | ## Obtaining screenlamp 6 | 7 | You can download the latest version of screenlamp from the [GitHub repository](https://github.com/psa-lab/screenlamp) by clicking on "Download ZIP" or download the latest stable release from the ["release" list](https://github.com/psa-lab/screenlamp/releases): 8 | 9 | [](https://github.com/psa-lab/screenlamp) 10 | 11 | ## Setting up your Python environment for screenlamp 12 | 13 | Python package dependencies of screenlamp are listed in the `requirements.txt` file located in the screenlamp directory. To install all of these dependencies most conveniently, you can execute the following command: 14 | 15 | pip install -r requirements.txt 16 | 17 | The the main modules of screenlamp are located in the `tools/` subdirectory, and after satisfying the Python package requirements (see [`requirements.txt`](https://github.com/psa-lab/screenlamp/blob/master/requirements.txt)), they are ready to use. If you haven't used screenlamp before, it is recommended that to read the screenlamp [tutorial](user_guide/doc-overview.md). 18 | 19 | ## Other software requirements 20 | 21 | Certain submodules within screenlamp require external software to sample low-energy conformations of molecules and to generate pair-wise overlays. The tools that are currently being used in the [pre-built, automated screening pipeline](user_guide/pipeline-tutorial-1/) are [OpenEye OMEGA](https://www.eyesopen.com/omega) and [OpenEye ROCS](https://www.eyesopen.com/rocs) to accomplish those tasks. However, screenlamp does not strictly require OMEGA and ROCS, and you are free to use any open source alternative that provided that the output files are compatible with screenlamp tools, which uses the MOL2 file format. 22 | 23 | If you don't have access to OpenEye toolkits, yet, you can visit the [OpenEye website](https://www.eyesopen.com/licensing-philosophy) for more details on their licensing terms (for example, OpenEye offers a free licensing model for academics engaged in public domain research or teaching). 24 | 25 | ## Obtaining older versions of screenlamp 26 | 27 | To obtain one of the previous versions of screenlamp, please see the [Release Notes](changelog), which contains download links for all release versions of screenlamp. 28 | 29 | ### Development version 30 | 31 | You can download the latest development version of screenlamp as [ZIP](https://github.com/psa-lab/screenlamp/archive/master.zip) file directly from GitHub: 32 | 33 | [](https://github.com/psa-lab/screenlamp) 34 | 35 | 36 | 37 | Alternatively, you can clone the screenlamp development version to your local machine by executing the following command: 38 | 39 | git clone https://github.com/psa-lab/screenlamp.git 40 | -------------------------------------------------------------------------------- /docs/sources/license.md: -------------------------------------------------------------------------------- 1 | 2 | ########################################################################## 3 | 4 | Apache License 5 | Version 2.0, January 2004 6 | http://www.apache.org/licenses/ 7 | 8 | ########################################################################## 9 | 10 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 11 | 12 | 1. Definitions. 13 | 14 | "License" shall mean the terms and conditions for use, reproduction, 15 | and distribution as defined by Sections 1 through 9 of this document. 16 | 17 | "Licensor" shall mean the copyright owner or entity authorized by 18 | the copyright owner that is granting the License. 19 | 20 | "Legal Entity" shall mean the union of the acting entity and all 21 | other entities that control, are controlled by, or are under common 22 | control with that entity. For the purposes of this definition, 23 | "control" means (i) the power, direct or indirect, to cause the 24 | direction or management of such entity, whether by contract or 25 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 26 | outstanding shares, or (iii) beneficial ownership of such entity. 27 | 28 | "You" (or "Your") shall mean an individual or Legal Entity 29 | exercising permissions granted by this License. 30 | 31 | "Source" form shall mean the preferred form for making modifications, 32 | including but not limited to software source code, documentation 33 | source, and configuration files. 34 | 35 | "Object" form shall mean any form resulting from mechanical 36 | transformation or translation of a Source form, including but 37 | not limited to compiled object code, generated documentation, 38 | and conversions to other media types. 39 | 40 | "Work" shall mean the work of authorship, whether in Source or 41 | Object form, made available under the License, as indicated by a 42 | copyright notice that is included in or attached to the work 43 | (an example is provided in the Appendix below). 44 | 45 | "Derivative Works" shall mean any work, whether in Source or Object 46 | form, that is based on (or derived from) the Work and for which the 47 | editorial revisions, annotations, elaborations, or other modifications 48 | represent, as a whole, an original work of authorship. For the purposes 49 | of this License, Derivative Works shall not include works that remain 50 | separable from, or merely link (or bind by name) to the interfaces of, 51 | the Work and Derivative Works thereof. 52 | 53 | "Contribution" shall mean any work of authorship, including 54 | the original version of the Work and any modifications or additions 55 | to that Work or Derivative Works thereof, that is intentionally 56 | submitted to Licensor for inclusion in the Work by the copyright owner 57 | or by an individual or Legal Entity authorized to submit on behalf of 58 | the copyright owner. For the purposes of this definition, "submitted" 59 | means any form of electronic, verbal, or written communication sent 60 | to the Licensor or its representatives, including but not limited to 61 | communication on electronic mailing lists, source code control systems, 62 | and issue tracking systems that are managed by, or on behalf of, the 63 | Licensor for the purpose of discussing and improving the Work, but 64 | excluding communication that is conspicuously marked or otherwise 65 | designated in writing by the copyright owner as "Not a Contribution." 66 | 67 | "Contributor" shall mean Licensor and any individual or Legal Entity 68 | on behalf of whom a Contribution has been received by Licensor and 69 | subsequently incorporated within the Work. 70 | 71 | 2. Grant of Copyright License. Subject to the terms and conditions of 72 | this License, each Contributor hereby grants to You a perpetual, 73 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 74 | copyright license to reproduce, prepare Derivative Works of, 75 | publicly display, publicly perform, sublicense, and distribute the 76 | Work and such Derivative Works in Source or Object form. 77 | 78 | 3. Grant of Patent License. Subject to the terms and conditions of 79 | this License, each Contributor hereby grants to You a perpetual, 80 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 81 | (except as stated in this section) patent license to make, have made, 82 | use, offer to sell, sell, import, and otherwise transfer the Work, 83 | where such license applies only to those patent claims licensable 84 | by such Contributor that are necessarily infringed by their 85 | Contribution(s) alone or by combination of their Contribution(s) 86 | with the Work to which such Contribution(s) was submitted. If You 87 | institute patent litigation against any entity (including a 88 | cross-claim or counterclaim in a lawsuit) alleging that the Work 89 | or a Contribution incorporated within the Work constitutes direct 90 | or contributory patent infringement, then any patent licenses 91 | granted to You under this License for that Work shall terminate 92 | as of the date such litigation is filed. 93 | 94 | 4. Redistribution. You may reproduce and distribute copies of the 95 | Work or Derivative Works thereof in any medium, with or without 96 | modifications, and in Source or Object form, provided that You 97 | meet the following conditions: 98 | 99 | (a) You must give any other recipients of the Work or 100 | Derivative Works a copy of this License; and 101 | 102 | (b) You must cause any modified files to carry prominent notices 103 | stating that You changed the files; and 104 | 105 | (c) You must retain, in the Source form of any Derivative Works 106 | that You distribute, all copyright, patent, trademark, and 107 | attribution notices from the Source form of the Work, 108 | excluding those notices that do not pertain to any part of 109 | the Derivative Works; and 110 | 111 | (d) If the Work includes a "NOTICE" text file as part of its 112 | distribution, then any Derivative Works that You distribute must 113 | include a readable copy of the attribution notices contained 114 | within such NOTICE file, excluding those notices that do not 115 | pertain to any part of the Derivative Works, in at least one 116 | of the following places: within a NOTICE text file distributed 117 | as part of the Derivative Works; within the Source form or 118 | documentation, if provided along with the Derivative Works; or, 119 | within a display generated by the Derivative Works, if and 120 | wherever such third-party notices normally appear. The contents 121 | of the NOTICE file are for informational purposes only and 122 | do not modify the License. You may add Your own attribution 123 | notices within Derivative Works that You distribute, alongside 124 | or as an addendum to the NOTICE text from the Work, provided 125 | that such additional attribution notices cannot be construed 126 | as modifying the License. 127 | 128 | You may add Your own copyright statement to Your modifications and 129 | may provide additional or different license terms and conditions 130 | for use, reproduction, or distribution of Your modifications, or 131 | for any such Derivative Works as a whole, provided Your use, 132 | reproduction, and distribution of the Work otherwise complies with 133 | the conditions stated in this License. 134 | 135 | 5. Submission of Contributions. Unless You explicitly state otherwise, 136 | any Contribution intentionally submitted for inclusion in the Work 137 | by You to the Licensor shall be under the terms and conditions of 138 | this License, without any additional terms or conditions. 139 | Notwithstanding the above, nothing herein shall supersede or modify 140 | the terms of any separate license agreement you may have executed 141 | with Licensor regarding such Contributions. 142 | 143 | 6. Trademarks. This License does not grant permission to use the trade 144 | names, trademarks, service marks, or product names of the Licensor, 145 | except as required for reasonable and customary use in describing the 146 | origin of the Work and reproducing the content of the NOTICE file. 147 | 148 | 7. Disclaimer of Warranty. Unless required by applicable law or 149 | agreed to in writing, Licensor provides the Work (and each 150 | Contributor provides its Contributions) on an "AS IS" BASIS, 151 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 152 | implied, including, without limitation, any warranties or conditions 153 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 154 | PARTICULAR PURPOSE. You are solely responsible for determining the 155 | appropriateness of using or redistributing the Work and assume any 156 | risks associated with Your exercise of permissions under this License. 157 | 158 | 8. Limitation of Liability. In no event and under no legal theory, 159 | whether in tort (including negligence), contract, or otherwise, 160 | unless required by applicable law (such as deliberate and grossly 161 | negligent acts) or agreed to in writing, shall any Contributor be 162 | liable to You for damages, including any direct, indirect, special, 163 | incidental, or consequential damages of any character arising as a 164 | result of this License or out of the use or inability to use the 165 | Work (including but not limited to damages for loss of goodwill, 166 | work stoppage, computer failure or malfunction, or any and all 167 | other commercial damages or losses), even if such Contributor 168 | has been advised of the possibility of such damages. 169 | 170 | 9. Accepting Warranty or Additional Liability. While redistributing 171 | the Work or Derivative Works thereof, You may choose to offer, 172 | and charge a fee for, acceptance of support, warranty, indemnity, 173 | or other liability obligations and/or rights consistent with this 174 | License. However, in accepting such obligations, You may act only 175 | on Your own behalf and on Your sole responsibility, not on behalf 176 | of any other Contributor, and only if You agree to indemnify, 177 | defend, and hold each Contributor harmless for any liability 178 | incurred by, or claims asserted against, such Contributor by reason 179 | of your accepting any such warranty or additional liability. 180 | 181 | 182 | 183 | 184 | END OF TERMS AND CONDITIONS 185 | 186 | 187 | Copyright 2017 Michigan State University 188 | 189 | The screenlamp software was developed by Sebastian Raschka 190 | and Leslie A. Kuhn in the Protein Structure Lab 191 | (http://www.kuhnlab.bmb.msu.edu) at Michigan State University. 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. -------------------------------------------------------------------------------- /docs/sources/user_guide/doc-overview.md: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | The screenlamp toolkit is designed in a modular way to provide the building blocks for constructing efficient and flexible virtual screening pipelines. The user documentation consists of three parts: 4 | 5 | 1. [Tools](tools): An overview of the different tools within screenlamp and a summary of their usage commands. 6 | 2. [Toolkit Tutorial](tools-tutorial-1): An example showing how to combine the different tools summarized in the Toolkit API to perform a typical virtual screening run. 7 | 3. [Pipeline Tutorial](pipeline-tutorial-1): A preconstructed, automated virtual pipeline based on the tools listed in the Toolkit API and the virtual screening run explained in the Toolkit Tutorial. 8 | 9 | If you are new to screenlamp and would like to get a top-down perspective of what you can do with this toolkit, I recommend starting with the [Pipeline Tutorial](pipeline-tutorial-1), which presents you with an automated virtual screening run on a small example dataset. To construct your own virtual screening pipelines and see how the different modules within screenlamp can be used in tandem, please read the [Toolkit Tutorial](tools-tutorial-1). While reading through the tutorials, the [Tools](tools) page can be used as a reference for more detailed descriptions of the tools that are available within screenlamp. -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/3kpzs-keto-sulfur.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/3kpzs-keto-sulfur.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/5-mol2ids.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/5-mol2ids.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/atomtype-match-ex-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/atomtype-match-ex-1.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/atomtype-match-ex-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/atomtype-match-ex-2.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/charge-match-ex-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/charge-match-ex-1.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/fgroup-match-overlays-pymol.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/fgroup-match-overlays-pymol.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/open-fgroup-match-overlays.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/open-fgroup-match-overlays.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipe-step-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-1.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipe-step-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-2.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipe-step-3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-3.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipe-step-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-4.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipe-step-5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-5.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipe-step-6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-6.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipe-step-7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-7.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipe-step-8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-8.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pipeline-overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipeline-overview.jpg -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pymol-overlay-ex-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pymol-overlay-ex-1.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/pymol-overlay-ex-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pymol-overlay-ex-2.png -------------------------------------------------------------------------------- /docs/sources/user_guide/images/tools-tutorial-1/zincdata-spreadsheat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/zincdata-spreadsheat.png -------------------------------------------------------------------------------- /docs/sources/user_guide/pipeline-tutorial-1.md: -------------------------------------------------------------------------------- 1 | # Tutorial on Using a Pre-constructed Screenlamp Pipeline 2 | 3 | ## Overview 4 | 5 | This tutorial explains how to use a pre-built screenlamp pipeline to perform an automated virtual screening on a small example dataset. 6 | 7 | In this particular screening pipeline, we are searching for mimics of a query molecule that contain a keto-group and sulfur atom in a specified distance to each other (13-20 angstroms) and have a high overall chemical and volumetric similarity towards the query. Then, we are selecting a subset of database molecules where the keto-group of the query molecule overlays with a keto-group in the database molecules, and where the sulfur atom in the query overlays with a sulfur atom in the database molecules. The overall virtual screening pipeline is summarized in the flowchart below. For more details on the individual screening steps, please see the [Toolkit Tutorial](./tools-tutorial-1), which walks you through these steps using the same database and approach. 8 | 9 |  10 | 11 | (A higher-resolution PDF version of this flowchart is available [here](https://github.com/rasbt/screenlamp/blob/master/docs/sources/images/automated-pipeline-flowchart.pdf).) 12 | 13 | ### Requirements 14 | 15 | Before you continue with the tutorial, please see the [setup instructions](../installation/index.html#other-software-requirements) for screenlamp if this is your first screening run. 16 | 17 | 18 | ## Obtaining and Preparing the Dataset 19 | 20 | 21 | ### MOL2 Input Files 22 | 23 | The automated screenlamp pipeline that is being used in this tutorial is compatible with Tripos MOL2 files of arbitrary database origin and size. A typical use case for this pipeline would be the screening of all ~18,000,000 *Drug-Like* molecules from [ZINC](http://zinc.docking.org), which is available in MOL2 format on ZINC [here](http://zinc.docking.org/subsets/drug-like). Please note that screenlamp supports both Tripos MOL2 (`*.mol2`) files and gzipped Tripos MOL2 files (`*.mol2.gz`) out of the box. Thus, if your input dataset is in gzipped format, you can use it right away without having to make any adjustments or decompressing it. However, please note that the decompressing and compressing operations that are performed when working with gzipped files have an additional toll on computational performance. 24 | 25 | **Please keep in mind that this screening pipeline with 18,000,000 input molecules and the preconfigured settings takes about a day to complete on a multi-core desktop computer**. Thus, it is recommended to work through this tutorial using a smaller dataset. With kind permission from John Irwin and the ZINC team, we recommend using a random subset of 70,000 small molecules that we prepared for this tutorial. It takes approximately 10 minutes for a multi-core Desktop computer to execute all steps in the automated, virtual screening pipeline described earlier. This subset from ZINC is split into 7 multi-MOL2 file with 10,000 molecules each: `partition_mol2_1.mol2` to `partition_mol2_7.mol2`. 26 | 27 | For this tutorial, please download the dataset by clicking the following link and unzip it on your machine that you are using for the virtual screening run: [https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/partition_1-7.zip](https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/partition_1-7.zip) 28 | 29 | ### data table for Prefiltering 30 | 31 | For this particular tutorial, you'll also need a data table containing general information about these molecules. Although the partitions you downloaded above are only a small, modified subset of [ZINC](http://zinc.docking.org) molecules, we are going to use the full ~18,000,000 molecule Drug-like table available for download at [http://zinc.docking.org/subsets/drug-like](http://zinc.docking.org/subsets/drug-like). To download the tab-separated table, click on the [Properties](http://zinc.docking.org/db/bysubset/3/3_prop.xls) link on the [ZINC Drug-like](http://zinc.docking.org/subsets/drug-like) page. Please note that the size of the data table is about ~1.8 Gb, and thus, the download may take a while depending on your internet connection. Alternatively, a smaller data table containing only ~170,000 molecules, please use the following link: [https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/small_table_p1-7.txt](https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/small_table_p1-7.txt) 32 | 33 | 34 | ### Query Molecule 35 | 36 | The third data file you'll need for ligand-based virtual screening is the query molecule. For this tutorial, please download the following multi-conformer MOL2 file: [https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/3kpzs_query.mol2](https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/3kpzs_query.mol2) 37 | 38 | ## Editing the Configuration File 39 | 40 | Once you obtained the database molecules (mol2 partitions), the data table of molecular properties, and the query molecule, you can prepare the configuration file that stores the information about your local file paths and screening settings. 41 | 42 | As your configuration file template, you can use the following YAML file the [`screenlamp/tools/pipelines/pipeline-example-1-config.yaml`](https://github.com/rasbt/screenlamp/blob/master/tools/pipelines/pipeline-example-1-config.yaml), create a local copy of it and modify the file paths according to your system's configuration. 43 | 44 | ## Running the Automated Screening Pipeline 45 | 46 | After you customized your configuration file, you start the screening pipeline as shown in the example command snippet below: 47 | 48 | ```bash 49 | python path/to/screenlamp/tools/pipelines/pipeline-example-1.py --config_file /path/to/your/config/pipeline-example-1-config.yaml --incremental true 50 | ``` 51 | 52 | By setting `--incremental true`, you will be prompted to confirm each step by pressing enter, which is recommended for the first time use. 53 | 54 | For your reference, a zip archive of all files being generated via the execution of the screenlamp pipeline can be obtained via the following download link: [https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/pipeline-tutorial_1_outputs.zip](https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/pipeline-tutorial_1_outputs.zip). 55 | 56 | ## Canceling and Resuming a Screening Run 57 | 58 | Note that throughout the screening pipeline execution, you will see a short description of the commands being executed. Also, the current pipeline step being executed will be shown in the terminal window. In case you cancel the or abort a screening run, you can resume it at the last step being executed using the `--start_at_step` flag. For example, if you quit the screening run at Step 2 by pressing CTRL+C 59 | 60 | ``` 61 | ################################################ 62 | Step 02: PREFILTER BY FUNCTIONAL GROUP PRESENCE 63 | ################################################ 64 | 65 | Running command: 66 | python /Users/sebastian/code/screenlamp/tools/funcgroup_presence_to_id.py --input /Users/sebastian/Desktop/screenlamp_pipe/01_selected-mol2s --output /Users/sebastian/Desktop/screenlamp_pipe/02_3keto-and-sulfur-mol2ids.txt --selection ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2') --processes 0 67 | 68 | Press Enter to proceed or CTRL+C to quit 69 | ``` 70 | 71 | you can resume the run by using `--start_at_step 2` as shown in the example below: 72 | 73 | ```bash 74 | python path/to/screenlamp/tools/pipelines/pipeline-example-1.py --config_file /path/to/your/config/pipeline-example-1-config.yaml --incremental true --start_at_step 2 75 | ``` 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /docs/sources/user_guide/tools: -------------------------------------------------------------------------------- 1 | ../../../tools/ -------------------------------------------------------------------------------- /docs/united/_LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2015, Dougal Matthews. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or 4 | without modification, are permitted provided that the following 5 | conditions are met: 6 | 7 | Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | Redistributions in binary form must reproduce the above copyright 10 | notice, this list of conditions and the following disclaimer in 11 | the documentation and/or other materials provided with the 12 | distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 15 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 16 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 17 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 22 | USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 | AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 25 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /docs/united/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/__init__.py -------------------------------------------------------------------------------- /docs/united/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {%- block site_meta %} 5 | 6 | 7 | 8 | {% if config.site_description %}{% endif %} 9 | {% if config.site_author %}{% endif %} 10 | {% if page and page.canonical_url %}{% endif %} 11 | 12 | {%- endblock %} 13 | 14 | {%- block htmltitle %} 15 | {% if page and page.title %}{{ page.title }} - {% endif %}{{ config.site_name }} 16 | {%- endblock %} 17 | 18 | {%- block styles %} 19 | 20 | 21 | 22 | 23 | {%- for path in extra_css %} 24 | 25 | {%- endfor %} 26 | {%- endblock %} 27 | 28 | {%- block libs %} 29 | 30 | 34 | 35 | 36 | 37 | 38 | {%- endblock %} 39 | 40 | {%- block analytics %} 41 | {% if config.google_analytics %} 42 | 51 | {% endif %} 52 | {%- endblock %} 53 | 54 | {%- block extrahead %} {% endblock %} 55 | 56 | 57 | 58 | 59 | {% include "nav.html" %} 60 | 61 | 62 | {%- block content %} 63 | {% include "toc.html" %} 64 | {% include "content.html" %} 65 | {%- endblock %} 66 | 67 | 68 | 77 | 78 | {%- block scripts %} 79 | 80 | 81 | 82 | {%- for path in extra_javascript %} 83 | 84 | {%- endfor %} 85 | {%- endblock %} 86 | 87 | 88 | 89 | 90 | 91 | ×Close 92 | Search 93 | 94 | 95 | 96 | From here you can search these documents. Enter 97 | your search terms below. 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/united/content.html: -------------------------------------------------------------------------------- 1 | {% if page and page.meta and page.meta.source %} 2 | 3 | {% for filename in page.meta.source %} 4 | {{ filename }} 5 | {% endfor %} 6 | 7 | {% endif %} 8 | 9 | {% if page and page.content %}{{ page.content }}{% endif %} 10 | -------------------------------------------------------------------------------- /docs/united/css/base.css: -------------------------------------------------------------------------------- 1 | body { 2 | padding-top: 70px; 3 | } 4 | 5 | /* 6 | * The code below adds some padding to the top of the current anchor target so 7 | * that, when navigating to it, the header isn't hidden by the navbar at the 8 | * top. This is especially complicated because we want to *remove* the padding 9 | * after navigation so that hovering over the header shows the permalink icon 10 | * correctly. Thus, we create a CSS animation to remove the extra padding after 11 | * a second. We have two animations so that navigating to an anchor within the 12 | * page always restarts the animation. 13 | * 14 | * See for more details. 15 | */ 16 | :target::before { 17 | content: ""; 18 | display: block; 19 | margin-top: -75px; 20 | height: 75px; 21 | pointer-events: none; 22 | animation: 0s 1s forwards collapse-anchor-padding-1; 23 | } 24 | 25 | body.clicky :target::before { 26 | animation-name: collapse-anchor-padding-2; 27 | } 28 | 29 | @keyframes collapse-anchor-padding-1 { 30 | to { 31 | margin-top: 0; 32 | height: 0; 33 | } 34 | } 35 | 36 | @keyframes collapse-anchor-padding-2 { 37 | to { 38 | margin-top: 0; 39 | height: 0; 40 | } 41 | } 42 | 43 | ul.nav li.main { 44 | font-weight: bold; 45 | } 46 | 47 | div.col-md-3 { 48 | padding-left: 0; 49 | } 50 | 51 | div.col-md-9 { 52 | padding-bottom: 100px; 53 | } 54 | 55 | div.source-links { 56 | float: right; 57 | } 58 | 59 | div.col-md-9 img { 60 | max-width: 100%; 61 | } 62 | 63 | code { 64 | padding: 1px 3px; 65 | background: #f5f5f5; 66 | border: solid 1px #ccc; 67 | color: #333; 68 | } 69 | 70 | pre code { 71 | background: transparent; 72 | border: none; 73 | } 74 | 75 | a > code { 76 | color: #dd4814; 77 | } 78 | 79 | a > code:hover, a > code:focus { 80 | color: #97310e; 81 | } 82 | 83 | /* 84 | * Side navigation 85 | * 86 | * Scrollspy and affixed enhanced navigation to highlight sections and secondary 87 | * sections of docs content. 88 | */ 89 | 90 | /* By default it's not affixed in mobile views, so undo that */ 91 | .bs-sidebar.affix { 92 | position: static; 93 | } 94 | 95 | .bs-sidebar.well { 96 | padding: 0; 97 | } 98 | 99 | /* First level of nav */ 100 | .bs-sidenav { 101 | margin-top: 30px; 102 | margin-bottom: 30px; 103 | padding-top: 10px; 104 | padding-bottom: 10px; 105 | border-radius: 5px; 106 | } 107 | 108 | /* All levels of nav */ 109 | .bs-sidebar .nav > li > a { 110 | display: block; 111 | padding: 5px 20px; 112 | z-index: 1; 113 | } 114 | .bs-sidebar .nav > li > a:hover, 115 | .bs-sidebar .nav > li > a:focus { 116 | text-decoration: none; 117 | border-right: 1px solid; 118 | } 119 | .bs-sidebar .nav > .active > a, 120 | .bs-sidebar .nav > .active:hover > a, 121 | .bs-sidebar .nav > .active:focus > a { 122 | font-weight: bold; 123 | background-color: transparent; 124 | border-right: 1px solid; 125 | } 126 | 127 | /* Nav: second level (shown on .active) */ 128 | .bs-sidebar .nav .nav { 129 | display: none; /* Hide by default, but at >768px, show it */ 130 | margin-bottom: 8px; 131 | } 132 | .bs-sidebar .nav .nav > li > a { 133 | padding-top: 3px; 134 | padding-bottom: 3px; 135 | padding-left: 30px; 136 | font-size: 90%; 137 | } 138 | 139 | /* Show and affix the side nav when space allows it */ 140 | @media (min-width: 992px) { 141 | .bs-sidebar .nav > .active > ul { 142 | display: block; 143 | } 144 | /* Widen the fixed sidebar */ 145 | .bs-sidebar.affix, 146 | .bs-sidebar.affix-bottom { 147 | width: 213px; 148 | } 149 | .bs-sidebar.affix { 150 | position: fixed; /* Undo the static from mobile first approach */ 151 | top: 80px; 152 | } 153 | .bs-sidebar.affix-bottom { 154 | position: absolute; /* Undo the static from mobile first approach */ 155 | } 156 | .bs-sidebar.affix-bottom .bs-sidenav, 157 | .bs-sidebar.affix .bs-sidenav { 158 | margin-top: 0; 159 | margin-bottom: 0; 160 | } 161 | } 162 | @media (min-width: 1200px) { 163 | /* Widen the fixed sidebar again */ 164 | .bs-sidebar.affix-bottom, 165 | .bs-sidebar.affix { 166 | width: 263px; 167 | } 168 | } 169 | 170 | .headerlink { 171 | display: none; 172 | padding-left: .5em; 173 | } 174 | 175 | h1:hover .headerlink, h2:hover .headerlink, h3:hover .headerlink, h4:hover .headerlink, h5:hover .headerlink, h6:hover .headerlink{ 176 | display:inline-block; 177 | } 178 | 179 | /* display submenu relative to parent*/ 180 | .dropdown-submenu { 181 | position: relative; 182 | } 183 | 184 | /* sub menu stlye */ 185 | .dropdown-submenu>.dropdown-menu { 186 | top: 0; 187 | left: 100%; 188 | margin-top: 0px; 189 | margin-left: -1px; 190 | -webkit-border-radius: 0 4px 4px 4px; 191 | -moz-border-radius: 0 4px 4px; 192 | border-radius: 0 4px 4px 4px; 193 | } 194 | 195 | /* display sub menu on hover*/ 196 | .dropdown-submenu:hover>.dropdown-menu { 197 | display: block; 198 | } 199 | 200 | /* little arrow */ 201 | .dropdown-submenu>a:after { 202 | display: block; 203 | content: " "; 204 | float: right; 205 | width: 0; 206 | height: 0; 207 | border-color: transparent; 208 | border-style: solid; 209 | border-width: 5px 0 5px 5px; 210 | border-left-color: #ccc; 211 | margin-top: 5px; 212 | margin-right: -10px; 213 | } 214 | 215 | /* little arrow of parent menu */ 216 | .dropdown-submenu:hover>a:after { 217 | border-left-color: #404040; 218 | } 219 | -------------------------------------------------------------------------------- /docs/united/css/highlight.css: -------------------------------------------------------------------------------- 1 | /* 2 | This is the GitHub theme for highlight.js 3 | 4 | github.com style (c) Vasily Polovnyov 5 | 6 | */ 7 | 8 | .hljs { 9 | display: block; 10 | overflow-x: auto; 11 | color: #333; 12 | -webkit-text-size-adjust: none; 13 | } 14 | 15 | .hljs-comment, 16 | .diff .hljs-header, 17 | .hljs-javadoc { 18 | color: #998; 19 | font-style: italic; 20 | } 21 | 22 | .hljs-keyword, 23 | .css .rule .hljs-keyword, 24 | .hljs-winutils, 25 | .nginx .hljs-title, 26 | .hljs-subst, 27 | .hljs-request, 28 | .hljs-status { 29 | color: #333; 30 | font-weight: bold; 31 | } 32 | 33 | .hljs-number, 34 | .hljs-hexcolor, 35 | .ruby .hljs-constant { 36 | color: #008080; 37 | } 38 | 39 | .hljs-string, 40 | .hljs-tag .hljs-value, 41 | .hljs-phpdoc, 42 | .hljs-dartdoc, 43 | .tex .hljs-formula { 44 | color: #d14; 45 | } 46 | 47 | .hljs-title, 48 | .hljs-id, 49 | .scss .hljs-preprocessor { 50 | color: #900; 51 | font-weight: bold; 52 | } 53 | 54 | .hljs-list .hljs-keyword, 55 | .hljs-subst { 56 | font-weight: normal; 57 | } 58 | 59 | .hljs-class .hljs-title, 60 | .hljs-type, 61 | .vhdl .hljs-literal, 62 | .tex .hljs-command { 63 | color: #458; 64 | font-weight: bold; 65 | } 66 | 67 | .hljs-tag, 68 | .hljs-tag .hljs-title, 69 | .hljs-rule .hljs-property, 70 | .django .hljs-tag .hljs-keyword { 71 | color: #000080; 72 | font-weight: normal; 73 | } 74 | 75 | .hljs-attribute, 76 | .hljs-variable, 77 | .lisp .hljs-body, 78 | .hljs-name { 79 | color: #008080; 80 | } 81 | 82 | .hljs-regexp { 83 | color: #009926; 84 | } 85 | 86 | .hljs-symbol, 87 | .ruby .hljs-symbol .hljs-string, 88 | .lisp .hljs-keyword, 89 | .clojure .hljs-keyword, 90 | .scheme .hljs-keyword, 91 | .tex .hljs-special, 92 | .hljs-prompt { 93 | color: #990073; 94 | } 95 | 96 | .hljs-built_in { 97 | color: #0086b3; 98 | } 99 | 100 | .hljs-preprocessor, 101 | .hljs-pragma, 102 | .hljs-pi, 103 | .hljs-doctype, 104 | .hljs-shebang, 105 | .hljs-cdata { 106 | color: #999; 107 | font-weight: bold; 108 | } 109 | 110 | .hljs-deletion { 111 | background: #fdd; 112 | } 113 | 114 | .hljs-addition { 115 | background: #dfd; 116 | } 117 | 118 | .diff .hljs-change { 119 | background: #0086b3; 120 | } 121 | 122 | .hljs-chunk { 123 | color: #aaa; 124 | } 125 | -------------------------------------------------------------------------------- /docs/united/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/united/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/united/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/united/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/img/favicon.ico -------------------------------------------------------------------------------- /docs/united/js/base.js: -------------------------------------------------------------------------------- 1 | /* Search */ 2 | 3 | function getSearchTerm() 4 | { 5 | var sPageURL = window.location.search.substring(1); 6 | var sURLVariables = sPageURL.split('&'); 7 | for (var i = 0; i < sURLVariables.length; i++) 8 | { 9 | var sParameterName = sURLVariables[i].split('='); 10 | if (sParameterName[0] == 'q') { 11 | return sParameterName[1]; 12 | } 13 | } 14 | } 15 | 16 | $(document).ready(function() { 17 | var search_term = getSearchTerm(), 18 | $search_modal = $('#mkdocs_search_modal'); 19 | 20 | if(search_term) { 21 | $search_modal.modal(); 22 | } 23 | 24 | $search_modal.on('shown.bs.modal', function () { 25 | $search_modal.find('#mkdocs-search-query').focus(); 26 | }); 27 | }); 28 | 29 | 30 | /* Highlight */ 31 | $( document ).ready(function() { 32 | hljs.initHighlightingOnLoad(); 33 | $('table').addClass('table table-striped table-hover'); 34 | }); 35 | 36 | 37 | $('body').scrollspy({ 38 | target: '.bs-sidebar', 39 | }); 40 | 41 | /* Toggle the `clicky` class on the body when clicking links to let us 42 | retrigger CSS animations. See ../css/base.css for more details. */ 43 | $('a').click(function(e) { 44 | $('body').toggleClass('clicky'); 45 | }); 46 | 47 | /* Prevent disabled links from causing a page reload */ 48 | $("li.disabled a").click(function() { 49 | event.preventDefault(); 50 | }); 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /docs/united/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | -------------------------------------------------------------------------------- /docs/united/nav-sub.html: -------------------------------------------------------------------------------- 1 | {% if not nav_item.children %} 2 | 3 | {{ nav_item.title }} 4 | 5 | {% else %} 6 | 7 | {{ nav_item.title }} 8 | 9 | {% for nav_item in nav_item.children %} 10 | {% include "nav-sub.html" %} 11 | {% endfor %} 12 | 13 | 14 | {% endif %} 15 | -------------------------------------------------------------------------------- /docs/united/nav.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Toggle navigation 9 | 10 | 11 | 12 | 13 | 14 | {%- block site_name %} 15 | {{ config.site_name }} 16 | {%- endblock %} 17 | 18 | 19 | 20 | 21 | {%- block site_nav %} 22 | 23 | 24 | {% for nav_item in nav %} 25 | {% if nav_item.children %} 26 | 27 | {{ nav_item.title }} 28 | 29 | {% for nav_item in nav_item.children %} 30 | {% include "nav-sub.html" %} 31 | {% endfor %} 32 | 33 | 34 | {% else %} 35 | 36 | {{ nav_item.title }} 37 | 38 | {% endif %} 39 | {% endfor %} 40 | 41 | {%- endblock %} 42 | 43 | 44 | 45 | {%- block search_button %} 46 | 47 | 48 | Search 49 | 50 | 51 | {%- endblock %} 52 | 53 | {%- block next_prev %} 54 | {%- if page and (page.next_page or page.previous_page) %} 55 | 56 | 57 | Previous 58 | 59 | 60 | 61 | 62 | Next 63 | 64 | 65 | {%- endif %} 66 | {%- endblock %} 67 | 68 | {%- block repo %} 69 | {% if config.repo_url %} 70 | 71 | 72 | {% if config.repo_name == 'GitHub' %} 73 | 74 | {% elif config.repo_name == 'Bitbucket' %} 75 | 76 | {% endif %} 77 | {{ config.repo_name }} 78 | 79 | 80 | {% endif %} 81 | {%- endblock %} 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /docs/united/toc.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | {% if page %} 4 | {% for toc_item in page.toc %} 5 | {{ toc_item.title }} 6 | {% for toc_item in toc_item.children %} 7 | {{ toc_item.title }} 8 | {% endfor %} 9 | {% endfor %} 10 | {% endif %} 11 | 12 | 13 | -------------------------------------------------------------------------------- /docs/update_docs.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2014-2016 2 | # mlxtend Machine Learning Library Extensions 3 | # 4 | # Author: Sebastian Raschka 5 | # 6 | # License: BSD 3 clause 7 | 8 | import subprocess 9 | 10 | 11 | with open('sources/user_guide/tools.md', 'w') as f: 12 | subprocess.call(['python', 'argparse_to_md.py', '../tools'], stdout=f) 13 | 14 | subprocess.call(['python', 'ipynb2markdown.py', '--ipynb', 15 | 'sources/user_guide/tools-tutorial-1.ipynb']) 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | mputil==0.1.0 2 | numpy>=1.12.1 3 | scipy>=0.19.0 4 | pandas>=0.19.2 5 | biopandas>=0.2.1 6 | pyaml>=3.0.0 7 | -------------------------------------------------------------------------------- /tools/count_mol2.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | 33 | 34 | import subprocess 35 | import argparse 36 | import sys 37 | import os 38 | import gzip 39 | 40 | 41 | def mol_count_python(input_file, zipped): 42 | 43 | if zipped: 44 | open_cmd = gzip.open 45 | look_up = b'@ATOM' 46 | else: 47 | open_cmd = open 48 | look_up = '@ATOM' 49 | cnt = 0 50 | with open_cmd(input_file, 'r') as f: 51 | for line in f: 52 | if line.startswith(look_up): 53 | cnt += 1 54 | return cnt 55 | 56 | 57 | def mol_count_shell(input_file, zipped): 58 | 59 | if zipped: 60 | grep = 'zgrep' 61 | else: 62 | grep = 'grep' 63 | 64 | ps = subprocess.Popen([grep, "@ATOM", input_file], 65 | stdout=subprocess.PIPE) 66 | raw = subprocess.check_output(['wc', '-l'], stdin=ps.stdout) 67 | ps.wait() 68 | 69 | return int(raw.decode().rstrip()) 70 | 71 | 72 | def count_in_dir(path, windows): 73 | 74 | total = 0 75 | for f in os.listdir(path): 76 | if f.endswith(('.mol2', 'mol2.gz')): 77 | file_path = os.path.join(path, f) 78 | zipped = f.endswith('.mol2.gz') 79 | 80 | if windows: 81 | cnt = mol_count_python(file_path, zipped) 82 | else: 83 | cnt = mol_count_shell(file_path, zipped) 84 | 85 | sys.stdout.write('%s : %d\n' % (f, cnt)) 86 | sys.stdout.flush() 87 | total += cnt 88 | return total 89 | 90 | 91 | def main(input_name): 92 | is_dir = os.path.isdir(input_name) 93 | is_windows = os.system == 'Windows' 94 | 95 | if not is_dir: 96 | zipped = input_name.endswith('.gz') 97 | if is_windows: 98 | total = mol_count_python(input_name, zipped) 99 | else: 100 | total = mol_count_shell(input_name, zipped) 101 | 102 | else: 103 | total = count_in_dir(input_name, is_windows) 104 | 105 | sys.stdout.write('Total : %d\n' % total) 106 | sys.stdout.flush() 107 | 108 | 109 | if __name__ == '__main__': 110 | 111 | parser = argparse.ArgumentParser( 112 | description=('A command line tool for counting the number' 113 | ' of molecules in MOL2 files.'), 114 | epilog="""Example: 115 | python count_mol2.py -i mol2_dir/ 116 | python count_mol2.py -i partition_1.mol2""", 117 | formatter_class=argparse.RawTextHelpFormatter) 118 | 119 | parser.add_argument('-i', '--input', 120 | required=True, 121 | type=str, 122 | help='(Required.) Path to a `.mol2` or `.mol2.gz`file,' 123 | '\nor a directory containing `.mol2`/`.mol2.gz`' 124 | ' files.') 125 | 126 | parser.add_argument('-v', '--version', action='version', version='v. 1.0') 127 | 128 | args = parser.parse_args() 129 | 130 | if not args.input: 131 | parser.print_help() 132 | 133 | else: 134 | main(args.input) 135 | -------------------------------------------------------------------------------- /tools/datatable_to_id.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | 33 | import argparse 34 | import sys 35 | import os 36 | import pandas as pd 37 | import time 38 | 39 | 40 | def read_and_write(source, target, selection, 41 | columns, id_column, sep, verbose): 42 | 43 | if verbose: 44 | counter = 0 45 | sys.stdout.write('Using columns: %s\n' % columns) 46 | sys.stdout.write('Using selection: %s\n' % selection) 47 | sys.stdout.flush() 48 | 49 | reader = pd.read_table(source, chunksize=100000, usecols=columns, sep=sep) 50 | 51 | with open(target, 'w') as f: 52 | if verbose: 53 | start = time.time() 54 | for chunk in reader: 55 | 56 | if selection is not None: 57 | mask = pd.eval(selection) 58 | else: 59 | mask = chunk.index 60 | chunk.loc[mask, [id_column]].to_csv(f, 61 | header=None, 62 | index=None) 63 | if verbose: 64 | counter += chunk.shape[0] 65 | 66 | elapsed = time.time() - start 67 | sys.stdout.write('\rProcessed %d rows | %d rows/sec' % 68 | (counter, counter / elapsed)) 69 | sys.stderr.flush() 70 | 71 | if verbose: 72 | n_lines = sum(1 for line in open(target, 'r')) 73 | sys.stdout.write('\nSelected: %d\n' % n_lines) 74 | sys.stdout.flush() 75 | 76 | 77 | def parse_selection_string(s, df_name='chunk'): 78 | return s.replace('(', '(%s.' % df_name) 79 | 80 | 81 | def columns_from_selection(s): 82 | return [c.replace('(', '') for c in s.split() if '(' in c] 83 | 84 | 85 | def main(input_dir, output_file, verbose, selection, id_column): 86 | 87 | columns = [id_column] 88 | if selection is None: 89 | parsed_sele = None 90 | else: 91 | parsed_sele = parse_selection_string(selection, df_name='chunk') 92 | columns += columns_from_selection(selection) 93 | 94 | dirpath = os.path.dirname(output_file) 95 | if not os.path.exists(dirpath): 96 | os.mkdir(dirpath) 97 | 98 | read_and_write(source=args.input, 99 | target=args.output, 100 | selection=parsed_sele, 101 | columns=columns, 102 | id_column=id_column, 103 | sep=args.separator, 104 | verbose=args.verbose) 105 | 106 | 107 | if __name__ == '__main__': 108 | 109 | parser = argparse.ArgumentParser( 110 | description='Create a text file with molecule IDs from MOL2 files.', 111 | epilog="""Example: 112 | python datatable_to_id.py\\ 113 | --input table.txt\\ 114 | --output ids.txt\\ 115 | --id_column ZINC_ID\\ 116 | --selection "(NRB <= 7) & (MWT > 200)" """, 117 | formatter_class=argparse.RawTextHelpFormatter) 118 | 119 | parser.add_argument('-i', '--input', 120 | type=str, 121 | required=True, 122 | help='(Required.) Path to a datatable file where each' 123 | '\nrow represents a molecule and each columns' 124 | '\nstore the molecular features.') 125 | parser.add_argument('-o', '--output', 126 | type=str, 127 | required=True, 128 | help='(Required.) Output path for the ID file' 129 | ' (for example, `ids.txt`).') 130 | parser.add_argument('--id_column', 131 | type=str, 132 | required=True, 133 | help='(Required.) Name of the Molecule ID column.') 134 | parser.add_argument('--separator', 135 | type=str, 136 | default='\t', 137 | help=('(Optional, default: `"\t"`.) Column separator used\nin the input table.\n' 138 | 'Assumes tab-separated values by default.')) 139 | parser.add_argument('-s', '--selection', 140 | type=str, 141 | default=None, 142 | help='(Optional, default: `None`.) A conditional selection string:\n' 143 | ' Single column selection example: `"(MWT > 500)"`. ' 144 | ' Logical OR example: `"(MWT > 500) | (MWT < 200)"`.' 145 | ' Logical AND example: `"(NRB <= 7) & (MWT > 200)"`.') 146 | parser.add_argument('-v', '--verbose', 147 | type=int, 148 | default=1, 149 | help='(Optional, default: `1`.) Verbosity level. If 0, does not print any' 150 | '\noutput.' 151 | '\nIf 1 (default), prints the file currently' 152 | '\nprocessing.') 153 | 154 | parser.add_argument('--version', action='version', version='v. 1.0') 155 | 156 | args = parser.parse_args() 157 | 158 | main(args.input, args.output, args.verbose, args.selection, args.id_column) 159 | -------------------------------------------------------------------------------- /tools/enumerate_conformers.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import os 33 | import argparse 34 | import sys 35 | import time 36 | import gzip 37 | from biopandas.mol2 import split_multimol2 38 | 39 | 40 | def get_mol2_files(dir_path): 41 | 42 | files = [] 43 | if os.path.isdir(dir_path): 44 | for f in os.listdir(dir_path): 45 | if f.endswith(('.mol2', 'mol2.gz')): 46 | file_path = os.path.join(dir_path, f) 47 | files.append(file_path) 48 | 49 | elif (os.path.isfile(dir_path) and 50 | dir_path.endswith(('.mol2', 'mol2.gz'))): 51 | files.append(dir_path) 52 | 53 | return files 54 | 55 | 56 | def read_and_write(inp_mol2_path, out_mol2_path, verbose): 57 | 58 | if verbose: 59 | sys.stdout.write('Processing %s' % os.path.basename(inp_mol2_path)) 60 | sys.stdout.flush() 61 | start = time.time() 62 | 63 | 64 | if inp_mol2_path.endswith('.gz'): 65 | write_mode = 'wb' 66 | open_file = gzip.open 67 | else: 68 | write_mode = 'w' 69 | open_file = open 70 | 71 | """ 72 | if query_path.endswith('.gz'): 73 | for id_, cont in split_multimol2(query_path): 74 | cnt += 1 75 | cont = b''.join(cont).decode('utf-8').split('\n') 76 | if multiconf_query: 77 | mol_idx = '%s_%d' % (id_.decode('utf-8'), cnt) 78 | else: 79 | mol_idx = id_ 80 | """ 81 | 82 | with open_file(out_mol2_path, write_mode) as outfile: 83 | 84 | prev_molecule = '' 85 | 86 | if inp_mol2_path.endswith('.gz'): 87 | for i, (id_, cont) in enumerate(split_multimol2(inp_mol2_path)): 88 | if prev_molecule != id_: 89 | cnt = 0 90 | else: 91 | cnt += 1 92 | 93 | mol_idx = b'%s_%d' % (id_, cnt) 94 | 95 | cont[1] = mol_idx + b'\n' 96 | outfile.write(b''.join(cont)) 97 | prev_molecule = id_ 98 | 99 | else: 100 | for i, (id_, cont) in enumerate(split_multimol2(inp_mol2_path)): 101 | if prev_molecule != id_: 102 | cnt = 0 103 | else: 104 | cnt += 1 105 | 106 | mol_idx = '%s_%d' % (id_, cnt) 107 | 108 | cont[1] = mol_idx + '\n' 109 | outfile.write(''.join(cont)) 110 | prev_molecule = id_ 111 | 112 | if verbose: 113 | elapsed = time.time() - start 114 | n_molecules = i + 1 115 | sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' % 116 | (n_molecules, n_molecules / elapsed)) 117 | sys.stdout.flush() 118 | 119 | 120 | def main(input_dir, output_dir, verbose): 121 | if not os.path.exists(output_dir): 122 | os.mkdir(output_dir) 123 | inp_mol2_paths = get_mol2_files(input_dir) 124 | 125 | for mol2_path in inp_mol2_paths: 126 | base = os.path.basename(mol2_path) 127 | out_mol2_path = os.path.join(output_dir, base) 128 | read_and_write(mol2_path, out_mol2_path, verbose) 129 | 130 | 131 | if __name__ == '__main__': 132 | 133 | parser = argparse.ArgumentParser( 134 | description='Numbers molecules in MOL2 files by' 135 | ' adding a suffix as index.' 136 | ' For example, if there are three' 137 | ' molecules in a MOL2 file,' 138 | ' moleculeabc_0, moleculeabc_1, and moleculedef_0,' 139 | '\n those molecules will be relabeled to' 140 | ' moleculeabc_0, moleculeabc_1, and moleculedef_0.', 141 | epilog="""Example: 142 | python enumerate_conformers.py -i conformer_mol2s/\\ 143 | --output numbered_conformers/""", 144 | formatter_class=argparse.RawTextHelpFormatter) 145 | 146 | parser.add_argument('-i', '--input', 147 | required=True, 148 | type=str, 149 | help='(Required.) Path to a `.mol2` or `.mol2.gz`file,' 150 | '\nor a directory containing `.mol2`/`.mol2.gz`' 151 | ' files.') 152 | parser.add_argument('-o', '--output', 153 | type=str, 154 | required=True, 155 | help='(Required.) Directory path for writing the' 156 | ' numbered MOL2s') 157 | parser.add_argument('-v', '--verbose', 158 | type=int, 159 | default=1, 160 | help='Verbosity level. If 0, does not print any' 161 | ' output.' 162 | '\nIf 1 (default), prints the file currently' 163 | '\nprocessing.') 164 | 165 | parser.add_argument('--version', action='version', version='v. 1.0') 166 | 167 | args = parser.parse_args() 168 | 169 | main(input_dir=args.input, 170 | output_dir=args.output, 171 | verbose=args.verbose) 172 | -------------------------------------------------------------------------------- /tools/funcgroup_distance_to_id.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | 33 | import os 34 | import argparse 35 | import sys 36 | import pandas as pd 37 | import time 38 | from mputil import lazy_imap 39 | from multiprocessing import cpu_count 40 | from biopandas.mol2 import split_multimol2 41 | from biopandas.mol2 import PandasMol2 42 | 43 | 44 | def parse_distance_string(s): 45 | dist = [int(p.strip()) for p in s.split('-')] 46 | return dist 47 | 48 | 49 | def get_mol2_files(dir_path): 50 | 51 | files = [] 52 | 53 | if os.path.isdir(dir_path): 54 | for f in os.listdir(dir_path): 55 | if f.endswith(('.mol2', 'mol2.gz')): 56 | file_path = os.path.join(dir_path, f) 57 | files.append(file_path) 58 | 59 | elif (os.path.isfile(dir_path) and 60 | dir_path.endswith(('.mol2', 'mol2.gz'))): 61 | files.append(dir_path) 62 | 63 | return files 64 | 65 | 66 | def parse_selection_string(s, df_name='pdmol.df'): 67 | 68 | columns = ['(atom_id', '(atom_name', '(atom_type', 69 | '(subst_id', '(subst_name', '(charge'] 70 | lst = [subs.strip() for subs in s.split('-->')] 71 | parsed = [] 72 | 73 | for subs in lst: 74 | for c in columns: 75 | subs = subs.replace(c, '(%s.%s' % (df_name, c[1:])) 76 | parsed.append(subs) 77 | return parsed 78 | 79 | 80 | def data_processor(mol2): 81 | 82 | pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1], 83 | mol2_code=mol2[0]) 84 | 85 | coordinates = pdmol.df.loc[pd.eval(SELECTION[0]), ['x', 'y', 'z']].values 86 | 87 | pdmol._df = pdmol._df[pd.eval(SELECTION[1])] 88 | 89 | for xyz in coordinates: 90 | 91 | distances = pdmol.distance(xyz) 92 | 93 | match = ((distances.values >= DISTANCE[0]).any() and 94 | (distances.values <= DISTANCE[1]).any()) 95 | 96 | if match: 97 | return mol2[0] 98 | 99 | return '' 100 | 101 | 102 | def data_processor_gz(mol2_gz): 103 | 104 | pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2_gz[1], 105 | mol2_code=mol2_gz[0]) 106 | 107 | coordinates = pdmol.df.loc[pd.eval(SELECTION[0]), ['x', 'y', 'z']].values 108 | 109 | pdmol._df = pdmol._df[pd.eval(SELECTION[1])] 110 | 111 | for xyz in coordinates: 112 | 113 | distances = pdmol.distance(xyz) 114 | 115 | match = ((distances.values >= DISTANCE[0]).any() and 116 | (distances.values <= DISTANCE[1]).any()) 117 | 118 | if match: 119 | return mol2_gz[0].decode('utf-8') 120 | 121 | return '' 122 | 123 | 124 | def read_and_write(mol2_files, id_file_path, verbose, n_cpus): 125 | 126 | if verbose: 127 | sys.stdout.write('Using selection: %s\n' % SELECTION) 128 | sys.stdout.flush() 129 | 130 | with open(id_file_path, 'w') as f: 131 | 132 | for mol2_file in mol2_files: 133 | if verbose: 134 | start = time.time() 135 | sys.stdout.write('Processing %s' % os.path.basename(mol2_file)) 136 | sys.stdout.flush() 137 | 138 | cnt = 0 139 | 140 | if mol2_file.endswith('.gz'): 141 | data_processor_fn = data_processor_gz 142 | else: 143 | data_processor_fn = data_processor 144 | 145 | for chunk in lazy_imap(data_processor=data_processor_fn, 146 | data_generator=split_multimol2(mol2_file), 147 | n_cpus=n_cpus): 148 | _ = [f.write('%s\n' % mol2_id)for mol2_id in chunk if mol2_id] 149 | cnt += len(chunk) 150 | 151 | if verbose: 152 | elapsed = time.time() - start 153 | sys.stdout.write(' | %d mol/sec\n' % (cnt / elapsed)) 154 | sys.stdout.flush() 155 | 156 | 157 | def get_num_cpus(n_cpus): 158 | if not n_cpus: 159 | n_cpus = cpu_count() 160 | elif n_cpus < 0: 161 | n_cpus = cpu_count() - n_cpus 162 | return n_cpus 163 | 164 | 165 | def main(input_dir, output_file, verbose, n_cpus): 166 | 167 | n_cpus = get_num_cpus(n_cpus) 168 | dirpath = os.path.dirname(output_file) 169 | if not os.path.exists(dirpath): 170 | os.mkdir(dirpath) 171 | mol2_files = get_mol2_files(dir_path=input_dir) 172 | read_and_write(mol2_files=mol2_files, 173 | id_file_path=output_file, 174 | verbose=verbose, 175 | n_cpus=n_cpus) 176 | 177 | if verbose: 178 | print('Finished') 179 | 180 | 181 | if __name__ == '__main__': 182 | 183 | parser = argparse.ArgumentParser( 184 | description='A command line tool for filtering mol2 files' 185 | '\nby the distance of two atoms or functional groups.', 186 | epilog="""Example: 187 | 188 | python funcgroup_distance_to_id.py\\ 189 | --input mol2_dir/\\ 190 | --output ids.txt\\ 191 | --selection "((atom_type == \'S.3\') | (atom_type == \'S.o2\')) --> (atom_type == \'O.2\')"\\ 192 | --distance 13-20\\ 193 | --processes 0 194 | 195 | 196 | \# The example above selects those molecules 197 | \# that contain S.2 or S.o2 atom that is within 198 | \# a 13-20 angstroms distance to an 'O.2' (sp2/keto oxygen) atom 199 | 200 | """, 201 | formatter_class=argparse.RawTextHelpFormatter) 202 | 203 | parser.add_argument('-i', '--input', 204 | type=str, 205 | required=True, 206 | help='(Required.) Path to a `.mol2` or `.mol2.gz` file,' 207 | '\nor a directory containing `.mol2`/`.mol2.gz`' 208 | 'files.') 209 | parser.add_argument('-o', '--output', 210 | type=str, 211 | required=True, 212 | help='(Required.) Directory for writing the output files.') 213 | parser.add_argument('-s', '--selection', 214 | type=str, 215 | required=True, 216 | help='(Required.) Selection condition for the atom distance' 217 | ' checks.' 218 | '\n1) Selection example to compare 2 atom types:' 219 | '\n `"(atom_type == \'S.o2\') -->' 220 | ' (atom_type == \'O.2\')"`.' 221 | '\n2) Selection example to consider either' 222 | ' an S.o2 or S.3 atom to an O.2 atom:' 223 | '\n `"((atom_type == \'S.3\') |' 224 | ' (atom_type == \'S.o2\')) -->' 225 | ' (atom_type == \'O.2\')"`.' 226 | '\n3) Selection example using logical ORs on ' 227 | 'both sides:\n' 228 | ' `"((atom_type == \'S.3\') | (atom_type == ' 229 | '\'S.o2\'))' 230 | ' --> ((atom_type == \'O.2\') |' 231 | ' (atom_type == \'O.3\'))"`.') 232 | parser.add_argument('-d', '--distance', 233 | type=str, 234 | required=True, 235 | help='(Required.) A distance range formatted' 236 | '\n as "lowerbound-upperbound".' 237 | '\nFor example, if 13-20 is provided as an' 238 | '\nargument, two atoms are considered a match' 239 | '\nif they are not closer than 13 angstroms and' 240 | '\n not farther than 20 angstroms.') 241 | parser.add_argument('--processes', 242 | type=int, 243 | default=1, 244 | help='(Optional, default: `1`.) Number of processes to run in parallel.' 245 | '\nIf processes > 0, the specified number of CPUs' 246 | '\nwill be used.' 247 | '\nIf processes = 0, all available CPUs will' 248 | '\nbe used.' 249 | '\nIf processes = -1, all available CPUs' 250 | '\nminus `processes` will be used.') 251 | parser.add_argument('-v', '--verbose', 252 | type=int, 253 | default=1, 254 | help='(Optional, default: `1`.) Verbosity level. If 0, does not print any' 255 | ' output.' 256 | '\nIf 1 (default), prints the file currently' 257 | ' processing.') 258 | 259 | parser.add_argument('--version', action='version', version='v. 1.0') 260 | 261 | args = parser.parse_args() 262 | DISTANCE = parse_distance_string(args.distance) 263 | if len(DISTANCE) != 2: 264 | raise ValueError("Make sure you only have a lower and upper bound" 265 | " for --distance" 266 | "\nFor example 13-20") 267 | 268 | SELECTION = parse_selection_string(args.selection) 269 | if len(SELECTION) != 2: 270 | raise ValueError("Make sure you have 2 --selection criteria" 271 | " separated via '-->', for example," 272 | "\n\"((atom_type == 'S.3') |" 273 | " (atom_type == 'S.o2'))\"") 274 | 275 | main(input_dir=args.input, 276 | output_file=args.output, 277 | verbose=args.verbose, 278 | n_cpus=args.processes) 279 | -------------------------------------------------------------------------------- /tools/funcgroup_matching.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import os 33 | import argparse 34 | import sys 35 | import time 36 | from multiprocessing import cpu_count 37 | from numpy import nan as np_nan 38 | from mputil import lazy_imap 39 | from biopandas.mol2 import PandasMol2 40 | from biopandas.mol2 import split_multimol2 41 | 42 | 43 | def get_mol2_files(dir_path): 44 | 45 | files = [] 46 | if os.path.isdir(dir_path): 47 | for f in os.listdir(dir_path): 48 | if f.endswith(('.mol2', 'mol2.gz')): 49 | file_path = os.path.join(dir_path, f) 50 | files.append(file_path) 51 | 52 | elif (os.path.isfile(dir_path) and 53 | dir_path.endswith(('.mol2', 'mol2.gz'))): 54 | files.append(dir_path) 55 | 56 | return files 57 | 58 | 59 | def get_dbase_query_pairs(all_mol2s): 60 | q_list, d_list = [], [] 61 | for m in all_mol2s: 62 | if m.endswith(('_query.mol2.gz', '_query.mol2')): 63 | q_list.append(m) 64 | elif m.endswith(('_dbase.mol2.gz', '_dbase.mol2')): 65 | d_list.append(m) 66 | if len(q_list) != len(q_list): 67 | raise ValueError('The input directory contains an unequal number of' 68 | '*_dbase* and *_query* files.') 69 | return q_list, d_list 70 | 71 | 72 | def get_atom_matches(q_pdmol, d_pdmol): 73 | atoms, charges = [], [] 74 | for xyz in q_pdmol.df[['x', 'y', 'z']].iterrows(): 75 | distances = d_pdmol.distance(xyz=xyz[1].values) 76 | nearest_idx = distances.argmin() 77 | columns = ['atom_type', 'charge'] 78 | if distances.iloc[nearest_idx] > THRESHOLD: 79 | atom, charge = '', np_nan 80 | else: 81 | atom, charge = d_pdmol.df[columns].iloc[nearest_idx].values 82 | atoms.append(atom) 83 | charges.append(charge) 84 | return atoms, charges 85 | 86 | 87 | def data_processor(mol2s): 88 | 89 | q_pdmol = PandasMol2() 90 | d_pdmol = PandasMol2() 91 | 92 | d_pdmol.read_mol2_from_list(mol2_code=mol2s[0][0], 93 | mol2_lines=mol2s[0][1]) 94 | 95 | q_pdmol.read_mol2_from_list(mol2_code=mol2s[1][0], 96 | mol2_lines=mol2s[1][1]) 97 | 98 | atoms, charges = get_atom_matches(q_pdmol, d_pdmol) 99 | return mol2s[0][0], mol2s[1][0], atoms, charges 100 | 101 | 102 | def data_processor_gz(mol2s_gz): 103 | 104 | q_pdmol = PandasMol2() 105 | d_pdmol = PandasMol2() 106 | 107 | d_pdmol.read_mol2_from_list(mol2_code=mol2s_gz[0][0], 108 | mol2_lines=mol2s_gz[0][1]) 109 | 110 | q_pdmol.read_mol2_from_list(mol2_code=mol2s_gz[1][0], 111 | mol2_lines=mol2s_gz[1][1]) 112 | 113 | atoms, charges = get_atom_matches(q_pdmol, d_pdmol) 114 | return (mol2s_gz[0][0].decode('utf-8'), 115 | mol2s_gz[1][0].decode('utf-8'), 116 | atoms, charges) 117 | 118 | 119 | def read_and_write(q_path, d_path, verbose, 120 | cache, output_file, n_cpus): 121 | 122 | dct_results = {'dbase': [], 'query': [], 'atoms': [], 'charges': []} 123 | 124 | d_base = os.path.basename(d_path) 125 | q_base = os.path.basename(q_path) 126 | 127 | if verbose: 128 | start = time.time() 129 | sys.stdout.write('Processing %s/%s' % (d_base, q_base)) 130 | sys.stdout.flush() 131 | 132 | cnt = 0 133 | 134 | if q_path.endswith('.gz'): 135 | data_processor_fn = data_processor_gz 136 | else: 137 | data_processor_fn = data_processor 138 | 139 | for chunk in lazy_imap(data_processor=data_processor_fn, 140 | data_generator=zip(split_multimol2(d_path), 141 | split_multimol2(q_path)), 142 | n_cpus=n_cpus): 143 | 144 | for dbase_id, query_id, atoms, charges in chunk: 145 | dct_results['dbase'].append(dbase_id) 146 | dct_results['query'].append(query_id) 147 | dct_results['atoms'].append(atoms) 148 | dct_results['charges'].append(charges) 149 | 150 | cnt += len(chunk) 151 | """ 152 | 153 | q_pdmol = PandasMol2() 154 | d_pdmol = PandasMol2() 155 | 156 | for q_mol2, d_mol2 in zip(split_multimol2(q_path), 157 | split_multimol2(d_path)): 158 | cnt += 1 159 | d_pdmol.read_mol2_from_list(mol2_code=d_mol2[0], 160 | mol2_lines=d_mol2[1]) 161 | d_pdmol._df = d_pdmol.df[(d_pdmol.df['atom_type'] != 'H')] 162 | 163 | if q_mol2[0] in cache: 164 | q_pdmol = cache[q_mol2[0]] 165 | 166 | else: 167 | q_pdmol.read_mol2_from_list(mol2_code=q_mol2[0], 168 | mol2_lines=q_mol2[1]) 169 | q_pdmol._df = q_pdmol.df[(q_pdmol.df['atom_type'] != 'H')] 170 | cache[q_mol2[0]] = q_pdmol 171 | 172 | atoms, charges = get_atom_matches(q_pdmol, d_pdmol) 173 | 174 | dct_results['query'].append(q_mol2[0]) 175 | dct_results['dbase'].append(d_mol2[0]) 176 | dct_results['atoms'].append(atoms) 177 | dct_results['charges'].append(charges) 178 | """ 179 | 180 | with open(output_file + '_charge.tsv', 'w') as f1,\ 181 | open(output_file + '_atomtype.tsv', 'w') as f2: 182 | 183 | columns = PandasMol2().read_mol2(q_path).df['atom_name'].values 184 | f1.write('dbase\tquery\t%s\n' % '\t'.join(columns)) 185 | f2.write('dbase\tquery\t%s\n' % '\t'.join(columns)) 186 | for i in range(len(dct_results['dbase'])): 187 | s1 = '%s\t%s\t%s\n' % (dct_results['dbase'][i], 188 | dct_results['query'][i], 189 | '\t'.join(format(x, "1.2f") 190 | for x in dct_results['charges'][i])) 191 | 192 | f1.write(s1) 193 | s2 = '%s\t%s\t%s\n' % (dct_results['dbase'][i], 194 | dct_results['query'][i], 195 | '\t'.join(dct_results['atoms'][i])) 196 | f2.write(s2) 197 | 198 | if verbose: 199 | elapsed = time.time() - start 200 | n_molecules = cnt + 1 201 | sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' % 202 | (n_molecules, n_molecules / elapsed)) 203 | sys.stdout.flush() 204 | 205 | 206 | def get_num_cpus(n_cpus): 207 | if not n_cpus: 208 | n_cpus = cpu_count() 209 | elif n_cpus < 0: 210 | n_cpus = cpu_count() - n_cpus 211 | return n_cpus 212 | 213 | 214 | def main(input_dir, output_dir, verbose, n_cpus): 215 | 216 | n_cpus = get_num_cpus(n_cpus) 217 | 218 | if not os.path.exists(output_dir): 219 | os.mkdir(output_dir) 220 | 221 | mol2_in_files = get_mol2_files(input_dir) 222 | 223 | q_list, d_list = get_dbase_query_pairs(mol2_in_files) 224 | 225 | csv_out_bases = [os.path.join(output_dir, 226 | os.path.basename(mol2).replace( 227 | '_dbase.mol2.gz', '').replace( 228 | '_dbase.mol2', '')) 229 | for mol2 in d_list] 230 | 231 | cache = {} 232 | for q, d, c in zip(q_list, d_list, csv_out_bases): 233 | read_and_write(q_path=q, 234 | d_path=d, 235 | verbose=verbose, 236 | cache=cache, 237 | output_file=c, 238 | n_cpus=n_cpus) 239 | 240 | 241 | if __name__ == '__main__': 242 | 243 | parser = argparse.ArgumentParser( 244 | description='Generates tab-separated tables with containing atom' 245 | '\n type and charge information from matching' 246 | '\n atoms in pair-wise overlays.\n', 247 | epilog="""Example: 248 | python funcgroup_matching.py\\ 249 | --input rocs_overlays_sorted/\\ 250 | --output matching_tables/\\ 251 | --max_distance 1.3\\ 252 | --processes 0""", 253 | formatter_class=argparse.RawTextHelpFormatter) 254 | 255 | parser.add_argument('-i', '--input', 256 | type=str, 257 | required=True, 258 | help='(Required.) Path to a directory containing pairs ' 259 | '\nof `*_query.mol2`/`.mol2.gz` ' 260 | '\nand `*_dbase.mol2`/`.mol2.gz` files') 261 | parser.add_argument('-o', '--output', 262 | type=str, 263 | required=True, 264 | help='(Required.) Path to a directory for writing' 265 | '\nthe output files') 266 | parser.add_argument('-d', '--max_distance', 267 | type=float, 268 | default=1.3, 269 | help='(Optional, default: `1.3`.) The maximum distance,' 270 | '\nin angstroms, the' 271 | '\noverlayed atoms can be apart from each' 272 | '\nother for being considered a match.' 273 | '\nFor instance, a --max_distance 1.3 (default)' 274 | '\nwould count atoms as a match if they' 275 | '\nare within 0 and 1.3 angstroms' 276 | '\nto the target atom.') 277 | parser.add_argument('--processes', 278 | type=int, 279 | default=1, 280 | help='(Optional, default: `1`.) Number of processes to' 281 | ' run in parallel.' 282 | '\nIf processes > 0, the specified number of CPUs' 283 | '\nwill be used.' 284 | '\nIf processes = 0, all available CPUs will' 285 | '\nbe used.' 286 | '\nIf processes = -1, all available CPUs' 287 | '\nminus `processes` will be used.') 288 | parser.add_argument('-v', '--verbose', 289 | type=int, 290 | default=1, 291 | help='(Optional, default: `1`.) Verbosity level. If 0,' 292 | ' does not print any output.' 293 | '\nIf 1 (default), prints the file currently' 294 | ' processing.') 295 | 296 | parser.add_argument('--version', action='version', version='v. 1.0') 297 | 298 | args = parser.parse_args() 299 | THRESHOLD = args.max_distance 300 | 301 | main(input_dir=args.input, 302 | output_dir=args.output, 303 | verbose=args.verbose, 304 | n_cpus=args.processes) 305 | -------------------------------------------------------------------------------- /tools/funcgroup_matching_selection.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import argparse 33 | import os 34 | import sys 35 | import pandas as pd 36 | import gzip 37 | import time 38 | from biopandas.mol2 import split_multimol2 39 | 40 | 41 | def get_tsv_pairs(all_tsv): 42 | a_list, c_list = [], [] 43 | for a in all_tsv: 44 | if a.endswith('_atomtype.tsv'): 45 | a_list.append(a) 46 | elif a.endswith('_charge.tsv'): 47 | c_list.append(a) 48 | if len(a_list) != len(c_list): 49 | raise ValueError('The input directory contains an unequal number of' 50 | '*_atomtype.tsv* and *_charge.tsv* files.') 51 | return a_list, c_list 52 | 53 | 54 | def parse_selection_string(s, columns, df_name='df'): 55 | 56 | for c in columns: 57 | if c in s: 58 | s = s.replace(c, '%s.%s' % (df_name, c)) 59 | s = s.replace(' --> ', '-->').split('-->') 60 | s = ['%s[%s]' % (df_name, sub) for sub in s] 61 | return s 62 | 63 | 64 | def main(input_dir, output_dir, atomtype_selection, charge_selection, 65 | input_mol2, verbose): 66 | 67 | if not os.path.exists(output_dir): 68 | os.mkdir(output_dir) 69 | 70 | all_tsv_base = [f for f in os.listdir(input_dir) if f.endswith('.tsv')] 71 | all_tsv_full = [os.path.join(input_dir, f) for f in all_tsv_base] 72 | a_inlist, c_inlist = get_tsv_pairs(all_tsv_full) 73 | a_outlist, c_outlist = get_tsv_pairs(all_tsv_base) 74 | a_outlist = [os.path.join(output_dir, f) for f in a_outlist] 75 | c_outlist = [os.path.join(output_dir, f) for f in c_outlist] 76 | 77 | for a_in, a_out, c_in, c_out in zip(a_inlist, a_outlist, 78 | c_inlist, c_outlist): 79 | 80 | if verbose: 81 | start = time.time() 82 | sys.stdout.write('Processing %s/%s' % (os.path.basename(a_in), 83 | os.path.basename(c_in))) 84 | sys.stdout.flush() 85 | 86 | df_charge = pd.read_table(c_in, sep='\t') 87 | for c in df_charge.columns[2:]: 88 | df_charge[c] = pd.to_numeric(df_charge[c]) 89 | df_atom = pd.read_table(a_in, sep='\t') 90 | mol2_cnt = df_atom.shape[0] 91 | 92 | if atomtype_selection: 93 | atom_sele = parse_selection_string(s=atomtype_selection, 94 | columns=df_atom.columns, 95 | df_name='df_atom') 96 | 97 | for sele in atom_sele: 98 | df_atom = pd.eval(sele) 99 | 100 | if charge_selection: 101 | charge_sele = parse_selection_string(s=charge_selection, 102 | columns=df_charge.columns, 103 | df_name='df_charge') 104 | 105 | for sele in charge_sele: 106 | df_charge = pd.eval(sele) 107 | 108 | selection_indices = set(df_charge.index).intersection( 109 | set(df_atom.index)) 110 | selection_indices = sorted(list(selection_indices)) 111 | 112 | df_atom.ix[selection_indices].to_csv(a_out, sep='\t') 113 | df_charge.ix[selection_indices].to_csv(c_out, sep='\t') 114 | 115 | if input_mol2: 116 | input_mol2_path_query = os.path.join(input_mol2, os.path.basename( 117 | c_out).replace('_charge.tsv', 118 | '_query.mol2')) 119 | input_mol2_path_dbase = input_mol2_path_query.replace( 120 | '_query.mol2', '_dbase.mol2') 121 | 122 | if not os.path.exists(input_mol2_path_query)\ 123 | and os.path.exists(input_mol2_path_query + '.gz'): 124 | input_mol2_path_query += '.gz' 125 | if not os.path.exists(input_mol2_path_dbase)\ 126 | and os.path.exists(input_mol2_path_dbase + '.gz'): 127 | input_mol2_path_dbase += '.gz' 128 | 129 | output_mol2_path_query = os.path.join(output_dir, 130 | os.path.basename( 131 | c_out).replace( 132 | '_charge.tsv', 133 | '_query.mol2')) 134 | output_mol2_path_dbase = output_mol2_path_query.replace( 135 | '_query.mol2', '_dbase.mol2') 136 | 137 | if input_mol2_path_query.endswith('.gz'): 138 | output_mol2_path_query += '.gz' 139 | query_write_mode = 'wb' 140 | query_open_file = gzip.open 141 | else: 142 | query_write_mode = 'w' 143 | query_open_file = open 144 | if input_mol2_path_dbase.endswith('.gz'): 145 | output_mol2_path_dbase += '.gz' 146 | dbase_write_mode = 'wb' 147 | dbase_open_file = gzip.open 148 | else: 149 | dbase_write_mode = 'w' 150 | dbase_open_file = open 151 | 152 | with query_open_file(output_mol2_path_query, query_write_mode) as opq,\ 153 | dbase_open_file(output_mol2_path_dbase, dbase_write_mode) as opd: 154 | for i in selection_indices: 155 | 156 | mol2_q_cont = ('DID NOT FIND %s\n' 157 | % (df_atom.ix[i]['query'])) 158 | 159 | mol2_d_cont = ('DID NOT FIND %s\n' 160 | % (df_atom.ix[i]['dbase'])) 161 | 162 | for idx, mol2 in enumerate(split_multimol2( 163 | input_mol2_path_query)): 164 | if idx == i: 165 | mol2_q_cont = mol2[1] 166 | break 167 | 168 | for idx, mol2 in enumerate(split_multimol2( 169 | input_mol2_path_dbase)): 170 | if idx == i: 171 | mol2_d_cont = mol2[1] 172 | break 173 | 174 | if query_write_mode == 'wb': 175 | opq.write(b''.join(mol2_q_cont)) 176 | else: 177 | opq.write(''.join(mol2_q_cont)) 178 | 179 | if dbase_write_mode == 'wb': 180 | opd.write(b''.join(mol2_d_cont)) 181 | else: 182 | opd.write(''.join(mol2_d_cont)) 183 | 184 | if verbose: 185 | elapsed = time.time() - start 186 | n_molecules = mol2_cnt 187 | sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' % 188 | (n_molecules, n_molecules / elapsed)) 189 | sys.stdout.flush() 190 | 191 | 192 | if __name__ == '__main__': 193 | 194 | parser = argparse.ArgumentParser( 195 | description='Selects molecules with certain functional group matching patterns after functional group matching.', 196 | epilog="""Example: 197 | python funcgroup_matching_selection.py\\ 198 | --input 07_fgroup_matching_tables # generated via funcgroup_matching.py\\ 199 | --input_mol2 06_rocs_overlays_sorted # generated via sort_rocs_mol2.py\\ 200 | --output 08_funcgroup_selection\\ 201 | --atomtype_selection "((S1 == 'S.3') | (S1 == 'S.o2')) --> (O2 == 'O.2')"\\ 202 | --charge_selection FGROUP_CHARGE "((S1 >= 1.0)) --> (O2 <= -0.5)" """, 203 | formatter_class=argparse.RawTextHelpFormatter) 204 | 205 | parser.add_argument('-i', '--input', 206 | type=str, 207 | required=True, 208 | help=('(Required.) Input directory with input `.tsv` tables (functional group files' 209 | ' generated via `funcgroup_matching.py`).')) 210 | parser.add_argument('--input_mol2', 211 | type=str, 212 | help=('(Optional.) Input directory with input `.mol2` structures (ROCS overlays' 213 | '\ngenerated via `sort_rocs_mol2.py`). If provided, the MOL2 structures' 214 | '\ncorresponding to the selected matches will be extracted from the' 215 | '\ninput_mol2 directory and written to the output directory for visual inspection,' 216 | '\nfor example, using PyMOL.')) 217 | parser.add_argument('-o', '--output', 218 | type=str, 219 | required=True, 220 | help='(Required.) Directory for writing the output files.') 221 | parser.add_argument('--atomtype_selection', 222 | type=str, 223 | default="", 224 | help="""(Optional, default="") Selection condition for the atom types. 225 | For example, the following selection query will make a selection based on 226 | matching 2 atoms in the reference molecule, S1 and O2: 227 | "((S1 == 'S.3') | (S1 == 'S.o2')) --> (O2 == 'O.2')". 228 | Here, S1 can either match an S.3 or an S.o2 atom in the database molecule. 229 | The second atom, O2, must match an atom of type O.2.""") 230 | parser.add_argument('--charge_selection', 231 | type=str, 232 | default="", 233 | help="""(Optional, default="") Selection condition for the atom charges. 234 | For example, the following selection query will make a selection based on 235 | matching the charges in 2 atoms in the reference molecule, S1 and O2: 236 | "((S1 >= 1.0)) --> (O2 <= -0.5)". 237 | Here, the atom that matches S1 has to have a positive charge, 1 or greater. The charge 238 | matching the second atom, O2, must be (partially) negative (-0.5 or smaller).""") 239 | parser.add_argument('-v', '--verbose', 240 | type=int, 241 | default=1, 242 | help='(Optional, default: `1`.) Verbosity level. If 0, does not print any' 243 | '\noutput.' 244 | '\nIf 1 (default), prints the file currently' 245 | '\nprocessing.') 246 | 247 | parser.add_argument('--version', action='version', version='v. 1.0') 248 | 249 | args = parser.parse_args() 250 | 251 | main(input_dir=args.input, 252 | output_dir=args.output, 253 | atomtype_selection=args.atomtype_selection, 254 | charge_selection=args.charge_selection, 255 | input_mol2=args.input_mol2, 256 | verbose=args.verbose) -------------------------------------------------------------------------------- /tools/funcgroup_presence_to_id.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | 33 | import os 34 | import argparse 35 | import sys 36 | import pandas as pd 37 | import time 38 | from mputil import lazy_imap 39 | from multiprocessing import cpu_count 40 | from biopandas.mol2 import split_multimol2 41 | from biopandas.mol2 import PandasMol2 42 | 43 | 44 | def get_mol2_files(dir_path): 45 | 46 | files = [] 47 | 48 | if os.path.isdir(dir_path): 49 | for f in os.listdir(dir_path): 50 | if f.endswith(('.mol2', 'mol2.gz')): 51 | file_path = os.path.join(dir_path, f) 52 | files.append(file_path) 53 | 54 | elif (os.path.isfile(dir_path) and 55 | dir_path.endswith(('.mol2', 'mol2.gz'))): 56 | files.append(dir_path) 57 | 58 | return files 59 | 60 | 61 | def parse_selection_string(s, df_name='pdmol.df'): 62 | 63 | columns = ['(atom_id', '(atom_name', '(atom_type', 64 | '(subst_id', '(subst_name', '(charge'] 65 | lst = [subs.strip() for subs in s.split('-->')] 66 | parsed = [] 67 | 68 | for subs in lst: 69 | for c in columns: 70 | subs = subs.replace(c, '(%s.%s' % (df_name, c[1:])) 71 | parsed.append(subs) 72 | return parsed 73 | 74 | 75 | def data_processor(mol2): 76 | 77 | pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1], 78 | mol2_code=mol2[0]) 79 | 80 | match = mol2[0] 81 | for sub_sele in SELECTION: 82 | if not pd.eval(sub_sele).any(): 83 | match = '' 84 | break 85 | 86 | return match 87 | 88 | def data_processor_gz(mol2_gz): 89 | 90 | pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2_gz[1], 91 | mol2_code=mol2_gz[0]) 92 | 93 | match = mol2_gz[0].decode('utf-8') 94 | for sub_sele in SELECTION: 95 | if not pd.eval(sub_sele).any(): 96 | match = '' 97 | break 98 | 99 | return match 100 | 101 | 102 | def read_and_write(mol2_files, id_file_path, verbose, n_cpus): 103 | 104 | if verbose: 105 | sys.stdout.write('Using selection: %s\n' % SELECTION) 106 | sys.stdout.flush() 107 | 108 | with open(id_file_path, 'w') as f: 109 | 110 | for mol2_file in mol2_files: 111 | if verbose: 112 | start = time.time() 113 | sys.stdout.write('Processing %s' % os.path.basename(mol2_file)) 114 | sys.stdout.flush() 115 | 116 | cnt = 0 117 | 118 | if mol2_file.endswith('.gz'): 119 | data_processor_fn = data_processor_gz 120 | else: 121 | data_processor_fn = data_processor 122 | 123 | for chunk in lazy_imap(data_processor=data_processor_fn, 124 | data_generator=split_multimol2( 125 | mol2_file), 126 | n_cpus=n_cpus): 127 | 128 | _ = [f.write('%s\n' % mol2_id) for mol2_id 129 | in chunk if mol2_id] 130 | cnt += len(chunk) 131 | 132 | if verbose: 133 | elapsed = time.time() - start 134 | sys.stdout.write(' | %d mol/sec\n' % (cnt / elapsed)) 135 | sys.stdout.flush() 136 | 137 | 138 | def get_num_cpus(n_cpus): 139 | if not n_cpus: 140 | n_cpus = cpu_count() 141 | elif n_cpus < 0: 142 | n_cpus = cpu_count() - n_cpus 143 | return n_cpus 144 | 145 | 146 | def main(input_dir, output_file, verbose, n_cpus): 147 | n_cpus = get_num_cpus(n_cpus) 148 | dirpath = os.path.dirname(output_file) 149 | if not os.path.exists(dirpath): 150 | os.mkdir(dirpath) 151 | mol2_files = get_mol2_files(dir_path=input_dir) 152 | read_and_write(mol2_files=mol2_files, 153 | id_file_path=output_file, 154 | verbose=verbose, 155 | n_cpus=n_cpus) 156 | if verbose: 157 | print('Finished') 158 | 159 | 160 | 161 | if __name__ == '__main__': 162 | 163 | parser = argparse.ArgumentParser( 164 | description="""Checking molecules base on the presence 165 | of certain atoms or functional groups and writing the results to a text file.""", 166 | epilog="""Example: 167 | python funcgroup_presence_to_id.py --input mol2s/\\ 168 | --output mol2ids.txt\\ 169 | --selection "((atom_type == \'S.3\') | (atom_type == \'S.o2\')) --> (atom_type == \'O.2\')"\\ 170 | --processes 0""", 171 | formatter_class=argparse.RawTextHelpFormatter) 172 | 173 | parser.add_argument('-i', '--input', 174 | type=str, 175 | required=True, 176 | help='(Required.) Input directory with `.mol2` and `.mol2.gz` files.') 177 | parser.add_argument('-o', '--output', 178 | type=str, 179 | required=True, 180 | help='(Required.) Directory for writing the output files.') 181 | parser.add_argument('-s', '--selection', 182 | type=str, 183 | required=True, 184 | help='Selection condition for the atom presence' 185 | ' checks.' 186 | '\n1) Require 2 atom types to be present:' 187 | '\n "(atom_type == \'S.o2\') -->' 188 | ' (atom_type == \'O.2\')"' 189 | '\n2) Selection example to consider either' 190 | ' an S.o2 or S.3 atom and a O.2 atom to be present:' 191 | '\n "((atom_type == \'S.3\') |' 192 | ' (atom_type == \'S.o2\')) -->' 193 | ' (atom_type == \'O.2\')"' 194 | '\n3) Selection example using logical ORs on ' 195 | 'both sides:\n' 196 | ' "((atom_type == \'S.3\') | (atom_type == ' 197 | '\'S.o2\'))' 198 | ' --> ((atom_type == \'O.2\') |' 199 | ' (atom_type == \'O.3\'))"') 200 | parser.add_argument('--processes', 201 | type=int, 202 | default=1, 203 | help='(Optional, default: `1`.) Number of processes to run in parallel.' 204 | '\nIf processes > 0, the specified number of CPUs' 205 | '\nwill be used.' 206 | '\nIf processes = 0, all available CPUs will' 207 | '\nbe used.' 208 | '\nIf processes = -1, all available CPUs' 209 | '\nminus `processes` will be used.') 210 | parser.add_argument('-v', '--verbose', 211 | type=int, 212 | default=1, 213 | help='(Optional, default: `1`.) Verbosity level. If 0, does not print any' 214 | '\noutput.' 215 | '\nIf 1 (default), prints the file currently' 216 | '\nprocessing.') 217 | 218 | parser.add_argument('--version', action='version', version='v. 1.0') 219 | 220 | args = parser.parse_args() 221 | SELECTION = parse_selection_string(args.selection) 222 | 223 | main(input_dir=args.input, 224 | output_file=args.output, 225 | verbose=args.verbose, 226 | n_cpus=args.processes) 227 | -------------------------------------------------------------------------------- /tools/generate_conformers_obabel.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import os 33 | import subprocess 34 | import sys 35 | import argparse 36 | 37 | 38 | def get_mol2_files(dir_path): 39 | 40 | files = [] 41 | 42 | if os.path.isdir(dir_path): 43 | for f in os.listdir(dir_path): 44 | if f.endswith(('.mol2', 'mol2.gz')): 45 | file_path = os.path.join(dir_path, f) 46 | files.append(file_path) 47 | 48 | elif (os.path.isfile(dir_path) and 49 | dir_path.endswith(('.mol2', 'mol2.gz'))): 50 | files.append(dir_path) 51 | 52 | return files 53 | 54 | 55 | def run_obabel(source_file, target_file, settings): 56 | 57 | sys.stdout.write('Processing %s\n' % source_file) 58 | sys.stdout.flush() 59 | 60 | cmd = [EXECUTABLE, 61 | source_file, 62 | '-O', target_file, 63 | '--original', 64 | '--confab'] 65 | if settings: 66 | for s in settings.split(): 67 | s = s.strip() 68 | if s: 69 | cmd.append(s) 70 | 71 | if source_file.endswith('.gz'): 72 | cmd.extend(['-zin', '-z']) 73 | prefix = ''.join(target_file.split('.mol2')[:-1]) 74 | 75 | with open(prefix + '.log', 'wb') as out, \ 76 | open(prefix + '.err', 'wb') as err: 77 | 78 | subprocess.call(cmd, bufsize=1, stdout=out, stderr=err) 79 | 80 | 81 | def main(input_dir, output_dir, settings): 82 | if not os.path.exists(output_dir): 83 | os.mkdir(output_dir) 84 | mol2_in_files = get_mol2_files(input_dir) 85 | mol2_out_files = [os.path.join(output_dir, os.path.basename(mol2)) 86 | for mol2 in mol2_in_files] 87 | 88 | for i, j in zip(mol2_in_files, mol2_out_files): 89 | run_obabel(source_file=i, 90 | target_file=j, 91 | settings=settings) 92 | 93 | 94 | if __name__ == '__main__': 95 | 96 | parser = argparse.ArgumentParser( 97 | description='Wrapper running OpenBabel Confab on one' 98 | '\nor more database partitions.' 99 | ' Please see' 100 | '\nhttp://open-babel.readthedocs.io/en/latest/' 101 | '3DStructureGen/multipleconformers.html' 102 | '\nif you want to learn more about OpenBabel Confab.', 103 | epilog="""Example: 104 | python generate_conformers_obabel.py\\ 105 | --input dbase_mol2/\\ 106 | --output dbase_conformers/\\ 107 | --executable /.../obabel""", 108 | formatter_class=argparse.RawTextHelpFormatter) 109 | 110 | parser.add_argument('-i', '--input', 111 | type=str, 112 | required=True, 113 | help='Input directory with `.mol2`' 114 | ' and `.mol2.gz` files.') 115 | parser.add_argument('-o', '--output', 116 | type=str, 117 | required=True, 118 | help='Directory for writing the output files.') 119 | parser.add_argument('--executable', 120 | type=str, 121 | required=True, 122 | help="""(Required.) The path or command for running 123 | OpenBabel Confab on your system.""") 124 | parser.add_argument('--settings', 125 | type=str, 126 | default='--conf 200 --ecutoff 50''--rcutoff 0.5', 127 | help='(Optional.) OpenBabel settings to use.') 128 | 129 | parser.add_argument('-v', '--version', action='version', version='v. 1.0') 130 | 131 | args = parser.parse_args() 132 | 133 | EXECUTABLE = args.executable 134 | 135 | main(input_dir=args.input, 136 | output_dir=args.output, 137 | settings=args.settings) 138 | -------------------------------------------------------------------------------- /tools/generate_conformers_omega.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import os 33 | import subprocess 34 | import sys 35 | import argparse 36 | from multiprocessing import cpu_count 37 | 38 | 39 | def get_num_cpus(n_cpus): 40 | if not n_cpus: 41 | n_cpus = cpu_count() 42 | elif n_cpus < 0: 43 | n_cpus = cpu_count() - n_cpus 44 | return n_cpus 45 | 46 | 47 | def get_mol2_files(dir_path): 48 | 49 | files = [] 50 | 51 | if os.path.isdir(dir_path): 52 | for f in os.listdir(dir_path): 53 | if f.endswith(('.mol2', 'mol2.gz')): 54 | file_path = os.path.join(dir_path, f) 55 | files.append(file_path) 56 | 57 | elif (os.path.isfile(dir_path) and 58 | dir_path.endswith(('.mol2', 'mol2.gz'))): 59 | files.append(dir_path) 60 | 61 | return files 62 | 63 | 64 | def run_omega(source_file, target_file, n_processes, settings): 65 | 66 | prefix = ''.join(target_file.split('.mol2')[:-1]) 67 | 68 | sys.stdout.write('Processing %s\n' % source_file) 69 | sys.stdout.flush() 70 | 71 | cmd = [EXECUTABLE, 72 | '-in', source_file, 73 | '-out', target_file, 74 | '-prefix', prefix, 75 | '-mpi_np', str(n_processes)] 76 | if settings: 77 | for s in settings.split(): 78 | s = s.strip() 79 | if s: 80 | cmd.append(s) 81 | 82 | subprocess.call(cmd, stdout=subprocess.PIPE, bufsize=1) 83 | 84 | 85 | def main(input_dir, output_dir, n_processes, settings): 86 | if not os.path.exists(output_dir): 87 | os.mkdir(output_dir) 88 | mol2_in_files = get_mol2_files(input_dir) 89 | mol2_out_files = [os.path.join(output_dir, os.path.basename(mol2)) 90 | for mol2 in mol2_in_files] 91 | 92 | n_processes = get_num_cpus(n_processes) 93 | 94 | for i, j in zip(mol2_in_files, mol2_out_files): 95 | run_omega(source_file=i, 96 | target_file=j, 97 | n_processes=n_processes, 98 | settings=settings) 99 | 100 | 101 | if __name__ == '__main__': 102 | 103 | parser = argparse.ArgumentParser( 104 | description='Wrapper running OpenEye OMEGA on one' 105 | '\nor more database partitions.', 106 | epilog="""Example: 107 | python generate_conformers_omega.py\\ 108 | --input dbase_mol2\\ 109 | --output dbase_conformers/\\ 110 | --executable /.../omega2-2.5.1.4\\ 111 | --processes 0""", 112 | formatter_class=argparse.RawTextHelpFormatter) 113 | 114 | parser.add_argument('-i', '--input', 115 | type=str, 116 | required=True, 117 | help='Input directory with `.mol2`' 118 | ' and `.mol2.gz` files.') 119 | parser.add_argument('-o', '--output', 120 | type=str, 121 | required=True, 122 | help='Directory for writing the output files.') 123 | parser.add_argument('--executable', 124 | type=str, 125 | required=True, 126 | help="""(Required.) The path or command for running 127 | OpenEye OMEGA2 on your system.""") 128 | parser.add_argument('--settings', 129 | type=str, 130 | default='-maxconfs 200 -warts false -progress percent', 131 | help='(Optional.) OMEGA settings to use.') 132 | parser.add_argument('--processes', 133 | type=int, 134 | default=1, 135 | help='(Optional, default: `1`.) Number of processes to' 136 | ' run in parallel.' 137 | '\nIf processes > 0, the specified number of CPUs' 138 | '\nwill be used.' 139 | '\nIf processes = 0, all available CPUs will' 140 | '\nbe used.' 141 | '\nIf processes = -1, all available CPUs' 142 | '\nminus `processes` will be used.') 143 | 144 | parser.add_argument('-v', '--version', action='version', version='v. 1.0') 145 | 146 | args = parser.parse_args() 147 | 148 | EXECUTABLE = args.executable 149 | 150 | main(input_dir=args.input, 151 | output_dir=args.output, 152 | n_processes=args.processes, 153 | settings=args.settings) 154 | -------------------------------------------------------------------------------- /tools/id_to_mol2.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import argparse 33 | import os 34 | import sys 35 | import time 36 | import gzip 37 | 38 | from biopandas.mol2.mol2_io import split_multimol2 39 | 40 | 41 | def str2bool(v): 42 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 43 | return True 44 | if v.lower() in ('no', 'false', 'f', 'n', '0'): 45 | return False 46 | else: 47 | raise argparse.ArgumentTypeError('Boolean value expected.') 48 | 49 | 50 | def get_mol2_files(dir_path): 51 | 52 | files = [] 53 | if os.path.isdir(dir_path): 54 | for f in os.listdir(dir_path): 55 | if f.endswith(('.mol2', 'mol2.gz')): 56 | file_path = os.path.join(dir_path, f) 57 | files.append(file_path) 58 | 59 | elif (os.path.isfile(dir_path) and 60 | dir_path.endswith(('.mol2', 'mol2.gz'))): 61 | files.append(dir_path) 62 | 63 | return files 64 | 65 | 66 | def read_idfile(id_file_path): 67 | with open(id_file_path, 'r') as f: 68 | ids = {line.strip() for line in f if not line.startswith('#')} 69 | return ids 70 | 71 | 72 | def filter_and_write(mol2_files, ids, output_dir, includelist_filter, verbose): 73 | for mol2_file in mol2_files: 74 | if verbose: 75 | sys.stdout.write('Processing %s' % os.path.basename(mol2_file)) 76 | sys.stdout.flush() 77 | 78 | if not os.path.exists(output_dir): 79 | os.mkdir(output_dir) 80 | 81 | mol2_outpath = os.path.join(output_dir, os.path.basename(mol2_file)) 82 | 83 | if mol2_outpath.endswith('.gz'): 84 | write_mode = 'wb' 85 | open_file = gzip.open 86 | else: 87 | write_mode = 'w' 88 | open_file = open 89 | 90 | with open_file(mol2_outpath, write_mode) as f: 91 | if verbose: 92 | start = time.time() 93 | 94 | if includelist_filter: 95 | 96 | if write_mode == 'w': 97 | for idx, mol2 in enumerate(split_multimol2(mol2_file)): 98 | 99 | if mol2[0] in ids: 100 | f.write(''.join(mol2[1])) 101 | else: 102 | for idx, mol2 in enumerate(split_multimol2(mol2_file)): 103 | 104 | if mol2[0].decode('utf-8') in ids: 105 | f.write(b''.join(mol2[1])) 106 | 107 | else: 108 | if write_mode == 'w': 109 | for idx, mol2 in enumerate(split_multimol2(mol2_file)): 110 | if mol2[0] not in ids: 111 | f.write(''.join(mol2[1])) 112 | else: 113 | for idx, mol2 in enumerate(split_multimol2(mol2_file)): 114 | if mol2[0].decode('utf-8') not in ids: 115 | f.write(b''.join(mol2[1])) 116 | if verbose: 117 | elapsed = time.time() - start 118 | n_molecules = idx + 1 119 | sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' % 120 | (n_molecules, n_molecules / elapsed)) 121 | sys.stdout.flush() 122 | 123 | 124 | 125 | def main(input_dir, id_file_path, output_dir, includelist_filter, verbose): 126 | mol2_files = get_mol2_files(dir_path=input_dir) 127 | ids = read_idfile(id_file_path) 128 | 129 | filter_and_write(mol2_files=mol2_files, 130 | ids=ids, 131 | output_dir=output_dir, 132 | includelist_filter=includelist_filter, 133 | verbose=verbose) 134 | if verbose: 135 | print('Finished') 136 | 137 | 138 | if __name__ == '__main__': 139 | 140 | parser = argparse.ArgumentParser( 141 | description='Create filtered MOL2 files from ID and' 142 | ' input MOL2 files.', 143 | epilog="""Example: 144 | python id_to_mol2.py --input mol2_dir/\\ 145 | --id_file ids.txt\\ 146 | --includelist True\\ 147 | --output filtered_mol2_dir/""", 148 | formatter_class=argparse.RawTextHelpFormatter) 149 | 150 | parser.add_argument('-i', '--input', 151 | type=str, 152 | required=True, 153 | help='(Required.) Input `.mol2` or `.mol2.gz` file,' 154 | ' or a directory of MOL2 files.') 155 | parser.add_argument('--id_file', 156 | type=str, 157 | required=True, 158 | help='(Required.) Input ID file that contains molecule' 159 | '\nIDs (one ID per line).') 160 | parser.add_argument('-o', '--output', 161 | type=str, 162 | required=True, 163 | help='(Required.) Output directory path for the' 164 | '\nfiltered MOL2 files.') 165 | parser.add_argument('-w', '--includelist', 166 | type=str2bool, 167 | default=True, 168 | help='(Optional, default: `True`.) Uses ID file as includelist if True (default).' 169 | '\nUses ID file as excludelist if False.') 170 | parser.add_argument('-v', '--verbose', 171 | type=int, 172 | default=1, 173 | help='(Optional, default: `1`.) Verbosity level. If 0, does not print any' 174 | '\noutput.' 175 | '\nIf 1 (default), prints the file currently' 176 | '\nprocessing.') 177 | 178 | parser.add_argument('--version', action='version', version='v. 1.0') 179 | 180 | args = parser.parse_args() 181 | 182 | main(input_dir=args.input, 183 | id_file_path=args.id_file, 184 | output_dir=args.output, 185 | includelist_filter=args.includelist, 186 | verbose=args.verbose) 187 | -------------------------------------------------------------------------------- /tools/merge_id_files.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | 33 | import argparse 34 | 35 | 36 | def read_idfile(id_file_path): 37 | with open(id_file_path, 'r') as f: 38 | ids = {line.strip() for line in f if not line.startswith('#')} 39 | return ids 40 | 41 | 42 | def main(id_file_path_1, id_file_path_2, output_path): 43 | 44 | cache = set() 45 | with open(output_path, 'w') as ofile: 46 | with open(id_file_path_1, 'r') as f1: 47 | for line in f1: 48 | line = line.strip() 49 | if not line.startswith('#') and line not in cache: 50 | ofile.write('%s\n' % line) 51 | cache.add(line) 52 | with open(id_file_path_2, 'r') as f2: 53 | for line in f2: 54 | line = line.strip() 55 | if not line.startswith('#') and line not in cache: 56 | ofile.write('%s\n' % line) 57 | cache.add(line) 58 | 59 | 60 | if __name__ == '__main__': 61 | 62 | parser = argparse.ArgumentParser( 63 | description="""Merges two Molecule ID files 64 | (e.g., created via `datatable_to_id.py`, `funcgroup_presence_to_id.py` 65 | or `mol2_to_id.py`) into a single ID file 66 | while preventing duplicate entries.""", 67 | epilog="""Example: 68 | python merge_id_files.py\\ 69 | --input1 mol2s_1.txt\\ 70 | --input2 mol2s_2.txt\\ 71 | --output merged.txt""", 72 | formatter_class=argparse.RawTextHelpFormatter) 73 | 74 | parser.add_argument('-i1', '--input1', 75 | type=str, 76 | required=True, 77 | help='(Required.) Input ID file that contains molecule' 78 | '\nIDs (one ID per line).') 79 | parser.add_argument('-i2', '--input2', 80 | type=str, 81 | required=True, 82 | help='(Required.) Input ID file that contains molecule' 83 | '\nIDs (one ID per line).') 84 | parser.add_argument('-o', '--output', 85 | type=str, 86 | required=True, 87 | help='(Required.) Path to the output ID file.') 88 | 89 | parser.add_argument('--version', action='version', version='v. 1.0') 90 | 91 | args = parser.parse_args() 92 | 93 | main(id_file_path_1=args.input1, 94 | id_file_path_2=args.input2, 95 | output_path=args.output) 96 | -------------------------------------------------------------------------------- /tools/mol2_to_id.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | 33 | import argparse 34 | import os 35 | import sys 36 | import time 37 | 38 | from biopandas.mol2.mol2_io import split_multimol2 39 | 40 | 41 | def get_mol2_files(dir_path): 42 | 43 | files = [] 44 | 45 | if os.path.isdir(dir_path): 46 | for f in os.listdir(dir_path): 47 | if f.endswith(('.mol2', 'mol2.gz')): 48 | file_path = os.path.join(dir_path, f) 49 | files.append(file_path) 50 | 51 | elif (os.path.isfile(dir_path) and 52 | dir_path.endswith(('.mol2', 'mol2.gz'))): 53 | files.append(dir_path) 54 | 55 | return files 56 | 57 | 58 | def mol2_to_idfile(mol2_files, id_file_path, verbose=0): 59 | with open(id_file_path, 'w') as f: 60 | for mol2_file in mol2_files: 61 | 62 | if verbose: 63 | sys.stdout.write('Processing %s' % os.path.basename(mol2_file)) 64 | sys.stdout.flush() 65 | start = time.time() 66 | 67 | for idx, mol2 in enumerate(split_multimol2(mol2_file)): 68 | f.write(mol2[0] + '\n') 69 | 70 | if verbose: 71 | elapsed = time.time() - start 72 | n_molecules = idx + 1 73 | sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' % 74 | (n_molecules, n_molecules / elapsed)) 75 | sys.stdout.flush() 76 | 77 | 78 | def main(input_dir, output_file, verbose): 79 | mol2_files = get_mol2_files(dir_path=input_dir) 80 | mol2_to_idfile(mol2_files=mol2_files, 81 | id_file_path=output_file, 82 | verbose=verbose) 83 | if verbose: 84 | print('Finished') 85 | 86 | 87 | if __name__ == '__main__': 88 | 89 | parser = argparse.ArgumentParser( 90 | description='Writes a file with molecule IDs from MOL2 files.', 91 | epilog="""Example: 92 | python mol2_to_id.py\\ 93 | --input mol2_dir\\ 94 | --output ids.txt""", 95 | formatter_class=argparse.RawTextHelpFormatter) 96 | 97 | parser.add_argument('-i', '--input', 98 | type=str, 99 | required=True, 100 | help='(Required.) Input `.mol2` or `.mol2.gz` file,' 101 | 'or a directory of MOL2 files.') 102 | parser.add_argument('-o', '--output', 103 | type=str, 104 | required=True, 105 | help='(Required.) Output path for the ID file.' 106 | ' For example, `ids.txt`.') 107 | parser.add_argument('-v', '--verbose', 108 | type=int, 109 | default=1, 110 | help='(Optional, default: `1`.)' 111 | ' Verbosity level. If 0, does not print any' 112 | ' output.' 113 | ' If 1 (default), prints the file currently' 114 | ' processing.') 115 | 116 | parser.add_argument('--version', action='version', version='v. 1.0') 117 | 118 | args = parser.parse_args() 119 | 120 | main(args.input, args.output, args.verbose) 121 | -------------------------------------------------------------------------------- /tools/overlay_molecules_rocs.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import os 33 | import subprocess 34 | import sys 35 | import argparse 36 | from multiprocessing import cpu_count 37 | from biopandas.mol2.mol2_io import split_multimol2 38 | 39 | 40 | def check_query(query_path): 41 | ids = [mol2[0] for mol2 in split_multimol2(query_path)] 42 | n_ids = len(ids) 43 | if n_ids > 1: 44 | n_unique_ids = len(set(ids)) 45 | if n_unique_ids > 1: 46 | raise ValueError('Please Make sure that you only submit one' 47 | ' molecule or, if you submit a multi-conformer' 48 | ' query, that conformers of the molecule' 49 | ' have all the same molecule ID labels.' 50 | ' Found %d molecules and %d unique labels' 51 | % (n_ids, n_unique_ids)) 52 | 53 | 54 | def get_num_cpus(n_cpus): 55 | if not n_cpus: 56 | n_cpus = cpu_count() 57 | elif n_cpus < 0: 58 | n_cpus = cpu_count() - n_cpus 59 | return n_cpus 60 | 61 | 62 | def get_mol2_files(dir_path): 63 | 64 | files = [] 65 | 66 | if os.path.isdir(dir_path): 67 | for f in os.listdir(dir_path): 68 | if f.endswith(('.mol2', 'mol2.gz')): 69 | file_path = os.path.join(dir_path, f) 70 | files.append(file_path) 71 | 72 | elif (os.path.isfile(dir_path) and 73 | dir_path.endswith(('.mol2', 'mol2.gz'))): 74 | files.append(dir_path) 75 | 76 | return files 77 | 78 | 79 | def run_rocs(source_file, target_file, n_processes, settings): 80 | 81 | prefix = ''.join(target_file.split('.mol2')[:-1]) 82 | 83 | sys.stdout.write('Processing %s\n' % os.path.basename(source_file)) 84 | sys.stdout.flush() 85 | 86 | for idx, mol2 in enumerate(split_multimol2(QUERY_FILE)): 87 | if idx >= 1: 88 | mcquery = 'true' 89 | break 90 | if not idx: 91 | mcquery = 'false' 92 | 93 | cmd = [EXECUTABLE, 94 | '-ref', QUERY_FILE, 95 | '-dbase', source_file, 96 | '-outputquery', 'false', 97 | '-prefix', prefix, 98 | '-mcquery', mcquery, 99 | '-mpi_np', str(n_processes), 100 | '-oformat', 'mol2'] 101 | 102 | if settings: 103 | for s in settings.split(): 104 | s = s.strip() 105 | if s: 106 | cmd.append(s) 107 | 108 | subprocess.call(cmd, stdout=subprocess.PIPE, bufsize=1) 109 | 110 | 111 | def main(input_dir, output_dir, n_processes, settings): 112 | if not os.path.exists(output_dir): 113 | os.mkdir(output_dir) 114 | 115 | check_query(QUERY_FILE) 116 | mol2_in_files = get_mol2_files(input_dir) 117 | mol2_out_files = [os.path.join(output_dir, os.path.basename(mol2)) 118 | for mol2 in mol2_in_files] 119 | 120 | n_processes = get_num_cpus(n_processes) 121 | 122 | for i, j in zip(mol2_in_files, mol2_out_files): 123 | run_rocs(source_file=i, 124 | target_file=j, 125 | n_processes=n_processes, 126 | settings=settings) 127 | 128 | 129 | if __name__ == '__main__': 130 | 131 | parser = argparse.ArgumentParser( 132 | description='Wrapper running OpenEye ROCS on one' 133 | '\nor more database partitions.', 134 | epilog="""Example: 135 | python overlay_molecules_rocs.py\\ 136 | --input database_conformers/\\ 137 | --output rocs_overlays/\\ 138 | --executable /.../rocs-3.2.1.4\\ 139 | --query query.mol2\\ 140 | --settings "-rankby TanimotoCombo -maxhits 0 -besthits 0 -progress percent"\\ 141 | --processes 0""", 142 | formatter_class=argparse.RawTextHelpFormatter) 143 | 144 | parser.add_argument('-i', '--input', 145 | type=str, 146 | required=True, 147 | help='Path to input directory containing the database' 148 | '\nmolecules in `.mol2` and/or `.mol2.gz` format.' 149 | ) 150 | parser.add_argument('-o', '--output', 151 | type=str, 152 | required=True, 153 | help='(Required.) Directory path for writing' 154 | ' the `.mol2`' 155 | '\noverlay ROCS status and ROCS report (`.rpt`)' 156 | ' files.') 157 | parser.add_argument('--query', 158 | type=str, 159 | required=True, 160 | help='(Required.) Path to the query molecule' 161 | '\nin `.mol2` and/or `.mol2.gz` format.' 162 | '\nThe query molecule file could be a single' 163 | '\nstructure of multiple-conformers of the same' 164 | '\nstructure. If a multi-conformer file is' 165 | '\nsubmitted, please make sure that all' 166 | '\nconformers in the mol2 file have the same' 167 | '\nmolecule ID/Name.') 168 | parser.add_argument('--executable', 169 | type=str, 170 | required=True, 171 | help="""(Required.) The path or command for running 172 | OpenEye ROCS on your system.""") 173 | parser.add_argument('--settings', 174 | type=str, 175 | default='-rankby TanimotoCombo -maxhits 0' 176 | ' -besthits 0 -progress percent', 177 | help='(Optional, default:" -rankby TanimotoCombo -maxhits 0' 178 | ' -besthits 0 -progress percent")\n ROCS settings to use.') 179 | parser.add_argument('--processes', 180 | type=int, 181 | default=1, 182 | help='(Optional, default: `1`.) Number of processes to' 183 | ' run in parallel.' 184 | '\nIf processes > 0, the specified number of CPUs' 185 | '\nwill be used.' 186 | '\nIf processes = 0, all available CPUs will' 187 | '\nbe used.' 188 | '\nIf processes = -1, all available CPUs' 189 | '\nminus `processes` will be used.') 190 | 191 | parser.add_argument('-v', '--version', action='version', version='v. 1.0') 192 | 193 | args = parser.parse_args() 194 | 195 | QUERY_FILE = args.query 196 | EXECUTABLE = args.executable 197 | 198 | main(input_dir=args.input, 199 | output_dir=args.output, 200 | n_processes=args.processes, 201 | settings=args.settings) 202 | -------------------------------------------------------------------------------- /tools/overlay_molecules_shapeit.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import os 33 | import subprocess 34 | import sys 35 | import argparse 36 | from biopandas.mol2.mol2_io import split_multimol2 37 | 38 | 39 | def check_query(query_path): 40 | ids = [mol2[0] for mol2 in split_multimol2(query_path)] 41 | n_ids = len(ids) 42 | if n_ids > 1: 43 | n_unique_ids = len(set(ids)) 44 | if n_unique_ids > 1: 45 | raise ValueError('Please Make sure that you only submit one' 46 | ' molecule or, if you submit a multi-conformer' 47 | ' query, that conformers of the molecule' 48 | ' have all the same molecule ID labels.' 49 | ' Found %d molecules and %d unique labels' 50 | % (n_ids, n_unique_ids)) 51 | 52 | 53 | def get_mol2_files(dir_path): 54 | 55 | files = [] 56 | 57 | if os.path.isdir(dir_path): 58 | for f in os.listdir(dir_path): 59 | if f.endswith(('.mol2', 'mol2.gz')): 60 | file_path = os.path.join(dir_path, f) 61 | files.append(file_path) 62 | 63 | elif (os.path.isfile(dir_path) and 64 | dir_path.endswith(('.mol2', 'mol2.gz'))): 65 | files.append(dir_path) 66 | 67 | return files 68 | 69 | 70 | def run_shapeit(source_file, target_file, settings): 71 | 72 | prefix = ''.join(target_file.split('.mol2')[:-1]) 73 | 74 | sys.stdout.write('Processing %s\n' % os.path.basename(source_file)) 75 | sys.stdout.flush() 76 | 77 | if source_file.endswith('.gz'): 78 | sys.stdout.write('Shape-it does not support compressed files' 79 | ' please decompress %s' % 80 | os.path.basename(source_file)) 81 | sys.stdout.flush() 82 | 83 | cmd = [EXECUTABLE, 84 | '--reference', QUERY_FILE, 85 | '--dbase', source_file, 86 | '--out', target_file, 87 | '--scores', prefix + '.rpt', 88 | '--noRef'] 89 | 90 | if settings: 91 | for s in settings.split(): 92 | s = s.strip() 93 | if s: 94 | cmd.append(s) 95 | 96 | print(' '.join(cmd)) 97 | subprocess.call(cmd, stdout=subprocess.PIPE, bufsize=1) 98 | 99 | 100 | def main(input_dir, output_dir, settings): 101 | if not os.path.exists(output_dir): 102 | os.mkdir(output_dir) 103 | 104 | check_query(QUERY_FILE) 105 | mol2_in_files = get_mol2_files(input_dir) 106 | mol2_out_files = [os.path.join(output_dir, os.path.basename(mol2)) 107 | for mol2 in mol2_in_files] 108 | 109 | for i, j in zip(mol2_in_files, mol2_out_files): 110 | run_shapeit(source_file=i, 111 | target_file=j, 112 | settings=settings) 113 | 114 | 115 | if __name__ == '__main__': 116 | 117 | parser = argparse.ArgumentParser( 118 | description='Wrapper running Silicos-it Shape-it on one' 119 | '\nor more database partitions.' 120 | '\nFor more information about Shape-it, please see' 121 | ' http://silicos-it.be.s3-website-eu-west-1.' 122 | 'amazonaws.com/software/shape-it/1.0.1/shape-it.html', 123 | epilog="""Example: 124 | python overlay_molecules_shapeit.py\\ 125 | --input database_conformers/\\ 126 | --output shapeit_overlays/\\ 127 | --executable 'shape-it'\\ 128 | --query query.mol2\\ 129 | --settings "--rankby Tanimoto""", 130 | formatter_class=argparse.RawTextHelpFormatter) 131 | 132 | parser.add_argument('-i', '--input', 133 | type=str, 134 | required=True, 135 | help='Path to input directory containing the database' 136 | '\nmolecules in `.mol2` and/or `.mol2.gz` format.' 137 | ) 138 | parser.add_argument('-o', '--output', 139 | type=str, 140 | required=True, 141 | help='(Required.) Directory path for writing' 142 | ' the `.mol2`' 143 | '\noverlays and Shape-it score/report (`.rpt`)' 144 | ' files.') 145 | parser.add_argument('--query', 146 | type=str, 147 | required=True, 148 | help='(Required.) Path to the query molecule' 149 | '\nin `.mol2` and/or `.mol2.gz` format.' 150 | '\nThe query molecule file could be a single' 151 | '\nstructure of multiple-conformers of the same' 152 | '\nstructure. If a multi-conformer file is' 153 | '\nsubmitted, please make sure that all' 154 | '\nconformers in the mol2 file have the same' 155 | '\nmolecule ID/Name.') 156 | parser.add_argument('--executable', 157 | type=str, 158 | required=True, 159 | help="""(Required.) The path or command for running 160 | Slicos-it Shape-it on your system.""") 161 | parser.add_argument('--settings', 162 | type=str, 163 | default='--rankBy Tanimoto', 164 | help='(Optional, default:" --rankBy Tanimoto")' 165 | '\nshape-it settings to use.') 166 | 167 | parser.add_argument('-v', '--version', action='version', version='v. 1.0') 168 | 169 | args = parser.parse_args() 170 | 171 | QUERY_FILE = args.query 172 | EXECUTABLE = args.executable 173 | 174 | main(input_dir=args.input, 175 | output_dir=args.output, 176 | settings=args.settings) 177 | -------------------------------------------------------------------------------- /tools/pipelines/experimental/pipeline-example-1-config_obabel.yaml: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | general settings: 33 | screenlamp tools directory: /Users/sebastian/code/screenlamp/tools 34 | project output directory: /Users/sebastian/Desktop/screening-results 35 | input mol2 directory: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/mol2 36 | number of cpus: 0 # 0 means all avaible CPUs (recommended) 37 | 38 | ################################################ 39 | ### Step 01: SELECT MOLECULES FROM DATA TABLE 40 | ################################################ 41 | molecule property filter settings: 42 | datatable path: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/tables/3_prop.xls 43 | # the following filter key selects all molecules with 44 | # fewer than 8 rotatable bonds and a molecular weight 45 | # greater than 200 g/mol 46 | column filter: (NRB <= 7) & (MWT > 200) 47 | 48 | ################################################### 49 | ### Step 02: PREFILTER BY FUNCTIONAL GROUP PRESENCE 50 | ################################################### 51 | functional group presence filter settings: 52 | # the following selection key selects all molecules that 53 | # have an sp2-hybridized sulfur atom (MOL2 atom type S.3 or S.o2) 54 | # and a keto group (MOL2 atom type O.2) 55 | selection key: ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2') 56 | 57 | ################################################### 58 | ### Step 03: PREFILTER BY FUNCTIONAL GROUP DISTANCE 59 | ################################################### 60 | functional group distance filter settings: 61 | # the following selection criteria select all molecules that 62 | # have an sp2-hybridized sulfur atom (MOL2 atom type S.3 or S.o2) 63 | # and a keto group (MOL2 atom type O.2), and where the distance between 64 | # the sulfur and oxygen atoms is between 13 and 20 angstrom 65 | selection key: ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2') 66 | distance: 13-20 67 | 68 | ################################################ 69 | ### Step 04: OpenBabel conformers 70 | ################################################ 71 | OpenBabel Confab settings: 72 | OpenBabel executable: obabel 73 | 74 | ######################################################### 75 | ### Step 05: ROCS OVERLAYS & Step 06: SORT ROCS OVERLAYS 76 | ######################################################### 77 | ROCS settings: 78 | ROCS executable: /Applications/ROCS 3.2.1.4.app/Contents/MacOS/rocs-3.2.1.4 79 | ROCS run rankby: TanimotoCombo 80 | ROCS results sort by: TanimotoCombo,ColorTanimoto 81 | ROCS score threshold: (TanimotoCombo >= 0.75) & (ColorTanimoto >= 0.1) 82 | query molecule path: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/query/3kpzs_conf_subset_nowarts.mol2 83 | # The query molecule above could be a single- or multi-conformer .mol2/.mol2.gz file. 84 | # However, for ROCS, please make sure that the query file does not have 85 | # "multi-conformer warts." I.e., all molecules in the multi-conformer .mol2/.mol2.gz 86 | # file must have exactly the same molecule ID without any enumerating prefixes 87 | # or suffixes. 88 | 89 | ################################################ 90 | ### Step 07: MATCHING FUNCTIONAL GROUPS 91 | ################################################ 92 | 93 | functional group matching selection settings: 94 | maximum pairwise atom distance: 1.3 # in angstrom 95 | 96 | ################################################ 97 | ### Step 08: SELECTING FUNCTIONAL GROUP MATCHES 98 | ################################################ 99 | 100 | functional group match selection settings: 101 | # the following selection key selects all database molecules 102 | # that meet the two following criteria 103 | # a) an overlay with the S1 atom in the query molecules 104 | # b) an overlay with the O2 atom in the query molecule 105 | # To satisfy a), the matching atom must be of atom type S.3 or S.o2 106 | # and its charge must be at least +1 or more positive. 107 | # To satisfy b), the matching atom must be of atom type O.2 (sp2 oxygen) 108 | # # and its partial charge must be -0.5 or more negative 109 | atomtype selection keys: ((S1 == 'S.3') | (S1 == 'S.o2')) --> (O2 == 'O.2') 110 | charge selection keys: ((S1 >= 1.0)) --> (O2 <= -0.5) 111 | write mol2 files: true 112 | 113 | 114 | -------------------------------------------------------------------------------- /tools/pipelines/pipeline-example-1-config.yaml: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | general settings: 33 | screenlamp tools directory: /Users/sebastian/code/screenlamp/tools 34 | project output directory: /Users/sebastian/code/screenlamp/example-files/example_1/screening-results 35 | input mol2 directory: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/mol2 36 | number of cpus: 0 # 0 means all avaible CPUs (recommended) 37 | 38 | ################################################ 39 | ### Step 01: SELECT MOLECULES FROM DATA TABLE 40 | ################################################ 41 | molecule property filter settings: 42 | datatable path: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/tables/3_prop.xls 43 | # the following filter key selects all molecules with 44 | # fewer than 8 rotatable bonds and a molecular weight 45 | # greater than 200 g/mol 46 | column filter: (NRB <= 7) & (MWT > 200) 47 | 48 | ################################################### 49 | ### Step 02: PREFILTER BY FUNCTIONAL GROUP PRESENCE 50 | ################################################### 51 | functional group presence filter settings: 52 | # the following selection key selects all molecules that 53 | # have an sp2-hybridized sulfur atom (MOL2 atom type S.3 or S.o2) 54 | # and a keto group (MOL2 atom type O.2) 55 | selection key: ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2') 56 | 57 | ################################################### 58 | ### Step 03: PREFILTER BY FUNCTIONAL GROUP DISTANCE 59 | ################################################### 60 | functional group distance filter settings: 61 | # the following selection criteria select all molecules that 62 | # have an sp2-hybridized sulfur atom (MOL2 atom type S.3 or S.o2) 63 | # and a keto group (MOL2 atom type O.2), and where the distance between 64 | # the sulfur and oxygen atoms is between 13 and 20 angstrom 65 | selection key: ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2') 66 | distance: 13-20 67 | 68 | ################################################ 69 | ### Step 04: OMEGA conformers 70 | ################################################ 71 | OMEGA settings: 72 | OMEGA executable: /Applications/OMEGA 2.5.1.4.app/Contents/MacOS/omega2-2.5.1.4 73 | 74 | ######################################################### 75 | ### Step 05: ROCS OVERLAYS & Step 06: SORT ROCS OVERLAYS 76 | ######################################################### 77 | ROCS settings: 78 | ROCS executable: /Applications/ROCS 3.2.1.4.app/Contents/MacOS/rocs-3.2.1.4 79 | ROCS run rankby: TanimotoCombo 80 | ROCS results sort by: TanimotoCombo,ColorTanimoto 81 | ROCS score threshold: (TanimotoCombo >= 0.75) & (ColorTanimoto >= 0.1) 82 | query molecule path: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/query/3kpzs_conf_subset_nowarts.mol2 83 | # The query molecule above could be a single- or multi-conformer .mol2/.mol2.gz file. 84 | # However, for ROCS, please make sure that the query file does not have 85 | # "multi-conformer warts." I.e., all molecules in the multi-conformer .mol2/.mol2.gz 86 | # file must have exactly the same molecule ID without any enumerating prefixes 87 | # or suffixes. 88 | 89 | ################################################ 90 | ### Step 07: MATCHING FUNCTIONAL GROUPS 91 | ################################################ 92 | 93 | functional group matching selection settings: 94 | maximum pairwise atom distance: 1.3 # in angstrom 95 | 96 | ################################################ 97 | ### Step 08: SELECTING FUNCTIONAL GROUP MATCHES 98 | ################################################ 99 | 100 | functional group match selection settings: 101 | # the following selection key selects all database molecules 102 | # that meet the two following criteria 103 | # a) an overlay with the S1 atom in the query molecules 104 | # b) an overlay with the O2 atom in the query molecule 105 | # To satisfy a), the matching atom must be of atom type S.3 or S.o2 106 | # and its charge must be at least +1 or more positive. 107 | # To satisfy b), the matching atom must be of atom type O.2 (sp2 oxygen) 108 | # # and its partial charge must be -0.5 or more negative 109 | atomtype selection keys: ((S1 == 'S.3') | (S1 == 'S.o2')) --> (O2 == 'O.2') 110 | charge selection keys: ((S1 >= 1.0)) --> (O2 <= -0.5) 111 | write mol2 files: true 112 | 113 | 114 | -------------------------------------------------------------------------------- /tools/pipelines/pipeline-example-1.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import subprocess 33 | import os 34 | import argparse 35 | import yaml 36 | 37 | 38 | ############################################################################### 39 | 40 | parser = argparse.ArgumentParser( 41 | description='An example screenlamp pipeline ... [placeholder].', 42 | formatter_class=argparse.RawTextHelpFormatter) 43 | 44 | parser.add_argument('-c', '--config_file', 45 | type=str, 46 | required=True, 47 | default=0, 48 | help='Path to the pipeline configuration file') 49 | 50 | parser.add_argument('-s', '--start_at_step', 51 | type=int, 52 | required=False, 53 | default=0, 54 | help='Start the pipeline at a particular step') 55 | 56 | parser.add_argument('-i', '--incremental', 57 | type=str, 58 | required=False, 59 | default='false', 60 | help='incremental mode. If enabled, stops before each step' 61 | ' to ask the user to continue') 62 | 63 | args = parser.parse_args() 64 | start_at = args.start_at_step 65 | config_path = args.config_file 66 | 67 | print(args.incremental) 68 | if args.incremental.lower() not in {'true', 'false'}: 69 | raise AttributeError('incremental must be true or false') 70 | if args.incremental == 'true': 71 | incremental = True 72 | else: 73 | incremental = False 74 | 75 | with open(config_path, 'r') as stream: 76 | ymldct = yaml.load(stream) 77 | 78 | PROJECT_PATH = ymldct['general settings']['project output directory'] 79 | SCREENLAMP_TOOLS_DIR = ymldct['general settings']['screenlamp tools directory'] 80 | INPUT_MOL2_PATH = ymldct['general settings']['input mol2 directory'] 81 | N_CPUS = str(ymldct['general settings']['number of cpus']) 82 | DATATABLE_PATH = ymldct['molecule property filter settings']['datatable path'] 83 | DATATABLE_FILTER = ymldct['molecule property filter settings']['column filter'] 84 | FUNCTIONAL_GROUP_PRESENCE = ymldct[ 85 | 'functional group presence filter settings']['selection key'] 86 | FUNCTIONAL_GROUP_DISTANCE_SELECTION = ymldct[ 87 | 'functional group distance filter settings']['selection key'] 88 | FUNCTIONAL_GROUP_DISTANCE = ymldct[ 89 | 'functional group distance filter settings']['distance'] 90 | OMEGA_EXECUTABLE = ymldct['OMEGA settings']['OMEGA executable'] 91 | ROCS_EXECUTABLE = ymldct['ROCS settings']['ROCS executable'] 92 | ROCS_RANKBY = ymldct['ROCS settings']['ROCS run rankby'] 93 | ROCS_SORTBY = ymldct['ROCS settings']['ROCS results sort by'] 94 | ROCS_THRESHOLD = ymldct['ROCS settings']['ROCS score threshold'] 95 | QUERY_PATH = ymldct['ROCS settings']['query molecule path'] 96 | 97 | FGROUP_MATCH_DISTANCE = str(ymldct['functional group matching ' 98 | 'selection settings'][ 99 | 'maximum pairwise atom distance']) 100 | 101 | WRITE_MATCH_OVERLAYS = False 102 | if ymldct['functional group match selection settings']['write mol2 files'] in ( 103 | 'true', True): 104 | WRITE_MATCH_OVERLAYS = True 105 | FGROUP_ATOMTYPE = ymldct['functional group match selection settings'][ 106 | 'atomtype selection keys'] 107 | FGROUP_CHARGE = ymldct['functional group match selection settings'][ 108 | 'charge selection keys'] 109 | 110 | if not os.path.exists(PROJECT_PATH): 111 | os.makedirs(PROJECT_PATH) 112 | 113 | ############################################################################### 114 | 115 | if start_at <= 0: 116 | s = """ 117 | 118 | ################################################ 119 | COUNT MOLECULES IN DATATABLE_PATH 120 | ################################################ 121 | """ 122 | print(s) 123 | 124 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'), 125 | '--input', INPUT_MOL2_PATH] 126 | 127 | print('Running command:\n%s\n' % ' '.join(cmd)) 128 | 129 | if incremental: 130 | input('Press Enter to proceed or CTRL+C to quit') 131 | subprocess.call(cmd) 132 | 133 | ############################################################################### 134 | 135 | if start_at <= 1: 136 | s = """ 137 | 138 | ################################################ 139 | Step 01: SELECT MOLECULES FROM DATA TABLE 140 | ################################################ 141 | """ 142 | print(s) 143 | 144 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'datatable_to_id.py'), 145 | '--input', DATATABLE_PATH, 146 | '--output', os.path.join(PROJECT_PATH, '01_ids_from_database.txt'), 147 | '--id_column', 'ZINC_ID', 148 | '--selection', DATATABLE_FILTER] 149 | 150 | print('Running command:\n%s\n' % ' '.join(cmd)) 151 | if incremental: 152 | input('Press Enter to proceed or CTRL+C to quit') 153 | subprocess.call(cmd) 154 | print('\n\n') 155 | 156 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'id_to_mol2.py'), 157 | '--input', INPUT_MOL2_PATH, 158 | '--id_file', os.path.join(PROJECT_PATH, '01_ids_from_database.txt'), 159 | '--output', os.path.join(PROJECT_PATH, '01_selected-mol2s'), 160 | '--includelist', 'True'] 161 | 162 | print('Running command:\n%s\n' % ' '.join(cmd)) 163 | if incremental: 164 | input('Press Enter to proceed or CTRL+C to quit') 165 | subprocess.call(cmd) 166 | print('\n\nSELECTED MOL2s:') 167 | 168 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'), 169 | '--input', os.path.join(PROJECT_PATH, '01_selected-mol2s')] 170 | 171 | print('Running command:\n%s\n' % ' '.join(cmd)) 172 | if incremental: 173 | input('Press Enter to proceed or CTRL+C to quit') 174 | subprocess.call(cmd) 175 | 176 | ############################################################################### 177 | 178 | if start_at <= 2: 179 | s = """ 180 | 181 | ################################################ 182 | Step 02: PREFILTER BY FUNCTIONAL GROUP PRESENCE 183 | ################################################ 184 | """ 185 | print(s) 186 | 187 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 188 | 'funcgroup_presence_to_id.py'), 189 | '--input', os.path.join(PROJECT_PATH, '01_selected-mol2s'), 190 | '--output', os.path.join(PROJECT_PATH, 191 | '02_fgroup-presence_mol2ids.txt'), 192 | '--selection', FUNCTIONAL_GROUP_PRESENCE, 193 | '--processes', N_CPUS] 194 | 195 | print('Running command:\n%s\n' % ' '.join(cmd)) 196 | if incremental: 197 | input('Press Enter to proceed or CTRL+C to quit') 198 | subprocess.call(cmd) 199 | print('\n\n') 200 | 201 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'id_to_mol2.py'), 202 | '--input', os.path.join(PROJECT_PATH, '01_selected-mol2s'), 203 | '--id_file', os.path.join(PROJECT_PATH, 204 | '02_fgroup-presence_mol2ids.txt'), 205 | '--output', os.path.join(PROJECT_PATH, '02_fgroup-presence_mol2s'), 206 | '--includelist', 'True'] 207 | 208 | print('Running command:\n%s\n' % ' '.join(cmd)) 209 | if incremental: 210 | input('Press Enter to proceed or CTRL+C to quit') 211 | subprocess.call(cmd) 212 | print('\n\nSELECTED MOL2s:') 213 | 214 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'), 215 | '--input', os.path.join(PROJECT_PATH, '02_fgroup-presence_mol2s')] 216 | 217 | print('Running command:\n%s\n' % ' '.join(cmd)) 218 | if incremental: 219 | input('Press Enter to proceed or CTRL+C to quit') 220 | subprocess.call(cmd) 221 | 222 | ############################################################################### 223 | 224 | if start_at <= 3: 225 | s = """ 226 | 227 | ################################################ 228 | Step 03: PREFILTER BY FUNCTIONAL GROUP DISTANCE 229 | ################################################ 230 | """ 231 | print(s) 232 | 233 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 234 | 'funcgroup_distance_to_id.py'), 235 | '--input', os.path.join(PROJECT_PATH, '02_fgroup-presence_mol2s'), 236 | '--output', os.path.join(PROJECT_PATH, 237 | '03_fgroup_distance_mol2ids.txt'), 238 | '--selection', FUNCTIONAL_GROUP_DISTANCE_SELECTION, 239 | '--distance', FUNCTIONAL_GROUP_DISTANCE, 240 | '--processes', N_CPUS] 241 | 242 | if incremental: 243 | input('Press Enter to proceed or CTRL+C to quit') 244 | subprocess.call(cmd) 245 | print('\n\n') 246 | 247 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'id_to_mol2.py'), 248 | '--input', os.path.join(PROJECT_PATH, '02_fgroup-presence_mol2s'), 249 | '--id_file', os.path.join(PROJECT_PATH, 250 | '03_fgroup_distance_mol2ids.txt'), 251 | '--output', os.path.join(PROJECT_PATH, 252 | '03_fgroup_distance_mol2s'), 253 | '--includelist', 'True'] 254 | 255 | print('Running command:\n%s\n' % ' '.join(cmd)) 256 | if incremental: 257 | input('Press Enter to proceed or CTRL+C to quit') 258 | subprocess.call(cmd) 259 | print('\n\nSELECTED MOL2s:') 260 | 261 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'), 262 | '--input', os.path.join(PROJECT_PATH, 263 | '03_fgroup_distance_mol2s')] 264 | 265 | print('Running command:\n%s\n' % ' '.join(cmd)) 266 | 267 | if incremental: 268 | input('Press Enter to proceed or CTRL+C to quit') 269 | subprocess.call(cmd) 270 | 271 | ############################################################################### 272 | 273 | if start_at <= 4: 274 | s = """ 275 | 276 | ################################################ 277 | Step 04: OMEGA conformers 278 | ################################################ 279 | """ 280 | print(s) 281 | 282 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'generate_conformers_omega.py'), 283 | '--input', os.path.join(PROJECT_PATH, 284 | '03_fgroup_distance_mol2s'), 285 | '--output', os.path.join(PROJECT_PATH, '04_omega_conformers'), 286 | '--executable', OMEGA_EXECUTABLE, 287 | '--processes', N_CPUS] 288 | 289 | print('Running command:\n%s\n' % ' '.join(cmd)) 290 | if incremental: 291 | input('Press Enter to proceed or CTRL+C to quit') 292 | subprocess.call(cmd) 293 | print('\n\nSELECTED MOL2s:') 294 | 295 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'), 296 | '--input', os.path.join(PROJECT_PATH, '04_omega_conformers')] 297 | 298 | print('Running command:\n%s\n' % ' '.join(cmd)) 299 | 300 | if incremental: 301 | input('Press Enter to proceed or CTRL+C to quit') 302 | subprocess.call(cmd) 303 | 304 | ############################################################################### 305 | 306 | if start_at <= 5: 307 | 308 | s = """ 309 | 310 | ################################################ 311 | Step 05: ROCS OVERLAYS 312 | ################################################ 313 | """ 314 | print(s) 315 | 316 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'overlay_molecules_rocs.py'), 317 | '--input', os.path.join(PROJECT_PATH, '04_omega_conformers'), 318 | '--output', os.path.join(PROJECT_PATH, '05_rocs_overlays'), 319 | '--executable', ROCS_EXECUTABLE, 320 | '--query', QUERY_PATH, 321 | '--settings', ('-rankby %s -maxhits 0' 322 | ' -besthits 0 -progress percent' % 323 | ROCS_RANKBY), 324 | '--processes', N_CPUS] 325 | 326 | print('Running command:\n%s\n' % ' '.join(cmd)) 327 | if incremental: 328 | input('Press Enter to proceed or CTRL+C to quit') 329 | subprocess.call(cmd) 330 | 331 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'), 332 | '--input', os.path.join(PROJECT_PATH, '05_rocs_overlays')] 333 | 334 | print('Running command:\n%s\n' % ' '.join(cmd)) 335 | if incremental: 336 | input('Press Enter to proceed or CTRL+C to quit') 337 | subprocess.call(cmd) 338 | 339 | 340 | ############################################################################### 341 | 342 | if start_at <= 6: 343 | 344 | s = """ 345 | 346 | ################################################ 347 | Step 06: SORT ROCS OVERLAYS 348 | ################################################ 349 | """ 350 | print(s) 351 | 352 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'sort_rocs_mol2.py'), 353 | '--input', os.path.join(PROJECT_PATH, '05_rocs_overlays'), 354 | '--output', os.path.join(PROJECT_PATH, '06_rocs_overlays_sorted'), 355 | '--query', QUERY_PATH, 356 | '--sortby', ROCS_SORTBY, 357 | '--selection', ROCS_THRESHOLD] 358 | 359 | print('Running command:\n%s\n' % ' '.join(cmd)) 360 | if incremental: 361 | input('Press Enter to proceed or CTRL+C to quit') 362 | subprocess.call(cmd) 363 | 364 | ############################################################################### 365 | 366 | if start_at <= 7: 367 | 368 | s = """ 369 | 370 | ################################################ 371 | Step 07: MATCHING FUNCTIONAL GROUPS 372 | ################################################ 373 | """ 374 | print(s) 375 | 376 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 377 | 'funcgroup_matching.py'), 378 | '--input', os.path.join(PROJECT_PATH, '06_rocs_overlays_sorted'), 379 | '--output', os.path.join(PROJECT_PATH, '07_funcgroup_matching'), 380 | '--max_distance', FGROUP_MATCH_DISTANCE, 381 | '--processes', N_CPUS] 382 | 383 | print('Running command:\n%s\n' % ' '.join(cmd)) 384 | if incremental: 385 | input('Press Enter to proceed or CTRL+C to quit') 386 | subprocess.call(cmd) 387 | 388 | ############################################################################### 389 | 390 | if start_at <= 8: 391 | 392 | s = """ 393 | 394 | ################################################ 395 | Step 08: SELECTING FUNCTIONAL GROUP MATCHES 396 | ################################################ 397 | """ 398 | print(s) 399 | 400 | if WRITE_MATCH_OVERLAYS: 401 | in_path = os.path.join(PROJECT_PATH, '06_rocs_overlays_sorted') 402 | else: 403 | in_path = '' 404 | 405 | cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 406 | 'funcgroup_matching_selection.py'), 407 | '--input', os.path.join(PROJECT_PATH, '07_funcgroup_matching'), 408 | '--output', os.path.join(PROJECT_PATH, '08_funcgroup_selection'), 409 | '--atomtype_selection', FGROUP_ATOMTYPE, 410 | '--charge_selection', FGROUP_CHARGE, 411 | '--input_mol2', in_path] 412 | 413 | print('Running command:\n%s\n' % ' '.join(cmd)) 414 | if incremental: 415 | input('Press Enter to proceed or CTRL+C to quit') 416 | subprocess.call(cmd) -------------------------------------------------------------------------------- /tools/sort_rocs_mol2.py: -------------------------------------------------------------------------------- 1 | # Sebastian Raschka 2017 2 | # 3 | # screenlamp is a Python toolkit 4 | # for hypothesis-driven virtual screening. 5 | # 6 | # Copyright (C) 2017 Michigan State University 7 | # License: Apache v2 8 | # 9 | # Software author: Sebastian Raschka 10 | # Software author email: mail@sebastianraschka.com 11 | # 12 | # Software source repository: https://github.com/rasbt/screenlamp 13 | # Documentation: https://psa-lab.github.io/screenlamp 14 | # 15 | # screenlamp was developed in the 16 | # Protein Structural Analysis & Design Laboratory 17 | # (http://www.kuhnlab.bmb.msu.edu) 18 | # 19 | # If you are using screenlamp in your research, please cite 20 | # the following journal article: 21 | # 22 | # Raschka, Sebastian, Anne M. Scott, Nan Liu, 23 | # Santosh Gunturu, Mar Huertas, Weiming Li, 24 | # and Leslie A. Kuhn. 2017 25 | # 26 | # Enabling the hypothesis-driven prioritization of 27 | # ligand candidates in big databases: 28 | # Screenlamp and its application to GPCR inhibitor 29 | # discovery for invasive species control. 30 | # 31 | 32 | import os 33 | import argparse 34 | import sys 35 | import time 36 | import pandas as pd 37 | from biopandas.mol2 import split_multimol2 38 | import tempfile 39 | import pickle 40 | 41 | 42 | def get_mol2_files(dir_path): 43 | 44 | files = [] 45 | if os.path.isdir(dir_path): 46 | for f in os.listdir(dir_path): 47 | if f.endswith(('.mol2', 'mol2.gz')): 48 | file_path = os.path.join(dir_path, f) 49 | files.append(file_path) 50 | 51 | elif (os.path.isfile(dir_path) and 52 | dir_path.endswith(('.mol2', 'mol2.gz'))): 53 | files.append(dir_path) 54 | 55 | return files 56 | 57 | 58 | def parse_selection_string(s, df_name='df'): 59 | return s.replace('(', '(%s.' % df_name) 60 | 61 | 62 | def read_and_write(inp_mol2_path, report_path, output_dir, query_path, 63 | sortby, separator, verbose, id_suffix, selection): 64 | 65 | if verbose: 66 | sys.stdout.write('Processing %s' % os.path.basename(inp_mol2_path)) 67 | sys.stdout.flush() 68 | 69 | df = pd.read_table(report_path, usecols=['Name', 'ShapeQuery'] + sortby, 70 | sep=separator) 71 | 72 | if sortby: 73 | df.sort_values(sortby, inplace=True, ascending=False) 74 | 75 | if selection: 76 | selection_str = parse_selection_string(selection, df_name='df') 77 | mask = pd.eval(selection_str) 78 | df = df[mask] 79 | 80 | dbase_query_pairs = [(d, q) for d, q in 81 | zip(df['Name'].values, df['ShapeQuery'].values)] 82 | query_names = {q for q in df['ShapeQuery'].values} 83 | 84 | query_mol2s = {} 85 | 86 | multiconf_query = False 87 | for idx, cont in enumerate(split_multimol2(query_path)): 88 | if idx >= 1: 89 | multiconf_query = True 90 | break 91 | 92 | cnt = -1 93 | 94 | if query_path.endswith('.gz'): 95 | for id_, cont in split_multimol2(query_path): 96 | cnt += 1 97 | cont = b''.join(cont).decode('utf-8').split('\n') 98 | if multiconf_query: 99 | mol_idx = '%s_%d' % (id_.decode('utf-8'), cnt) 100 | else: 101 | mol_idx = id_ 102 | if mol_idx in query_names: 103 | if id_suffix: 104 | cont[1] = mol_idx + '\n' 105 | query_mol2s[mol_idx] = ''.join(cont) 106 | 107 | else: 108 | for id_, cont in split_multimol2(query_path): 109 | cnt += 1 110 | if multiconf_query: 111 | mol_idx = '%s_%d' % (id_, cnt) 112 | else: 113 | mol_idx = id_ 114 | if mol_idx in query_names: 115 | if id_suffix: 116 | cont[1] = mol_idx + '\n' 117 | query_mol2s[mol_idx] = ''.join(cont) 118 | 119 | out_path_base = os.path.join(output_dir, os.path.basename(inp_mol2_path) 120 | .split('.mol2')[0]) 121 | out_path_q = '%s_%s' % (out_path_base, 'query.mol2') 122 | out_path_d = '%s_%s' % (out_path_base, 'dbase.mol2') 123 | 124 | with tempfile.TemporaryDirectory() as tmpdirname: 125 | for id_, cont in split_multimol2(inp_mol2_path): 126 | if id_: 127 | tmp_path = os.path.join(tmpdirname, id_) 128 | with open(tmp_path, 'wb') as f: 129 | pickle.dump(''.join(cont), f) 130 | 131 | with open(out_path_d, 'w') as dof,\ 132 | open(out_path_q, 'w') as qof: 133 | 134 | if verbose: 135 | start = time.time() 136 | 137 | cnt = 0 138 | for d, q in dbase_query_pairs: 139 | cnt += 1 140 | qof.write(query_mol2s[q]) 141 | with open(os.path.join(tmpdirname, d), 'rb') as pkl: 142 | pkl_cont = pickle.load(pkl) 143 | dof.write(pkl_cont) 144 | 145 | if verbose: 146 | elapsed = time.time() - start 147 | n_molecules = cnt + 1 148 | sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' % 149 | (n_molecules, n_molecules / elapsed)) 150 | sys.stdout.flush() 151 | 152 | 153 | def main(input_dir, output_dir, query_path, 154 | sortby, separator, verbose, id_suffix, selection): 155 | if not os.path.exists(output_dir): 156 | os.mkdir(output_dir) 157 | inp_mol2_paths = get_mol2_files(input_dir) 158 | 159 | for mol2_path in inp_mol2_paths: 160 | base = os.path.basename(mol2_path) 161 | report_path = base.replace('.mol2', '.rpt').replace('_hits_', '_') 162 | report_path = os.path.join(os.path.dirname(mol2_path), report_path) 163 | read_and_write(mol2_path, report_path, output_dir, query_path, 164 | sortby, separator, verbose, id_suffix, selection) 165 | 166 | 167 | if __name__ == '__main__': 168 | 169 | parser = argparse.ArgumentParser( 170 | description='Sorts ROCS results by score and creates' 171 | '\nseparate .mol2 files for the database' 172 | ' and query molecules.', 173 | epilog="""Example: 174 | python sort_rocs_mol2.py -i rocs_results/\\ 175 | --output rocs_sorted/ --query mol.mol2\\ 176 | --sortby TanimotoCombo,ColorTanimoto\\ 177 | --selection "(TanimotoCombo >= 0.75) & (ColorTanimoto >= 0.1)" """, 178 | formatter_class=argparse.RawTextHelpFormatter) 179 | 180 | parser.add_argument('-i', '--input', 181 | type=str, 182 | required=True, 183 | help='(Required.) Input directory with results from a ROCS run.') 184 | parser.add_argument('-o', '--output', 185 | type=str, 186 | required=True, 187 | help='(Required.) Directory path for writing the `.mol2` overlay' 188 | '\nROCS status and ROCS report (`.rpt`) files') 189 | parser.add_argument('--query', 190 | type=str, 191 | required=True, 192 | help='(Required.) Path to the query molecule' 193 | '\nin `.mol2` and/or `.mol2.gz` format.' 194 | '\nThe query molecule file could be a single' 195 | '\nstructure of multiple-conformers of the same' 196 | '\nstructure. If a multi-conformer file is' 197 | '\nsubmitted, please make sure that all' 198 | '\nconformers in the mol2 file have the same' 199 | '\nmolecule ID/Name.') 200 | parser.add_argument('-s', '--sortby', 201 | type=str, 202 | default='TanimotoCombo,ColorTanimoto', 203 | help='(Optional, default: `"TanimotoCombo,ColorTanimoto"`)' 204 | '\nScore column(s) in ROCS report files that' 205 | '\nthe structures should be sorted by.') 206 | parser.add_argument('--selection', 207 | type=str, 208 | default='(TanimotoCombo >= 1.0)' 209 | ' & (ColorTanimoto >= 0.25)', 210 | help='(Optional, default: `"(TanimotoCombo >= 1.0)) & (ColorTanimoto >= 0.25)"`)' 211 | '\nSelection string to exclude molecules above' 212 | '\nor below a certain score threshold. By default' 213 | '\nall molecules with a ColorTanimoto score smaller than 0.25' 214 | '\n and a TanimotoCombo score smaller than 1.0 will be disregarded.') 215 | parser.add_argument('--separator', 216 | type=str, 217 | default='\t', 218 | help=('(Optional, default: `"\\t"`.) Column separator used\nin the input table.\n' 219 | 'Assumes tab-separated values by default.')) 220 | parser.add_argument('--id_suffix', 221 | type=str, 222 | default='False', 223 | help='(Optional, default: `"False"`.)' 224 | '\nIf `--id_suffix "True"`, a molecule ID suffix' 225 | '\nwill be added to the query' 226 | '\nmolecules in the order the ROCS query molecules' 227 | '\nappear in a multi-conformer query file.' 228 | '\nFor instance, if all query molecules are labeled "3kPZS",' 229 | '\nthen the same structures in the output file are labeled' 230 | '\n3kPZS_1, 3kPZS_2, ... Note that those modified conformer' 231 | '\nwill correspond to the conformer names in the ROCS report' 232 | '\ntables. However, they may appear in an unsorted order in' 233 | '\nthe _query files, which are sorted by the overlay score' 234 | '\nof the database molecules. For example, if the' 235 | '\ndatabase molecule is called ZINC123_112, first' 236 | '\nentry in the _query file that corresponds to *_dbase' 237 | '\nfile may by labeled 3kPZS_11 if the 11th 3kPZS conformer' 238 | '\nis the best match according to ROCS.') 239 | parser.add_argument('-v', '--verbose', 240 | type=int, 241 | default=1, 242 | help='Verbosity level. If 0, does not print any' 243 | ' output.' 244 | '\nIf 1 (default), prints the file currently' 245 | '\nprocessing.') 246 | 247 | parser.add_argument('--version', action='version', version='v. 1.0') 248 | 249 | args = parser.parse_args() 250 | 251 | if args.id_suffix.lower() in {'false', 'f', 'no', 'n'}: 252 | id_suffix = False 253 | elif args.id_suffix.lower() in {'true', 't', 'yes', 'y'}: 254 | id_suffix = True 255 | else: 256 | raise ValueError('--id_suffix must be true or false. Got %s' % 257 | args.id_suffix) 258 | 259 | sortby = [s.strip() for s in args.sortby.split(',')] 260 | 261 | for s in args.selection.split(' '): 262 | if s.startswith('(') and s[1:] not in args.sortby: 263 | raise ValueError('Selection columns are a subset of' 264 | ' the --sortby columns. The column %s' 265 | ' is currently not contained in the' 266 | ' --sortby argument. Please add it ' 267 | 'there to use this column as a ' 268 | 'selection criterion.' % (s[1:])) 269 | 270 | main(input_dir=args.input, output_dir=args.output, query_path=args.query, 271 | sortby=sortby, 272 | verbose=args.verbose, 273 | separator=args.separator, 274 | id_suffix=id_suffix, 275 | selection=args.selection) 276 | --------------------------------------------------------------------------------
96 | From here you can search these documents. Enter 97 | your search terms below. 98 |