├── .gitignore
├── LICENSE.txt
├── README.md
├── docs
    ├── argparse_to_md.py
    ├── ipynb2markdown.py
    ├── mkdocs.yml
    ├── sources
    │   ├── changelog.md
    │   ├── cite.md
    │   ├── contact.md
    │   ├── extra.css
    │   ├── images
    │   │   ├── automated-pipeline-flowchart.jpg
    │   │   ├── automated-pipeline-flowchart.pdf
    │   │   ├── logo-transparent-bg.png
    │   │   ├── logo.png
    │   │   ├── obtaining-screenlamp-2.png
    │   │   ├── obtaining-screenlamp.png
    │   │   └── toolkit-tutorial
    │   │   │   └── dataset-overview.png
    │   ├── index.md
    │   ├── installation.md
    │   ├── license.md
    │   └── user_guide
    │   │   ├── doc-overview.md
    │   │   ├── images
    │   │       └── tools-tutorial-1
    │   │       │   ├── 3kpzs-keto-sulfur.png
    │   │       │   ├── 5-mol2ids.png
    │   │       │   ├── atomtype-match-ex-1.png
    │   │       │   ├── atomtype-match-ex-2.png
    │   │       │   ├── charge-match-ex-1.png
    │   │       │   ├── fgroup-match-overlays-pymol.png
    │   │       │   ├── open-fgroup-match-overlays.png
    │   │       │   ├── pipe-step-1.jpg
    │   │       │   ├── pipe-step-2.jpg
    │   │       │   ├── pipe-step-3.jpg
    │   │       │   ├── pipe-step-4.jpg
    │   │       │   ├── pipe-step-5.jpg
    │   │       │   ├── pipe-step-6.jpg
    │   │       │   ├── pipe-step-7.jpg
    │   │       │   ├── pipe-step-8.jpg
    │   │       │   ├── pipeline-overview.jpg
    │   │       │   ├── pymol-overlay-ex-1.png
    │   │       │   ├── pymol-overlay-ex-2.png
    │   │       │   └── zincdata-spreadsheat.png
    │   │   ├── pipeline-tutorial-1.md
    │   │   ├── tools
    │   │   ├── tools-tutorial-1.ipynb
    │   │   ├── tools-tutorial-1.md
    │   │   └── tools.md
    ├── united
    │   ├── _LICENSE
    │   ├── __init__.py
    │   ├── base.html
    │   ├── content.html
    │   ├── css
    │   │   ├── base.css
    │   │   ├── bootstrap-custom.min.css
    │   │   ├── font-awesome-4.0.3.css
    │   │   └── highlight.css
    │   ├── fonts
    │   │   ├── fontawesome-webfont.eot
    │   │   ├── fontawesome-webfont.svg
    │   │   ├── fontawesome-webfont.ttf
    │   │   └── fontawesome-webfont.woff
    │   ├── img
    │   │   └── favicon.ico
    │   ├── js
    │   │   ├── base.js
    │   │   ├── bootstrap-3.0.3.min.js
    │   │   ├── highlight.pack.js
    │   │   └── jquery-1.10.2.min.js
    │   ├── main.html
    │   ├── nav-sub.html
    │   ├── nav.html
    │   └── toc.html
    └── update_docs.py
├── requirements.txt
└── tools
    ├── count_mol2.py
    ├── datatable_to_id.py
    ├── enumerate_conformers.py
    ├── funcgroup_distance_to_id.py
    ├── funcgroup_matching.py
    ├── funcgroup_matching_selection.py
    ├── funcgroup_presence_to_id.py
    ├── generate_conformers_obabel.py
    ├── generate_conformers_omega.py
    ├── id_to_mol2.py
    ├── merge_id_files.py
    ├── mol2_to_id.py
    ├── overlay_molecules_rocs.py
    ├── overlay_molecules_shapeit.py
    ├── pipelines
        ├── experimental
        │   ├── pipeline-example-1-config_obabel.yaml
        │   └── pipeline-example-1_obabel.py
        ├── pipeline-example-1-config.yaml
        └── pipeline-example-1.py
    └── sort_rocs_mol2.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Datafiles
  2 | *.graffle
  3 | example-files/
  4 | docs/workflow/
  5 | docs/html/
  6 | docs/sources/workflow/80698/
  7 | docs/sources/workflow/3keto-24sulfate/
  8 | docs/sources/workflow/example_1/
  9 | docs/sources/user_guide/tutorial-results/
 10 | docs/sources/user_guide/tk-tutorial_data
 11 | 
 12 | # MacOS
 13 | .DS_Store
 14 | 
 15 | # Jupyter Notebook
 16 | .ipynb_checkpoints
 17 | 
 18 | 
 19 | # Byte-compiled / optimized / DLL files
 20 | __pycache__/
 21 | *.py[cod]
 22 | *$py.class
 23 | 
 24 | # C extensions
 25 | *.so
 26 | 
 27 | # Distribution / packaging
 28 | .Python
 29 | env/
 30 | build/
 31 | develop-eggs/
 32 | dist/
 33 | downloads/
 34 | eggs/
 35 | .eggs/
 36 | lib/
 37 | lib64/
 38 | parts/
 39 | sdist/
 40 | var/
 41 | *.egg-info/
 42 | .installed.cfg
 43 | *.egg
 44 | 
 45 | # PyInstaller
 46 | #  Usually these files are written by a python script from a template
 47 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 48 | *.manifest
 49 | *.spec
 50 | 
 51 | # Installer logs
 52 | pip-log.txt
 53 | pip-delete-this-directory.txt
 54 | 
 55 | # Unit test / coverage reports
 56 | htmlcov/
 57 | .tox/
 58 | .coverage
 59 | .coverage.*
 60 | .cache
 61 | nosetests.xml
 62 | coverage.xml
 63 | *,cover
 64 | .hypothesis/
 65 | 
 66 | # Translations
 67 | *.mo
 68 | *.pot
 69 | 
 70 | # Django stuff:
 71 | *.log
 72 | local_settings.py
 73 | 
 74 | # Flask stuff:
 75 | instance/
 76 | .webassets-cache
 77 | 
 78 | # Scrapy stuff:
 79 | .scrapy
 80 | 
 81 | # Sphinx documentation
 82 | docs/_build/
 83 | 
 84 | # PyBuilder
 85 | target/
 86 | 
 87 | # IPython Notebook
 88 | .ipynb_checkpoints
 89 | 
 90 | # pyenv
 91 | .python-version
 92 | 
 93 | # celery beat schedule file
 94 | celerybeat-schedule
 95 | 
 96 | # dotenv
 97 | .env
 98 | 
 99 | # virtualenv
100 | venv/
101 | ENV/
102 | 
103 | # Spyder project settings
104 | .spyderproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 | 
180 |    Copyright 2017 Michigan State University
181 | 
182 |    The screenlamp software was developed by Sebastian Raschka
183 |    and Leslie A. Kuhn in the Protein Structure Lab 
184 |    (http://www.kuhnlab.bmb.msu.edu) at Michigan State University.
185 | 
186 |    Licensed under the Apache License, Version 2.0 (the "License");
187 |    you may not use this file except in compliance with the License.
188 |    You may obtain a copy of the License at
189 | 
190 |        http://www.apache.org/licenses/LICENSE-2.0
191 | 
192 |    Unless required by applicable law or agreed to in writing, software
193 |    distributed under the License is distributed on an "AS IS" BASIS,
194 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
195 |    See the License for the specific language governing permissions and
196 |    limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <img src="docs/sources/images/logo-transparent-bg.png" alt="screenlamp logo" width="75%">
 3 | 
 4 | 
 5 | 
 6 | 
 7 | ![Python 3.6](https://img.shields.io/badge/Python-3.6-orange.svg)
 8 | [![License](https://img.shields.io/badge/License-Apache_v2-orange.svg)](license)
 9 | [![GitHub](https://img.shields.io/badge/Source-GitHub-orange.svg)](license)
10 | 
11 | ### A toolkit for ligand-based virtual screening
12 | 
13 | 
14 | Screenlamp is a Python package for facilitating ligand-based virtual screening workflows and toolkits with hypothesis-driven filtering steps.
15 | 
16 | **The official documentation is available at https://psa-lab.github.io/screenlamp.**
17 | 
18 | 
19 | ## About
20 | 
21 | The screenlamp toolkit was developed in the [Protein Structure Analysis & Design Laboratory](http://www.kuhnlab.bmb.msu.edu) at Michigan State University. For additional information about screenlamp, please refer to the accompanying research publication, which is currently under revision:
22 | 
23 | - Raschka, Sebastian, Anne M. Scott, Nan Liu, Santosh Gunturu, Mar Huertas, Weiming Li, and Leslie A. Kuhn (2018). "Enabling the hypothesis-driven prioritization of ligand candidates in big databases: 
24 | Screenlamp and its application to GPCR inhibitor discovery for invasive species control". *Journal of Computer-Aided Molecular Design* 32: 415.  
25 | [[biorxiv preprint](https://www.biorxiv.org/content/early/2018/01/17/249151)]
26 |  [[Journal article](https://link.springer.com/article/10.1007/s10822-018-0100-7)]
27 | 
28 | 
29 | Screenlamp is research software and has been made available to other researchers under a permissive [Apache v2 open source license](license). If you use screenlamp in your scientific projects or any derivative work, the authors of the screenlamp software must be acknowledged and the publication listed above should be cited.
30 | 
31 | 
32 | # Contact
33 | 
34 | If you encounter bugs or other technical issues with the screenlamp software package, please send an email to [kuhnlab@msu.edu](mailto:kuhnlab@msu.edu) or use the [Issue Tracker](https://github.com/psa-lab/screenlamp/issues). For questions about the [screenlamp research article](cite/index.html), please contact the publisher or [corresponding author](mailto:kuhnl@msu.edu) directly instead.
35 | 


--------------------------------------------------------------------------------
/docs/argparse_to_md.py:
--------------------------------------------------------------------------------
  1 | # Tool to generate a markdown documentation
  2 | # from argparse scripts.
  3 | #
  4 | # usage: python argparse_to_md.py scipts/ > doc.md
  5 | #
  6 | # Copyright (C) 2017 Sebastian Raschka
  7 | # License: MIT
  8 | #
  9 | # Author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Author email: mail@sebastianraschka.com
 11 | 
 12 | 
 13 | import subprocess
 14 | import os
 15 | import sys
 16 | 
 17 | 
 18 | def get_pyfiles(path):
 19 |     files = []
 20 |     for file in os.listdir(path):
 21 |         if file.endswith('.py') and not file.startswith('_'):
 22 |             files.append(os.path.join(os.path.abspath(path), file))
 23 |     return files
 24 | 
 25 | 
 26 | def get_help_messages(path):
 27 |     s = subprocess.check_output('python %s --help' % path, shell=True)
 28 |     return s.decode()
 29 | 
 30 | 
 31 | def help_to_md(s):
 32 |     out_lines = []
 33 | 
 34 | 
 35 |     example_section = False
 36 |     for line in s.split('\n'):
 37 | 
 38 |         lstripped = line.lstrip()
 39 |         stripped = lstripped.rstrip()
 40 | 
 41 |         if not stripped:
 42 |             continue
 43 | 
 44 |         if stripped == "-v, --version         show program's version number and exit":
 45 |             out_lines.append('- `-v, --version`  ')
 46 |             out_lines.append("Show program's version number and exit")
 47 | 
 48 |         elif stripped == "-h, --help            show this help message and exit":
 49 |             out_lines.append('- `-h, --help`  ')
 50 |             out_lines.append("Show this help message and exit")
 51 | 
 52 |         elif stripped.startswith('Example:'):
 53 |             example_section = True
 54 |             out_lines.append('\n**Example:**\n\n```')
 55 | 
 56 |         elif example_section:
 57 |             if stripped.startswith('#'):
 58 |                 out_lines.append('```\n')
 59 |                 example_section = False
 60 |             out_lines.append(stripped)
 61 | 
 62 |         elif stripped.startswith('[-'):
 63 |             out_lines.append('`%s`  ' % stripped)
 64 | 
 65 |         elif stripped.startswith('usage:'):
 66 |             usage = stripped.split('usage:')[-1]
 67 |             out_lines.append('\n**Usage:**\n\n    %s\n\n' % usage)
 68 | 
 69 |         elif stripped.startswith('optional arguments:'):
 70 |             out_lines.append('\n**Arguments:**\n\n')
 71 | 
 72 |         elif stripped.startswith('  --') or stripped.startswith('python'):
 73 |             out_lines.append('`%s`  ' % stripped)
 74 | 
 75 |         elif line.startswith('  -'):
 76 |             out_lines.append('- `%s`  ' % stripped)
 77 | 
 78 |             #usage = line.split('Example')[-1].strip().strip(':')
 79 |             
 80 |         else:
 81 |             if stripped.startswith('  '):
 82 |                 stripped = '     ' + stripped.strip()
 83 |             out_lines.append(stripped)
 84 | 
 85 |     if example_section:
 86 |         out_lines.append('```\n')
 87 | 
 88 |     return out_lines
 89 | 
 90 | 
 91 | def main(dir_path):
 92 |     s = ("This page serves as a quick lookup reference for the different"
 93 |          " modules within screenlamp. Please see the [Toolkit Tutorial](tools-tutorial-1)"
 94 |          " for a"
 95 |          " more detailed explanation of the different modules and how"
 96 |          " they can be combined in a typical virtual screening pipeline.")
 97 | 
 98 |     contents = ["# Tools", "\n", s]
 99 |     paths = get_pyfiles(dir_path)
100 |     for f in paths:
101 |         contents.append('\n\n## %s\n\n' % os.path.basename(f))
102 |         s = get_help_messages(f)
103 |         lines = help_to_md(s)
104 |         contents.extend(lines)
105 |     for line in contents:
106 |         print(line)
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     main(sys.argv[1])
111 | 


--------------------------------------------------------------------------------
/docs/ipynb2markdown.py:
--------------------------------------------------------------------------------
  1 | # IPython Notebook to Markdown conversion script
  2 | #
  3 | # Sebastian Raschka 2014-2016
  4 | # mlxtend Machine Learning Library Extensions
  5 | #
  6 | # Author: Sebastian Raschka <sebastianraschka.com>
  7 | #
  8 | # License: BSD 3 clause
  9 | 
 10 | import subprocess
 11 | import glob
 12 | import shutil
 13 | import os
 14 | import markdown
 15 | from markdown.treeprocessors import Treeprocessor
 16 | from markdown.extensions import Extension
 17 | from nbconvert.exporters import MarkdownExporter
 18 | 
 19 | 
 20 | class ImgExtractor(Treeprocessor):
 21 |     def run(self, doc):
 22 |         self.markdown.images = []
 23 |         for image in doc.findall('.//img'):
 24 |             self.markdown.images.append(image.get('src'))
 25 | 
 26 | 
 27 | class ImgExtExtension(Extension):
 28 |     def extendMarkdown(self, md, md_globals):
 29 |         img_ext = ImgExtractor(md)
 30 |         md.treeprocessors.add('imgext', img_ext, '>inline')
 31 | 
 32 | 
 33 | def ipynb_to_md(ipynb_path):
 34 |     orig_path = os.getcwd()
 35 |     os.chdir(os.path.dirname(ipynb_path))
 36 |     file_name = os.path.basename(ipynb_path)
 37 |     subprocess.call(['python', '-m', 'nbconvert',
 38 |                      '--to', 'markdown', file_name])
 39 | 
 40 |     new_s = []
 41 |     md_name = file_name.replace('.ipynb', '.md')
 42 |     with open(md_name, 'r') as f:
 43 |         for line in f:
 44 |             if line.startswith('#'):
 45 |                 new_s.append(line)
 46 |                 break
 47 |         for line in f:
 48 |             if line.startswith('## API'):
 49 |                 new_s.append(line)
 50 |                 new_s.append('\n')
 51 |                 break
 52 |             new_s.append(line)
 53 |         for line in f:
 54 |             if line.lstrip().startswith('#'):
 55 |                 break
 56 |         for line in f:
 57 |             if line.lstrip().startswith('```'):
 58 |                 continue
 59 |             else:
 60 |                 new_s.append(line[4:])
 61 | 
 62 |     with open(md_name, 'w') as f:
 63 |         f.write(''.join(new_s))
 64 |     os.chdir(orig_path)
 65 | 
 66 | 
 67 | # md = markdown.Markdown(extensions=[ImgExtExtension()])
 68 | # html = md.convert(data)
 69 | # print(md.images)
 70 | 
 71 | 
 72 | if __name__ == "__main__":
 73 | 
 74 |     import argparse
 75 |     parser = argparse.ArgumentParser(
 76 |             description='Convert docstring into a markdown API documentation.',
 77 |             formatter_class=argparse.RawTextHelpFormatter)
 78 | 
 79 |     parser.add_argument('-i', '--ipynb',
 80 |                         help='Path to the IPython file')
 81 | 
 82 |     parser.add_argument('-a', '--all',
 83 |                         help='Path to parse all ipynb recursively')
 84 | 
 85 |     parser.add_argument('-v', '--version',
 86 |                         action='version',
 87 |                         version='v. 0.1')
 88 | 
 89 |     args = parser.parse_args()
 90 | 
 91 |     if args.all and args.ipynb:
 92 |         raise AttributeError('Conflicting flags --ipynb and --all; choose one')
 93 | 
 94 |     if args.ipynb:
 95 |         ipynb_to_md(ipynb_path=args.ipynb)
 96 |     else:
 97 |         tree = os.walk(args.all)
 98 |         for d in tree:
 99 |             filenames = glob.glob(os.path.join(d[0], '*'))
100 |             for f in filenames:
101 |                 if f.endswith('.ipynb'):
102 |                     print(f)
103 | 
104 |                     ipynb_to_md(ipynb_path=f)
105 | 


--------------------------------------------------------------------------------
/docs/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: screenlamp
 2 | site_url: http://rasbt.github.io/screenlamp
 3 | site_author: Sebastian Raschka
 4 | site_description: A toolkit for ligand-based screening
 5 | 
 6 | repo_url: https://github.com/psa-lab/screenlamp
 7 | 
 8 | #include_search: true # not necessary for this theme
 9 | docs_dir: sources
10 | site_dir: html
11 | theme_dir: united
12 | 
13 | use_directory_urls: false
14 | site_favicon: favicon.ico
15 | 
16 | markdown_extensions:
17 |   - mathjax
18 |   - extra
19 |   - tables
20 |   - fenced_code
21 | extra_javascript:
22 |   - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML
23 |   - mathjaxhelper.js
24 | extra_css:
25 |   - extra.css
26 | 
27 | copyright: Copyright &copy; 2017 Michigan State University. Developed in the <a href="http://www.kuhnlab.bmb.msu.edu">PSA Lab</a>.<br><em><a href="cite">Sebastian Raschka et al. 2017</a></em>.
28 | 
29 | pages:
30 | - Home: index.md
31 | - Installation: installation.md
32 | - User Documentation: 
33 |   - Overview: user_guide/doc-overview.md
34 |   - Tools: user_guide/tools.md
35 |   - Toolkit Tutorial: user_guide/tools-tutorial-1.md
36 |   - Pipeline Tutorial: user_guide/pipeline-tutorial-1.md
37 | - About:
38 |   - Contact: contact.md
39 |   - Release Notes: changelog.md
40 |   - License: license.md
41 |   - Citing screenlamp: cite.md
42 | 


--------------------------------------------------------------------------------
/docs/sources/changelog.md:
--------------------------------------------------------------------------------
 1 | # Release Notes
 2 | 
 3 | ### Version 1.0.0 (2017-10-31)
 4 | 
 5 | - First release
 6 | 
 7 | ##### Downloads
 8 | 
 9 | - [Source code (zip)](https://github.com/psa-lab/screenlamp/archive/v1.0.0.zip)
10 | - [Source code (tar.gz)](https://github.com/psa-lab/screenlamp/archive/v1.0.0.tar.gz)
11 | 


--------------------------------------------------------------------------------
/docs/sources/cite.md:
--------------------------------------------------------------------------------
 1 | # Citing screenlamp
 2 | 
 3 | Screenlamp is research software and has been made available to other researchers under a permissive [Apache v2 open source license](license). If you use screenlamp in your scientific projects or any derivative work, the authors of the screenlamp software must be acknowledged and the following publication should be cited:
 4 | 
 5 | - Raschka, Sebastian, Anne M. Scott, Nan Liu, Santosh Gunturu, Mar Huertas, Weiming Li, and Leslie A. Kuhn (2018). "Enabling the hypothesis-driven prioritization of ligand candidates in big databases: 
 6 | Screenlamp and its application to GPCR inhibitor discovery for invasive species control". *Journal of Computer-Aided Molecular Design* 32: 415.  
 7 | [[biorxiv preprint](https://www.biorxiv.org/content/early/2018/01/17/249151)]
 8 |  [[Journal article](https://link.springer.com/article/10.1007/s10822-018-0100-7)]
 9 | 
10 | 
11 | 
12 | ### BibTeX Entry
13 | 
14 | ```tex
15 | @article{raschka2018, 
16 |   title={Enabling the hypothesis-driven prioritization of ligand 
17 |     candidates in big databases: Screenlamp and its application to GPCR 
18 |     inhibitor discovery for invasive species control}, 
19 |   volume={32}, 
20 |   DOI={10.1007/s10822-018-0100-7}, 
21 |   number={3}, 
22 |   journal={Journal of Computer-Aided Molecular Design}, 
23 |   author={Raschka, Sebastian and Scott, Anne M. 
24 |     and Liu, Nan and Gunturu, Santosh and Huertas, 
25 |     Mar and Li, Weiming and Kuhn, Leslie A.}, 
26 |   year={2018}, 
27 |   month={Mar}, 
28 |   pages={415–433}
29 | }
30 | 
31 | ```
32 | 
33 | 


--------------------------------------------------------------------------------
/docs/sources/contact.md:
--------------------------------------------------------------------------------
1 | # Contact
2 | 
3 | If you encounter bugs or other technical issues with the screenlamp software package, please send an email to [kuhnlab@msu.edu](mailto:kuhnlab@msu.edu) or use the [Issue Tracker](https://github.com/psa-lab/screenlamp/issues). For questions about the [screenlamp research article](cite/index.html), please contact the publisher or [corresponding author](mailto:kuhnl@msu.edu) directly instead.
4 | 


--------------------------------------------------------------------------------
/docs/sources/extra.css:
--------------------------------------------------------------------------------
 1 | h1, h2, h3, h4 {
 2 |   padding-top: 2em;
 3 |   padding-bottom: 0.5em;
 4 | }
 5 | 
 6 | h5, h6 {
 7 |   padding-top: 1em;
 8 |   padding-bottom: 0.2em;
 9 | }
10 | 


--------------------------------------------------------------------------------
/docs/sources/images/automated-pipeline-flowchart.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/automated-pipeline-flowchart.jpg


--------------------------------------------------------------------------------
/docs/sources/images/automated-pipeline-flowchart.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/automated-pipeline-flowchart.pdf


--------------------------------------------------------------------------------
/docs/sources/images/logo-transparent-bg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/logo-transparent-bg.png


--------------------------------------------------------------------------------
/docs/sources/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/logo.png


--------------------------------------------------------------------------------
/docs/sources/images/obtaining-screenlamp-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/obtaining-screenlamp-2.png


--------------------------------------------------------------------------------
/docs/sources/images/obtaining-screenlamp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/obtaining-screenlamp.png


--------------------------------------------------------------------------------
/docs/sources/images/toolkit-tutorial/dataset-overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/images/toolkit-tutorial/dataset-overview.png


--------------------------------------------------------------------------------
/docs/sources/index.md:
--------------------------------------------------------------------------------
 1 | <br>
 2 | <br>
 3 | 
 4 | <div style="max-width:50%;">
 5 | <img src="images/logo-transparent-bg.png" alt="screenlamp logo">
 6 | </div>
 7 | 
 8 | 
 9 | 
10 | ![Python 3.6](https://img.shields.io/badge/Python-3.6-orange.svg)
11 | [![License](https://img.shields.io/badge/License-Apache_v2-orange.svg)](license)
12 | [![GitHub](https://img.shields.io/badge/Source-GitHub-orange.svg)](license)
13 | 
14 | ### A toolkit for ligand-based virtual screening
15 | 
16 | 
17 | Screenlamp is a Python package for facilitating ligand-based virtual screening workflows and toolkits with hypothesis-driven filtering steps.
18 | 
19 | 
20 | ## About
21 | 
22 | The screenlamp toolkit was developed in the [Protein Structure Analysis & Design Laboratory](http://www.kuhnlab.bmb.msu.edu) at Michigan State University. For additional information about screenlamp, please refer to the accompanying research publication, which is currently under revision:
23 | 
24 | - Raschka, Sebastian, Anne M. Scott, Nan Liu, Santosh Gunturu, Mar Huertas, Weiming Li, and Leslie A. Kuhn (2018). "Enabling the hypothesis-driven prioritization of ligand candidates in big databases: 
25 | Screenlamp and its application to GPCR inhibitor discovery for invasive species control". *Journal of Computer-Aided Molecular Design* 32: 415.  
26 | [[biorxiv preprint](https://www.biorxiv.org/content/early/2018/01/17/249151)]
27 |  [[Journal article](https://link.springer.com/article/10.1007/s10822-018-0100-7)]
28 | 
29 | 
30 | 
31 | Screenlamp is research software and has been made available to other researchers under a permissive [Apache v2 open source license](license). If you use screenlamp in your scientific projects or any derivative work, the authors of the screenlamp software must be acknowledged and the publication listed above should be cited.
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/docs/sources/installation.md:
--------------------------------------------------------------------------------
 1 | # Installing screenlamp
 2 | 
 3 | ---
 4 | 
 5 | ## Obtaining screenlamp
 6 | 
 7 | You can download the latest version of screenlamp from the [GitHub repository](https://github.com/psa-lab/screenlamp) by clicking on "Download ZIP" or download the latest stable release from the ["release" list](https://github.com/psa-lab/screenlamp/releases):
 8 | 
 9 | [![](images/obtaining-screenlamp.png)](https://github.com/psa-lab/screenlamp)
10 | 
11 | ## Setting up your Python environment for screenlamp
12 | 
13 | Python package dependencies of screenlamp are listed in the `requirements.txt` file located in the screenlamp directory. To install all of these dependencies most conveniently, you can execute the following command:
14 | 
15 |     pip install -r requirements.txt
16 | 
17 | The the main modules of screenlamp are located in the `tools/` subdirectory, and after satisfying the Python package requirements (see [`requirements.txt`](https://github.com/psa-lab/screenlamp/blob/master/requirements.txt)), they are ready to use. If you haven't used screenlamp before, it is recommended that to read the screenlamp [tutorial](user_guide/doc-overview.md).
18 | 
19 | ## Other software requirements
20 | 
21 | Certain submodules within screenlamp require external software to sample low-energy conformations of molecules and to generate pair-wise overlays. The tools that are currently being used in the [pre-built, automated screening pipeline](user_guide/pipeline-tutorial-1/) are [OpenEye OMEGA](https://www.eyesopen.com/omega) and [OpenEye ROCS](https://www.eyesopen.com/rocs) to accomplish those tasks. However, screenlamp does not strictly require OMEGA and ROCS, and you are free to use any open source alternative that provided that the output files are compatible with screenlamp tools, which uses the MOL2 file format.
22 | 
23 | If you don't have access to OpenEye toolkits, yet, you can visit the [OpenEye website](https://www.eyesopen.com/licensing-philosophy) for more details on their licensing terms (for example, OpenEye offers a free licensing model for academics engaged in public domain research or teaching).
24 | 
25 | ## Obtaining older versions of screenlamp
26 | 
27 | To obtain one of the previous versions of screenlamp, please see the [Release Notes](changelog), which contains download links for all release versions of screenlamp.
28 | 
29 | ### Development version
30 | 
31 | You can download the latest development version of screenlamp as [ZIP](https://github.com/psa-lab/screenlamp/archive/master.zip) file directly from GitHub:
32 | 
33 | [![](images/obtaining-screenlamp-2.png)](https://github.com/psa-lab/screenlamp)
34 | 
35 | <br>
36 | 
37 | Alternatively, you can clone the screenlamp development version to your local machine by executing the following command:
38 | 
39 |     git clone https://github.com/psa-lab/screenlamp.git
40 | 


--------------------------------------------------------------------------------
/docs/sources/license.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ##########################################################################
  3 | 
  4 |                              Apache License
  5 |                        Version 2.0, January 2004
  6 |                     http://www.apache.org/licenses/
  7 | 
  8 | ##########################################################################
  9 | 
 10 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 11 | 
 12 | 1. Definitions.
 13 | 
 14 |   "License" shall mean the terms and conditions for use, reproduction,
 15 |   and distribution as defined by Sections 1 through 9 of this document.
 16 | 
 17 |   "Licensor" shall mean the copyright owner or entity authorized by
 18 |   the copyright owner that is granting the License.
 19 | 
 20 |   "Legal Entity" shall mean the union of the acting entity and all
 21 |   other entities that control, are controlled by, or are under common
 22 |   control with that entity. For the purposes of this definition,
 23 |   "control" means (i) the power, direct or indirect, to cause the
 24 |   direction or management of such entity, whether by contract or
 25 |   otherwise, or (ii) ownership of fifty percent (50%) or more of the
 26 |   outstanding shares, or (iii) beneficial ownership of such entity.
 27 | 
 28 |   "You" (or "Your") shall mean an individual or Legal Entity
 29 |   exercising permissions granted by this License.
 30 | 
 31 |   "Source" form shall mean the preferred form for making modifications,
 32 |   including but not limited to software source code, documentation
 33 |   source, and configuration files.
 34 | 
 35 |   "Object" form shall mean any form resulting from mechanical
 36 |   transformation or translation of a Source form, including but
 37 |   not limited to compiled object code, generated documentation,
 38 |   and conversions to other media types.
 39 | 
 40 |   "Work" shall mean the work of authorship, whether in Source or
 41 |   Object form, made available under the License, as indicated by a
 42 |   copyright notice that is included in or attached to the work
 43 |   (an example is provided in the Appendix below).
 44 | 
 45 |   "Derivative Works" shall mean any work, whether in Source or Object
 46 |   form, that is based on (or derived from) the Work and for which the
 47 |   editorial revisions, annotations, elaborations, or other modifications
 48 |   represent, as a whole, an original work of authorship. For the purposes
 49 |   of this License, Derivative Works shall not include works that remain
 50 |   separable from, or merely link (or bind by name) to the interfaces of,
 51 |   the Work and Derivative Works thereof.
 52 | 
 53 |   "Contribution" shall mean any work of authorship, including
 54 |   the original version of the Work and any modifications or additions
 55 |   to that Work or Derivative Works thereof, that is intentionally
 56 |   submitted to Licensor for inclusion in the Work by the copyright owner
 57 |   or by an individual or Legal Entity authorized to submit on behalf of
 58 |   the copyright owner. For the purposes of this definition, "submitted"
 59 |   means any form of electronic, verbal, or written communication sent
 60 |   to the Licensor or its representatives, including but not limited to
 61 |   communication on electronic mailing lists, source code control systems,
 62 |   and issue tracking systems that are managed by, or on behalf of, the
 63 |   Licensor for the purpose of discussing and improving the Work, but
 64 |   excluding communication that is conspicuously marked or otherwise
 65 |   designated in writing by the copyright owner as "Not a Contribution."
 66 | 
 67 |   "Contributor" shall mean Licensor and any individual or Legal Entity
 68 |   on behalf of whom a Contribution has been received by Licensor and
 69 |   subsequently incorporated within the Work.
 70 | 
 71 | 2. Grant of Copyright License. Subject to the terms and conditions of
 72 |   this License, each Contributor hereby grants to You a perpetual,
 73 |   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 74 |   copyright license to reproduce, prepare Derivative Works of,
 75 |   publicly display, publicly perform, sublicense, and distribute the
 76 |   Work and such Derivative Works in Source or Object form.
 77 | 
 78 | 3. Grant of Patent License. Subject to the terms and conditions of
 79 |   this License, each Contributor hereby grants to You a perpetual,
 80 |   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 81 |   (except as stated in this section) patent license to make, have made,
 82 |   use, offer to sell, sell, import, and otherwise transfer the Work,
 83 |   where such license applies only to those patent claims licensable
 84 |   by such Contributor that are necessarily infringed by their
 85 |   Contribution(s) alone or by combination of their Contribution(s)
 86 |   with the Work to which such Contribution(s) was submitted. If You
 87 |   institute patent litigation against any entity (including a
 88 |   cross-claim or counterclaim in a lawsuit) alleging that the Work
 89 |   or a Contribution incorporated within the Work constitutes direct
 90 |   or contributory patent infringement, then any patent licenses
 91 |   granted to You under this License for that Work shall terminate
 92 |   as of the date such litigation is filed.
 93 | 
 94 | 4. Redistribution. You may reproduce and distribute copies of the
 95 |   Work or Derivative Works thereof in any medium, with or without
 96 |   modifications, and in Source or Object form, provided that You
 97 |   meet the following conditions:
 98 | 
 99 |   (a) You must give any other recipients of the Work or
100 |       Derivative Works a copy of this License; and
101 | 
102 |   (b) You must cause any modified files to carry prominent notices
103 |       stating that You changed the files; and
104 | 
105 |   (c) You must retain, in the Source form of any Derivative Works
106 |       that You distribute, all copyright, patent, trademark, and
107 |       attribution notices from the Source form of the Work,
108 |       excluding those notices that do not pertain to any part of
109 |       the Derivative Works; and
110 | 
111 |   (d) If the Work includes a "NOTICE" text file as part of its
112 |       distribution, then any Derivative Works that You distribute must
113 |       include a readable copy of the attribution notices contained
114 |       within such NOTICE file, excluding those notices that do not
115 |       pertain to any part of the Derivative Works, in at least one
116 |       of the following places: within a NOTICE text file distributed
117 |       as part of the Derivative Works; within the Source form or
118 |       documentation, if provided along with the Derivative Works; or,
119 |       within a display generated by the Derivative Works, if and
120 |       wherever such third-party notices normally appear. The contents
121 |       of the NOTICE file are for informational purposes only and
122 |       do not modify the License. You may add Your own attribution
123 |       notices within Derivative Works that You distribute, alongside
124 |       or as an addendum to the NOTICE text from the Work, provided
125 |       that such additional attribution notices cannot be construed
126 |       as modifying the License.
127 | 
128 |   You may add Your own copyright statement to Your modifications and
129 |   may provide additional or different license terms and conditions
130 |   for use, reproduction, or distribution of Your modifications, or
131 |   for any such Derivative Works as a whole, provided Your use,
132 |   reproduction, and distribution of the Work otherwise complies with
133 |   the conditions stated in this License.
134 | 
135 | 5. Submission of Contributions. Unless You explicitly state otherwise,
136 |   any Contribution intentionally submitted for inclusion in the Work
137 |   by You to the Licensor shall be under the terms and conditions of
138 |   this License, without any additional terms or conditions.
139 |   Notwithstanding the above, nothing herein shall supersede or modify
140 |   the terms of any separate license agreement you may have executed
141 |   with Licensor regarding such Contributions.
142 | 
143 | 6. Trademarks. This License does not grant permission to use the trade
144 |   names, trademarks, service marks, or product names of the Licensor,
145 |   except as required for reasonable and customary use in describing the
146 |   origin of the Work and reproducing the content of the NOTICE file.
147 | 
148 | 7. Disclaimer of Warranty. Unless required by applicable law or
149 |   agreed to in writing, Licensor provides the Work (and each
150 |   Contributor provides its Contributions) on an "AS IS" BASIS,
151 |   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
152 |   implied, including, without limitation, any warranties or conditions
153 |   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
154 |   PARTICULAR PURPOSE. You are solely responsible for determining the
155 |   appropriateness of using or redistributing the Work and assume any
156 |   risks associated with Your exercise of permissions under this License.
157 | 
158 | 8. Limitation of Liability. In no event and under no legal theory,
159 |   whether in tort (including negligence), contract, or otherwise,
160 |   unless required by applicable law (such as deliberate and grossly
161 |   negligent acts) or agreed to in writing, shall any Contributor be
162 |   liable to You for damages, including any direct, indirect, special,
163 |   incidental, or consequential damages of any character arising as a
164 |   result of this License or out of the use or inability to use the
165 |   Work (including but not limited to damages for loss of goodwill,
166 |   work stoppage, computer failure or malfunction, or any and all
167 |   other commercial damages or losses), even if such Contributor
168 |   has been advised of the possibility of such damages.
169 | 
170 | 9. Accepting Warranty or Additional Liability. While redistributing
171 |   the Work or Derivative Works thereof, You may choose to offer,
172 |   and charge a fee for, acceptance of support, warranty, indemnity,
173 |   or other liability obligations and/or rights consistent with this
174 |   License. However, in accepting such obligations, You may act only
175 |   on Your own behalf and on Your sole responsibility, not on behalf
176 |   of any other Contributor, and only if You agree to indemnify,
177 |   defend, and hold each Contributor harmless for any liability
178 |   incurred by, or claims asserted against, such Contributor by reason
179 |   of your accepting any such warranty or additional liability.
180 | 
181 | <br>
182 | <br>
183 | 
184 | END OF TERMS AND CONDITIONS
185 | 
186 | 
187 | Copyright 2017 Michigan State University
188 | 
189 | The screenlamp software was developed by Sebastian Raschka
190 | and Leslie A. Kuhn in the Protein Structure Lab 
191 | (http://www.kuhnlab.bmb.msu.edu) at Michigan State University.
192 | 
193 | Licensed under the Apache License, Version 2.0 (the "License");
194 | you may not use this file except in compliance with the License.
195 | You may obtain a copy of the License at
196 | 
197 | http://www.apache.org/licenses/LICENSE-2.0
198 | 
199 | Unless required by applicable law or agreed to in writing, software
200 | distributed under the License is distributed on an "AS IS" BASIS,
201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 | See the License for the specific language governing permissions and
203 | limitations under the License.


--------------------------------------------------------------------------------
/docs/sources/user_guide/doc-overview.md:
--------------------------------------------------------------------------------
1 | # Overview
2 | 
3 | The screenlamp toolkit is designed in a modular way to provide the building blocks for constructing efficient and flexible virtual screening pipelines. The user documentation consists of three parts:
4 | 
5 | 1. [Tools](tools): An overview of the different tools within screenlamp and a summary of their usage commands.
6 | 2. [Toolkit Tutorial](tools-tutorial-1): An example showing how to combine the different tools summarized in the Toolkit API to perform a typical virtual screening run.
7 | 3. [Pipeline Tutorial](pipeline-tutorial-1): A preconstructed, automated virtual pipeline based on the tools listed in the Toolkit API and the virtual screening run explained in the Toolkit Tutorial. 
8 | 
9 | If you are new to screenlamp and would like to get a top-down perspective of what you can do with this toolkit, I recommend starting with the [Pipeline Tutorial](pipeline-tutorial-1), which presents you with an automated virtual screening run on a small example dataset. To construct your own virtual screening pipelines and see how the different modules within screenlamp can be used in tandem, please read the [Toolkit Tutorial](tools-tutorial-1). While reading through the tutorials, the [Tools](tools) page can be used as a reference for more detailed descriptions of the tools that are available within screenlamp.


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/3kpzs-keto-sulfur.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/3kpzs-keto-sulfur.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/5-mol2ids.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/5-mol2ids.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/atomtype-match-ex-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/atomtype-match-ex-1.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/atomtype-match-ex-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/atomtype-match-ex-2.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/charge-match-ex-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/charge-match-ex-1.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/fgroup-match-overlays-pymol.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/fgroup-match-overlays-pymol.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/open-fgroup-match-overlays.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/open-fgroup-match-overlays.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-1.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-2.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-3.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-4.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-5.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-6.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-7.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipe-step-8.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pipeline-overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pipeline-overview.jpg


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pymol-overlay-ex-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pymol-overlay-ex-1.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/pymol-overlay-ex-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/pymol-overlay-ex-2.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/images/tools-tutorial-1/zincdata-spreadsheat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/sources/user_guide/images/tools-tutorial-1/zincdata-spreadsheat.png


--------------------------------------------------------------------------------
/docs/sources/user_guide/pipeline-tutorial-1.md:
--------------------------------------------------------------------------------
 1 | # Tutorial on Using a Pre-constructed Screenlamp Pipeline
 2 | 
 3 | ## Overview
 4 | 
 5 | This tutorial explains how to use a pre-built screenlamp pipeline to perform an automated virtual screening on a small example dataset.
 6 | 
 7 | In this particular screening pipeline, we are searching for mimics of a query molecule that contain a keto-group and sulfur atom in a specified distance to each other (13-20 angstroms) and have a high overall chemical and volumetric similarity towards the query. Then, we are selecting a subset of database molecules where the keto-group of the query molecule overlays with a keto-group in the database molecules, and where the sulfur atom in the query overlays with a sulfur atom in the database molecules. The overall virtual screening pipeline is summarized in the flowchart below. For more details on the individual screening steps, please see the [Toolkit Tutorial](./tools-tutorial-1), which walks you through these steps using the same database and approach.
 8 | 
 9 | ![](../images/automated-pipeline-flowchart.jpg)
10 | 
11 | (A higher-resolution PDF version of this flowchart is available [here](https://github.com/rasbt/screenlamp/blob/master/docs/sources/images/automated-pipeline-flowchart.pdf).)
12 | 
13 | ### Requirements
14 | 
15 | Before you continue with the tutorial, please see the [setup instructions](../installation/index.html#other-software-requirements) for screenlamp if this is your first screening run.
16 | 
17 | 
18 | ## Obtaining and Preparing the Dataset
19 | 
20 | 
21 | ### MOL2 Input Files
22 | 
23 | The automated screenlamp pipeline that is being used in this tutorial is compatible with Tripos MOL2 files of arbitrary database origin and size. A typical use case for this pipeline would be the screening of all ~18,000,000 *Drug-Like* molecules from [ZINC](http://zinc.docking.org), which is available in MOL2 format on ZINC [here](http://zinc.docking.org/subsets/drug-like). Please note that screenlamp supports both Tripos MOL2 (`*.mol2`) files and gzipped Tripos MOL2 files (`*.mol2.gz`) out of the box. Thus, if your input dataset is in gzipped format, you can use it right away without having to make any adjustments or decompressing it. However, please note that the decompressing and compressing operations that are performed when working with gzipped files have an additional toll on computational performance.
24 | 
25 | **Please keep in mind that this screening pipeline with 18,000,000 input molecules and the preconfigured settings takes about a day to complete on a multi-core desktop computer**. Thus, it is recommended to work through this tutorial using a smaller dataset. With kind permission from John Irwin and the ZINC team, we recommend using a random subset of 70,000 small molecules that we prepared for this tutorial. It takes approximately 10 minutes for a multi-core Desktop computer to execute all steps in the automated, virtual screening pipeline described earlier. This subset from ZINC is split into 7 multi-MOL2 file with 10,000 molecules each: `partition_mol2_1.mol2` to `partition_mol2_7.mol2`. 
26 | 
27 | For this tutorial, please download the dataset by clicking the following link and unzip it on your machine that you are using for the virtual screening run: [https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/partition_1-7.zip](https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/partition_1-7.zip)
28 | 
29 | ### data table for Prefiltering
30 | 
31 | For this particular tutorial, you'll also need a data table containing general information about these molecules. Although the partitions you downloaded above are only a small, modified subset of [ZINC](http://zinc.docking.org) molecules, we are going to use the full ~18,000,000 molecule Drug-like table available for download at [http://zinc.docking.org/subsets/drug-like](http://zinc.docking.org/subsets/drug-like). To download the tab-separated table, click on the [Properties](http://zinc.docking.org/db/bysubset/3/3_prop.xls) link on the [ZINC Drug-like](http://zinc.docking.org/subsets/drug-like) page. Please note that the size of the data table is about ~1.8 Gb, and thus, the download may take a while depending on your internet connection. Alternatively, a smaller data table containing only ~170,000 molecules, please use the following link: [https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/small_table_p1-7.txt](https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/small_table_p1-7.txt)
32 | 
33 | 
34 | ### Query Molecule
35 | 
36 | The third data file you'll need for ligand-based virtual screening is the query molecule. For this tutorial, please download the following multi-conformer MOL2 file: [https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/3kpzs_query.mol2](https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/3kpzs_query.mol2)
37 | 
38 | ## Editing the Configuration File
39 | 
40 | Once you obtained the database molecules (mol2 partitions), the data table of molecular properties, and the query molecule, you can prepare the configuration file that stores the information about your local file paths and screening settings.
41 | 
42 | As your configuration file template, you can use the following YAML file the [`screenlamp/tools/pipelines/pipeline-example-1-config.yaml`](https://github.com/rasbt/screenlamp/blob/master/tools/pipelines/pipeline-example-1-config.yaml), create a local copy of it and modify the file paths according to your system's configuration.
43 | 
44 | ## Running the Automated Screening Pipeline
45 | 
46 | After you customized your configuration file, you start the screening pipeline as shown in the example command snippet below:
47 | 
48 | ```bash
49 | python path/to/screenlamp/tools/pipelines/pipeline-example-1.py --config_file /path/to/your/config/pipeline-example-1-config.yaml --incremental true
50 | ```
51 | 
52 | By setting `--incremental true`, you will be prompted to confirm each step by pressing enter, which is recommended for the first time use.
53 | 
54 | For your reference, a zip archive of all files being generated via the execution of the screenlamp pipeline can be obtained via the following download link: [https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/pipeline-tutorial_1_outputs.zip](https://sebastianraschka.com/datasets/screenlamp/pipeline-tutorial_1/pipeline-tutorial_1_outputs.zip).
55 | 
56 | ## Canceling and Resuming a Screening Run
57 | 
58 | Note that throughout the screening pipeline execution, you will see a short description of the commands being executed. Also, the current pipeline step being executed will be shown in the terminal window. In case you cancel the or abort a screening run, you can resume it at the last step being executed using the `--start_at_step` flag. For example, if you quit the screening run at Step 2 by pressing CTRL+C
59 | 
60 | ```
61 | ################################################
62 | Step 02: PREFILTER BY FUNCTIONAL GROUP PRESENCE
63 | ################################################
64 |     
65 | Running command:
66 | python /Users/sebastian/code/screenlamp/tools/funcgroup_presence_to_id.py --input /Users/sebastian/Desktop/screenlamp_pipe/01_selected-mol2s --output /Users/sebastian/Desktop/screenlamp_pipe/02_3keto-and-sulfur-mol2ids.txt --selection ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2') --processes 0
67 | 
68 | Press Enter to proceed or CTRL+C to quit
69 | ```
70 | 
71 | you can resume the run by using `--start_at_step 2` as shown in the example below:
72 | 
73 | ```bash
74 | python path/to/screenlamp/tools/pipelines/pipeline-example-1.py --config_file /path/to/your/config/pipeline-example-1-config.yaml --incremental true --start_at_step 2
75 | ```
76 | 
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/docs/sources/user_guide/tools:
--------------------------------------------------------------------------------
1 | ../../../tools/


--------------------------------------------------------------------------------
/docs/united/_LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright © 2015, Dougal Matthews. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or
 4 | without modification, are permitted provided that the following
 5 | conditions are met:
 6 | 
 7 | Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 | Redistributions in binary form must reproduce the above copyright
10 | notice, this list of conditions and the following disclaimer in
11 | the documentation and/or other materials provided with the
12 | distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
15 | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
16 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
22 | USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 | AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 | POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/docs/united/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/__init__.py


--------------------------------------------------------------------------------
/docs/united/base.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |     <head>
  4 |         {%- block site_meta %}
  5 |         <meta charset="utf-8">
  6 |         <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 |         <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 |         {% if config.site_description %}<meta name="description" content="{{ config.site_description }}">{% endif %}
  9 |         {% if config.site_author %}<meta name="author" content="{{ config.site_author }}">{% endif %}
 10 |         {% if page and page.canonical_url %}<link rel="canonical" href="{{ page.canonical_url }}">{% endif %}
 11 |         <link rel="shortcut icon" href="{{ base_url }}/img/favicon.ico">
 12 |         {%- endblock %}
 13 | 
 14 |         {%- block htmltitle %}
 15 |         <title>{% if page and page.title %}{{ page.title }} - {% endif %}{{ config.site_name }}</title>
 16 |         {%- endblock %}
 17 | 
 18 |         {%- block styles %}
 19 |         <link href="{{ base_url }}/css/bootstrap-custom.min.css" rel="stylesheet">
 20 |         <link href="{{ base_url }}/css/font-awesome-4.0.3.css" rel="stylesheet">
 21 |         <link rel="stylesheet" href="{{ base_url }}/css/highlight.css">
 22 |         <link href="{{ base_url }}/css/base.css" rel="stylesheet">
 23 |         {%- for path in extra_css %}
 24 |         <link href="{{ path }}" rel="stylesheet">
 25 |         {%- endfor %}
 26 |         {%- endblock %}
 27 | 
 28 |         {%- block libs %}
 29 |         <!-- HTML5 shim and Respond.js IE8 support of HTML5 elements and media queries -->
 30 |         <!--[if lt IE 9]>
 31 |             <script src="https://oss.maxcdn.com/libs/html5shiv/3.7.0/html5shiv.js"></script>
 32 |             <script src="https://oss.maxcdn.com/libs/respond.js/1.3.0/respond.min.js"></script>
 33 |         <![endif]-->
 34 | 
 35 |         <script src="{{ base_url }}/js/jquery-1.10.2.min.js"></script>
 36 |         <script src="{{ base_url }}/js/bootstrap-3.0.3.min.js"></script>
 37 |         <script src="{{ base_url }}/js/highlight.pack.js"></script>
 38 |         {%- endblock %}
 39 | 
 40 |         {%- block analytics %}
 41 |         {% if config.google_analytics %}
 42 |         <script>
 43 |             (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
 44 |             (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
 45 |             m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
 46 |             })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
 47 | 
 48 |             ga('create', '{{ config.google_analytics[0] }}', '{{ config.google_analytics[1] }}');
 49 |             ga('send', 'pageview');
 50 |         </script>
 51 |         {% endif %}
 52 |         {%- endblock %}
 53 |       
 54 |         {%- block extrahead %} {% endblock %}
 55 |     </head>
 56 | 
 57 |     <body>
 58 | 
 59 |         {% include "nav.html" %}
 60 | 
 61 |         <div class="container">
 62 |             {%- block content %}
 63 |                 <div class="col-md-3">{% include "toc.html" %}</div>
 64 |                 <div class="col-md-9" role="main">{% include "content.html" %}</div>
 65 |             {%- endblock %}
 66 |         </div>
 67 | 
 68 |         <footer class="col-md-12">
 69 |             {%- block footer %}
 70 |             <hr>
 71 |             {% if config.copyright %}
 72 |                 <center>{{ config.copyright }}</center>
 73 |             {% endif %}
 74 |             <center>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</center>
 75 |             {%- endblock %}
 76 |         </footer>
 77 | 
 78 |         {%- block scripts %}
 79 |         <script>var base_url = '{{ base_url }}';</script>
 80 |         <script data-main="{{ base_url }}/mkdocs/js/search.js" src="{{ base_url }}/mkdocs/js/require.js"></script>
 81 |         <script src="{{ base_url }}/js/base.js"></script>
 82 |         {%- for path in extra_javascript %}
 83 |         <script src="{{ path }}"></script>
 84 |         {%- endfor %}
 85 |         {%- endblock %}
 86 | 
 87 |         <div class="modal" id="mkdocs_search_modal" tabindex="-1" role="dialog" aria-labelledby="Search Modal" aria-hidden="true">
 88 |             <div class="modal-dialog">
 89 |                 <div class="modal-content">
 90 |                     <div class="modal-header">
 91 |                         <button type="button" class="close" data-dismiss="modal"><span aria-hidden="true">&times;</span><span class="sr-only">Close</span></button>
 92 |                         <h4 class="modal-title" id="exampleModalLabel">Search</h4>
 93 |                     </div>
 94 |                     <div class="modal-body">
 95 |                         <p>
 96 |                             From here you can search these documents. Enter
 97 |                             your search terms below.
 98 |                         </p>
 99 |                         <form role="form">
100 |                             <div class="form-group">
101 |                                 <input type="text" class="form-control" placeholder="Search..." id="mkdocs-search-query">
102 |                             </div>
103 |                         </form>
104 |                         <div id="mkdocs-search-results"></div>
105 |                     </div>
106 |                     <div class="modal-footer">
107 |                     </div>
108 |                 </div>
109 |             </div>
110 |         </div>
111 |     </body>
112 | </html>
113 | 


--------------------------------------------------------------------------------
/docs/united/content.html:
--------------------------------------------------------------------------------
 1 | {% if page and page.meta and page.meta.source %}
 2 | <div class="source-links">
 3 | {% for filename in page.meta.source %}
 4 |     <span class="label label-primary">{{ filename }}</span>
 5 | {% endfor %}
 6 | </div>
 7 | {% endif %}
 8 | 
 9 | {% if page and page.content %}{{ page.content }}{% endif %}
10 | 


--------------------------------------------------------------------------------
/docs/united/css/base.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |     padding-top: 70px;
  3 | }
  4 | 
  5 | /*
  6 |  * The code below adds some padding to the top of the current anchor target so
  7 |  * that, when navigating to it, the header isn't hidden by the navbar at the
  8 |  * top. This is especially complicated because we want to *remove* the padding
  9 |  * after navigation so that hovering over the header shows the permalink icon
 10 |  * correctly. Thus, we create a CSS animation to remove the extra padding after
 11 |  * a second. We have two animations so that navigating to an anchor within the
 12 |  * page always restarts the animation.
 13 |  *
 14 |  * See <https://github.com/mkdocs/mkdocs/issues/843> for more details.
 15 |  */
 16 | :target::before {
 17 |     content: "";
 18 |     display: block;
 19 |     margin-top: -75px;
 20 |     height: 75px;
 21 |     pointer-events: none;
 22 |     animation: 0s 1s forwards collapse-anchor-padding-1;
 23 | }
 24 | 
 25 | body.clicky :target::before {
 26 |     animation-name: collapse-anchor-padding-2;
 27 | }
 28 | 
 29 | @keyframes collapse-anchor-padding-1 {
 30 |     to {
 31 |         margin-top: 0;
 32 |         height: 0;
 33 |     }
 34 | }
 35 | 
 36 | @keyframes collapse-anchor-padding-2 {
 37 |     to {
 38 |         margin-top: 0;
 39 |         height: 0;
 40 |     }
 41 | }
 42 | 
 43 | ul.nav li.main {
 44 |     font-weight: bold;
 45 | }
 46 | 
 47 | div.col-md-3 {
 48 |     padding-left: 0;
 49 | }
 50 | 
 51 | div.col-md-9 {
 52 |     padding-bottom: 100px;
 53 | }
 54 | 
 55 | div.source-links {
 56 |     float: right;
 57 | }
 58 | 
 59 | div.col-md-9 img {
 60 |     max-width: 100%;
 61 | }
 62 | 
 63 | code {
 64 |     padding: 1px 3px;
 65 |     background: #f5f5f5;
 66 |     border: solid 1px #ccc;
 67 |     color: #333;
 68 | }
 69 | 
 70 | pre code {
 71 |     background: transparent;
 72 |     border: none;
 73 | }
 74 | 
 75 | a > code {
 76 |     color: #dd4814;
 77 | }
 78 | 
 79 | a > code:hover, a > code:focus {
 80 |     color: #97310e;
 81 | }
 82 | 
 83 | /*
 84 |  * Side navigation
 85 |  *
 86 |  * Scrollspy and affixed enhanced navigation to highlight sections and secondary
 87 |  * sections of docs content.
 88 |  */
 89 | 
 90 | /* By default it's not affixed in mobile views, so undo that */
 91 | .bs-sidebar.affix {
 92 |     position: static;
 93 | }
 94 | 
 95 | .bs-sidebar.well {
 96 |     padding: 0;
 97 | }
 98 | 
 99 | /* First level of nav */
100 | .bs-sidenav {
101 |     margin-top: 30px;
102 |     margin-bottom: 30px;
103 |     padding-top:    10px;
104 |     padding-bottom: 10px;
105 |     border-radius: 5px;
106 | }
107 | 
108 | /* All levels of nav */
109 | .bs-sidebar .nav > li > a {
110 |     display: block;
111 |     padding: 5px 20px;
112 |     z-index: 1;
113 | }
114 | .bs-sidebar .nav > li > a:hover,
115 | .bs-sidebar .nav > li > a:focus {
116 |     text-decoration: none;
117 |     border-right: 1px solid;
118 | }
119 | .bs-sidebar .nav > .active > a,
120 | .bs-sidebar .nav > .active:hover > a,
121 | .bs-sidebar .nav > .active:focus > a {
122 |     font-weight: bold;
123 |     background-color: transparent;
124 |     border-right: 1px solid;
125 | }
126 | 
127 | /* Nav: second level (shown on .active) */
128 | .bs-sidebar .nav .nav {
129 |     display: none; /* Hide by default, but at >768px, show it */
130 |     margin-bottom: 8px;
131 | }
132 | .bs-sidebar .nav .nav > li > a {
133 |     padding-top:    3px;
134 |     padding-bottom: 3px;
135 |     padding-left: 30px;
136 |     font-size: 90%;
137 | }
138 | 
139 | /* Show and affix the side nav when space allows it */
140 | @media (min-width: 992px) {
141 |     .bs-sidebar .nav > .active > ul {
142 |         display: block;
143 |     }
144 |     /* Widen the fixed sidebar */
145 |     .bs-sidebar.affix,
146 |     .bs-sidebar.affix-bottom {
147 |         width: 213px;
148 |     }
149 |     .bs-sidebar.affix {
150 |         position: fixed; /* Undo the static from mobile first approach */
151 |         top: 80px;
152 |     }
153 |     .bs-sidebar.affix-bottom {
154 |         position: absolute; /* Undo the static from mobile first approach */
155 |     }
156 |     .bs-sidebar.affix-bottom .bs-sidenav,
157 |     .bs-sidebar.affix .bs-sidenav {
158 |         margin-top: 0;
159 |         margin-bottom: 0;
160 |     }
161 | }
162 | @media (min-width: 1200px) {
163 |     /* Widen the fixed sidebar again */
164 |     .bs-sidebar.affix-bottom,
165 |     .bs-sidebar.affix {
166 |         width: 263px;
167 |     }
168 | }
169 | 
170 | .headerlink {
171 |     display: none;
172 |     padding-left: .5em;
173 | }
174 | 
175 | h1:hover .headerlink, h2:hover .headerlink, h3:hover .headerlink, h4:hover .headerlink, h5:hover .headerlink, h6:hover .headerlink{
176 |     display:inline-block;
177 | }
178 | 
179 | /* display submenu relative to parent*/
180 | .dropdown-submenu {
181 |     position: relative;
182 | }
183 | 
184 | /* sub menu stlye */
185 | .dropdown-submenu>.dropdown-menu {
186 |     top: 0;
187 |     left: 100%;
188 |     margin-top: 0px;
189 |     margin-left: -1px;
190 |     -webkit-border-radius: 0 4px 4px 4px;
191 |     -moz-border-radius: 0 4px 4px;
192 |     border-radius: 0 4px 4px 4px;
193 | }
194 | 
195 | /* display sub menu on hover*/
196 | .dropdown-submenu:hover>.dropdown-menu {
197 |     display: block;
198 | }
199 | 
200 | /* little arrow */
201 | .dropdown-submenu>a:after {
202 |     display: block;
203 |     content: " ";
204 |     float: right;
205 |     width: 0;
206 |     height: 0;
207 |     border-color: transparent;
208 |     border-style: solid;
209 |     border-width: 5px 0 5px 5px;
210 |     border-left-color: #ccc;
211 |     margin-top: 5px;
212 |     margin-right: -10px;
213 | }
214 | 
215 | /* little arrow of parent menu */
216 | .dropdown-submenu:hover>a:after {
217 |     border-left-color: #404040;
218 | }
219 | 


--------------------------------------------------------------------------------
/docs/united/css/highlight.css:
--------------------------------------------------------------------------------
  1 | /*
  2 | This is the GitHub theme for highlight.js
  3 | 
  4 | github.com style (c) Vasily Polovnyov <vast@whiteants.net>
  5 | 
  6 | */
  7 | 
  8 | .hljs {
  9 |   display: block;
 10 |   overflow-x: auto;
 11 |   color: #333;
 12 |   -webkit-text-size-adjust: none;
 13 | }
 14 | 
 15 | .hljs-comment,
 16 | .diff .hljs-header,
 17 | .hljs-javadoc {
 18 |   color: #998;
 19 |   font-style: italic;
 20 | }
 21 | 
 22 | .hljs-keyword,
 23 | .css .rule .hljs-keyword,
 24 | .hljs-winutils,
 25 | .nginx .hljs-title,
 26 | .hljs-subst,
 27 | .hljs-request,
 28 | .hljs-status {
 29 |   color: #333;
 30 |   font-weight: bold;
 31 | }
 32 | 
 33 | .hljs-number,
 34 | .hljs-hexcolor,
 35 | .ruby .hljs-constant {
 36 |   color: #008080;
 37 | }
 38 | 
 39 | .hljs-string,
 40 | .hljs-tag .hljs-value,
 41 | .hljs-phpdoc,
 42 | .hljs-dartdoc,
 43 | .tex .hljs-formula {
 44 |   color: #d14;
 45 | }
 46 | 
 47 | .hljs-title,
 48 | .hljs-id,
 49 | .scss .hljs-preprocessor {
 50 |   color: #900;
 51 |   font-weight: bold;
 52 | }
 53 | 
 54 | .hljs-list .hljs-keyword,
 55 | .hljs-subst {
 56 |   font-weight: normal;
 57 | }
 58 | 
 59 | .hljs-class .hljs-title,
 60 | .hljs-type,
 61 | .vhdl .hljs-literal,
 62 | .tex .hljs-command {
 63 |   color: #458;
 64 |   font-weight: bold;
 65 | }
 66 | 
 67 | .hljs-tag,
 68 | .hljs-tag .hljs-title,
 69 | .hljs-rule .hljs-property,
 70 | .django .hljs-tag .hljs-keyword {
 71 |   color: #000080;
 72 |   font-weight: normal;
 73 | }
 74 | 
 75 | .hljs-attribute,
 76 | .hljs-variable,
 77 | .lisp .hljs-body,
 78 | .hljs-name {
 79 |   color: #008080;
 80 | }
 81 | 
 82 | .hljs-regexp {
 83 |   color: #009926;
 84 | }
 85 | 
 86 | .hljs-symbol,
 87 | .ruby .hljs-symbol .hljs-string,
 88 | .lisp .hljs-keyword,
 89 | .clojure .hljs-keyword,
 90 | .scheme .hljs-keyword,
 91 | .tex .hljs-special,
 92 | .hljs-prompt {
 93 |   color: #990073;
 94 | }
 95 | 
 96 | .hljs-built_in {
 97 |   color: #0086b3;
 98 | }
 99 | 
100 | .hljs-preprocessor,
101 | .hljs-pragma,
102 | .hljs-pi,
103 | .hljs-doctype,
104 | .hljs-shebang,
105 | .hljs-cdata {
106 |   color: #999;
107 |   font-weight: bold;
108 | }
109 | 
110 | .hljs-deletion {
111 |   background: #fdd;
112 | }
113 | 
114 | .hljs-addition {
115 |   background: #dfd;
116 | }
117 | 
118 | .diff .hljs-change {
119 |   background: #0086b3;
120 | }
121 | 
122 | .hljs-chunk {
123 |   color: #aaa;
124 | }
125 | 


--------------------------------------------------------------------------------
/docs/united/fonts/fontawesome-webfont.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/fonts/fontawesome-webfont.eot


--------------------------------------------------------------------------------
/docs/united/fonts/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/fonts/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/united/fonts/fontawesome-webfont.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/fonts/fontawesome-webfont.woff


--------------------------------------------------------------------------------
/docs/united/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rasbt/screenlamp/c0da04fa00472890880b0817bd1f61d013638c9b/docs/united/img/favicon.ico


--------------------------------------------------------------------------------
/docs/united/js/base.js:
--------------------------------------------------------------------------------
 1 | /* Search */
 2 | 
 3 | function getSearchTerm()
 4 | {
 5 |     var sPageURL = window.location.search.substring(1);
 6 |     var sURLVariables = sPageURL.split('&');
 7 |     for (var i = 0; i < sURLVariables.length; i++)
 8 |     {
 9 |         var sParameterName = sURLVariables[i].split('=');
10 |         if (sParameterName[0] == 'q') {
11 |             return sParameterName[1];
12 |         }
13 |     }
14 | }
15 | 
16 | $(document).ready(function() {
17 |     var search_term = getSearchTerm(),
18 |         $search_modal = $('#mkdocs_search_modal');
19 | 
20 |     if(search_term) {
21 |         $search_modal.modal();
22 |     }
23 | 
24 |     $search_modal.on('shown.bs.modal', function () {
25 |         $search_modal.find('#mkdocs-search-query').focus();
26 |     });
27 | });
28 | 
29 | 
30 | /* Highlight */
31 | $( document ).ready(function() {
32 |     hljs.initHighlightingOnLoad();
33 |     $('table').addClass('table table-striped table-hover');
34 | });
35 | 
36 | 
37 | $('body').scrollspy({
38 |     target: '.bs-sidebar',
39 | });
40 | 
41 | /* Toggle the `clicky` class on the body when clicking links to let us
42 |    retrigger CSS animations. See ../css/base.css for more details. */
43 | $('a').click(function(e) {
44 |     $('body').toggleClass('clicky');
45 | });
46 | 
47 | /* Prevent disabled links from causing a page reload */
48 | $("li.disabled a").click(function() {
49 |     event.preventDefault();
50 | });
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/docs/united/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 | 


--------------------------------------------------------------------------------
/docs/united/nav-sub.html:
--------------------------------------------------------------------------------
 1 | {% if not nav_item.children %}
 2 | <li {% if nav_item.active %}class="active"{% endif %}>
 3 |     <a href="{{ nav_item.url }}">{{ nav_item.title }}</a>
 4 | </li>
 5 | {% else %}
 6 |   <li class="dropdown-submenu">
 7 |     <a tabindex="-1" href="">{{ nav_item.title }}</a>
 8 |     <ul class="dropdown-menu">
 9 |         {% for nav_item in nav_item.children %}
10 |             {% include "nav-sub.html" %}
11 |         {% endfor %}
12 |     </ul>
13 |   </li>
14 | {% endif %}
15 | 


--------------------------------------------------------------------------------
/docs/united/nav.html:
--------------------------------------------------------------------------------
 1 | <div class="navbar {% if config.extra.theme_inverse %}navbar-inverse{% else %}navbar-default{% endif %} navbar-fixed-top" role="navigation">
 2 |     <div class="container">
 3 | 
 4 |         <!-- Collapsed navigation -->
 5 |         <div class="navbar-header">
 6 |             <!-- Expander button -->
 7 |             <button type="button" class="navbar-toggle" data-toggle="collapse" data-target=".navbar-collapse">
 8 |                 <span class="sr-only">Toggle navigation</span>
 9 |                 <span class="icon-bar"></span>
10 |                 <span class="icon-bar"></span>
11 |                 <span class="icon-bar"></span>
12 |             </button>
13 | 
14 |             {%- block site_name %}
15 |             <a class="navbar-brand" href="{{ nav.homepage.url }}">{{ config.site_name }}</a>
16 |             {%- endblock %}
17 |         </div>
18 | 
19 |         <!-- Expanded navigation -->
20 |         <div class="navbar-collapse collapse">
21 |             {%- block site_nav %}
22 |             <!-- Main navigation -->
23 |             <ul class="nav navbar-nav">
24 |             {% for nav_item in nav %}
25 |             {% if nav_item.children %}
26 |                 <li class="dropdown{% if nav_item.active %} active{% endif %}">
27 |                     <a href="#" class="dropdown-toggle" data-toggle="dropdown">{{ nav_item.title }} <b class="caret"></b></a>
28 |                     <ul class="dropdown-menu">
29 |                     {% for nav_item in nav_item.children %}
30 |                         {% include "nav-sub.html" %}
31 |                     {% endfor %}
32 |                     </ul>
33 |                 </li>
34 |             {% else %}
35 |                 <li {% if nav_item.active %}class="active"{% endif %}>
36 |                     <a href="{{ nav_item.url }}">{{ nav_item.title }}</a>
37 |                 </li>
38 |             {% endif %}
39 |             {% endfor %}
40 |             </ul>
41 |             {%- endblock %}
42 | 
43 |             <!-- Search, Navigation and Repo links -->
44 |             <ul class="nav navbar-nav navbar-right">
45 |                 {%- block search_button %}
46 |                 <li>
47 |                     <a href="#" data-toggle="modal" data-target="#mkdocs_search_modal">
48 |                         <i class="fa fa-search"></i> Search
49 |                     </a>
50 |                 </li>
51 |                 {%- endblock %}
52 |                 
53 |                 {%- block next_prev %}
54 |                 {%- if page and (page.next_page or page.previous_page) %}
55 |                     <li {% if not page.previous_page %}class="disabled"{% endif %}>
56 |                         <a rel="next" {% if page.previous_page %}href="{{ page.previous_page.url }}"{% endif %}>
57 |                             <i class="fa fa-arrow-left"></i> Previous
58 |                         </a>
59 |                     </li>
60 |                     <li {% if not page.next_page %}class="disabled"{% endif %}>
61 |                         <a rel="prev" {% if page.next_page %}href="{{ page.next_page.url }}"{% endif %}>
62 |                             Next <i class="fa fa-arrow-right"></i>
63 |                         </a>
64 |                     </li>
65 |                 {%- endif %}
66 |                 {%- endblock %}
67 |                 
68 |                 {%- block repo %}
69 |                 {% if config.repo_url %}
70 |                 <li>
71 |                     <a href="{{ config.repo_url }}">
72 |                         {% if config.repo_name == 'GitHub' %}
73 |                             <i class="fa fa-github"></i>
74 |                         {% elif config.repo_name == 'Bitbucket' %}
75 |                             <i class="fa fa-bitbucket"></i>
76 |                         {% endif %}
77 |                         {{ config.repo_name }}
78 |                     </a>
79 |                 </li>
80 |                 {% endif %}
81 |                 {%- endblock %}
82 |             </ul>
83 |         </div>
84 |     </div>
85 | </div>
86 | 


--------------------------------------------------------------------------------
/docs/united/toc.html:
--------------------------------------------------------------------------------
 1 | <div class="bs-sidebar hidden-print affix well" role="complementary">
 2 |     <ul class="nav bs-sidenav">
 3 |     {% if page %}
 4 |     {% for toc_item in page.toc %}
 5 |         <li class="main {% if toc_item.active %}active{% endif %}"><a href="{{ toc_item.url }}">{{ toc_item.title }}</a></li>
 6 |         {% for toc_item in toc_item.children %}
 7 |             <li><a href="{{ toc_item.url }}">{{ toc_item.title }}</a></li>
 8 |         {% endfor %}
 9 |     {% endfor %}
10 |     {% endif %}
11 |     </ul>
12 | </div>
13 | 


--------------------------------------------------------------------------------
/docs/update_docs.py:
--------------------------------------------------------------------------------
 1 | # Sebastian Raschka 2014-2016
 2 | # mlxtend Machine Learning Library Extensions
 3 | #
 4 | # Author: Sebastian Raschka <sebastianraschka.com>
 5 | #
 6 | # License: BSD 3 clause
 7 | 
 8 | import subprocess
 9 | 
10 | 
11 | with open('sources/user_guide/tools.md', 'w') as f:
12 |     subprocess.call(['python', 'argparse_to_md.py', '../tools'], stdout=f)
13 | 
14 | subprocess.call(['python', 'ipynb2markdown.py', '--ipynb',
15 |                  'sources/user_guide/tools-tutorial-1.ipynb'])
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | mputil==0.1.0
2 | numpy>=1.12.1
3 | scipy>=0.19.0
4 | pandas>=0.19.2
5 | biopandas>=0.2.1
6 | pyaml>=3.0.0
7 | 


--------------------------------------------------------------------------------
/tools/count_mol2.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | 
 33 | 
 34 | import subprocess
 35 | import argparse
 36 | import sys
 37 | import os
 38 | import gzip
 39 | 
 40 | 
 41 | def mol_count_python(input_file, zipped):
 42 | 
 43 |     if zipped:
 44 |         open_cmd = gzip.open
 45 |         look_up = b'@<TRIPOS>ATOM'
 46 |     else:
 47 |         open_cmd = open
 48 |         look_up = '@<TRIPOS>ATOM'
 49 |     cnt = 0
 50 |     with open_cmd(input_file, 'r') as f:
 51 |         for line in f:
 52 |             if line.startswith(look_up):
 53 |                 cnt += 1
 54 |     return cnt
 55 | 
 56 | 
 57 | def mol_count_shell(input_file, zipped):
 58 | 
 59 |     if zipped:
 60 |         grep = 'zgrep'
 61 |     else:
 62 |         grep = 'grep'
 63 | 
 64 |     ps = subprocess.Popen([grep, "@<TRIPOS>ATOM", input_file],
 65 |                           stdout=subprocess.PIPE)
 66 |     raw = subprocess.check_output(['wc', '-l'], stdin=ps.stdout)
 67 |     ps.wait()
 68 | 
 69 |     return int(raw.decode().rstrip())
 70 | 
 71 | 
 72 | def count_in_dir(path, windows):
 73 | 
 74 |     total = 0
 75 |     for f in os.listdir(path):
 76 |         if f.endswith(('.mol2', 'mol2.gz')):
 77 |             file_path = os.path.join(path, f)
 78 |             zipped = f.endswith('.mol2.gz')
 79 | 
 80 |             if windows:
 81 |                 cnt = mol_count_python(file_path, zipped)
 82 |             else:
 83 |                 cnt = mol_count_shell(file_path, zipped)
 84 | 
 85 |             sys.stdout.write('%s : %d\n' % (f, cnt))
 86 |             sys.stdout.flush()
 87 |             total += cnt
 88 |     return total
 89 | 
 90 | 
 91 | def main(input_name):
 92 |     is_dir = os.path.isdir(input_name)
 93 |     is_windows = os.system == 'Windows'
 94 | 
 95 |     if not is_dir:
 96 |         zipped = input_name.endswith('.gz')
 97 |         if is_windows:
 98 |             total = mol_count_python(input_name, zipped)
 99 |         else:
100 |             total = mol_count_shell(input_name, zipped)
101 | 
102 |     else:
103 |         total = count_in_dir(input_name, is_windows)
104 | 
105 |     sys.stdout.write('Total : %d\n' % total)
106 |     sys.stdout.flush()
107 | 
108 | 
109 | if __name__ == '__main__':
110 | 
111 |     parser = argparse.ArgumentParser(
112 |             description=('A command line tool for counting the number'
113 |                          ' of molecules in MOL2 files.'),
114 |             epilog="""Example: 
115 |     python count_mol2.py -i mol2_dir/
116 |     python count_mol2.py -i partition_1.mol2""",
117 |             formatter_class=argparse.RawTextHelpFormatter)
118 | 
119 |     parser.add_argument('-i', '--input',
120 |                         required=True,
121 |                         type=str,
122 |                         help='(Required.) Path to a `.mol2` or `.mol2.gz`file,'
123 |                              '\nor a directory containing `.mol2`/`.mol2.gz`'
124 |                              ' files.')
125 | 
126 |     parser.add_argument('-v', '--version', action='version', version='v. 1.0')
127 | 
128 |     args = parser.parse_args()
129 | 
130 |     if not args.input:
131 |         parser.print_help()
132 | 
133 |     else:
134 |         main(args.input)
135 | 


--------------------------------------------------------------------------------
/tools/datatable_to_id.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | 
 33 | import argparse
 34 | import sys
 35 | import os
 36 | import pandas as pd
 37 | import time
 38 | 
 39 | 
 40 | def read_and_write(source, target, selection,
 41 |                    columns, id_column, sep, verbose):
 42 | 
 43 |     if verbose:
 44 |         counter = 0
 45 |         sys.stdout.write('Using columns: %s\n' % columns)
 46 |         sys.stdout.write('Using selection: %s\n' % selection)
 47 |         sys.stdout.flush()
 48 | 
 49 |     reader = pd.read_table(source, chunksize=100000, usecols=columns, sep=sep)
 50 | 
 51 |     with open(target, 'w') as f:
 52 |         if verbose:
 53 |             start = time.time()
 54 |         for chunk in reader:
 55 | 
 56 |             if selection is not None:
 57 |                 mask = pd.eval(selection)
 58 |             else:
 59 |                 mask = chunk.index
 60 |             chunk.loc[mask, [id_column]].to_csv(f,
 61 |                                                 header=None,
 62 |                                                 index=None)
 63 |             if verbose:
 64 |                 counter += chunk.shape[0]
 65 | 
 66 |                 elapsed = time.time() - start
 67 |                 sys.stdout.write('\rProcessed %d rows | %d rows/sec' %
 68 |                                  (counter, counter / elapsed))
 69 |                 sys.stderr.flush()
 70 | 
 71 |     if verbose:
 72 |         n_lines = sum(1 for line in open(target, 'r'))
 73 |         sys.stdout.write('\nSelected: %d\n' % n_lines)
 74 |         sys.stdout.flush()
 75 | 
 76 | 
 77 | def parse_selection_string(s, df_name='chunk'):
 78 |     return s.replace('(', '(%s.' % df_name)
 79 | 
 80 | 
 81 | def columns_from_selection(s):
 82 |     return [c.replace('(', '') for c in s.split() if '(' in c]
 83 | 
 84 | 
 85 | def main(input_dir, output_file, verbose, selection, id_column):
 86 | 
 87 |     columns = [id_column]
 88 |     if selection is None:
 89 |         parsed_sele = None
 90 |     else:
 91 |         parsed_sele = parse_selection_string(selection, df_name='chunk')
 92 |         columns += columns_from_selection(selection)
 93 | 
 94 |     dirpath = os.path.dirname(output_file)
 95 |     if not os.path.exists(dirpath):
 96 |         os.mkdir(dirpath)
 97 | 
 98 |     read_and_write(source=args.input,
 99 |                    target=args.output,
100 |                    selection=parsed_sele,
101 |                    columns=columns,
102 |                    id_column=id_column,
103 |                    sep=args.separator,
104 |                    verbose=args.verbose)
105 | 
106 | 
107 | if __name__ == '__main__':
108 | 
109 |     parser = argparse.ArgumentParser(
110 |             description='Create a text file with molecule IDs from MOL2 files.',
111 |             epilog="""Example:
112 | python datatable_to_id.py\\
113 |   --input table.txt\\
114 |   --output ids.txt\\
115 |   --id_column ZINC_ID\\
116 |   --selection "(NRB <= 7) & (MWT > 200)" """,
117 |             formatter_class=argparse.RawTextHelpFormatter)
118 | 
119 |     parser.add_argument('-i', '--input',
120 |                         type=str,
121 |                         required=True,
122 |                         help='(Required.) Path to a datatable file where each'
123 |                              '\nrow represents a molecule and each columns'
124 |                              '\nstore the molecular features.')
125 |     parser.add_argument('-o', '--output',
126 |                         type=str,
127 |                         required=True,
128 |                         help='(Required.) Output path for the ID file'
129 |                              ' (for example, `ids.txt`).')
130 |     parser.add_argument('--id_column',
131 |                         type=str,
132 |                         required=True,
133 |                         help='(Required.) Name of the Molecule ID column.')
134 |     parser.add_argument('--separator',
135 |                         type=str,
136 |                         default='\t',
137 |                         help=('(Optional, default: `"\t"`.) Column separator used\nin the input table.\n'
138 |                               'Assumes tab-separated values by default.'))
139 |     parser.add_argument('-s', '--selection',
140 |                         type=str,
141 |                         default=None,
142 |                         help='(Optional, default: `None`.) A conditional selection string:\n'
143 |                         ' Single column selection example: `"(MWT > 500)"`. '
144 |                         ' Logical OR example: `"(MWT > 500) | (MWT < 200)"`.'
145 |                         ' Logical AND example: `"(NRB <= 7) & (MWT > 200)"`.')
146 |     parser.add_argument('-v', '--verbose',
147 |                         type=int,
148 |                         default=1,
149 |                         help='(Optional, default: `1`.) Verbosity level. If 0, does not print any'
150 |                              '\noutput.'
151 |                              '\nIf 1 (default), prints the file currently'
152 |                              '\nprocessing.')
153 | 
154 |     parser.add_argument('--version', action='version', version='v. 1.0')
155 | 
156 |     args = parser.parse_args()
157 | 
158 |     main(args.input, args.output, args.verbose, args.selection, args.id_column)
159 | 


--------------------------------------------------------------------------------
/tools/enumerate_conformers.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import os
 33 | import argparse
 34 | import sys
 35 | import time
 36 | import gzip
 37 | from biopandas.mol2 import split_multimol2
 38 | 
 39 | 
 40 | def get_mol2_files(dir_path):
 41 | 
 42 |     files = []
 43 |     if os.path.isdir(dir_path):
 44 |         for f in os.listdir(dir_path):
 45 |             if f.endswith(('.mol2', 'mol2.gz')):
 46 |                 file_path = os.path.join(dir_path, f)
 47 |                 files.append(file_path)
 48 | 
 49 |     elif (os.path.isfile(dir_path) and
 50 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 51 |         files.append(dir_path)
 52 | 
 53 |     return files
 54 | 
 55 | 
 56 | def read_and_write(inp_mol2_path, out_mol2_path, verbose):
 57 | 
 58 |     if verbose:
 59 |         sys.stdout.write('Processing %s' % os.path.basename(inp_mol2_path))
 60 |         sys.stdout.flush()
 61 |         start = time.time()
 62 | 
 63 | 
 64 |     if inp_mol2_path.endswith('.gz'):
 65 |         write_mode = 'wb'
 66 |         open_file = gzip.open
 67 |     else:
 68 |         write_mode = 'w'
 69 |         open_file = open
 70 | 
 71 |     """
 72 |     if query_path.endswith('.gz'):
 73 |         for id_, cont in split_multimol2(query_path):
 74 |             cnt += 1
 75 |             cont = b''.join(cont).decode('utf-8').split('\n')
 76 |             if multiconf_query:
 77 |                 mol_idx = '%s_%d' % (id_.decode('utf-8'), cnt)
 78 |             else:
 79 |                 mol_idx = id_
 80 |     """
 81 | 
 82 |     with open_file(out_mol2_path, write_mode) as outfile:
 83 | 
 84 |         prev_molecule = ''
 85 | 
 86 |         if inp_mol2_path.endswith('.gz'):
 87 |             for i, (id_, cont) in enumerate(split_multimol2(inp_mol2_path)):
 88 |                 if prev_molecule != id_:
 89 |                     cnt = 0
 90 |                 else:
 91 |                     cnt += 1
 92 | 
 93 |                 mol_idx = b'%s_%d' % (id_, cnt)
 94 | 
 95 |                 cont[1] = mol_idx + b'\n'
 96 |                 outfile.write(b''.join(cont))
 97 |                 prev_molecule = id_
 98 | 
 99 |         else:
100 |             for i, (id_, cont) in enumerate(split_multimol2(inp_mol2_path)):
101 |                 if prev_molecule != id_:
102 |                     cnt = 0
103 |                 else:
104 |                     cnt += 1
105 | 
106 |                 mol_idx = '%s_%d' % (id_, cnt)
107 | 
108 |                 cont[1] = mol_idx + '\n'
109 |                 outfile.write(''.join(cont))
110 |                 prev_molecule = id_
111 | 
112 |     if verbose:
113 |         elapsed = time.time() - start
114 |         n_molecules = i + 1
115 |         sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' %
116 |                          (n_molecules, n_molecules / elapsed))
117 |         sys.stdout.flush()
118 | 
119 | 
120 | def main(input_dir, output_dir, verbose):
121 |     if not os.path.exists(output_dir):
122 |         os.mkdir(output_dir)
123 |     inp_mol2_paths = get_mol2_files(input_dir)
124 | 
125 |     for mol2_path in inp_mol2_paths:
126 |         base = os.path.basename(mol2_path)
127 |         out_mol2_path = os.path.join(output_dir, base)
128 |         read_and_write(mol2_path, out_mol2_path, verbose)
129 | 
130 | 
131 | if __name__ == '__main__':
132 | 
133 |     parser = argparse.ArgumentParser(
134 |             description='Numbers molecules in MOL2 files by'
135 |                         ' adding a suffix as index.'
136 |                         ' For example, if there are three'
137 |                         ' molecules in a MOL2 file,'
138 |                         ' moleculeabc_0, moleculeabc_1, and moleculedef_0,'
139 |                         '\n those molecules will be relabeled to'
140 |                         ' moleculeabc_0, moleculeabc_1, and moleculedef_0.',
141 |             epilog="""Example:
142 | python enumerate_conformers.py -i conformer_mol2s/\\
143 |    --output numbered_conformers/""",
144 |             formatter_class=argparse.RawTextHelpFormatter)
145 | 
146 |     parser.add_argument('-i', '--input',
147 |                         required=True,
148 |                         type=str,
149 |                         help='(Required.) Path to a `.mol2` or `.mol2.gz`file,'
150 |                              '\nor a directory containing `.mol2`/`.mol2.gz`'
151 |                              ' files.')
152 |     parser.add_argument('-o', '--output',
153 |                         type=str,
154 |                         required=True,
155 |                         help='(Required.) Directory path for writing the'
156 |                              ' numbered MOL2s')
157 |     parser.add_argument('-v', '--verbose',
158 |                         type=int,
159 |                         default=1,
160 |                         help='Verbosity level. If 0, does not print any'
161 |                              ' output.'
162 |                              '\nIf 1 (default), prints the file currently'
163 |                              '\nprocessing.')
164 | 
165 |     parser.add_argument('--version', action='version', version='v. 1.0')
166 | 
167 |     args = parser.parse_args()
168 | 
169 |     main(input_dir=args.input,
170 |          output_dir=args.output,
171 |          verbose=args.verbose)
172 | 


--------------------------------------------------------------------------------
/tools/funcgroup_distance_to_id.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | 
 33 | import os
 34 | import argparse
 35 | import sys
 36 | import pandas as pd
 37 | import time
 38 | from mputil import lazy_imap
 39 | from multiprocessing import cpu_count
 40 | from biopandas.mol2 import split_multimol2
 41 | from biopandas.mol2 import PandasMol2
 42 | 
 43 | 
 44 | def parse_distance_string(s):
 45 |     dist = [int(p.strip()) for p in s.split('-')]
 46 |     return dist
 47 | 
 48 | 
 49 | def get_mol2_files(dir_path):
 50 | 
 51 |     files = []
 52 | 
 53 |     if os.path.isdir(dir_path):
 54 |         for f in os.listdir(dir_path):
 55 |             if f.endswith(('.mol2', 'mol2.gz')):
 56 |                 file_path = os.path.join(dir_path, f)
 57 |                 files.append(file_path)
 58 | 
 59 |     elif (os.path.isfile(dir_path) and
 60 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 61 |         files.append(dir_path)
 62 | 
 63 |     return files
 64 | 
 65 | 
 66 | def parse_selection_string(s, df_name='pdmol.df'):
 67 | 
 68 |     columns = ['(atom_id', '(atom_name', '(atom_type',
 69 |                '(subst_id', '(subst_name', '(charge']
 70 |     lst = [subs.strip() for subs in s.split('-->')]
 71 |     parsed = []
 72 | 
 73 |     for subs in lst:
 74 |         for c in columns:
 75 |             subs = subs.replace(c, '(%s.%s' % (df_name, c[1:]))
 76 |         parsed.append(subs)
 77 |     return parsed
 78 | 
 79 | 
 80 | def data_processor(mol2):
 81 | 
 82 |     pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1],
 83 |                                              mol2_code=mol2[0])
 84 | 
 85 |     coordinates = pdmol.df.loc[pd.eval(SELECTION[0]), ['x', 'y', 'z']].values
 86 | 
 87 |     pdmol._df = pdmol._df[pd.eval(SELECTION[1])]
 88 | 
 89 |     for xyz in coordinates:
 90 | 
 91 |         distances = pdmol.distance(xyz)
 92 | 
 93 |         match = ((distances.values >= DISTANCE[0]).any() and
 94 |                  (distances.values <= DISTANCE[1]).any())
 95 | 
 96 |         if match:
 97 |             return mol2[0]
 98 | 
 99 |     return ''
100 | 
101 | 
102 | def data_processor_gz(mol2_gz):
103 | 
104 |     pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2_gz[1],
105 |                                              mol2_code=mol2_gz[0])
106 | 
107 |     coordinates = pdmol.df.loc[pd.eval(SELECTION[0]), ['x', 'y', 'z']].values
108 | 
109 |     pdmol._df = pdmol._df[pd.eval(SELECTION[1])]
110 | 
111 |     for xyz in coordinates:
112 | 
113 |         distances = pdmol.distance(xyz)
114 | 
115 |         match = ((distances.values >= DISTANCE[0]).any() and
116 |                  (distances.values <= DISTANCE[1]).any())
117 | 
118 |         if match:
119 |             return mol2_gz[0].decode('utf-8')
120 | 
121 |     return ''
122 | 
123 | 
124 | def read_and_write(mol2_files, id_file_path, verbose, n_cpus):
125 | 
126 |     if verbose:
127 |         sys.stdout.write('Using selection: %s\n' % SELECTION)
128 |         sys.stdout.flush()
129 | 
130 |     with open(id_file_path, 'w') as f:
131 | 
132 |         for mol2_file in mol2_files:
133 |             if verbose:
134 |                 start = time.time()
135 |                 sys.stdout.write('Processing %s' % os.path.basename(mol2_file))
136 |                 sys.stdout.flush()
137 | 
138 |             cnt = 0
139 | 
140 |             if mol2_file.endswith('.gz'):
141 |                 data_processor_fn = data_processor_gz
142 |             else:
143 |                 data_processor_fn = data_processor
144 | 
145 |             for chunk in lazy_imap(data_processor=data_processor_fn,
146 |                                    data_generator=split_multimol2(mol2_file),
147 |                                    n_cpus=n_cpus):
148 |                 _ = [f.write('%s\n' % mol2_id)for mol2_id in chunk if mol2_id]
149 |                 cnt += len(chunk)
150 | 
151 |             if verbose:
152 |                 elapsed = time.time() - start
153 |                 sys.stdout.write(' | %d mol/sec\n' % (cnt / elapsed))
154 |                 sys.stdout.flush()
155 | 
156 | 
157 | def get_num_cpus(n_cpus):
158 |     if not n_cpus:
159 |         n_cpus = cpu_count()
160 |     elif n_cpus < 0:
161 |         n_cpus = cpu_count() - n_cpus
162 |     return n_cpus
163 | 
164 | 
165 | def main(input_dir, output_file, verbose, n_cpus):
166 | 
167 |     n_cpus = get_num_cpus(n_cpus)
168 |     dirpath = os.path.dirname(output_file)
169 |     if not os.path.exists(dirpath):
170 |         os.mkdir(dirpath)
171 |     mol2_files = get_mol2_files(dir_path=input_dir)
172 |     read_and_write(mol2_files=mol2_files,
173 |                    id_file_path=output_file,
174 |                    verbose=verbose,
175 |                    n_cpus=n_cpus)
176 | 
177 |     if verbose:
178 |         print('Finished')
179 | 
180 | 
181 | if __name__ == '__main__':
182 | 
183 |     parser = argparse.ArgumentParser(
184 |             description='A command line tool for filtering mol2 files'
185 |                         '\nby the distance of two atoms or functional groups.',
186 |             epilog="""Example:
187 | 
188 | python funcgroup_distance_to_id.py\\
189 |   --input mol2_dir/\\
190 |   --output ids.txt\\
191 |   --selection "((atom_type == \'S.3\') | (atom_type == \'S.o2\')) --> (atom_type == \'O.2\')"\\
192 |   --distance 13-20\\
193 |   --processes 0
194 |   
195 | 
196 |   \# The example above selects those molecules
197 |   \# that contain S.2 or S.o2 atom that is within
198 |   \# a 13-20 angstroms distance to an 'O.2' (sp2/keto oxygen) atom
199 | 
200 |   """,
201 |             formatter_class=argparse.RawTextHelpFormatter)
202 | 
203 |     parser.add_argument('-i', '--input',
204 |                         type=str,
205 |                         required=True,
206 |                         help='(Required.) Path to a `.mol2` or `.mol2.gz` file,'
207 |                              '\nor a directory containing `.mol2`/`.mol2.gz`'
208 |                              'files.')
209 |     parser.add_argument('-o', '--output',
210 |                         type=str,
211 |                         required=True,
212 |                         help='(Required.) Directory for writing the output files.')
213 |     parser.add_argument('-s', '--selection',
214 |                         type=str,
215 |                         required=True,
216 |                         help='(Required.) Selection condition for the atom distance'
217 |                         ' checks.'
218 |                         '\n1) Selection example to compare 2 atom types:'
219 |                         '\n    `"(atom_type == \'S.o2\') -->'
220 |                         ' (atom_type == \'O.2\')"`.'
221 |                         '\n2) Selection example to consider either'
222 |                         ' an S.o2 or S.3 atom to an O.2 atom:'
223 |                         '\n    `"((atom_type == \'S.3\') |'
224 |                         ' (atom_type == \'S.o2\')) -->'
225 |                         ' (atom_type == \'O.2\')"`.'
226 |                         '\n3) Selection example using logical ORs on '
227 |                         'both sides:\n'
228 |                         '    `"((atom_type == \'S.3\') | (atom_type == '
229 |                         '\'S.o2\'))'
230 |                         ' -->  ((atom_type == \'O.2\') |'
231 |                         ' (atom_type == \'O.3\'))"`.')
232 |     parser.add_argument('-d', '--distance',
233 |                         type=str,
234 |                         required=True,
235 |                         help='(Required.) A distance range formatted'
236 |                              '\n as "lowerbound-upperbound".'
237 |                              '\nFor example, if 13-20 is provided as an'
238 |                              '\nargument, two atoms are considered a match'
239 |                              '\nif they are not closer than 13 angstroms and'
240 |                              '\n not farther than 20 angstroms.')
241 |     parser.add_argument('--processes',
242 |                         type=int,
243 |                         default=1,
244 |                         help='(Optional, default: `1`.) Number of processes to run in parallel.'
245 |                              '\nIf processes > 0, the specified number of CPUs'
246 |                              '\nwill be used.'
247 |                              '\nIf processes = 0, all available CPUs will'
248 |                              '\nbe used.'
249 |                              '\nIf processes = -1, all available CPUs'
250 |                              '\nminus `processes` will be used.')
251 |     parser.add_argument('-v', '--verbose',
252 |                         type=int,
253 |                         default=1,
254 |                         help='(Optional, default: `1`.) Verbosity level. If 0, does not print any'
255 |                              ' output.'
256 |                              '\nIf 1 (default), prints the file currently'
257 |                              ' processing.')
258 | 
259 |     parser.add_argument('--version', action='version', version='v. 1.0')
260 | 
261 |     args = parser.parse_args()
262 |     DISTANCE = parse_distance_string(args.distance)
263 |     if len(DISTANCE) != 2:
264 |         raise ValueError("Make sure you only have a lower and upper bound"
265 |                          " for --distance"
266 |                          "\nFor example 13-20")
267 | 
268 |     SELECTION = parse_selection_string(args.selection)
269 |     if len(SELECTION) != 2:
270 |         raise ValueError("Make sure you have 2 --selection criteria"
271 |                          " separated via '-->', for example,"
272 |                          "\n\"((atom_type == 'S.3') |"
273 |                          " (atom_type == 'S.o2'))\"")
274 | 
275 |     main(input_dir=args.input,
276 |          output_file=args.output,
277 |          verbose=args.verbose,
278 |          n_cpus=args.processes)
279 | 


--------------------------------------------------------------------------------
/tools/funcgroup_matching.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import os
 33 | import argparse
 34 | import sys
 35 | import time
 36 | from multiprocessing import cpu_count
 37 | from numpy import nan as np_nan
 38 | from mputil import lazy_imap
 39 | from biopandas.mol2 import PandasMol2
 40 | from biopandas.mol2 import split_multimol2
 41 | 
 42 | 
 43 | def get_mol2_files(dir_path):
 44 | 
 45 |     files = []
 46 |     if os.path.isdir(dir_path):
 47 |         for f in os.listdir(dir_path):
 48 |             if f.endswith(('.mol2', 'mol2.gz')):
 49 |                 file_path = os.path.join(dir_path, f)
 50 |                 files.append(file_path)
 51 | 
 52 |     elif (os.path.isfile(dir_path) and
 53 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 54 |         files.append(dir_path)
 55 | 
 56 |     return files
 57 | 
 58 | 
 59 | def get_dbase_query_pairs(all_mol2s):
 60 |     q_list, d_list = [], []
 61 |     for m in all_mol2s:
 62 |         if m.endswith(('_query.mol2.gz', '_query.mol2')):
 63 |             q_list.append(m)
 64 |         elif m.endswith(('_dbase.mol2.gz', '_dbase.mol2')):
 65 |             d_list.append(m)
 66 |     if len(q_list) != len(q_list):
 67 |         raise ValueError('The input directory contains an unequal number of'
 68 |                          '*_dbase* and *_query* files.')
 69 |     return q_list, d_list
 70 | 
 71 | 
 72 | def get_atom_matches(q_pdmol, d_pdmol):
 73 |     atoms, charges = [], []
 74 |     for xyz in q_pdmol.df[['x', 'y', 'z']].iterrows():
 75 |         distances = d_pdmol.distance(xyz=xyz[1].values)
 76 |         nearest_idx = distances.argmin()
 77 |         columns = ['atom_type', 'charge']
 78 |         if distances.iloc[nearest_idx] > THRESHOLD:
 79 |             atom, charge = '', np_nan
 80 |         else:
 81 |             atom, charge = d_pdmol.df[columns].iloc[nearest_idx].values
 82 |         atoms.append(atom)
 83 |         charges.append(charge)
 84 |     return atoms, charges
 85 | 
 86 | 
 87 | def data_processor(mol2s):
 88 | 
 89 |     q_pdmol = PandasMol2()
 90 |     d_pdmol = PandasMol2()
 91 | 
 92 |     d_pdmol.read_mol2_from_list(mol2_code=mol2s[0][0],
 93 |                                 mol2_lines=mol2s[0][1])
 94 | 
 95 |     q_pdmol.read_mol2_from_list(mol2_code=mol2s[1][0],
 96 |                                 mol2_lines=mol2s[1][1])
 97 | 
 98 |     atoms, charges = get_atom_matches(q_pdmol, d_pdmol)
 99 |     return mol2s[0][0], mol2s[1][0], atoms, charges
100 | 
101 | 
102 | def data_processor_gz(mol2s_gz):
103 | 
104 |     q_pdmol = PandasMol2()
105 |     d_pdmol = PandasMol2()
106 | 
107 |     d_pdmol.read_mol2_from_list(mol2_code=mol2s_gz[0][0],
108 |                                 mol2_lines=mol2s_gz[0][1])
109 | 
110 |     q_pdmol.read_mol2_from_list(mol2_code=mol2s_gz[1][0],
111 |                                 mol2_lines=mol2s_gz[1][1])
112 | 
113 |     atoms, charges = get_atom_matches(q_pdmol, d_pdmol)
114 |     return (mol2s_gz[0][0].decode('utf-8'),
115 |             mol2s_gz[1][0].decode('utf-8'),
116 |             atoms, charges)
117 | 
118 | 
119 | def read_and_write(q_path, d_path, verbose,
120 |                    cache, output_file, n_cpus):
121 | 
122 |     dct_results = {'dbase': [], 'query': [], 'atoms': [], 'charges': []}
123 | 
124 |     d_base = os.path.basename(d_path)
125 |     q_base = os.path.basename(q_path)
126 | 
127 |     if verbose:
128 |         start = time.time()
129 |         sys.stdout.write('Processing %s/%s' % (d_base, q_base))
130 |         sys.stdout.flush()
131 | 
132 |     cnt = 0
133 | 
134 |     if q_path.endswith('.gz'):
135 |         data_processor_fn = data_processor_gz
136 |     else:
137 |         data_processor_fn = data_processor
138 | 
139 |     for chunk in lazy_imap(data_processor=data_processor_fn,
140 |                            data_generator=zip(split_multimol2(d_path),
141 |                                               split_multimol2(q_path)),
142 |                            n_cpus=n_cpus):
143 | 
144 |         for dbase_id, query_id, atoms, charges in chunk:
145 |             dct_results['dbase'].append(dbase_id)
146 |             dct_results['query'].append(query_id)
147 |             dct_results['atoms'].append(atoms)
148 |             dct_results['charges'].append(charges)
149 | 
150 |         cnt += len(chunk)
151 |     """
152 | 
153 |     q_pdmol = PandasMol2()
154 |     d_pdmol = PandasMol2()
155 | 
156 |     for q_mol2, d_mol2 in zip(split_multimol2(q_path),
157 |                               split_multimol2(d_path)):
158 |         cnt += 1
159 |         d_pdmol.read_mol2_from_list(mol2_code=d_mol2[0],
160 |                                     mol2_lines=d_mol2[1])
161 |         d_pdmol._df = d_pdmol.df[(d_pdmol.df['atom_type'] != 'H')]
162 | 
163 |         if q_mol2[0] in cache:
164 |             q_pdmol = cache[q_mol2[0]]
165 | 
166 |         else:
167 |             q_pdmol.read_mol2_from_list(mol2_code=q_mol2[0],
168 |                                         mol2_lines=q_mol2[1])
169 |             q_pdmol._df = q_pdmol.df[(q_pdmol.df['atom_type'] != 'H')]
170 |             cache[q_mol2[0]] = q_pdmol
171 | 
172 |         atoms, charges = get_atom_matches(q_pdmol, d_pdmol)
173 | 
174 |         dct_results['query'].append(q_mol2[0])
175 |         dct_results['dbase'].append(d_mol2[0])
176 |         dct_results['atoms'].append(atoms)
177 |         dct_results['charges'].append(charges)
178 |     """
179 | 
180 |     with open(output_file + '_charge.tsv', 'w') as f1,\
181 |             open(output_file + '_atomtype.tsv', 'w') as f2:
182 | 
183 |         columns = PandasMol2().read_mol2(q_path).df['atom_name'].values
184 |         f1.write('dbase\tquery\t%s\n' % '\t'.join(columns))
185 |         f2.write('dbase\tquery\t%s\n' % '\t'.join(columns))
186 |         for i in range(len(dct_results['dbase'])):
187 |             s1 = '%s\t%s\t%s\n' % (dct_results['dbase'][i],
188 |                                  dct_results['query'][i],
189 |                                  '\t'.join(format(x, "1.2f")
190 |                                           for x in dct_results['charges'][i]))
191 | 
192 |             f1.write(s1)
193 |             s2 = '%s\t%s\t%s\n' % (dct_results['dbase'][i],
194 |                                  dct_results['query'][i],
195 |                                  '\t'.join(dct_results['atoms'][i]))
196 |             f2.write(s2)
197 | 
198 |     if verbose:
199 |         elapsed = time.time() - start
200 |         n_molecules = cnt + 1
201 |         sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' %
202 |                          (n_molecules, n_molecules / elapsed))
203 |         sys.stdout.flush()
204 | 
205 | 
206 | def get_num_cpus(n_cpus):
207 |     if not n_cpus:
208 |         n_cpus = cpu_count()
209 |     elif n_cpus < 0:
210 |         n_cpus = cpu_count() - n_cpus
211 |     return n_cpus
212 | 
213 | 
214 | def main(input_dir, output_dir, verbose, n_cpus):
215 | 
216 |     n_cpus = get_num_cpus(n_cpus)
217 | 
218 |     if not os.path.exists(output_dir):
219 |         os.mkdir(output_dir)
220 | 
221 |     mol2_in_files = get_mol2_files(input_dir)
222 | 
223 |     q_list, d_list = get_dbase_query_pairs(mol2_in_files)
224 | 
225 |     csv_out_bases = [os.path.join(output_dir,
226 |                                   os.path.basename(mol2).replace(
227 |                                     '_dbase.mol2.gz', '').replace(
228 |                                     '_dbase.mol2', ''))
229 |                      for mol2 in d_list]
230 | 
231 |     cache = {}
232 |     for q, d, c in zip(q_list, d_list, csv_out_bases):
233 |         read_and_write(q_path=q,
234 |                        d_path=d,
235 |                        verbose=verbose,
236 |                        cache=cache,
237 |                        output_file=c,
238 |                        n_cpus=n_cpus)
239 | 
240 | 
241 | if __name__ == '__main__':
242 | 
243 |     parser = argparse.ArgumentParser(
244 |             description='Generates tab-separated tables with containing atom'
245 |             '\n type and charge information from matching'
246 |             '\n atoms in pair-wise overlays.\n',
247 |             epilog="""Example:
248 | python funcgroup_matching.py\\
249 |    --input rocs_overlays_sorted/\\
250 |    --output matching_tables/\\
251 |    --max_distance 1.3\\
252 |    --processes 0""",
253 |             formatter_class=argparse.RawTextHelpFormatter)
254 | 
255 |     parser.add_argument('-i', '--input',
256 |                         type=str,
257 |                         required=True,
258 |                         help='(Required.) Path to a directory containing pairs '
259 |                              '\nof `*_query.mol2`/`.mol2.gz` '
260 |                              '\nand `*_dbase.mol2`/`.mol2.gz` files')
261 |     parser.add_argument('-o', '--output',
262 |                         type=str,
263 |                         required=True,
264 |                         help='(Required.) Path to a directory for writing'
265 |                              '\nthe output files')
266 |     parser.add_argument('-d', '--max_distance',
267 |                         type=float,
268 |                         default=1.3,
269 |                         help='(Optional, default: `1.3`.) The maximum distance,'
270 |                         '\nin angstroms, the'
271 |                         '\noverlayed atoms can be apart from each'
272 |                         '\nother for being considered a match.'
273 |                         '\nFor instance, a --max_distance 1.3 (default)'
274 |                         '\nwould count atoms as a match if they'
275 |                         '\nare within 0 and 1.3 angstroms'
276 |                         '\nto the target atom.')
277 |     parser.add_argument('--processes',
278 |                         type=int,
279 |                         default=1,
280 |                         help='(Optional, default: `1`.) Number of processes to'
281 |                              ' run in parallel.'
282 |                              '\nIf processes > 0, the specified number of CPUs'
283 |                              '\nwill be used.'
284 |                              '\nIf processes = 0, all available CPUs will'
285 |                              '\nbe used.'
286 |                              '\nIf processes = -1, all available CPUs'
287 |                              '\nminus `processes` will be used.')
288 |     parser.add_argument('-v', '--verbose',
289 |                         type=int,
290 |                         default=1,
291 |                         help='(Optional, default: `1`.) Verbosity level. If 0,'
292 |                              ' does not print any output.'
293 |                              '\nIf 1 (default), prints the file currently'
294 |                              ' processing.')
295 | 
296 |     parser.add_argument('--version', action='version', version='v. 1.0')
297 | 
298 |     args = parser.parse_args()
299 |     THRESHOLD = args.max_distance
300 | 
301 |     main(input_dir=args.input,
302 |          output_dir=args.output,
303 |          verbose=args.verbose,
304 |          n_cpus=args.processes)
305 | 


--------------------------------------------------------------------------------
/tools/funcgroup_matching_selection.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import argparse
 33 | import os
 34 | import sys
 35 | import pandas as pd
 36 | import gzip
 37 | import time
 38 | from biopandas.mol2 import split_multimol2
 39 | 
 40 | 
 41 | def get_tsv_pairs(all_tsv):
 42 |     a_list, c_list = [], []
 43 |     for a in all_tsv:
 44 |         if a.endswith('_atomtype.tsv'):
 45 |             a_list.append(a)
 46 |         elif a.endswith('_charge.tsv'):
 47 |             c_list.append(a)
 48 |     if len(a_list) != len(c_list):
 49 |         raise ValueError('The input directory contains an unequal number of'
 50 |                          '*_atomtype.tsv* and *_charge.tsv* files.')
 51 |     return a_list, c_list
 52 | 
 53 | 
 54 | def parse_selection_string(s, columns, df_name='df'):
 55 | 
 56 |     for c in columns:
 57 |         if c in s:
 58 |             s = s.replace(c, '%s.%s' % (df_name, c))
 59 |     s = s.replace(' --> ', '-->').split('-->')
 60 |     s = ['%s[%s]' % (df_name, sub) for sub in s]
 61 |     return s
 62 | 
 63 | 
 64 | def main(input_dir, output_dir, atomtype_selection, charge_selection, 
 65 |          input_mol2, verbose):
 66 | 
 67 |     if not os.path.exists(output_dir):
 68 |         os.mkdir(output_dir)
 69 | 
 70 |     all_tsv_base = [f for f in os.listdir(input_dir) if f.endswith('.tsv')]
 71 |     all_tsv_full = [os.path.join(input_dir, f) for f in all_tsv_base]
 72 |     a_inlist, c_inlist = get_tsv_pairs(all_tsv_full)
 73 |     a_outlist, c_outlist = get_tsv_pairs(all_tsv_base)
 74 |     a_outlist = [os.path.join(output_dir, f) for f in a_outlist]
 75 |     c_outlist = [os.path.join(output_dir, f) for f in c_outlist]
 76 | 
 77 |     for a_in, a_out, c_in, c_out in zip(a_inlist, a_outlist,
 78 |                                         c_inlist, c_outlist):
 79 | 
 80 |         if verbose:
 81 |             start = time.time()
 82 |             sys.stdout.write('Processing %s/%s' % (os.path.basename(a_in),
 83 |                                                    os.path.basename(c_in)))
 84 |             sys.stdout.flush()
 85 | 
 86 |         df_charge = pd.read_table(c_in, sep='\t')
 87 |         for c in df_charge.columns[2:]:
 88 |             df_charge[c] = pd.to_numeric(df_charge[c])
 89 |         df_atom = pd.read_table(a_in, sep='\t')
 90 |         mol2_cnt = df_atom.shape[0]
 91 | 
 92 |         if atomtype_selection:
 93 |             atom_sele = parse_selection_string(s=atomtype_selection,
 94 |                                                columns=df_atom.columns,
 95 |                                                df_name='df_atom')
 96 | 
 97 |             for sele in atom_sele:
 98 |                 df_atom = pd.eval(sele)
 99 | 
100 |         if charge_selection:
101 |             charge_sele = parse_selection_string(s=charge_selection,
102 |                                                  columns=df_charge.columns,
103 |                                                  df_name='df_charge')
104 | 
105 |             for sele in charge_sele:
106 |                 df_charge = pd.eval(sele)
107 | 
108 |         selection_indices = set(df_charge.index).intersection(
109 |                             set(df_atom.index))
110 |         selection_indices = sorted(list(selection_indices))
111 | 
112 |         df_atom.ix[selection_indices].to_csv(a_out, sep='\t')
113 |         df_charge.ix[selection_indices].to_csv(c_out, sep='\t')
114 | 
115 |         if input_mol2:
116 |             input_mol2_path_query = os.path.join(input_mol2, os.path.basename(
117 |                                     c_out).replace('_charge.tsv',
118 |                                                    '_query.mol2'))
119 |             input_mol2_path_dbase = input_mol2_path_query.replace(
120 |                                     '_query.mol2', '_dbase.mol2')
121 | 
122 |             if not os.path.exists(input_mol2_path_query)\
123 |                     and os.path.exists(input_mol2_path_query + '.gz'):
124 |                 input_mol2_path_query += '.gz'
125 |             if not os.path.exists(input_mol2_path_dbase)\
126 |                     and os.path.exists(input_mol2_path_dbase + '.gz'):
127 |                 input_mol2_path_dbase += '.gz'
128 | 
129 |             output_mol2_path_query = os.path.join(output_dir,
130 |                                                   os.path.basename(
131 |                                                    c_out).replace(
132 |                                                    '_charge.tsv',
133 |                                                    '_query.mol2'))
134 |             output_mol2_path_dbase = output_mol2_path_query.replace(
135 |                                      '_query.mol2', '_dbase.mol2')
136 | 
137 |             if input_mol2_path_query.endswith('.gz'):
138 |                 output_mol2_path_query += '.gz'
139 |                 query_write_mode = 'wb'
140 |                 query_open_file = gzip.open
141 |             else:
142 |                 query_write_mode = 'w'
143 |                 query_open_file = open
144 |             if input_mol2_path_dbase.endswith('.gz'):
145 |                 output_mol2_path_dbase += '.gz'
146 |                 dbase_write_mode = 'wb'
147 |                 dbase_open_file = gzip.open
148 |             else:
149 |                 dbase_write_mode = 'w'
150 |                 dbase_open_file = open
151 | 
152 |             with query_open_file(output_mol2_path_query, query_write_mode) as opq,\
153 |                     dbase_open_file(output_mol2_path_dbase, dbase_write_mode) as opd:
154 |                 for i in selection_indices:
155 | 
156 |                     mol2_q_cont = ('DID NOT FIND %s\n'
157 |                                    % (df_atom.ix[i]['query']))
158 | 
159 |                     mol2_d_cont = ('DID NOT FIND %s\n'
160 |                                    % (df_atom.ix[i]['dbase']))
161 | 
162 |                     for idx, mol2 in enumerate(split_multimol2(
163 |                             input_mol2_path_query)):
164 |                         if idx == i:
165 |                             mol2_q_cont = mol2[1]
166 |                             break
167 | 
168 |                     for idx, mol2 in enumerate(split_multimol2(
169 |                             input_mol2_path_dbase)):
170 |                         if idx == i:
171 |                             mol2_d_cont = mol2[1]
172 |                             break
173 | 
174 |                     if query_write_mode == 'wb':
175 |                         opq.write(b''.join(mol2_q_cont))
176 |                     else:
177 |                         opq.write(''.join(mol2_q_cont))
178 | 
179 |                     if dbase_write_mode == 'wb':
180 |                         opd.write(b''.join(mol2_d_cont))
181 |                     else:
182 |                         opd.write(''.join(mol2_d_cont))
183 | 
184 |         if verbose:
185 |             elapsed = time.time() - start
186 |             n_molecules = mol2_cnt
187 |             sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' %
188 |                              (n_molecules, n_molecules / elapsed))
189 |             sys.stdout.flush()
190 | 
191 | 
192 | if __name__ == '__main__':
193 | 
194 |     parser = argparse.ArgumentParser(
195 |             description='Selects molecules with certain functional group matching patterns after functional group matching.',
196 |             epilog="""Example:
197 | python funcgroup_matching_selection.py\\
198 |   --input 07_fgroup_matching_tables # generated via funcgroup_matching.py\\
199 |   --input_mol2 06_rocs_overlays_sorted # generated via sort_rocs_mol2.py\\
200 |   --output 08_funcgroup_selection\\
201 |   --atomtype_selection "((S1 == 'S.3') | (S1 == 'S.o2')) --> (O2 == 'O.2')"\\
202 |   --charge_selection FGROUP_CHARGE "((S1 >= 1.0)) --> (O2 <= -0.5)" """,
203 |             formatter_class=argparse.RawTextHelpFormatter)
204 | 
205 |     parser.add_argument('-i', '--input',
206 |                         type=str,
207 |                         required=True,
208 |                         help=('(Required.) Input directory with input `.tsv` tables (functional group files'
209 |                               ' generated via `funcgroup_matching.py`).'))
210 |     parser.add_argument('--input_mol2',
211 |                         type=str,
212 |                         help=('(Optional.) Input directory with input `.mol2` structures (ROCS overlays'
213 |                               '\ngenerated via `sort_rocs_mol2.py`). If provided, the MOL2 structures'
214 |                               '\ncorresponding to the selected matches will be extracted from the'
215 |                               '\ninput_mol2 directory and written to the output directory for visual inspection,'
216 |                               '\nfor example, using PyMOL.'))
217 |     parser.add_argument('-o', '--output',
218 |                         type=str,
219 |                         required=True,
220 |                         help='(Required.) Directory for writing the output files.')
221 |     parser.add_argument('--atomtype_selection',
222 |                         type=str,
223 |                         default="",
224 |                         help="""(Optional, default="") Selection condition for the atom types.
225 | For example, the following selection query will make a selection based on
226 | matching 2 atoms in the reference molecule, S1 and O2:
227 | "((S1 == 'S.3') | (S1 == 'S.o2')) --> (O2 == 'O.2')".
228 | Here, S1 can either match an S.3 or an S.o2 atom in the database molecule.
229 | The second atom, O2, must match an atom of type O.2.""")
230 |     parser.add_argument('--charge_selection',
231 |                         type=str,
232 |                         default="",
233 |                         help="""(Optional, default="") Selection condition for the atom charges.
234 | For example, the following selection query will make a selection based on
235 | matching the charges in 2 atoms in the reference molecule, S1 and O2:
236 | "((S1 >= 1.0)) --> (O2 <= -0.5)".
237 | Here, the atom that matches S1 has to have a positive charge, 1 or greater. The charge
238 | matching the second atom, O2, must be (partially) negative (-0.5 or smaller).""")
239 |     parser.add_argument('-v', '--verbose',
240 |                         type=int,
241 |                         default=1,
242 |                         help='(Optional, default: `1`.) Verbosity level. If 0, does not print any'
243 |                              '\noutput.'
244 |                              '\nIf 1 (default), prints the file currently'
245 |                              '\nprocessing.')
246 | 
247 |     parser.add_argument('--version', action='version', version='v. 1.0')
248 | 
249 |     args = parser.parse_args()
250 | 
251 |     main(input_dir=args.input,
252 |          output_dir=args.output,
253 |          atomtype_selection=args.atomtype_selection,
254 |          charge_selection=args.charge_selection,
255 |          input_mol2=args.input_mol2,
256 |          verbose=args.verbose)


--------------------------------------------------------------------------------
/tools/funcgroup_presence_to_id.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | 
 33 | import os
 34 | import argparse
 35 | import sys
 36 | import pandas as pd
 37 | import time
 38 | from mputil import lazy_imap
 39 | from multiprocessing import cpu_count
 40 | from biopandas.mol2 import split_multimol2
 41 | from biopandas.mol2 import PandasMol2
 42 | 
 43 | 
 44 | def get_mol2_files(dir_path):
 45 | 
 46 |     files = []
 47 | 
 48 |     if os.path.isdir(dir_path):
 49 |         for f in os.listdir(dir_path):
 50 |             if f.endswith(('.mol2', 'mol2.gz')):
 51 |                 file_path = os.path.join(dir_path, f)
 52 |                 files.append(file_path)
 53 | 
 54 |     elif (os.path.isfile(dir_path) and
 55 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 56 |         files.append(dir_path)
 57 | 
 58 |     return files
 59 | 
 60 | 
 61 | def parse_selection_string(s, df_name='pdmol.df'):
 62 | 
 63 |     columns = ['(atom_id', '(atom_name', '(atom_type',
 64 |                '(subst_id', '(subst_name', '(charge']
 65 |     lst = [subs.strip() for subs in s.split('-->')]
 66 |     parsed = []
 67 | 
 68 |     for subs in lst:
 69 |         for c in columns:
 70 |             subs = subs.replace(c, '(%s.%s' % (df_name, c[1:]))
 71 |         parsed.append(subs)
 72 |     return parsed
 73 | 
 74 | 
 75 | def data_processor(mol2):
 76 | 
 77 |     pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2[1],
 78 |                                              mol2_code=mol2[0])
 79 | 
 80 |     match = mol2[0]
 81 |     for sub_sele in SELECTION:
 82 |         if not pd.eval(sub_sele).any():
 83 |             match = ''
 84 |             break
 85 | 
 86 |     return match
 87 | 
 88 | def data_processor_gz(mol2_gz):
 89 | 
 90 |     pdmol = PandasMol2().read_mol2_from_list(mol2_lines=mol2_gz[1],
 91 |                                              mol2_code=mol2_gz[0])
 92 | 
 93 |     match = mol2_gz[0].decode('utf-8')
 94 |     for sub_sele in SELECTION:
 95 |         if not pd.eval(sub_sele).any():
 96 |             match = ''
 97 |             break
 98 | 
 99 |     return match
100 | 
101 | 
102 | def read_and_write(mol2_files, id_file_path, verbose, n_cpus):
103 | 
104 |     if verbose:
105 |         sys.stdout.write('Using selection: %s\n' % SELECTION)
106 |         sys.stdout.flush()
107 | 
108 |     with open(id_file_path, 'w') as f:
109 | 
110 |         for mol2_file in mol2_files:
111 |             if verbose:
112 |                 start = time.time()
113 |                 sys.stdout.write('Processing %s' % os.path.basename(mol2_file))
114 |                 sys.stdout.flush()
115 | 
116 |             cnt = 0
117 | 
118 |             if mol2_file.endswith('.gz'):
119 |                 data_processor_fn = data_processor_gz
120 |             else:
121 |                 data_processor_fn = data_processor
122 | 
123 |             for chunk in lazy_imap(data_processor=data_processor_fn,
124 |                                    data_generator=split_multimol2(
125 |                                       mol2_file),
126 |                                    n_cpus=n_cpus):
127 | 
128 |                 _ = [f.write('%s\n' % mol2_id) for mol2_id
129 |                      in chunk if mol2_id]
130 |                 cnt += len(chunk)
131 | 
132 |             if verbose:
133 |                 elapsed = time.time() - start
134 |                 sys.stdout.write(' | %d mol/sec\n' % (cnt / elapsed))
135 |                 sys.stdout.flush()
136 | 
137 | 
138 | def get_num_cpus(n_cpus):
139 |     if not n_cpus:
140 |         n_cpus = cpu_count()
141 |     elif n_cpus < 0:
142 |         n_cpus = cpu_count() - n_cpus
143 |     return n_cpus
144 | 
145 | 
146 | def main(input_dir, output_file, verbose, n_cpus):
147 |     n_cpus = get_num_cpus(n_cpus)
148 |     dirpath = os.path.dirname(output_file)
149 |     if not os.path.exists(dirpath):
150 |         os.mkdir(dirpath)
151 |     mol2_files = get_mol2_files(dir_path=input_dir)
152 |     read_and_write(mol2_files=mol2_files,
153 |                    id_file_path=output_file,
154 |                    verbose=verbose,
155 |                    n_cpus=n_cpus)
156 |     if verbose:
157 |         print('Finished')
158 | 
159 | 
160 | 
161 | if __name__ == '__main__':
162 | 
163 |     parser = argparse.ArgumentParser(
164 |             description="""Checking molecules base on the presence
165 | of certain atoms or functional groups and writing the results to a text file.""",
166 |             epilog="""Example:
167 | python funcgroup_presence_to_id.py --input mol2s/\\
168 |   --output mol2ids.txt\\
169 |   --selection "((atom_type == \'S.3\') | (atom_type == \'S.o2\')) --> (atom_type == \'O.2\')"\\
170 |   --processes 0""",
171 |             formatter_class=argparse.RawTextHelpFormatter)
172 | 
173 |     parser.add_argument('-i', '--input',
174 |                         type=str,
175 |                         required=True,
176 |                         help='(Required.) Input directory with `.mol2` and `.mol2.gz` files.')
177 |     parser.add_argument('-o', '--output',
178 |                         type=str,
179 |                         required=True,
180 |                         help='(Required.) Directory for writing the output files.')
181 |     parser.add_argument('-s', '--selection',
182 |                         type=str,
183 |                         required=True,
184 |                         help='Selection condition for the atom presence'
185 |                         ' checks.'
186 |                         '\n1) Require 2 atom types to be present:'
187 |                         '\n    "(atom_type == \'S.o2\') -->'
188 |                         ' (atom_type == \'O.2\')"'
189 |                         '\n2) Selection example to consider either'
190 |                         ' an S.o2 or S.3 atom and a O.2 atom to be present:'
191 |                         '\n    "((atom_type == \'S.3\') |'
192 |                         ' (atom_type == \'S.o2\')) -->'
193 |                         ' (atom_type == \'O.2\')"'
194 |                         '\n3) Selection example using logical ORs on '
195 |                         'both sides:\n'
196 |                         '    "((atom_type == \'S.3\') | (atom_type == '
197 |                         '\'S.o2\'))'
198 |                         ' -->  ((atom_type == \'O.2\') |'
199 |                         ' (atom_type == \'O.3\'))"')
200 |     parser.add_argument('--processes',
201 |                         type=int,
202 |                         default=1,
203 |                         help='(Optional, default: `1`.) Number of processes to run in parallel.'
204 |                              '\nIf processes > 0, the specified number of CPUs'
205 |                              '\nwill be used.'
206 |                              '\nIf processes = 0, all available CPUs will'
207 |                              '\nbe used.'
208 |                              '\nIf processes = -1, all available CPUs'
209 |                              '\nminus `processes` will be used.')
210 |     parser.add_argument('-v', '--verbose',
211 |                         type=int,
212 |                         default=1,
213 |                         help='(Optional, default: `1`.) Verbosity level. If 0, does not print any'
214 |                              '\noutput.'
215 |                              '\nIf 1 (default), prints the file currently'
216 |                              '\nprocessing.')
217 | 
218 |     parser.add_argument('--version', action='version', version='v. 1.0')
219 | 
220 |     args = parser.parse_args()
221 |     SELECTION = parse_selection_string(args.selection)
222 | 
223 |     main(input_dir=args.input,
224 |          output_file=args.output,
225 |          verbose=args.verbose,
226 |          n_cpus=args.processes)
227 | 


--------------------------------------------------------------------------------
/tools/generate_conformers_obabel.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import os
 33 | import subprocess
 34 | import sys
 35 | import argparse
 36 | 
 37 | 
 38 | def get_mol2_files(dir_path):
 39 | 
 40 |     files = []
 41 | 
 42 |     if os.path.isdir(dir_path):
 43 |         for f in os.listdir(dir_path):
 44 |             if f.endswith(('.mol2', 'mol2.gz')):
 45 |                 file_path = os.path.join(dir_path, f)
 46 |                 files.append(file_path)
 47 | 
 48 |     elif (os.path.isfile(dir_path) and
 49 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 50 |         files.append(dir_path)
 51 | 
 52 |     return files
 53 | 
 54 | 
 55 | def run_obabel(source_file, target_file, settings):
 56 | 
 57 |     sys.stdout.write('Processing %s\n' % source_file)
 58 |     sys.stdout.flush()
 59 | 
 60 |     cmd = [EXECUTABLE,
 61 |            source_file,
 62 |            '-O', target_file,
 63 |            '--original',
 64 |            '--confab']
 65 |     if settings:
 66 |         for s in settings.split():
 67 |             s = s.strip()
 68 |             if s:
 69 |                 cmd.append(s)
 70 | 
 71 |     if source_file.endswith('.gz'):
 72 |         cmd.extend(['-zin', '-z'])
 73 |     prefix = ''.join(target_file.split('.mol2')[:-1])
 74 | 
 75 |     with open(prefix + '.log', 'wb') as out, \
 76 |             open(prefix + '.err', 'wb') as err:
 77 | 
 78 |         subprocess.call(cmd, bufsize=1, stdout=out, stderr=err)
 79 | 
 80 | 
 81 | def main(input_dir, output_dir, settings):
 82 |     if not os.path.exists(output_dir):
 83 |         os.mkdir(output_dir)
 84 |     mol2_in_files = get_mol2_files(input_dir)
 85 |     mol2_out_files = [os.path.join(output_dir, os.path.basename(mol2))
 86 |                       for mol2 in mol2_in_files]
 87 | 
 88 |     for i, j in zip(mol2_in_files, mol2_out_files):
 89 |         run_obabel(source_file=i,
 90 |                    target_file=j,
 91 |                    settings=settings)
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 | 
 96 |     parser = argparse.ArgumentParser(
 97 |             description='Wrapper running OpenBabel Confab on one'
 98 |                         '\nor more database partitions.'
 99 |                         ' Please see'
100 |                         '\nhttp://open-babel.readthedocs.io/en/latest/'
101 |                         '3DStructureGen/multipleconformers.html'
102 |                         '\nif you want to learn more about OpenBabel Confab.',
103 |             epilog="""Example:
104 | python generate_conformers_obabel.py\\
105 |    --input dbase_mol2/\\
106 |    --output dbase_conformers/\\
107 |    --executable /.../obabel""",
108 |             formatter_class=argparse.RawTextHelpFormatter)
109 | 
110 |     parser.add_argument('-i', '--input',
111 |                         type=str,
112 |                         required=True,
113 |                         help='Input directory with `.mol2`'
114 |                              ' and `.mol2.gz` files.')
115 |     parser.add_argument('-o', '--output',
116 |                         type=str,
117 |                         required=True,
118 |                         help='Directory for writing the output files.')
119 |     parser.add_argument('--executable',
120 |                         type=str,
121 |                         required=True,
122 |                         help="""(Required.) The path or command for running
123 | OpenBabel Confab on your system.""")
124 |     parser.add_argument('--settings',
125 |                         type=str,
126 |                         default='--conf 200 --ecutoff 50''--rcutoff 0.5',
127 |                         help='(Optional.) OpenBabel settings to use.')
128 | 
129 |     parser.add_argument('-v', '--version', action='version', version='v. 1.0')
130 | 
131 |     args = parser.parse_args()
132 | 
133 |     EXECUTABLE = args.executable
134 | 
135 |     main(input_dir=args.input,
136 |          output_dir=args.output,
137 |          settings=args.settings)
138 | 


--------------------------------------------------------------------------------
/tools/generate_conformers_omega.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import os
 33 | import subprocess
 34 | import sys
 35 | import argparse
 36 | from multiprocessing import cpu_count
 37 | 
 38 | 
 39 | def get_num_cpus(n_cpus):
 40 |     if not n_cpus:
 41 |         n_cpus = cpu_count()
 42 |     elif n_cpus < 0:
 43 |         n_cpus = cpu_count() - n_cpus
 44 |     return n_cpus
 45 | 
 46 | 
 47 | def get_mol2_files(dir_path):
 48 | 
 49 |     files = []
 50 | 
 51 |     if os.path.isdir(dir_path):
 52 |         for f in os.listdir(dir_path):
 53 |             if f.endswith(('.mol2', 'mol2.gz')):
 54 |                 file_path = os.path.join(dir_path, f)
 55 |                 files.append(file_path)
 56 | 
 57 |     elif (os.path.isfile(dir_path) and
 58 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 59 |         files.append(dir_path)
 60 | 
 61 |     return files
 62 | 
 63 | 
 64 | def run_omega(source_file, target_file, n_processes, settings):
 65 | 
 66 |     prefix = ''.join(target_file.split('.mol2')[:-1])
 67 | 
 68 |     sys.stdout.write('Processing %s\n' % source_file)
 69 |     sys.stdout.flush()
 70 | 
 71 |     cmd = [EXECUTABLE,
 72 |            '-in', source_file,
 73 |            '-out', target_file,
 74 |            '-prefix', prefix,
 75 |            '-mpi_np', str(n_processes)]
 76 |     if settings:
 77 |         for s in settings.split():
 78 |             s = s.strip()
 79 |             if s:
 80 |                 cmd.append(s)
 81 | 
 82 |     subprocess.call(cmd, stdout=subprocess.PIPE, bufsize=1)
 83 | 
 84 | 
 85 | def main(input_dir, output_dir, n_processes, settings):
 86 |     if not os.path.exists(output_dir):
 87 |         os.mkdir(output_dir)
 88 |     mol2_in_files = get_mol2_files(input_dir)
 89 |     mol2_out_files = [os.path.join(output_dir, os.path.basename(mol2))
 90 |                       for mol2 in mol2_in_files]
 91 | 
 92 |     n_processes = get_num_cpus(n_processes)
 93 | 
 94 |     for i, j in zip(mol2_in_files, mol2_out_files):
 95 |         run_omega(source_file=i,
 96 |                   target_file=j,
 97 |                   n_processes=n_processes,
 98 |                   settings=settings)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 | 
103 |     parser = argparse.ArgumentParser(
104 |             description='Wrapper running OpenEye OMEGA on one'
105 |                         '\nor more database partitions.',
106 |             epilog="""Example:
107 | python generate_conformers_omega.py\\
108 |    --input dbase_mol2\\
109 |    --output dbase_conformers/\\
110 |    --executable /.../omega2-2.5.1.4\\
111 |    --processes 0""",
112 |             formatter_class=argparse.RawTextHelpFormatter)
113 | 
114 |     parser.add_argument('-i', '--input',
115 |                         type=str,
116 |                         required=True,
117 |                         help='Input directory with `.mol2`'
118 |                              ' and `.mol2.gz` files.')
119 |     parser.add_argument('-o', '--output',
120 |                         type=str,
121 |                         required=True,
122 |                         help='Directory for writing the output files.')
123 |     parser.add_argument('--executable',
124 |                         type=str,
125 |                         required=True,
126 |                         help="""(Required.) The path or command for running
127 | OpenEye OMEGA2 on your system.""")
128 |     parser.add_argument('--settings',
129 |                         type=str,
130 |                         default='-maxconfs 200 -warts false -progress percent',
131 |                         help='(Optional.) OMEGA settings to use.')
132 |     parser.add_argument('--processes',
133 |                         type=int,
134 |                         default=1,
135 |                         help='(Optional, default: `1`.) Number of processes to'
136 |                              ' run in parallel.'
137 |                              '\nIf processes > 0, the specified number of CPUs'
138 |                              '\nwill be used.'
139 |                              '\nIf processes = 0, all available CPUs will'
140 |                              '\nbe used.'
141 |                              '\nIf processes = -1, all available CPUs'
142 |                              '\nminus `processes` will be used.')
143 | 
144 |     parser.add_argument('-v', '--version', action='version', version='v. 1.0')
145 | 
146 |     args = parser.parse_args()
147 | 
148 |     EXECUTABLE = args.executable
149 | 
150 |     main(input_dir=args.input,
151 |          output_dir=args.output,
152 |          n_processes=args.processes,
153 |          settings=args.settings)
154 | 


--------------------------------------------------------------------------------
/tools/id_to_mol2.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import argparse
 33 | import os
 34 | import sys
 35 | import time
 36 | import gzip
 37 | 
 38 | from biopandas.mol2.mol2_io import split_multimol2
 39 | 
 40 | 
 41 | def str2bool(v):
 42 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 43 |         return True
 44 |     if v.lower() in ('no', 'false', 'f', 'n', '0'):
 45 |         return False
 46 |     else:
 47 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 48 | 
 49 | 
 50 | def get_mol2_files(dir_path):
 51 | 
 52 |     files = []
 53 |     if os.path.isdir(dir_path):
 54 |         for f in os.listdir(dir_path):
 55 |             if f.endswith(('.mol2', 'mol2.gz')):
 56 |                 file_path = os.path.join(dir_path, f)
 57 |                 files.append(file_path)
 58 | 
 59 |     elif (os.path.isfile(dir_path) and
 60 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 61 |         files.append(dir_path)
 62 | 
 63 |     return files
 64 | 
 65 | 
 66 | def read_idfile(id_file_path):
 67 |     with open(id_file_path, 'r') as f:
 68 |         ids = {line.strip() for line in f if not line.startswith('#')}
 69 |     return ids
 70 | 
 71 | 
 72 | def filter_and_write(mol2_files, ids, output_dir, includelist_filter, verbose):
 73 |     for mol2_file in mol2_files:
 74 |         if verbose:
 75 |             sys.stdout.write('Processing %s' % os.path.basename(mol2_file))
 76 |             sys.stdout.flush()
 77 | 
 78 |         if not os.path.exists(output_dir):
 79 |             os.mkdir(output_dir)
 80 | 
 81 |         mol2_outpath = os.path.join(output_dir, os.path.basename(mol2_file))
 82 | 
 83 |         if mol2_outpath.endswith('.gz'):
 84 |             write_mode = 'wb'
 85 |             open_file = gzip.open
 86 |         else:
 87 |             write_mode = 'w'
 88 |             open_file = open
 89 | 
 90 |         with open_file(mol2_outpath, write_mode) as f:
 91 |             if verbose:
 92 |                 start = time.time()
 93 | 
 94 |             if includelist_filter:
 95 | 
 96 |                 if write_mode == 'w':
 97 |                     for idx, mol2 in enumerate(split_multimol2(mol2_file)):
 98 | 
 99 |                         if mol2[0] in ids:
100 |                             f.write(''.join(mol2[1]))
101 |                 else:
102 |                     for idx, mol2 in enumerate(split_multimol2(mol2_file)):
103 | 
104 |                         if mol2[0].decode('utf-8') in ids:
105 |                             f.write(b''.join(mol2[1]))
106 | 
107 |             else:
108 |                 if write_mode == 'w':
109 |                     for idx, mol2 in enumerate(split_multimol2(mol2_file)):
110 |                         if mol2[0] not in ids:
111 |                             f.write(''.join(mol2[1]))
112 |                 else:
113 |                     for idx, mol2 in enumerate(split_multimol2(mol2_file)):
114 |                         if mol2[0].decode('utf-8') not in ids:
115 |                             f.write(b''.join(mol2[1]))
116 |             if verbose:
117 |                 elapsed = time.time() - start
118 |                 n_molecules = idx + 1
119 |                 sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' %
120 |                                  (n_molecules, n_molecules / elapsed))
121 |                 sys.stdout.flush()
122 | 
123 | 
124 | 
125 | def main(input_dir, id_file_path, output_dir, includelist_filter, verbose):
126 |     mol2_files = get_mol2_files(dir_path=input_dir)
127 |     ids = read_idfile(id_file_path)
128 | 
129 |     filter_and_write(mol2_files=mol2_files,
130 |                      ids=ids,
131 |                      output_dir=output_dir,
132 |                      includelist_filter=includelist_filter,
133 |                      verbose=verbose)
134 |     if verbose:
135 |         print('Finished')
136 | 
137 | 
138 | if __name__ == '__main__':
139 | 
140 |     parser = argparse.ArgumentParser(
141 |             description='Create filtered MOL2 files from ID and'
142 |                         ' input MOL2 files.',
143 |             epilog="""Example:
144 | python id_to_mol2.py --input mol2_dir/\\
145 |    --id_file ids.txt\\
146 |    --includelist True\\
147 |    --output filtered_mol2_dir/""",
148 |             formatter_class=argparse.RawTextHelpFormatter)
149 | 
150 |     parser.add_argument('-i', '--input',
151 |                         type=str,
152 |                         required=True,
153 |                         help='(Required.) Input `.mol2` or `.mol2.gz` file,'
154 |                              ' or a directory of MOL2 files.')
155 |     parser.add_argument('--id_file',
156 |                         type=str,
157 |                         required=True,
158 |                         help='(Required.) Input ID file that contains molecule'
159 |                              '\nIDs (one ID per line).')
160 |     parser.add_argument('-o', '--output',
161 |                         type=str,
162 |                         required=True,
163 |                         help='(Required.) Output directory path for the'
164 |                              '\nfiltered MOL2 files.')
165 |     parser.add_argument('-w', '--includelist',
166 |                         type=str2bool,
167 |                         default=True,
168 |                         help='(Optional, default: `True`.) Uses ID file as includelist if True (default).'
169 |                         '\nUses ID file as excludelist if False.')
170 |     parser.add_argument('-v', '--verbose',
171 |                         type=int,
172 |                         default=1,
173 |                         help='(Optional, default: `1`.) Verbosity level. If 0, does not print any'
174 |                              '\noutput.'
175 |                              '\nIf 1 (default), prints the file currently'
176 |                              '\nprocessing.')
177 | 
178 |     parser.add_argument('--version', action='version', version='v. 1.0')
179 | 
180 |     args = parser.parse_args()
181 | 
182 |     main(input_dir=args.input,
183 |          id_file_path=args.id_file,
184 |          output_dir=args.output,
185 |          includelist_filter=args.includelist,
186 |          verbose=args.verbose)
187 | 


--------------------------------------------------------------------------------
/tools/merge_id_files.py:
--------------------------------------------------------------------------------
 1 | # Sebastian Raschka 2017
 2 | #
 3 | # screenlamp is a Python toolkit
 4 | # for hypothesis-driven virtual screening.
 5 | #
 6 | # Copyright (C) 2017 Michigan State University
 7 | # License: Apache v2
 8 | #
 9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
10 | # Software author email: mail@sebastianraschka.com
11 | #
12 | # Software source repository: https://github.com/rasbt/screenlamp
13 | # Documentation: https://psa-lab.github.io/screenlamp
14 | #
15 | # screenlamp was developed in the
16 | # Protein Structural Analysis & Design Laboratory
17 | # (http://www.kuhnlab.bmb.msu.edu)
18 | #
19 | # If you are using screenlamp in your research, please cite
20 | # the following journal article:
21 | #
22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
24 | #   and Leslie A. Kuhn. 2017
25 | #
26 | # Enabling the hypothesis-driven prioritization of
27 | #   ligand candidates in big databases:
28 | #   Screenlamp and its application to GPCR inhibitor
29 | #   discovery for invasive species control.
30 | #
31 | 
32 | 
33 | import argparse
34 | 
35 | 
36 | def read_idfile(id_file_path):
37 |     with open(id_file_path, 'r') as f:
38 |         ids = {line.strip() for line in f if not line.startswith('#')}
39 |     return ids
40 | 
41 | 
42 | def main(id_file_path_1, id_file_path_2, output_path):
43 | 
44 |     cache = set()
45 |     with open(output_path, 'w') as ofile:
46 |         with open(id_file_path_1, 'r') as f1:
47 |             for line in f1:
48 |                 line = line.strip()
49 |                 if not line.startswith('#') and line not in cache:
50 |                     ofile.write('%s\n' % line)
51 |                     cache.add(line)
52 |         with open(id_file_path_2, 'r') as f2:
53 |             for line in f2:
54 |                 line = line.strip()
55 |                 if not line.startswith('#') and line not in cache:
56 |                     ofile.write('%s\n' % line)
57 |                     cache.add(line)
58 | 
59 | 
60 | if __name__ == '__main__':
61 | 
62 |     parser = argparse.ArgumentParser(
63 |             description="""Merges two Molecule ID files
64 | (e.g., created via `datatable_to_id.py`, `funcgroup_presence_to_id.py`
65 |  or `mol2_to_id.py`) into a single ID file
66 |  while preventing duplicate entries.""",
67 |             epilog="""Example:
68 | python merge_id_files.py\\
69 |    --input1 mol2s_1.txt\\
70 |    --input2 mol2s_2.txt\\
71 |    --output merged.txt""",
72 |             formatter_class=argparse.RawTextHelpFormatter)
73 | 
74 |     parser.add_argument('-i1', '--input1',
75 |                         type=str,
76 |                         required=True,
77 |                         help='(Required.) Input ID file that contains molecule'
78 |                              '\nIDs (one ID per line).')
79 |     parser.add_argument('-i2', '--input2',
80 |                         type=str,
81 |                         required=True,
82 |                         help='(Required.) Input ID file that contains molecule'
83 |                              '\nIDs (one ID per line).')
84 |     parser.add_argument('-o', '--output',
85 |                         type=str,
86 |                         required=True,
87 |                         help='(Required.) Path to the output ID file.')
88 | 
89 |     parser.add_argument('--version', action='version', version='v. 1.0')
90 | 
91 |     args = parser.parse_args()
92 | 
93 |     main(id_file_path_1=args.input1,
94 |          id_file_path_2=args.input2,
95 |          output_path=args.output)
96 | 


--------------------------------------------------------------------------------
/tools/mol2_to_id.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | 
 33 | import argparse
 34 | import os
 35 | import sys
 36 | import time
 37 | 
 38 | from biopandas.mol2.mol2_io import split_multimol2
 39 | 
 40 | 
 41 | def get_mol2_files(dir_path):
 42 | 
 43 |     files = []
 44 | 
 45 |     if os.path.isdir(dir_path):
 46 |         for f in os.listdir(dir_path):
 47 |             if f.endswith(('.mol2', 'mol2.gz')):
 48 |                 file_path = os.path.join(dir_path, f)
 49 |                 files.append(file_path)
 50 | 
 51 |     elif (os.path.isfile(dir_path) and
 52 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 53 |         files.append(dir_path)
 54 | 
 55 |     return files
 56 | 
 57 | 
 58 | def mol2_to_idfile(mol2_files, id_file_path, verbose=0):
 59 |     with open(id_file_path, 'w') as f:
 60 |         for mol2_file in mol2_files:
 61 | 
 62 |             if verbose:
 63 |                 sys.stdout.write('Processing %s' % os.path.basename(mol2_file))
 64 |                 sys.stdout.flush()
 65 |                 start = time.time()
 66 | 
 67 |             for idx, mol2 in enumerate(split_multimol2(mol2_file)):
 68 |                 f.write(mol2[0] + '\n')
 69 | 
 70 |             if verbose:
 71 |                 elapsed = time.time() - start
 72 |                 n_molecules = idx + 1
 73 |                 sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' %
 74 |                                  (n_molecules, n_molecules / elapsed))
 75 |                 sys.stdout.flush()
 76 | 
 77 | 
 78 | def main(input_dir, output_file, verbose):
 79 |     mol2_files = get_mol2_files(dir_path=input_dir)
 80 |     mol2_to_idfile(mol2_files=mol2_files,
 81 |                    id_file_path=output_file,
 82 |                    verbose=verbose)
 83 |     if verbose:
 84 |         print('Finished')
 85 | 
 86 | 
 87 | if __name__ == '__main__':
 88 | 
 89 |     parser = argparse.ArgumentParser(
 90 |             description='Writes a file with molecule IDs from MOL2 files.',
 91 |             epilog="""Example:
 92 | python mol2_to_id.py\\
 93 |    --input mol2_dir\\
 94 |    --output ids.txt""",
 95 |             formatter_class=argparse.RawTextHelpFormatter)
 96 | 
 97 |     parser.add_argument('-i', '--input',
 98 |                         type=str,
 99 |                         required=True,
100 |                         help='(Required.) Input `.mol2` or `.mol2.gz` file,'
101 |                              'or a directory of MOL2 files.')
102 |     parser.add_argument('-o', '--output',
103 |                         type=str,
104 |                         required=True,
105 |                         help='(Required.) Output path for the ID file.'
106 |                              ' For example, `ids.txt`.')
107 |     parser.add_argument('-v', '--verbose',
108 |                         type=int,
109 |                         default=1,
110 |                         help='(Optional, default: `1`.)'
111 |                              ' Verbosity level. If 0, does not print any'
112 |                              ' output.'
113 |                              ' If 1 (default), prints the file currently'
114 |                              ' processing.')
115 | 
116 |     parser.add_argument('--version', action='version', version='v. 1.0')
117 | 
118 |     args = parser.parse_args()
119 | 
120 |     main(args.input, args.output, args.verbose)
121 | 


--------------------------------------------------------------------------------
/tools/overlay_molecules_rocs.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import os
 33 | import subprocess
 34 | import sys
 35 | import argparse
 36 | from multiprocessing import cpu_count
 37 | from biopandas.mol2.mol2_io import split_multimol2
 38 | 
 39 | 
 40 | def check_query(query_path):
 41 |     ids = [mol2[0] for mol2 in split_multimol2(query_path)]
 42 |     n_ids = len(ids)
 43 |     if n_ids > 1:
 44 |         n_unique_ids = len(set(ids))
 45 |         if n_unique_ids > 1:
 46 |             raise ValueError('Please Make sure that you only submit one'
 47 |                              ' molecule or, if you submit a multi-conformer'
 48 |                              ' query, that conformers of the molecule'
 49 |                              ' have all the same molecule ID labels.'
 50 |                              ' Found %d molecules and %d unique labels'
 51 |                              % (n_ids, n_unique_ids))
 52 | 
 53 | 
 54 | def get_num_cpus(n_cpus):
 55 |     if not n_cpus:
 56 |         n_cpus = cpu_count()
 57 |     elif n_cpus < 0:
 58 |         n_cpus = cpu_count() - n_cpus
 59 |     return n_cpus
 60 | 
 61 | 
 62 | def get_mol2_files(dir_path):
 63 | 
 64 |     files = []
 65 | 
 66 |     if os.path.isdir(dir_path):
 67 |         for f in os.listdir(dir_path):
 68 |             if f.endswith(('.mol2', 'mol2.gz')):
 69 |                 file_path = os.path.join(dir_path, f)
 70 |                 files.append(file_path)
 71 | 
 72 |     elif (os.path.isfile(dir_path) and
 73 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 74 |         files.append(dir_path)
 75 | 
 76 |     return files
 77 | 
 78 | 
 79 | def run_rocs(source_file, target_file, n_processes, settings):
 80 | 
 81 |     prefix = ''.join(target_file.split('.mol2')[:-1])
 82 | 
 83 |     sys.stdout.write('Processing %s\n' % os.path.basename(source_file))
 84 |     sys.stdout.flush()
 85 | 
 86 |     for idx, mol2 in enumerate(split_multimol2(QUERY_FILE)):
 87 |         if idx >= 1:
 88 |             mcquery = 'true'
 89 |             break
 90 |     if not idx:
 91 |         mcquery = 'false'
 92 | 
 93 |     cmd = [EXECUTABLE,
 94 |            '-ref', QUERY_FILE,
 95 |            '-dbase', source_file,
 96 |            '-outputquery', 'false',
 97 |            '-prefix', prefix,
 98 |            '-mcquery', mcquery,
 99 |            '-mpi_np', str(n_processes),
100 |            '-oformat', 'mol2']
101 | 
102 |     if settings:
103 |         for s in settings.split():
104 |             s = s.strip()
105 |             if s:
106 |                 cmd.append(s)
107 | 
108 |     subprocess.call(cmd, stdout=subprocess.PIPE, bufsize=1)
109 | 
110 | 
111 | def main(input_dir, output_dir, n_processes, settings):
112 |     if not os.path.exists(output_dir):
113 |         os.mkdir(output_dir)
114 | 
115 |     check_query(QUERY_FILE)
116 |     mol2_in_files = get_mol2_files(input_dir)
117 |     mol2_out_files = [os.path.join(output_dir, os.path.basename(mol2))
118 |                       for mol2 in mol2_in_files]
119 | 
120 |     n_processes = get_num_cpus(n_processes)
121 | 
122 |     for i, j in zip(mol2_in_files, mol2_out_files):
123 |         run_rocs(source_file=i,
124 |                  target_file=j,
125 |                  n_processes=n_processes,
126 |                  settings=settings)
127 | 
128 | 
129 | if __name__ == '__main__':
130 | 
131 |     parser = argparse.ArgumentParser(
132 |             description='Wrapper running OpenEye ROCS on one'
133 |                         '\nor more database partitions.',
134 |             epilog="""Example:
135 | python overlay_molecules_rocs.py\\
136 |    --input database_conformers/\\
137 |    --output rocs_overlays/\\
138 |    --executable /.../rocs-3.2.1.4\\
139 |    --query query.mol2\\
140 |    --settings "-rankby TanimotoCombo -maxhits 0 -besthits 0 -progress percent"\\
141 |    --processes 0""",
142 |             formatter_class=argparse.RawTextHelpFormatter)
143 | 
144 |     parser.add_argument('-i', '--input',
145 |                         type=str,
146 |                         required=True,
147 |                         help='Path to input directory containing the database'
148 |                              '\nmolecules in `.mol2` and/or `.mol2.gz` format.'
149 |                         )
150 |     parser.add_argument('-o', '--output',
151 |                         type=str,
152 |                         required=True,
153 |                         help='(Required.) Directory path for writing'
154 |                              ' the `.mol2`'
155 |                              '\noverlay ROCS status and ROCS report (`.rpt`)'
156 |                              ' files.')
157 |     parser.add_argument('--query',
158 |                         type=str,
159 |                         required=True,
160 |                         help='(Required.) Path to the query molecule'
161 |                              '\nin `.mol2` and/or `.mol2.gz` format.'
162 |                              '\nThe query molecule file could be a single'
163 |                              '\nstructure of multiple-conformers of the same'
164 |                              '\nstructure. If a multi-conformer file is'
165 |                              '\nsubmitted, please make sure that all'
166 |                              '\nconformers in the mol2 file have the same'
167 |                              '\nmolecule ID/Name.')
168 |     parser.add_argument('--executable',
169 |                         type=str,
170 |                         required=True,
171 |                         help="""(Required.) The path or command for running
172 | OpenEye ROCS on your system.""")
173 |     parser.add_argument('--settings',
174 |                         type=str,
175 |                         default='-rankby TanimotoCombo -maxhits 0'
176 |                                 ' -besthits 0 -progress percent',
177 |                         help='(Optional, default:" -rankby TanimotoCombo -maxhits 0'
178 |                              ' -besthits 0 -progress percent")\n ROCS settings to use.')
179 |     parser.add_argument('--processes',
180 |                         type=int,
181 |                         default=1,
182 |                         help='(Optional, default: `1`.) Number of processes to'
183 |                              ' run in parallel.'
184 |                              '\nIf processes > 0, the specified number of CPUs'
185 |                              '\nwill be used.'
186 |                              '\nIf processes = 0, all available CPUs will'
187 |                              '\nbe used.'
188 |                              '\nIf processes = -1, all available CPUs'
189 |                              '\nminus `processes` will be used.')
190 | 
191 |     parser.add_argument('-v', '--version', action='version', version='v. 1.0')
192 | 
193 |     args = parser.parse_args()
194 | 
195 |     QUERY_FILE = args.query
196 |     EXECUTABLE = args.executable
197 | 
198 |     main(input_dir=args.input,
199 |          output_dir=args.output,
200 |          n_processes=args.processes,
201 |          settings=args.settings)
202 | 


--------------------------------------------------------------------------------
/tools/overlay_molecules_shapeit.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import os
 33 | import subprocess
 34 | import sys
 35 | import argparse
 36 | from biopandas.mol2.mol2_io import split_multimol2
 37 | 
 38 | 
 39 | def check_query(query_path):
 40 |     ids = [mol2[0] for mol2 in split_multimol2(query_path)]
 41 |     n_ids = len(ids)
 42 |     if n_ids > 1:
 43 |         n_unique_ids = len(set(ids))
 44 |         if n_unique_ids > 1:
 45 |             raise ValueError('Please Make sure that you only submit one'
 46 |                              ' molecule or, if you submit a multi-conformer'
 47 |                              ' query, that conformers of the molecule'
 48 |                              ' have all the same molecule ID labels.'
 49 |                              ' Found %d molecules and %d unique labels'
 50 |                              % (n_ids, n_unique_ids))
 51 | 
 52 | 
 53 | def get_mol2_files(dir_path):
 54 | 
 55 |     files = []
 56 | 
 57 |     if os.path.isdir(dir_path):
 58 |         for f in os.listdir(dir_path):
 59 |             if f.endswith(('.mol2', 'mol2.gz')):
 60 |                 file_path = os.path.join(dir_path, f)
 61 |                 files.append(file_path)
 62 | 
 63 |     elif (os.path.isfile(dir_path) and
 64 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 65 |         files.append(dir_path)
 66 | 
 67 |     return files
 68 | 
 69 | 
 70 | def run_shapeit(source_file, target_file, settings):
 71 | 
 72 |     prefix = ''.join(target_file.split('.mol2')[:-1])
 73 | 
 74 |     sys.stdout.write('Processing %s\n' % os.path.basename(source_file))
 75 |     sys.stdout.flush()
 76 | 
 77 |     if source_file.endswith('.gz'):
 78 |         sys.stdout.write('Shape-it does not support compressed files'
 79 |                          ' please decompress %s' %
 80 |                          os.path.basename(source_file))
 81 |         sys.stdout.flush()
 82 | 
 83 |     cmd = [EXECUTABLE,
 84 |            '--reference', QUERY_FILE,
 85 |            '--dbase', source_file,
 86 |            '--out', target_file,
 87 |            '--scores', prefix + '.rpt',
 88 |            '--noRef']
 89 | 
 90 |     if settings:
 91 |         for s in settings.split():
 92 |             s = s.strip()
 93 |             if s:
 94 |                 cmd.append(s)
 95 | 
 96 |     print(' '.join(cmd))
 97 |     subprocess.call(cmd, stdout=subprocess.PIPE, bufsize=1)
 98 | 
 99 | 
100 | def main(input_dir, output_dir, settings):
101 |     if not os.path.exists(output_dir):
102 |         os.mkdir(output_dir)
103 | 
104 |     check_query(QUERY_FILE)
105 |     mol2_in_files = get_mol2_files(input_dir)
106 |     mol2_out_files = [os.path.join(output_dir, os.path.basename(mol2))
107 |                       for mol2 in mol2_in_files]
108 | 
109 |     for i, j in zip(mol2_in_files, mol2_out_files):
110 |         run_shapeit(source_file=i,
111 |                     target_file=j,
112 |                     settings=settings)
113 | 
114 | 
115 | if __name__ == '__main__':
116 | 
117 |     parser = argparse.ArgumentParser(
118 |             description='Wrapper running Silicos-it Shape-it on one'
119 |                         '\nor more database partitions.'
120 |                         '\nFor more information about Shape-it, please see'
121 |                         ' http://silicos-it.be.s3-website-eu-west-1.'
122 |                         'amazonaws.com/software/shape-it/1.0.1/shape-it.html',
123 |             epilog="""Example:
124 | python overlay_molecules_shapeit.py\\
125 |    --input database_conformers/\\
126 |    --output shapeit_overlays/\\
127 |    --executable 'shape-it'\\
128 |    --query query.mol2\\
129 |    --settings "--rankby Tanimoto""",
130 |             formatter_class=argparse.RawTextHelpFormatter)
131 | 
132 |     parser.add_argument('-i', '--input',
133 |                         type=str,
134 |                         required=True,
135 |                         help='Path to input directory containing the database'
136 |                              '\nmolecules in `.mol2` and/or `.mol2.gz` format.'
137 |                         )
138 |     parser.add_argument('-o', '--output',
139 |                         type=str,
140 |                         required=True,
141 |                         help='(Required.) Directory path for writing'
142 |                              ' the `.mol2`'
143 |                              '\noverlays and Shape-it score/report (`.rpt`)'
144 |                              ' files.')
145 |     parser.add_argument('--query',
146 |                         type=str,
147 |                         required=True,
148 |                         help='(Required.) Path to the query molecule'
149 |                              '\nin `.mol2` and/or `.mol2.gz` format.'
150 |                              '\nThe query molecule file could be a single'
151 |                              '\nstructure of multiple-conformers of the same'
152 |                              '\nstructure. If a multi-conformer file is'
153 |                              '\nsubmitted, please make sure that all'
154 |                              '\nconformers in the mol2 file have the same'
155 |                              '\nmolecule ID/Name.')
156 |     parser.add_argument('--executable',
157 |                         type=str,
158 |                         required=True,
159 |                         help="""(Required.) The path or command for running
160 | Slicos-it Shape-it on your system.""")
161 |     parser.add_argument('--settings',
162 |                         type=str,
163 |                         default='--rankBy Tanimoto',
164 |                         help='(Optional, default:" --rankBy Tanimoto")'
165 |                              '\nshape-it settings to use.')
166 | 
167 |     parser.add_argument('-v', '--version', action='version', version='v. 1.0')
168 | 
169 |     args = parser.parse_args()
170 | 
171 |     QUERY_FILE = args.query
172 |     EXECUTABLE = args.executable
173 | 
174 |     main(input_dir=args.input,
175 |          output_dir=args.output,
176 |          settings=args.settings)
177 | 


--------------------------------------------------------------------------------
/tools/pipelines/experimental/pipeline-example-1-config_obabel.yaml:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | general settings:
 33 |   screenlamp tools directory: /Users/sebastian/code/screenlamp/tools
 34 |   project output directory: /Users/sebastian/Desktop/screening-results
 35 |   input mol2 directory: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/mol2
 36 |   number of cpus: 0 # 0 means all avaible CPUs (recommended)
 37 | 
 38 | ################################################
 39 | ### Step 01: SELECT MOLECULES FROM DATA TABLE
 40 | ################################################
 41 | molecule property filter settings:
 42 |   datatable path: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/tables/3_prop.xls
 43 |   # the following filter key selects all molecules with 
 44 |   # fewer than 8 rotatable bonds and a molecular weight 
 45 |   # greater than 200 g/mol
 46 |   column filter: (NRB <= 7) & (MWT > 200)
 47 | 
 48 | ###################################################
 49 | ### Step 02: PREFILTER BY FUNCTIONAL GROUP PRESENCE
 50 | ###################################################
 51 | functional group presence filter settings:
 52 |   # the following selection key selects all molecules that
 53 |   # have an sp2-hybridized sulfur atom (MOL2 atom type S.3 or S.o2)
 54 |   # and a keto group (MOL2 atom type O.2)
 55 |   selection key: ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2')
 56 | 
 57 | ###################################################
 58 | ### Step 03: PREFILTER BY FUNCTIONAL GROUP DISTANCE
 59 | ###################################################
 60 | functional group distance filter settings:
 61 |   # the following selection criteria select all molecules that
 62 |   # have an sp2-hybridized sulfur atom (MOL2 atom type S.3 or S.o2)
 63 |   # and a keto group (MOL2 atom type O.2), and where the distance between
 64 |   # the sulfur and oxygen atoms is between 13 and 20 angstrom
 65 |   selection key: ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2')
 66 |   distance: 13-20
 67 | 
 68 | ################################################
 69 | ### Step 04: OpenBabel conformers
 70 | ################################################
 71 | OpenBabel Confab settings:
 72 |   OpenBabel executable: obabel
 73 | 
 74 | #########################################################
 75 | ### Step 05: ROCS OVERLAYS & Step 06: SORT ROCS OVERLAYS
 76 | #########################################################
 77 | ROCS settings:
 78 |   ROCS executable: /Applications/ROCS 3.2.1.4.app/Contents/MacOS/rocs-3.2.1.4
 79 |   ROCS run rankby: TanimotoCombo
 80 |   ROCS results sort by: TanimotoCombo,ColorTanimoto
 81 |   ROCS score threshold: (TanimotoCombo >= 0.75) & (ColorTanimoto >= 0.1)
 82 |   query molecule path: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/query/3kpzs_conf_subset_nowarts.mol2
 83 |   # The query molecule above could be a single- or multi-conformer .mol2/.mol2.gz file.
 84 |   # However, for ROCS, please make sure that the query file does not have
 85 |   # "multi-conformer warts." I.e., all molecules in the multi-conformer .mol2/.mol2.gz
 86 |   # file must have exactly the same molecule ID without any enumerating prefixes
 87 |   # or suffixes.
 88 | 
 89 | ################################################
 90 | ### Step 07: MATCHING FUNCTIONAL GROUPS
 91 | ################################################
 92 | 
 93 | functional group matching selection settings:
 94 |   maximum pairwise atom distance: 1.3 # in angstrom
 95 | 
 96 | ################################################
 97 | ### Step 08: SELECTING FUNCTIONAL GROUP MATCHES
 98 | ################################################
 99 | 
100 | functional group match selection settings:
101 |   # the following selection key selects all database molecules
102 |   # that meet the two following criteria
103 |   #   a) an overlay with the S1 atom in the query molecules
104 |   #   b) an overlay with the O2 atom in the query molecule 
105 |   # To satisfy a), the matching atom must be of atom type S.3 or S.o2
106 |   #   and its charge must be at least +1 or more positive.
107 |   # To satisfy b), the matching atom must be of atom type O.2 (sp2 oxygen)
108 |   #   # and its partial charge must be -0.5 or more negative
109 |   atomtype selection keys: ((S1 == 'S.3') | (S1 == 'S.o2')) --> (O2 == 'O.2')
110 |   charge selection keys: ((S1 >= 1.0)) --> (O2 <= -0.5)
111 |   write mol2 files: true
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/tools/pipelines/pipeline-example-1-config.yaml:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | general settings:
 33 |   screenlamp tools directory: /Users/sebastian/code/screenlamp/tools
 34 |   project output directory: /Users/sebastian/code/screenlamp/example-files/example_1/screening-results
 35 |   input mol2 directory: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/mol2
 36 |   number of cpus: 0 # 0 means all avaible CPUs (recommended)
 37 | 
 38 | ################################################
 39 | ### Step 01: SELECT MOLECULES FROM DATA TABLE
 40 | ################################################
 41 | molecule property filter settings:
 42 |   datatable path: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/tables/3_prop.xls
 43 |   # the following filter key selects all molecules with 
 44 |   # fewer than 8 rotatable bonds and a molecular weight 
 45 |   # greater than 200 g/mol
 46 |   column filter: (NRB <= 7) & (MWT > 200)
 47 | 
 48 | ###################################################
 49 | ### Step 02: PREFILTER BY FUNCTIONAL GROUP PRESENCE
 50 | ###################################################
 51 | functional group presence filter settings:
 52 |   # the following selection key selects all molecules that
 53 |   # have an sp2-hybridized sulfur atom (MOL2 atom type S.3 or S.o2)
 54 |   # and a keto group (MOL2 atom type O.2)
 55 |   selection key: ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2')
 56 | 
 57 | ###################################################
 58 | ### Step 03: PREFILTER BY FUNCTIONAL GROUP DISTANCE
 59 | ###################################################
 60 | functional group distance filter settings:
 61 |   # the following selection criteria select all molecules that
 62 |   # have an sp2-hybridized sulfur atom (MOL2 atom type S.3 or S.o2)
 63 |   # and a keto group (MOL2 atom type O.2), and where the distance between
 64 |   # the sulfur and oxygen atoms is between 13 and 20 angstrom
 65 |   selection key: ((atom_type == 'S.3') | (atom_type == 'S.o2')) --> (atom_type == 'O.2')
 66 |   distance: 13-20
 67 | 
 68 | ################################################
 69 | ### Step 04: OMEGA conformers
 70 | ################################################
 71 | OMEGA settings:
 72 |   OMEGA executable: /Applications/OMEGA 2.5.1.4.app/Contents/MacOS/omega2-2.5.1.4
 73 | 
 74 | #########################################################
 75 | ### Step 05: ROCS OVERLAYS & Step 06: SORT ROCS OVERLAYS
 76 | #########################################################
 77 | ROCS settings:
 78 |   ROCS executable: /Applications/ROCS 3.2.1.4.app/Contents/MacOS/rocs-3.2.1.4
 79 |   ROCS run rankby: TanimotoCombo
 80 |   ROCS results sort by: TanimotoCombo,ColorTanimoto
 81 |   ROCS score threshold: (TanimotoCombo >= 0.75) & (ColorTanimoto >= 0.1)
 82 |   query molecule path: /Users/sebastian/code/screenlamp/example-files/example_1/dataset/query/3kpzs_conf_subset_nowarts.mol2
 83 |   # The query molecule above could be a single- or multi-conformer .mol2/.mol2.gz file.
 84 |   # However, for ROCS, please make sure that the query file does not have
 85 |   # "multi-conformer warts." I.e., all molecules in the multi-conformer .mol2/.mol2.gz
 86 |   # file must have exactly the same molecule ID without any enumerating prefixes
 87 |   # or suffixes.
 88 | 
 89 | ################################################
 90 | ### Step 07: MATCHING FUNCTIONAL GROUPS
 91 | ################################################
 92 | 
 93 | functional group matching selection settings:
 94 |   maximum pairwise atom distance: 1.3 # in angstrom
 95 | 
 96 | ################################################
 97 | ### Step 08: SELECTING FUNCTIONAL GROUP MATCHES
 98 | ################################################
 99 | 
100 | functional group match selection settings:
101 |   # the following selection key selects all database molecules
102 |   # that meet the two following criteria
103 |   #   a) an overlay with the S1 atom in the query molecules
104 |   #   b) an overlay with the O2 atom in the query molecule 
105 |   # To satisfy a), the matching atom must be of atom type S.3 or S.o2
106 |   #   and its charge must be at least +1 or more positive.
107 |   # To satisfy b), the matching atom must be of atom type O.2 (sp2 oxygen)
108 |   #   # and its partial charge must be -0.5 or more negative
109 |   atomtype selection keys: ((S1 == 'S.3') | (S1 == 'S.o2')) --> (O2 == 'O.2')
110 |   charge selection keys: ((S1 >= 1.0)) --> (O2 <= -0.5)
111 |   write mol2 files: true
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/tools/pipelines/pipeline-example-1.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import subprocess
 33 | import os
 34 | import argparse
 35 | import yaml
 36 | 
 37 | 
 38 | ###############################################################################
 39 | 
 40 | parser = argparse.ArgumentParser(
 41 |         description='An example screenlamp pipeline ... [placeholder].',
 42 |         formatter_class=argparse.RawTextHelpFormatter)
 43 | 
 44 | parser.add_argument('-c', '--config_file',
 45 |                     type=str,
 46 |                     required=True,
 47 |                     default=0,
 48 |                     help='Path to the pipeline configuration file')
 49 | 
 50 | parser.add_argument('-s', '--start_at_step',
 51 |                     type=int,
 52 |                     required=False,
 53 |                     default=0,
 54 |                     help='Start the pipeline at a particular step')
 55 | 
 56 | parser.add_argument('-i', '--incremental',
 57 |                     type=str,
 58 |                     required=False,
 59 |                     default='false',
 60 |                     help='incremental mode. If enabled, stops before each step'
 61 |                     ' to ask the user to continue')
 62 | 
 63 | args = parser.parse_args()
 64 | start_at = args.start_at_step
 65 | config_path = args.config_file
 66 | 
 67 | print(args.incremental)
 68 | if args.incremental.lower() not in {'true', 'false'}:
 69 |     raise AttributeError('incremental must be true or false')
 70 | if args.incremental == 'true':
 71 |     incremental = True
 72 | else:
 73 |     incremental = False
 74 | 
 75 | with open(config_path, 'r') as stream:
 76 |     ymldct = yaml.load(stream)
 77 | 
 78 | PROJECT_PATH = ymldct['general settings']['project output directory']
 79 | SCREENLAMP_TOOLS_DIR = ymldct['general settings']['screenlamp tools directory']
 80 | INPUT_MOL2_PATH = ymldct['general settings']['input mol2 directory']
 81 | N_CPUS = str(ymldct['general settings']['number of cpus'])
 82 | DATATABLE_PATH = ymldct['molecule property filter settings']['datatable path']
 83 | DATATABLE_FILTER = ymldct['molecule property filter settings']['column filter']
 84 | FUNCTIONAL_GROUP_PRESENCE = ymldct[
 85 |     'functional group presence filter settings']['selection key']
 86 | FUNCTIONAL_GROUP_DISTANCE_SELECTION = ymldct[
 87 |     'functional group distance filter settings']['selection key']
 88 | FUNCTIONAL_GROUP_DISTANCE = ymldct[
 89 |     'functional group distance filter settings']['distance']
 90 | OMEGA_EXECUTABLE = ymldct['OMEGA settings']['OMEGA executable']
 91 | ROCS_EXECUTABLE = ymldct['ROCS settings']['ROCS executable']
 92 | ROCS_RANKBY = ymldct['ROCS settings']['ROCS run rankby']
 93 | ROCS_SORTBY = ymldct['ROCS settings']['ROCS results sort by']
 94 | ROCS_THRESHOLD = ymldct['ROCS settings']['ROCS score threshold']
 95 | QUERY_PATH = ymldct['ROCS settings']['query molecule path']
 96 | 
 97 | FGROUP_MATCH_DISTANCE = str(ymldct['functional group matching '
 98 |                                    'selection settings'][
 99 |                                    'maximum pairwise atom distance'])
100 | 
101 | WRITE_MATCH_OVERLAYS = False
102 | if ymldct['functional group match selection settings']['write mol2 files'] in (
103 |       'true', True):
104 |     WRITE_MATCH_OVERLAYS = True
105 | FGROUP_ATOMTYPE = ymldct['functional group match selection settings'][
106 |                          'atomtype selection keys']
107 | FGROUP_CHARGE = ymldct['functional group match selection settings'][
108 |                        'charge selection keys']
109 | 
110 | if not os.path.exists(PROJECT_PATH):
111 |     os.makedirs(PROJECT_PATH)
112 | 
113 | ###############################################################################
114 | 
115 | if start_at <= 0:
116 |     s = """
117 | 
118 | ################################################
119 | COUNT MOLECULES IN DATATABLE_PATH
120 | ################################################
121 |     """
122 |     print(s)
123 | 
124 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'),
125 |            '--input', INPUT_MOL2_PATH]
126 | 
127 |     print('Running command:\n%s\n' % ' '.join(cmd))
128 | 
129 |     if incremental:
130 |         input('Press Enter to proceed or CTRL+C to quit')
131 |     subprocess.call(cmd)
132 | 
133 | ###############################################################################
134 | 
135 | if start_at <= 1:
136 |     s = """
137 | 
138 | ################################################
139 | Step 01: SELECT MOLECULES FROM DATA TABLE
140 | ################################################
141 |     """
142 |     print(s)
143 | 
144 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'datatable_to_id.py'),
145 |            '--input', DATATABLE_PATH,
146 |            '--output', os.path.join(PROJECT_PATH, '01_ids_from_database.txt'),
147 |            '--id_column', 'ZINC_ID',
148 |            '--selection', DATATABLE_FILTER]
149 | 
150 |     print('Running command:\n%s\n' % ' '.join(cmd))
151 |     if incremental:
152 |         input('Press Enter to proceed or CTRL+C to quit')
153 |     subprocess.call(cmd)
154 |     print('\n\n')
155 | 
156 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'id_to_mol2.py'),
157 |            '--input', INPUT_MOL2_PATH,
158 |            '--id_file', os.path.join(PROJECT_PATH, '01_ids_from_database.txt'),
159 |            '--output', os.path.join(PROJECT_PATH, '01_selected-mol2s'),
160 |            '--includelist', 'True']
161 | 
162 |     print('Running command:\n%s\n' % ' '.join(cmd))
163 |     if incremental:
164 |         input('Press Enter to proceed or CTRL+C to quit')
165 |     subprocess.call(cmd)
166 |     print('\n\nSELECTED MOL2s:')
167 | 
168 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'),
169 |            '--input', os.path.join(PROJECT_PATH, '01_selected-mol2s')]
170 | 
171 |     print('Running command:\n%s\n' % ' '.join(cmd))
172 |     if incremental:
173 |         input('Press Enter to proceed or CTRL+C to quit')
174 |     subprocess.call(cmd)
175 | 
176 | ###############################################################################
177 | 
178 | if start_at <= 2:
179 |     s = """
180 | 
181 | ################################################
182 | Step 02: PREFILTER BY FUNCTIONAL GROUP PRESENCE
183 | ################################################
184 |     """
185 |     print(s)
186 | 
187 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR,
188 |                                   'funcgroup_presence_to_id.py'),
189 |            '--input', os.path.join(PROJECT_PATH, '01_selected-mol2s'),
190 |            '--output', os.path.join(PROJECT_PATH,
191 |                                     '02_fgroup-presence_mol2ids.txt'),
192 |            '--selection', FUNCTIONAL_GROUP_PRESENCE,
193 |            '--processes', N_CPUS]
194 | 
195 |     print('Running command:\n%s\n' % ' '.join(cmd))
196 |     if incremental:
197 |         input('Press Enter to proceed or CTRL+C to quit')
198 |     subprocess.call(cmd)
199 |     print('\n\n')
200 | 
201 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'id_to_mol2.py'),
202 |            '--input', os.path.join(PROJECT_PATH, '01_selected-mol2s'),
203 |            '--id_file', os.path.join(PROJECT_PATH,
204 |                                      '02_fgroup-presence_mol2ids.txt'),
205 |            '--output', os.path.join(PROJECT_PATH, '02_fgroup-presence_mol2s'),
206 |            '--includelist', 'True']
207 | 
208 |     print('Running command:\n%s\n' % ' '.join(cmd))
209 |     if incremental:
210 |         input('Press Enter to proceed or CTRL+C to quit')
211 |     subprocess.call(cmd)
212 |     print('\n\nSELECTED MOL2s:')
213 | 
214 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'),
215 |            '--input', os.path.join(PROJECT_PATH, '02_fgroup-presence_mol2s')]
216 | 
217 |     print('Running command:\n%s\n' % ' '.join(cmd))
218 |     if incremental:
219 |         input('Press Enter to proceed or CTRL+C to quit')
220 |     subprocess.call(cmd)
221 | 
222 | ###############################################################################
223 | 
224 | if start_at <= 3:
225 |     s = """
226 | 
227 | ################################################
228 | Step 03: PREFILTER BY FUNCTIONAL GROUP DISTANCE
229 | ################################################
230 |     """
231 |     print(s)
232 | 
233 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR,
234 |                                   'funcgroup_distance_to_id.py'),
235 |            '--input', os.path.join(PROJECT_PATH, '02_fgroup-presence_mol2s'),
236 |            '--output', os.path.join(PROJECT_PATH,
237 |                                     '03_fgroup_distance_mol2ids.txt'),
238 |            '--selection', FUNCTIONAL_GROUP_DISTANCE_SELECTION,
239 |            '--distance', FUNCTIONAL_GROUP_DISTANCE,
240 |            '--processes', N_CPUS]
241 | 
242 |     if incremental:
243 |         input('Press Enter to proceed or CTRL+C to quit')
244 |     subprocess.call(cmd)
245 |     print('\n\n')
246 | 
247 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'id_to_mol2.py'),
248 |            '--input', os.path.join(PROJECT_PATH, '02_fgroup-presence_mol2s'),
249 |            '--id_file', os.path.join(PROJECT_PATH,
250 |                                      '03_fgroup_distance_mol2ids.txt'),
251 |            '--output', os.path.join(PROJECT_PATH,
252 |                                     '03_fgroup_distance_mol2s'),
253 |            '--includelist', 'True']
254 | 
255 |     print('Running command:\n%s\n' % ' '.join(cmd))
256 |     if incremental:
257 |         input('Press Enter to proceed or CTRL+C to quit')
258 |     subprocess.call(cmd)
259 |     print('\n\nSELECTED MOL2s:')
260 | 
261 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'),
262 |            '--input', os.path.join(PROJECT_PATH,
263 |                                    '03_fgroup_distance_mol2s')]
264 | 
265 |     print('Running command:\n%s\n' % ' '.join(cmd))
266 | 
267 |     if incremental:
268 |         input('Press Enter to proceed or CTRL+C to quit')
269 |     subprocess.call(cmd)
270 | 
271 | ###############################################################################
272 | 
273 | if start_at <= 4:
274 |     s = """
275 | 
276 | ################################################
277 | Step 04: OMEGA conformers
278 | ################################################
279 |     """
280 |     print(s)
281 | 
282 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'generate_conformers_omega.py'),
283 |            '--input', os.path.join(PROJECT_PATH,
284 |                                    '03_fgroup_distance_mol2s'),
285 |            '--output', os.path.join(PROJECT_PATH, '04_omega_conformers'),
286 |            '--executable', OMEGA_EXECUTABLE,
287 |            '--processes', N_CPUS]
288 | 
289 |     print('Running command:\n%s\n' % ' '.join(cmd))
290 |     if incremental:
291 |         input('Press Enter to proceed or CTRL+C to quit')
292 |     subprocess.call(cmd)
293 |     print('\n\nSELECTED MOL2s:')
294 | 
295 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'),
296 |            '--input', os.path.join(PROJECT_PATH, '04_omega_conformers')]
297 | 
298 |     print('Running command:\n%s\n' % ' '.join(cmd))
299 | 
300 |     if incremental:
301 |         input('Press Enter to proceed or CTRL+C to quit')
302 |     subprocess.call(cmd)
303 | 
304 | ###############################################################################
305 | 
306 | if start_at <= 5:
307 | 
308 |     s = """
309 | 
310 | ################################################
311 | Step 05: ROCS OVERLAYS
312 | ################################################
313 |     """
314 |     print(s)
315 | 
316 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'overlay_molecules_rocs.py'),
317 |            '--input', os.path.join(PROJECT_PATH, '04_omega_conformers'),
318 |            '--output', os.path.join(PROJECT_PATH, '05_rocs_overlays'),
319 |            '--executable', ROCS_EXECUTABLE,
320 |            '--query', QUERY_PATH,
321 |            '--settings', ('-rankby %s -maxhits 0'
322 |                           ' -besthits 0 -progress percent' %
323 |                           ROCS_RANKBY),
324 |            '--processes', N_CPUS]
325 | 
326 |     print('Running command:\n%s\n' % ' '.join(cmd))
327 |     if incremental:
328 |         input('Press Enter to proceed or CTRL+C to quit')
329 |     subprocess.call(cmd)
330 | 
331 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR, 'count_mol2.py'),
332 |            '--input', os.path.join(PROJECT_PATH, '05_rocs_overlays')]
333 | 
334 |     print('Running command:\n%s\n' % ' '.join(cmd))
335 |     if incremental:
336 |         input('Press Enter to proceed or CTRL+C to quit')
337 |     subprocess.call(cmd)
338 | 
339 | 
340 | ###############################################################################
341 | 
342 | if start_at <= 6:
343 | 
344 |     s = """
345 | 
346 | ################################################
347 | Step 06: SORT ROCS OVERLAYS
348 | ################################################
349 |     """
350 |     print(s)
351 | 
352 |     cmd = ['python',  os.path.join(SCREENLAMP_TOOLS_DIR, 'sort_rocs_mol2.py'),
353 |            '--input', os.path.join(PROJECT_PATH, '05_rocs_overlays'),
354 |            '--output', os.path.join(PROJECT_PATH, '06_rocs_overlays_sorted'),
355 |            '--query', QUERY_PATH,
356 |            '--sortby', ROCS_SORTBY,
357 |            '--selection', ROCS_THRESHOLD]
358 | 
359 |     print('Running command:\n%s\n' % ' '.join(cmd))
360 |     if incremental:
361 |         input('Press Enter to proceed or CTRL+C to quit')
362 |     subprocess.call(cmd)
363 | 
364 | ###############################################################################
365 | 
366 | if start_at <= 7:
367 | 
368 |     s = """
369 | 
370 | ################################################
371 | Step 07: MATCHING FUNCTIONAL GROUPS
372 | ################################################
373 |     """
374 |     print(s)
375 | 
376 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR,
377 |                                   'funcgroup_matching.py'),
378 |            '--input', os.path.join(PROJECT_PATH, '06_rocs_overlays_sorted'),
379 |            '--output', os.path.join(PROJECT_PATH, '07_funcgroup_matching'),
380 |            '--max_distance', FGROUP_MATCH_DISTANCE,
381 |            '--processes', N_CPUS]
382 | 
383 |     print('Running command:\n%s\n' % ' '.join(cmd))
384 |     if incremental:
385 |         input('Press Enter to proceed or CTRL+C to quit')
386 |     subprocess.call(cmd)
387 | 
388 | ###############################################################################
389 | 
390 | if start_at <= 8:
391 | 
392 |     s = """
393 | 
394 | ################################################
395 | Step 08: SELECTING FUNCTIONAL GROUP MATCHES
396 | ################################################
397 |     """
398 |     print(s)
399 | 
400 |     if WRITE_MATCH_OVERLAYS:
401 |         in_path = os.path.join(PROJECT_PATH, '06_rocs_overlays_sorted')
402 |     else:
403 |         in_path = ''
404 | 
405 |     cmd = ['python', os.path.join(SCREENLAMP_TOOLS_DIR,
406 |                                   'funcgroup_matching_selection.py'),
407 |            '--input', os.path.join(PROJECT_PATH, '07_funcgroup_matching'),
408 |            '--output', os.path.join(PROJECT_PATH, '08_funcgroup_selection'),
409 |            '--atomtype_selection', FGROUP_ATOMTYPE,
410 |            '--charge_selection', FGROUP_CHARGE,
411 |            '--input_mol2', in_path]
412 | 
413 |     print('Running command:\n%s\n' % ' '.join(cmd))
414 |     if incremental:
415 |         input('Press Enter to proceed or CTRL+C to quit')
416 |     subprocess.call(cmd)


--------------------------------------------------------------------------------
/tools/sort_rocs_mol2.py:
--------------------------------------------------------------------------------
  1 | # Sebastian Raschka 2017
  2 | #
  3 | # screenlamp is a Python toolkit
  4 | # for hypothesis-driven virtual screening.
  5 | #
  6 | # Copyright (C) 2017 Michigan State University
  7 | # License: Apache v2
  8 | #
  9 | # Software author: Sebastian Raschka <http://sebastianraschka.com>
 10 | # Software author email: mail@sebastianraschka.com
 11 | #
 12 | # Software source repository: https://github.com/rasbt/screenlamp
 13 | # Documentation: https://psa-lab.github.io/screenlamp
 14 | #
 15 | # screenlamp was developed in the
 16 | # Protein Structural Analysis & Design Laboratory
 17 | # (http://www.kuhnlab.bmb.msu.edu)
 18 | #
 19 | # If you are using screenlamp in your research, please cite
 20 | # the following journal article:
 21 | #
 22 | # Raschka, Sebastian,  Anne M. Scott, Nan Liu,
 23 | #   Santosh Gunturu, Mar Huertas, Weiming Li,
 24 | #   and Leslie A. Kuhn. 2017
 25 | #
 26 | # Enabling the hypothesis-driven prioritization of
 27 | #   ligand candidates in big databases:
 28 | #   Screenlamp and its application to GPCR inhibitor
 29 | #   discovery for invasive species control.
 30 | #
 31 | 
 32 | import os
 33 | import argparse
 34 | import sys
 35 | import time
 36 | import pandas as pd
 37 | from biopandas.mol2 import split_multimol2
 38 | import tempfile
 39 | import pickle
 40 | 
 41 | 
 42 | def get_mol2_files(dir_path):
 43 | 
 44 |     files = []
 45 |     if os.path.isdir(dir_path):
 46 |         for f in os.listdir(dir_path):
 47 |             if f.endswith(('.mol2', 'mol2.gz')):
 48 |                 file_path = os.path.join(dir_path, f)
 49 |                 files.append(file_path)
 50 | 
 51 |     elif (os.path.isfile(dir_path) and
 52 |           dir_path.endswith(('.mol2', 'mol2.gz'))):
 53 |         files.append(dir_path)
 54 | 
 55 |     return files
 56 | 
 57 | 
 58 | def parse_selection_string(s, df_name='df'):
 59 |     return s.replace('(', '(%s.' % df_name)
 60 | 
 61 | 
 62 | def read_and_write(inp_mol2_path, report_path, output_dir, query_path,
 63 |                    sortby, separator, verbose, id_suffix, selection):
 64 | 
 65 |     if verbose:
 66 |         sys.stdout.write('Processing %s' % os.path.basename(inp_mol2_path))
 67 |         sys.stdout.flush()
 68 | 
 69 |     df = pd.read_table(report_path, usecols=['Name', 'ShapeQuery'] + sortby,
 70 |                        sep=separator)
 71 | 
 72 |     if sortby:
 73 |         df.sort_values(sortby, inplace=True, ascending=False)
 74 | 
 75 |     if selection:
 76 |         selection_str = parse_selection_string(selection, df_name='df')
 77 |         mask = pd.eval(selection_str)
 78 |         df = df[mask]
 79 | 
 80 |     dbase_query_pairs = [(d, q) for d, q in
 81 |                          zip(df['Name'].values, df['ShapeQuery'].values)]
 82 |     query_names = {q for q in df['ShapeQuery'].values}
 83 | 
 84 |     query_mol2s = {}
 85 | 
 86 |     multiconf_query = False
 87 |     for idx, cont in enumerate(split_multimol2(query_path)):
 88 |         if idx >= 1:
 89 |             multiconf_query = True
 90 |             break
 91 | 
 92 |     cnt = -1
 93 | 
 94 |     if query_path.endswith('.gz'):
 95 |         for id_, cont in split_multimol2(query_path):
 96 |             cnt += 1
 97 |             cont = b''.join(cont).decode('utf-8').split('\n')
 98 |             if multiconf_query:
 99 |                 mol_idx = '%s_%d' % (id_.decode('utf-8'), cnt)
100 |             else:
101 |                 mol_idx = id_
102 |             if mol_idx in query_names:
103 |                 if id_suffix:
104 |                     cont[1] = mol_idx + '\n'
105 |                 query_mol2s[mol_idx] = ''.join(cont)
106 | 
107 |     else:
108 |         for id_, cont in split_multimol2(query_path):
109 |             cnt += 1
110 |             if multiconf_query:
111 |                 mol_idx = '%s_%d' % (id_, cnt)
112 |             else:
113 |                 mol_idx = id_
114 |             if mol_idx in query_names:
115 |                 if id_suffix:
116 |                     cont[1] = mol_idx + '\n'
117 |                 query_mol2s[mol_idx] = ''.join(cont)
118 | 
119 |     out_path_base = os.path.join(output_dir, os.path.basename(inp_mol2_path)
120 |                                  .split('.mol2')[0])
121 |     out_path_q = '%s_%s' % (out_path_base, 'query.mol2')
122 |     out_path_d = '%s_%s' % (out_path_base, 'dbase.mol2')
123 | 
124 |     with tempfile.TemporaryDirectory() as tmpdirname:
125 |         for id_, cont in split_multimol2(inp_mol2_path):
126 |             if id_:
127 |                 tmp_path = os.path.join(tmpdirname, id_)
128 |                 with open(tmp_path, 'wb') as f:
129 |                     pickle.dump(''.join(cont), f)
130 | 
131 |         with open(out_path_d, 'w') as dof,\
132 |                 open(out_path_q, 'w') as qof:
133 | 
134 |             if verbose:
135 |                 start = time.time()
136 | 
137 |             cnt = 0
138 |             for d, q in dbase_query_pairs:
139 |                 cnt += 1
140 |                 qof.write(query_mol2s[q])
141 |                 with open(os.path.join(tmpdirname, d), 'rb') as pkl:
142 |                     pkl_cont = pickle.load(pkl)
143 |                     dof.write(pkl_cont)
144 | 
145 |     if verbose:
146 |         elapsed = time.time() - start
147 |         n_molecules = cnt + 1
148 |         sys.stdout.write(' | scanned %d molecules | %d mol/sec\n' %
149 |                          (n_molecules, n_molecules / elapsed))
150 |         sys.stdout.flush()
151 | 
152 | 
153 | def main(input_dir, output_dir, query_path,
154 |          sortby, separator, verbose, id_suffix, selection):
155 |     if not os.path.exists(output_dir):
156 |         os.mkdir(output_dir)
157 |     inp_mol2_paths = get_mol2_files(input_dir)
158 | 
159 |     for mol2_path in inp_mol2_paths:
160 |         base = os.path.basename(mol2_path)
161 |         report_path = base.replace('.mol2', '.rpt').replace('_hits_', '_')
162 |         report_path = os.path.join(os.path.dirname(mol2_path), report_path)
163 |         read_and_write(mol2_path, report_path, output_dir, query_path,
164 |                        sortby, separator, verbose, id_suffix, selection)
165 | 
166 | 
167 | if __name__ == '__main__':
168 | 
169 |     parser = argparse.ArgumentParser(
170 |             description='Sorts ROCS results by score and creates'
171 |                         '\nseparate .mol2 files for the database'
172 |                         ' and query molecules.',
173 |             epilog="""Example:
174 | python sort_rocs_mol2.py -i rocs_results/\\
175 |    --output rocs_sorted/ --query mol.mol2\\
176 |    --sortby TanimotoCombo,ColorTanimoto\\
177 |    --selection "(TanimotoCombo >= 0.75) & (ColorTanimoto >= 0.1)" """,
178 |             formatter_class=argparse.RawTextHelpFormatter)
179 | 
180 |     parser.add_argument('-i', '--input',
181 |                         type=str,
182 |                         required=True,
183 |                         help='(Required.) Input directory with results from a ROCS run.')
184 |     parser.add_argument('-o', '--output',
185 |                         type=str,
186 |                         required=True,
187 |                         help='(Required.) Directory path for writing the `.mol2` overlay'
188 |                              '\nROCS status and ROCS report (`.rpt`) files')
189 |     parser.add_argument('--query',
190 |                         type=str,
191 |                         required=True,
192 |                         help='(Required.) Path to the query molecule'
193 |                              '\nin `.mol2` and/or `.mol2.gz` format.'
194 |                              '\nThe query molecule file could be a single'
195 |                              '\nstructure of multiple-conformers of the same'
196 |                              '\nstructure. If a multi-conformer file is'
197 |                              '\nsubmitted, please make sure that all'
198 |                              '\nconformers in the mol2 file have the same'
199 |                              '\nmolecule ID/Name.')
200 |     parser.add_argument('-s', '--sortby',
201 |                         type=str,
202 |                         default='TanimotoCombo,ColorTanimoto',
203 |                         help='(Optional, default: `"TanimotoCombo,ColorTanimoto"`)'
204 |                              '\nScore column(s) in ROCS report files that'
205 |                              '\nthe structures should be sorted by.')
206 |     parser.add_argument('--selection',
207 |                         type=str,
208 |                         default='(TanimotoCombo >= 1.0)'
209 |                                 ' & (ColorTanimoto >= 0.25)',
210 |                         help='(Optional, default: `"(TanimotoCombo >= 1.0)) & (ColorTanimoto >= 0.25)"`)'
211 |                              '\nSelection string to exclude molecules above'
212 |                              '\nor below a certain score threshold. By default'
213 |                              '\nall molecules with a ColorTanimoto score smaller than 0.25'
214 |                              '\n and a TanimotoCombo score smaller than 1.0 will be disregarded.')
215 |     parser.add_argument('--separator',
216 |                         type=str,
217 |                         default='\t',
218 |                         help=('(Optional, default: `"\\t"`.) Column separator used\nin the input table.\n'
219 |                               'Assumes tab-separated values by default.'))
220 |     parser.add_argument('--id_suffix',
221 |                         type=str,
222 |                         default='False',
223 |                         help='(Optional, default: `"False"`.)'
224 |                              '\nIf `--id_suffix "True"`, a molecule ID suffix'
225 |                              '\nwill be added to the query'
226 |                              '\nmolecules in the order the ROCS query molecules'
227 |                              '\nappear in a multi-conformer query file.'
228 |                              '\nFor instance, if all query molecules are labeled "3kPZS",'
229 |                              '\nthen the same structures in the output file are labeled'
230 |                              '\n3kPZS_1, 3kPZS_2, ... Note that those modified conformer'
231 |                              '\nwill correspond to the conformer names in the ROCS report'
232 |                              '\ntables. However, they may appear in an unsorted order in'
233 |                              '\nthe _query files, which are sorted by the overlay score'
234 |                              '\nof the database molecules. For example, if the'
235 |                              '\ndatabase molecule is called ZINC123_112, first'
236 |                              '\nentry in the _query file that corresponds to *_dbase'
237 |                              '\nfile may by labeled 3kPZS_11 if the 11th 3kPZS conformer'
238 |                              '\nis the best match according to ROCS.')
239 |     parser.add_argument('-v', '--verbose',
240 |                         type=int,
241 |                         default=1,
242 |                         help='Verbosity level. If 0, does not print any'
243 |                              ' output.'
244 |                              '\nIf 1 (default), prints the file currently'
245 |                              '\nprocessing.')
246 | 
247 |     parser.add_argument('--version', action='version', version='v. 1.0')
248 | 
249 |     args = parser.parse_args()
250 | 
251 |     if args.id_suffix.lower() in {'false', 'f', 'no', 'n'}:
252 |         id_suffix = False
253 |     elif args.id_suffix.lower() in {'true', 't', 'yes', 'y'}:
254 |         id_suffix = True
255 |     else:
256 |         raise ValueError('--id_suffix must be true or false. Got %s' %
257 |                          args.id_suffix)
258 | 
259 |     sortby = [s.strip() for s in args.sortby.split(',')]
260 | 
261 |     for s in args.selection.split(' '):
262 |         if s.startswith('(') and s[1:] not in args.sortby:
263 |             raise ValueError('Selection columns are a subset of'
264 |                              ' the --sortby columns. The column %s'
265 |                              ' is currently not contained in the'
266 |                              ' --sortby argument. Please add it '
267 |                              'there to use this column as a '
268 |                              'selection criterion.' % (s[1:]))
269 | 
270 |     main(input_dir=args.input, output_dir=args.output, query_path=args.query,
271 |          sortby=sortby,
272 |          verbose=args.verbose,
273 |          separator=args.separator,
274 |          id_suffix=id_suffix,
275 |          selection=args.selection)
276 | 


--------------------------------------------------------------------------------