├── .gitignore ├── LICENSE.txt ├── MANIFEST.in ├── README.md ├── README.rst ├── dev ├── build.sh ├── document.sh ├── package_plugin.sh └── rebuild.sh ├── docs ├── Makefile ├── pyvol_manual.pdf └── source │ ├── _static │ ├── basic_parameters_gui.png │ ├── basic_v01.png │ ├── display_parameters_gui.png │ ├── display_v01.png │ ├── installation_settings_gui.png │ ├── load_parameters_gui.png │ ├── overview_image_v01.png │ ├── partitioning_parameters_gui.png │ ├── pocket_specification_gui.png │ └── spec_v01.png │ ├── basic.rst │ ├── conf.py │ ├── development.rst │ ├── index.rst │ ├── install.rst │ ├── introduction.rst │ ├── load_previous.rst │ ├── output.rst │ ├── partitioning.rst │ ├── pocket_specification.rst │ ├── pyvol.rst │ └── shell.rst ├── installers └── pyvol-installer.zip ├── pyvol ├── __init__.py ├── __main__.py ├── cluster.py ├── configuration.py ├── construct.py ├── exceptions.py ├── identify.py ├── poses.py ├── pymol_interface.py ├── pymol_utilities.py ├── pyvol_gui │ ├── __init__.py │ └── pyvol_gui.ui ├── spheres.py └── utilities.py ├── setup.cfg ├── setup.py └── tests ├── 1uwh_B_lig.pdb ├── 1uwh_B_prot.pdb └── test_pyvol.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .eggs 3 | build/ 4 | dist/ 5 | bio_pyvol.egg-info/ 6 | pyvol/pyvol_gui/pyvol_gui.ui.autosave 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Ryan H.B. Smith 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | SOFTWARE. 21 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | PyVOL: Protein Pocket Visualization, Segmentation, and Characterization 3 | ======================================================================= 4 | 5 | ![image](https://anaconda.org/conda-forge/bio-pyvol/badges/version.svg) ![image](https://img.shields.io/pypi/v/bio_pyvol.svg) ![image](https://img.shields.io/pypi/l/bio_pyvol.svg) 6 | 7 | See the [Full Project Documentation](https://schlessinger-lab.github.io/pyvol/). 8 | ![image](docs/source/_static/overview_image_v01.png) 9 | 10 | 11 | Overview 12 | ======== 13 | 14 | PyVOL is a python library packaged into a [PyMOL](https://pymol.org/2/) 15 | GUI for identifying protein binding pockets, partitioning them into 16 | sub-pockets, and calculating their volumes. PyVOL can be run as a PyMOL 17 | plugin through its GUI or the PyMOL prompt, as an imported python 18 | library, or as a command-line program. Visualization of results is 19 | exclusively supported through PyMOL though exported surfaces are 20 | compatible with standard 3D geometry visualization programs. The project 21 | is hosted on github by the Schlessinger Lab. Please access the 22 | repository to view code or submit bugs. The package has been most 23 | extensively tested with PyMOL 2.3+ running Python 3.7. Support for all 24 | python versions 2.7+ is intended but not as thoroughly tested. Support 25 | for PyMOL 1.7.4+ without the GUI is as yet incomplete. Unfortunately, 26 | PyVOL can not currently run on MacOS Catalina due to its restrictions on 27 | running 32-bit executables. The Mac-compatible MSMS executable is not 28 | yet available in a 64-bit form. 29 | 30 | Quick Installation into PyMOL 2.0+ 31 | ================================== 32 | 33 | PyVOL can be installed into any python environment, but installing 34 | directly into PyMOL 2.0+ is easiest. Download the 35 | [basic GUI installer](https://github.com/schlessinger-lab/pyvol/blob/master/installers/pyvol-installer.zip); 36 | and then use the PyMOL plugin manager to install that file: 37 | Plugins 𔿪 Plugin Manager 𔿪 Install New Plugin 𔿪 Install from local file 𔿪 38 | Choose file... 39 | 40 | The following description is deprecated but will be updated soon: 41 | This installs the PyVOL GUI. Select 42 | Plugins 𔿪 PyVOL 𔿪 Settings 𔿪 Install PyVOL from PyPI to 43 | fetch PyVOL and any missing dependencies. Once PyVOL has been installed, 44 | the location of MSMS must be added to the path. In the MSMS Settings 45 | panel, common locations for the executable can be searched. Once an 46 | executable has been identified and is displayed, Change MSMS Path can be 47 | clicked to make that executable visible to the back-end. The GUI should 48 | then display that it can find MSMS. For academic users and non-academic 49 | users with the Schrodinger incentive PyMOL distribution, installation is 50 | now complete. For all others see the full [installation documentation](https://schlessinger-lab.github.io/pyvol/install.html). 51 | 52 | Example Basic Run 53 | ================= 54 | 55 | A simple calculation using the PyMOL prompt is to load a protein of 56 | interest and then run the pocket command. This is an example for the 57 | Sorafenib-bound structure of BRAF: 58 | 59 | ``` {.sourceCode .python} 60 | fetch '1uwh' 61 | pocket protein="1uwh and chain B" 62 | ``` 63 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | 2 | *********************************************************************** 3 | PyVOL: Protein Pocket Visualization, Segmentation, and Characterization 4 | *********************************************************************** 5 | 6 | |conda| |pypi| |license| 7 | 8 | .. |conda| image:: https://anaconda.org/conda-forge/bio-pyvol/badges/version.svg 9 | 10 | .. |pypi| image:: https://img.shields.io/pypi/v/bio_pyvol.svg 11 | 12 | .. |license| image:: https://img.shields.io/pypi/l/bio_pyvol.svg 13 | 14 | .. marker-start-introduction 15 | 16 | .. figure:: docs/source/_static/overview_image_v01.png 17 | :align: center 18 | 19 | 20 | Overview 21 | -------- 22 | 23 | PyVOL is a python library packaged into a `PyMOL `_ GUI for identifying protein binding pockets, partitioning them into sub-pockets, and calculating their volumes. PyVOL can be run as a PyMOL plugin through its GUI or the PyMOL prompt, as an imported python library, or as a command-line program. Visualization of results is exclusively supported through PyMOL though exported surfaces are compatible with standard 3D geometry visualization programs. The project is hosted on github by the Schlessinger Lab. Please access the repository to view code or submit bugs. The package has been most extensively tested with PyMOL 2.3+ running Python 3.7. Support for all python versions 2.7+ is intended but not as thoroughly tested. Support for PyMOL 1.7.4+ without the GUI is as yet incomplete. Unfortunately, PyVOL can not currently run on MacOS Catalina due to its restrictions on running 32-bit executables. The Mac-compatible MSMS executable is not yet available in a 64-bit form. 24 | 25 | Quick Installation into PyMOL 2.0+ 26 | ---------------------------------- 27 | 28 | PyVOL can be installed into any python environment, but installing directly into PyMOL 2.0+ is easiest. Download the :download:`basic GUI installer ` and then use the PyMOL plugin manager to install that file: :menuselection:`Plugins --> Plugin Manager --> Install New Plugin --> Install from local file -->` :guilabel:`Choose file...` 29 | 30 | This installs the PyVOL GUI. Select :menuselection:`Plugins --> PyVOL --> Settings -->` :guilabel:`Install PyVOL from PyPI` to fetch PyVOL and any missing dependencies. Once PyVOL has been installed, the location of MSMS must be added to the path. In the `MSMS Settings` panel, common locations for the executable can be searched. Once an executable has been identified and is displayed, :guilabel:`Change MSMS Path` can be clicked to make that executable visible to the back-end. The GUI should then display that it can find MSMS. For academic users and non-academic users with the Schrodinger incentive PyMOL distribution, installation is now complete. For all others :ref:`install:MSMS Installation`. 31 | 32 | Example Basic Run 33 | ----------------- 34 | 35 | A simple calculation using the PyMOL prompt is to load a protein of interest and then run the `pocket` command. This is an example for the Sorafenib-bound structure of BRAF: 36 | 37 | .. code-block:: python 38 | 39 | fetch '1uwh' 40 | pocket protein="1uwh and chain B" 41 | -------------------------------------------------------------------------------- /dev/build.sh: -------------------------------------------------------------------------------- 1 | echo "This must be run from the dev directory" 2 | pyvol_root=".." 3 | 4 | cd $pyvol_root 5 | if [ -d "bio_pyvol.egg-info" ]; then 6 | rm -r "bio_pyvol.egg-info" 7 | fi 8 | 9 | if [ -d "build" ]; then 10 | rm -r "build" 11 | fi 12 | 13 | if [ -d "dist" ]; then 14 | rm -r "dist" 15 | fi 16 | 17 | python setup.py sdist bdist_wheel 18 | 19 | if [ -d "dist" ]; then 20 | rm pyvol/pyvol_gui/cached_source/bio-pyvol-*.tar.gz 21 | cp dist/bio-pyvol-*.tar.gz pyvol/pyvol_gui/cached_source/ 22 | 23 | twine check dist/* 24 | twine upload --repository-url https://upload.pypi.org/legacy/ dist/* 25 | fi 26 | 27 | if [ -d "bio_pyvol.egg-info" ]; then 28 | rm -r "bio_pyvol.egg-info" 29 | fi 30 | 31 | if [ -d "build" ]; then 32 | rm -r "build" 33 | fi 34 | 35 | if [ -d "dist" ]; then 36 | rm -r "dist" 37 | fi 38 | 39 | conda-build . -c bioconda 40 | -------------------------------------------------------------------------------- /dev/document.sh: -------------------------------------------------------------------------------- 1 | pyvol_dev_root="/home/rsmith/research/pyvol_development" 2 | docs_dir="${pyvol_dev_root}/pyvol/docs" 3 | 4 | if [ -d "${pyvol_dev_root}/_build" ]; then 5 | rm -r "${pyvol_dev_root}/_buil" 6 | fi 7 | 8 | cd "${pyvol_dev_root}/pyvol" 9 | # pandoc -s -o README.md README.rst 10 | 11 | cd ${docs_dir} 12 | make html 13 | 14 | if [ -f "${pyvol_dev_root}/pyvol-docs/_build/html/index.html" ]; then 15 | cd "${pyvol_dev_root}/pyvol-docs/gh-pages" 16 | 17 | rm *.html 18 | rm -r _sources/ 19 | rm -r _static/ 20 | 21 | cp -r ../_build/html/* . 22 | touch .nojekyll 23 | git add . 24 | git commit -m "[auto] rebuilt docs"; git push origin gh-pages 25 | git push origin gh-pages 26 | fi 27 | 28 | cd ${docs_dir} 29 | make latexpdf 30 | if [ -f "manual.pdf" ]; then 31 | mv manual.pdf pyvol_manual.pdf 32 | git add .; git commit -m "[auto] rebuilt pdf"; git push origin master 33 | fi 34 | 35 | # sphinx-apidoc run with: sphinx-apidoc -o docs/source/ pyvol/ 36 | # PyMOL 3k5v view 1: (-0.8930549025535583, 0.4392430782318115, 0.09748003631830215, -0.4189101457595825, -0.8907894492149353, 0.17604589462280273, 0.16416054964065552, 0.11639083176851273, 0.9795329570770264, 0.0001701563596725464, 0.00048378854990005493, -182.1396942138672, 17.222610473632812, 22.656993865966797, 59.59479522705078, 166.03463745117188, 198.02749633789062, -20.0) 37 | # view 2: (-0.7477022409439087, 0.4609299898147583, -0.47798359394073486, -0.43831321597099304, -0.8833208084106445, -0.16615529358386993, -0.49880295991897583, 0.08527922630310059, 0.8624975681304932, 0.0009277071803808212, 0.0003758147358894348, -110.71723937988281, 18.246036529541016, 12.564921379089355, 58.72468185424805, 89.2468032836914, 131.82928466796875, -20.0) 38 | -------------------------------------------------------------------------------- /dev/package_plugin.sh: -------------------------------------------------------------------------------- 1 | if [ -z $1 ]; then 2 | echo "You must pass a version to run this script" 3 | exit 4 | fi 5 | 6 | echo "Building gui zip file for version" $1 7 | 8 | pyvol_dev_root="/home/rsmith/research/pyvol_development" 9 | project_dir="${pyvol_dev_root}/pyvol/pyvol" 10 | 11 | zip_name="pyvol-${1}-installer.zip" 12 | 13 | cd ${project_dir} 14 | zip -r ${zip_name} pyvol_gui/ 15 | mv ${small_zip_name} ../installers/ 16 | cp ../installers/${small_zip_name} ../installers/pyvol-installer.zip 17 | 18 | cd .. 19 | git add installers/*.zip 20 | git commit -m "[auto] created new installer for version ${1}" 21 | git push origin master 22 | -------------------------------------------------------------------------------- /dev/rebuild.sh: -------------------------------------------------------------------------------- 1 | 2 | # update the version in: 3 | # setup.py 4 | # meta.yaml 5 | # pyvol/__init__.py 6 | # pyvol/pyvol_gui/__init__.py 7 | # login to anaconda with "anaconda login" 8 | # run this script from the pyvol/dev directory with the version number as the only argument 9 | 10 | if [ -z $1 ]; then 11 | echo "You must pass a version to run this script" 12 | exit 13 | fi 14 | 15 | sh document.sh 16 | sh build.sh 17 | sh package_plugin.sh $1 18 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = ../../pyvol-docs/_build 9 | PDFBUILDDIR = /tmp 10 | PDF = pyvol_manual.pdf 11 | 12 | # User-friendly check for sphinx-build 13 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 14 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 15 | endif 16 | 17 | # Internal variables. 18 | PAPEROPT_a4 = -D latex_paper_size=a4 19 | PAPEROPT_letter = -D latex_paper_size=letter 20 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | # the i18n builder cannot share the environment and doctrees with the others 22 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 23 | 24 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 25 | 26 | help: 27 | @echo "Please use \`make ' where is one of" 28 | @echo " html to make standalone HTML files" 29 | @echo " dirhtml to make HTML files named index.html in directories" 30 | @echo " singlehtml to make a single large HTML file" 31 | @echo " pickle to make pickle files" 32 | @echo " json to make JSON files" 33 | @echo " htmlhelp to make HTML files and a HTML help project" 34 | @echo " qthelp to make HTML files and a qthelp project" 35 | @echo " applehelp to make an Apple Help Book" 36 | @echo " devhelp to make HTML files and a Devhelp project" 37 | @echo " epub to make an epub" 38 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 39 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 40 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 41 | @echo " text to make text files" 42 | @echo " man to make manual pages" 43 | @echo " texinfo to make Texinfo files" 44 | @echo " info to make Texinfo files and run them through makeinfo" 45 | @echo " gettext to make PO message catalogs" 46 | @echo " changes to make an overview of all changed/added/deprecated items" 47 | @echo " xml to make Docutils-native XML files" 48 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 49 | @echo " linkcheck to check all external links for integrity" 50 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 51 | @echo " coverage to run coverage check of the documentation (if enabled)" 52 | 53 | clean: 54 | rm -rf $(BUILDDIR)/* 55 | 56 | html: 57 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 58 | @echo 59 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 60 | 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | singlehtml: 67 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 68 | @echo 69 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 70 | 71 | pickle: 72 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 73 | @echo 74 | @echo "Build finished; now you can process the pickle files." 75 | 76 | json: 77 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 78 | @echo 79 | @echo "Build finished; now you can process the JSON files." 80 | 81 | htmlhelp: 82 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 83 | @echo 84 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 85 | ".hhp project file in $(BUILDDIR)/htmlhelp." 86 | 87 | qthelp: 88 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 89 | @echo 90 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 91 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 92 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pyvol.qhcp" 93 | @echo "To view the help file:" 94 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pyvol.qhc" 95 | 96 | applehelp: 97 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 98 | @echo 99 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 100 | @echo "N.B. You won't be able to view it unless you put it in" \ 101 | "~/Library/Documentation/Help or install it in your application" \ 102 | "bundle." 103 | 104 | devhelp: 105 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 106 | @echo 107 | @echo "Build finished." 108 | @echo "To view the help file:" 109 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pyvol" 110 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pyvol" 111 | @echo "# devhelp" 112 | 113 | epub: 114 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 115 | @echo 116 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 117 | 118 | latex: 119 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 120 | @echo 121 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 122 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 123 | "(use \`make latexpdf' here to do that automatically)." 124 | 125 | latexpdf: 126 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(PDFBUILDDIR)/latex 127 | @echo "Running LaTeX files through pdflatex..." 128 | $(MAKE) -C $(PDFBUILDDIR)/latex all-pdf 129 | cp $(PDFBUILDDIR)/latex/*.pdf $(PDF) 130 | @echo "pdflatex finished; PDF moved to docs/pyvol_manual.pdf" 131 | 132 | latexpdfja: 133 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 134 | @echo "Running LaTeX files through platex and dvipdfmx..." 135 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 136 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 137 | 138 | text: 139 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 140 | @echo 141 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 142 | 143 | man: 144 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 145 | @echo 146 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 147 | 148 | texinfo: 149 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 150 | @echo 151 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 152 | @echo "Run \`make' in that directory to run these through makeinfo" \ 153 | "(use \`make info' here to do that automatically)." 154 | 155 | info: 156 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 157 | @echo "Running Texinfo files through makeinfo..." 158 | make -C $(BUILDDIR)/texinfo info 159 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 160 | 161 | gettext: 162 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 163 | @echo 164 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 165 | 166 | changes: 167 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 168 | @echo 169 | @echo "The overview file is in $(BUILDDIR)/changes." 170 | 171 | linkcheck: 172 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 173 | @echo 174 | @echo "Link check complete; look for any errors in the above output " \ 175 | "or in $(BUILDDIR)/linkcheck/output.txt." 176 | 177 | doctest: 178 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 179 | @echo "Testing of doctests in the sources finished, look at the " \ 180 | "results in $(BUILDDIR)/doctest/output.txt." 181 | 182 | coverage: 183 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 184 | @echo "Testing of coverage in the sources finished, look at the " \ 185 | "results in $(BUILDDIR)/coverage/python.txt." 186 | 187 | xml: 188 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 189 | @echo 190 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 191 | 192 | pseudoxml: 193 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 194 | @echo 195 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 196 | -------------------------------------------------------------------------------- /docs/pyvol_manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/pyvol_manual.pdf -------------------------------------------------------------------------------- /docs/source/_static/basic_parameters_gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/basic_parameters_gui.png -------------------------------------------------------------------------------- /docs/source/_static/basic_v01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/basic_v01.png -------------------------------------------------------------------------------- /docs/source/_static/display_parameters_gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/display_parameters_gui.png -------------------------------------------------------------------------------- /docs/source/_static/display_v01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/display_v01.png -------------------------------------------------------------------------------- /docs/source/_static/installation_settings_gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/installation_settings_gui.png -------------------------------------------------------------------------------- /docs/source/_static/load_parameters_gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/load_parameters_gui.png -------------------------------------------------------------------------------- /docs/source/_static/overview_image_v01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/overview_image_v01.png -------------------------------------------------------------------------------- /docs/source/_static/partitioning_parameters_gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/partitioning_parameters_gui.png -------------------------------------------------------------------------------- /docs/source/_static/pocket_specification_gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/pocket_specification_gui.png -------------------------------------------------------------------------------- /docs/source/_static/spec_v01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/docs/source/_static/spec_v01.png -------------------------------------------------------------------------------- /docs/source/basic.rst: -------------------------------------------------------------------------------- 1 | .. |rarr| unicode:: U+2192 .. right arrow 2 | 3 | =========== 4 | Basic Usage 5 | =========== 6 | 7 | PyVOL accepts inputs from the PyMOL prompt, the PyMOL GUI, and configuration files via the system shell. The PyMOL prompt and configuration file inputs are fully featured while the GUI contains a slightly simplified interface with a few enforced defaults. Programmatic invocation is also supported and covered through module documentation. 8 | 9 | The next few sections describe the parameters controlling Basic Usage, :ref:`pocket_specification:Pocket Specification`, :ref:`partitioning:Partitioning Options`, and :ref:`output:Output and Display Options`. With the exception of PyMOL-specific arguments, parameter names and argument handling are identical across all inputs. 10 | 11 | .. figure:: _static/basic_parameters_gui.png 12 | :align: center 13 | 14 | The Basic Parameters section of the PyVOL GUI with parameter mapping: Protein PyMOL Selection -> `protein`, Minimum Radius -> `min_rad`, and Maximum Radius -> `max_rad` 15 | 16 | Protein Selection 17 | ----------------- 18 | 19 | The `Protein` selection for processing can be provided using one of two arguments. The `prot_file` argument directs PyVOL to read a structure from file. All included atoms are considered to be part of the pocket boundary (all atoms that occlude space). Alternatively, input through either PyMOL interface allows specification of the `protein` argument. This is a PyMOL selection string that defines the pocket boundary. 20 | 21 | When providing input through PyMOL, the Boolean `protein_only` argument (checkbox in the GUI) can be set to restrict the `protein` selection to peptide atoms. This is enabled in the GUI by default and excludes all waters, solvent molecules, and other small-molecules from the pocket boundary. This is normally useful, but if a co-factor should be considered part of the binding site and excluded from the available volume, it can make sense to group it with the peptide for the purpose of calculating the solvent excluded surface. 22 | 23 | .. code-block:: python 24 | 25 | # arguments: protein, prot_file 26 | pocket prot_file= 27 | pocket protein=<"PyMOL selection string">, protein_only=True 28 | 29 | .. figure:: _static/basic_v01.png 30 | :align: center 31 | 32 | Demonstration of the effects of varying the minimum and maximum probe radii. The smaller minimum radius of 1.2 Å (left column) shows surface topology slightly better than the larger minimum radius of 1.6 Å (right column). However, the smaller radius can connect regions for which the connections are smaller than that tolerated by small molecules. The smaller maximum radius of 2.8 Å (top row) includes excludes an extra region from the bulk solvent relative to the larger maximum radius of 3.4 Å (bottom row). In this case, all parameter combinations with the exception of the 1.2 minimum radius and 3.4 maximum radius identify a pocket closely conforming to the volume occupied by the bound small molecule. The top left panel was produced with the command: `pocket protein='3k5v and chain A', protein_only=True, min_rad=1.2, max_rad=2.8`. 33 | 34 | Minimum and Maximum Probe Radii 35 | ------------------------------- 36 | 37 | The most important parameters controlling PyVOL pocket identification and boundary location are the minimum and maximum radii (`min_rad` and `max_rad` arguments) used for surface identification. The maximum radius determines the size of the probe used to identify regions accessible to bulk solvent. This parameter should be chosen to exclude any binding pockets of interest while not overly distorting the surface of the protein. Generally, values around 3.4 Å are reasonable, but this parameter can be set lower to increase pocket detection stringency or set higher to reduce stringency. The drawback to setting `max_rad` too high is that this can lead to the identification of unreasonable, shallow pockets that snake around the surface of the protein. The minimum radius controls two factors: the level of detail of the calculated binding pocket surfaces and the algorithmic lower limit to minimum internal radii of identified binding pockets. Lower minimum radii calculate the accessibility to smaller solvent molecules. This necessarily increases the number of nooks or crannies in the binding pocket surface that are calculated and can link adjacent pockets that can not accommodate even small organic molecules. If the purpose of volume calculations is to identify protein features relevant to compound binding, such behavior is undesirable. From such a perspective, setting the minimum radius to approximately that of the smallest pharmacophore radius of potential ligands makes sense for calculations. However, the radius of water is the default setting because it meets the typical expectation of users looking at solvent excluded surfaces. 38 | 39 | .. code-block:: python 40 | 41 | # arguments min_rad, max_rad 42 | pocket prot_file=, min_rad=<1.4>, max_rad=<3.4> 43 | pocket protein=<"PyMOL selection string">, min_rad=<1.4>, max_rad=<3.4> 44 | 45 | Input Constraint 46 | ---------------- 47 | 48 | By default, basic input radii parameters are compared and constrained to tested ranges using the `constrain_radii` argument. This can be turned off when running outside of the GUI, but in practice it is never useful to disable this feature. While edge cases are possible in which violating constraints is useful, in practice these constraints represent effective ranges. In particular, if the minimum radius is set to absurdly low values, the software will start fitting pockets even within intramolecular spaces and provide meaningless output if not fully crashing the program. 49 | 50 | .. code-block:: python 51 | 52 | # arguments constrain_radii 53 | pocket prot_file=, min_rad=<1.4>, max_rad=<3.4>, constrain_radii=True 54 | pocket protein=<"PyMOL selection string">, min_rad=<1.4>, max_rad=<3.4>, constrain_radii=True 55 | 56 | 57 | .. note:: 58 | 59 | Be careful about saving `.pse` PyMOL sessions with PyVOL-produced surfaces. PyMOL does not currently use plugins to load unfamiliar CGO objects, so calculated surfaces will not load correctly from a saved PyMOL session. On the other hand, it should be possible to recreate results using saved PyMOL `.pml` logs. Surfaces can be loaded back into a session using the `Load Pocket` (command-line `load_pocket`) commands. 60 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pyvol documentation build configuration file, created by 5 | # sphinx-quickstart on Sun Oct 27 16:17:43 2019. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | import shlex 19 | 20 | # If extensions (or modules to document with autodoc) are in another directory, 21 | # add these directories to sys.path here. If the directory is relative to the 22 | # documentation root, use os.path.abspath to make it absolute, like shown here. 23 | sys.path.insert(0, os.path.abspath('../..')) 24 | 25 | # -- General configuration ------------------------------------------------ 26 | 27 | # If your documentation needs a minimal Sphinx version, state it here. 28 | #needs_sphinx = '1.0' 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = [ 34 | "sphinx.ext.autodoc", 35 | "sphinx.ext.autosummary", 36 | "sphinx.ext.autosectionlabel" 37 | ] 38 | 39 | # "sphinx.ext.intersphinx", 40 | # "sphinx.ext.napoleon", 41 | # "rinoh.frontend.sphinx" 42 | 43 | autodoc_member_order = 'bysource' 44 | autodoc_default_flags = ['members', 'show-inheritance'] 45 | autosectionlabel_prefix_document = True 46 | autosummary_generate = True 47 | 48 | # Add any paths that contain templates here, relative to this directory. 49 | templates_path = ['_templates'] 50 | 51 | # The suffix(es) of source filenames. 52 | # You can specify multiple suffix as a list of string: 53 | # source_suffix = ['.rst', '.md'] 54 | source_suffix = '.rst' 55 | 56 | # The encoding of source files. 57 | #source_encoding = 'utf-8-sig' 58 | 59 | # The master toctree document. 60 | master_doc = 'index' 61 | 62 | # General information about the project. 63 | project = 'PyVOL' 64 | copyright = '2019, Ryan H.B. Smith' 65 | author = 'Ryan H.B. Smith' 66 | 67 | # The version info for the project you're documenting, acts as replacement for 68 | # |version| and |release|, also used in various other places throughout the 69 | # built documents. 70 | # 71 | # The short X.Y version. 72 | version = '1.8.0' 73 | # The full version, including alpha/beta/rc tags. 74 | release = '1.8.0' 75 | 76 | # The language for content autogenerated by Sphinx. Refer to documentation 77 | # for a list of supported languages. 78 | # 79 | # This is also used if you do content translation via gettext catalogs. 80 | # Usually you set "language" from the command line for these cases. 81 | language = None 82 | 83 | # There are two options for replacing |today|: either, you set today to some 84 | # non-false value, then it is used: 85 | #today = '' 86 | # Else, today_fmt is used as the format for a strftime call. 87 | #today_fmt = '%B %d, %Y' 88 | 89 | # List of patterns, relative to source directory, that match files and 90 | # directories to ignore when looking for source files. 91 | exclude_patterns = ['_build'] 92 | 93 | # The reST default role (used for this markup: `text`) to use for all 94 | # documents. 95 | #default_role = None 96 | 97 | # If true, '()' will be appended to :func: etc. cross-reference text. 98 | #add_function_parentheses = True 99 | 100 | # If true, the current module name will be prepended to all description 101 | # unit titles (such as .. function::). 102 | #add_module_names = True 103 | 104 | # If true, sectionauthor and moduleauthor directives will be shown in the 105 | # output. They are ignored by default. 106 | #show_authors = False 107 | 108 | # The name of the Pygments (syntax highlighting) style to use. 109 | pygments_style = 'sphinx' 110 | 111 | # A list of ignored prefixes for module index sorting. 112 | #modindex_common_prefix = [] 113 | 114 | # If true, keep warnings as "system message" paragraphs in the built documents. 115 | #keep_warnings = False 116 | 117 | # If true, `todo` and `todoList` produce output, else they produce nothing. 118 | todo_include_todos = False 119 | 120 | 121 | # -- Options for HTML output ---------------------------------------------- 122 | 123 | # The theme to use for HTML and HTML Help pages. See the documentation for 124 | # a list of builtin themes. 125 | html_theme = 'sphinx_rtd_theme' 126 | 127 | # Theme options are theme-specific and customize the look and feel of a theme 128 | # further. For a list of options available for each theme, see the 129 | # documentation. 130 | #html_theme_options = {} 131 | 132 | # Add any paths that contain custom themes here, relative to this directory. 133 | #html_theme_path = [] 134 | 135 | # The name for this set of Sphinx documents. If None, it defaults to 136 | # " v documentation". 137 | #html_title = None 138 | 139 | # A shorter title for the navigation bar. Default is the same as html_title. 140 | #html_short_title = None 141 | 142 | # The name of an image file (relative to this directory) to place at the top 143 | # of the sidebar. 144 | #html_logo = None 145 | 146 | # The name of an image file (within the static path) to use as favicon of the 147 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 148 | # pixels large. 149 | #html_favicon = None 150 | 151 | # Add any paths that contain custom static files (such as style sheets) here, 152 | # relative to this directory. They are copied after the builtin static files, 153 | # so a file named "default.css" will overwrite the builtin "default.css". 154 | html_static_path = ['_static'] 155 | 156 | # Add any extra paths that contain custom files (such as robots.txt or 157 | # .htaccess) here, relative to this directory. These files are copied 158 | # directly to the root of the documentation. 159 | #html_extra_path = [] 160 | 161 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 162 | # using the given strftime format. 163 | #html_last_updated_fmt = '%b %d, %Y' 164 | 165 | # If true, SmartyPants will be used to convert quotes and dashes to 166 | # typographically correct entities. 167 | #html_use_smartypants = True 168 | 169 | # Custom sidebar templates, maps document names to template names. 170 | #html_sidebars = {} 171 | 172 | # Additional templates that should be rendered to pages, maps page names to 173 | # template names. 174 | #html_additional_pages = {} 175 | 176 | # If false, no module index is generated. 177 | #html_domain_indices = True 178 | 179 | # If false, no index is generated. 180 | #html_use_index = True 181 | 182 | # If true, the index is split into individual pages for each letter. 183 | #html_split_index = False 184 | 185 | # If true, links to the reST sources are added to the pages. 186 | #html_show_sourcelink = True 187 | 188 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 189 | #html_show_sphinx = True 190 | 191 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 192 | #html_show_copyright = True 193 | 194 | # If true, an OpenSearch description file will be output, and all pages will 195 | # contain a tag referring to it. The value of this option must be the 196 | # base URL from which the finished HTML is served. 197 | #html_use_opensearch = '' 198 | 199 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 200 | #html_file_suffix = None 201 | 202 | # Language to be used for generating the HTML full-text search index. 203 | # Sphinx supports the following languages: 204 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 205 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 206 | #html_search_language = 'en' 207 | 208 | # A dictionary with options for the search language support, empty by default. 209 | # Now only 'ja' uses this config value 210 | #html_search_options = {'type': 'default'} 211 | 212 | # The name of a javascript file (relative to the configuration directory) that 213 | # implements a search results scorer. If empty, the default will be used. 214 | #html_search_scorer = 'scorer.js' 215 | 216 | # Output file base name for HTML help builder. 217 | htmlhelp_basename = 'pyvoldoc' 218 | 219 | # -- Options for LaTeX output --------------------------------------------- 220 | 221 | latex_elements = { 222 | # The paper size ('letterpaper' or 'a4paper'). 223 | 'papersize': 'letterpaper', 224 | 225 | # The font size ('10pt', '11pt' or '12pt'). 226 | 'pointsize': '10pt', 227 | 228 | # Additional stuff for the LaTeX preamble. 229 | 'preamble': '', 230 | 231 | # Latex figure (float) alignment 232 | 'figure_align': 'htbp', 233 | 'extraclassoptions': 'openany,oneside', 234 | } 235 | 236 | # Grouping the document tree into LaTeX files. List of tuples 237 | # (source start file, target name, title, 238 | # author, documentclass [howto, manual, or own class]). 239 | latex_documents = [ 240 | (master_doc, 'pyvol.tex', 'PyVOL Documentation', 241 | 'Ryan H.B. Smith', 'manual'), 242 | ] 243 | 244 | # The name of an image file (relative to this directory) to place at the top of 245 | # the title page. 246 | #latex_logo = None 247 | 248 | # For "manual" documents, if this is true, then toplevel headings are parts, 249 | # not chapters. 250 | #latex_use_parts = False 251 | 252 | # If true, show page references after internal links. 253 | #latex_show_pagerefs = False 254 | 255 | # If true, show URL addresses after external links. 256 | #latex_show_urls = False 257 | 258 | # Documents to append as an appendix to all manuals. 259 | #latex_appendices = [] 260 | 261 | # If false, no module index is generated. 262 | #latex_domain_indices = True 263 | 264 | 265 | # -- Options for manual page output --------------------------------------- 266 | 267 | # One entry per manual page. List of tuples 268 | # (source start file, name, description, authors, manual section). 269 | man_pages = [ 270 | (master_doc, 'pyvol', 'pyvol Documentation', 271 | [author], 1) 272 | ] 273 | 274 | # If true, show URL addresses after external links. 275 | #man_show_urls = False 276 | 277 | 278 | # -- Options for Texinfo output ------------------------------------------- 279 | 280 | # Grouping the document tree into Texinfo files. List of tuples 281 | # (source start file, target name, title, author, 282 | # dir menu entry, description, category) 283 | texinfo_documents = [ 284 | (master_doc, 'pyvol', 'pyvol Documentation', 285 | author, 'pyvol', 'One line description of project.', 286 | 'Miscellaneous'), 287 | ] 288 | 289 | # Documents to append as an appendix to all manuals. 290 | #texinfo_appendices = [] 291 | 292 | # If false, no module index is generated. 293 | #texinfo_domain_indices = True 294 | 295 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 296 | #texinfo_show_urls = 'footnote' 297 | 298 | # If true, do not generate a @detailmenu in the "Top" node's menu. 299 | #texinfo_no_detailmenu = False 300 | -------------------------------------------------------------------------------- /docs/source/development.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | Development 3 | =========== 4 | 5 | Package Design 6 | -------------- 7 | 8 | The main PyVOL algorithm is run from `identify.pocket`. Input option sanitization and logger configuration have been split into `identify.pocket_wrapper`. The pocket identification logic occurs within `identify.pocket` with almost all direct data manipulation handled by the class methods of `Spheres`. If enabled, subpocket clustering occurs in `identify.subpockets` with data manipulation occurring in `cluster`. Frequently used functions have been split into `utilities`. Configuration file reading and writing as well as input parameter checking is done in `configuration`. 9 | 10 | The PyMOL interface is contained in `pymol_interface` though integration into the PyMOL environment is actually handled in `pyvol_gui.__init__`. Display and other PyMOL-specific functions are defined in `pymol_utilities`. 11 | 12 | The two primary interfaces are via configuration file (invoked through the command line using the entry point in `__main__` that is created on installation) and via PyMOL. PyMOL is extended with all commands, and the GUI provides a limited interface to these functions. Programmatic invocation is also supported. If standard output options are reasonable, using the `identify.pocket_wrapper` entry point is better. For more customization, directly call `identify.pocket` after calling `configuration.clean_opts` on a dictionary containing all required options. 13 | 14 | Algorithm Design 15 | ---------------- 16 | 17 | The primary algorithmic logic is supplied in `identify.py` which acts as the only interface between the user-facing modules and the computational back-end. 18 | 19 | The Spheres class holds all of the geometric information about proteins and pockets. It represents any object as a collection of spheres by holding their coordinates, radii, and cluster identifications in a 5D numpy array. Surface triangulation using MSMS and many other convenience functions are included in the class itself. The methods contained in the separate `cluster.py` would largely work as methods in the Spheres class but have been separated due to that class becoming too large and the specificity of those methods to subpocket partitioning. 20 | 21 | GUI Design 22 | ---------- 23 | 24 | The GUI was developed using Qt Designer and run using PyQT5. PyQT does not run in PyMOL 1.x distributions, so the GUI is only available in PyMOL 2.0+. 25 | 26 | Version Incrementation 27 | ---------------------- 28 | 29 | PyVOL uses a standard incrementation scheme. The version of the back-end must be updated in `setup.py`, `pyvol/__init__.py`, and `docs/source/conf.py`. The GUI version is set in `pyvolgui/__init__.py`, and the the version of the GUI that the back-end expects is set again in `pyvol/__init__.py`. Experimental code is pushed with an alpha or beta designation (a or b before the final digit). GUI versions should only change when the GUI files are changed, but the version is intended to catch up to the backend version rather than the next available incrementation. 30 | 31 | Distribution 32 | ------------ 33 | 34 | The code is hosted on github by the Schlessinger Lab. The PyVOL backend is distributed through PyPI. This process of uploading to PyPI is automated in the `dev/build.sh` script. Installers are packaged using the `dev/package_plugins.sh` script. Documentation is generated and pushed to the github-hosted documentation website with the `dev/document.sh` script. All three are combined in the `dev/rebuild.sh` script. The plugin will be available both from the github page and (eventually) through the official PyMOL wiki. 35 | 36 | Documentation 37 | ------------- 38 | 39 | Documentation is in the Sphinx/RTD style. Module documentation is collated using `sphinx-apidoc`. The documentation website is built using the `sphinx-rtd-theme` and maintained on the gh-pages branch of PyVOL. The `pyvol_manual.pdf` is generated using sphinx's evocation of pdfTeX. PyPI can apparently not parse rst files, so the README.rst is converted to a md file using pandoc just prior to deployment. 40 | 41 | Testing 42 | ------- 43 | 44 | Integration testing of the non-PyMOL components is performed using pytest out of `tests/test_pyvol.py`. These are invoked by running `python -m pytest` in the root pyvol directory. These tests have been run using pytest version 5.3.5. Installing `pytest-xdist` is recommended for efficiency's sake. 45 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. pyvol documentation master file, created by 2 | sphinx-quickstart on Sun Oct 27 16:17:43 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | =================== 7 | PyVOL Documentation 8 | =================== 9 | 10 | .. only:: html 11 | 12 | .. include:: introduction.rst 13 | :start-after: marker-start-readme 14 | 15 | .. toctree:: 16 | 17 | introduction 18 | install 19 | basic 20 | pocket_specification 21 | partitioning 22 | output 23 | shell 24 | load_previous 25 | development 26 | pyvol 27 | 28 | .. only:: html 29 | 30 | Indices and tables 31 | ================== 32 | 33 | * :ref:`genindex` 34 | * :ref:`modindex` 35 | 36 | .. * :ref:`search` 37 | -------------------------------------------------------------------------------- /docs/source/install.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | PyVOL consists of a back-end and a GUI. The back-end has been packaged into installers that contain all dependencies, but normal distribution is through PyPI and accessed through `pip`. PyVOL can consequently be installed into any python environment. For convenience, the PyVOL GUI contains an installer for easy installation into PyMOL 2.0+. 6 | 7 | .. figure:: _static/installation_settings_gui.png 8 | :align: center 9 | 10 | GUI section that installs, updates, and uninstalls the PyVOL backend as well as confirming availability of the MSMS binary. 11 | 12 | GUI Installation into PyMOL from PyPI 13 | ------------------------------------- 14 | 15 | Download the :download:`basic GUI installer ` and then use the PyMOL plugin manager to install that file: :menuselection:`Plugins --> Plugin Manager --> Install New Plugin --> Install from local file -->` :guilabel:`Choose file...` 16 | 17 | This installs the PyVOL GUI. Select :menuselection:`Plugins --> PyVOL --> Settings -->` :guilabel:`Install PyVOL from PyPI` to fetch PyVOL and any missing dependencies. Once PyVOL has been installed, the location of MSMS must be added to the path. In the `MSMS Settings` panel, common locations for the executable can be searched. Once an executable has been identified and is displayed, :guilabel:`Change MSMS Path` can be clicked to make that executable visible to the back-end. The GUI should then display that it can find MSMS. For academic users and non-academic users with the Schrodinger incentive PyMOL distribution, installation is now complete. For all others refer to :ref:`install:MSMS Installation`. 18 | 19 | 20 | GUI Installation into PyMOL from a Packaged Installer 21 | ----------------------------------------------------- 22 | 23 | A larger installer with cached copies of PyVOL and its dependencies is also available. This option is useful if deploying PyVOL onto computers without internet access or if accessing a stable snapshot of a working build is necessary for some reason. Download the :download:`full GUI installer ` and then use the PyMOL plugin manager to install that file: :menuselection:`Plugins --> Plugin Manager --> Install New Plugin --> Install from local file -->` :guilabel:`Choose file...` 24 | 25 | This installs the PyVOL GUI. Select :menuselection:`Plugins --> PyVOL --> Settings -->` :guilabel:`Install PyVOL from Cache` to install PyVOL and any missing dependencies from the installer. Once PyVOL has been installed, the location of MSMS must be added to the path. In the `MSMS Settings` panel, common locations for the executable can be searched. Once an executable has been identified and is displayed, :guilabel:`Change MSMS Path` can be clicked to make that executable visible to the back-end. The GUI should then display that it can find MSMS. For academic users and non-academic users with the Schrodinger incentive PyMOL distribution, installation is now complete. For all others refer to :ref:`install:MSMS Installation`. 26 | 27 | 28 | PyMOL Prompt Installation into PyMOL 29 | ------------------------------------ 30 | 31 | Installation of the PyMOL back-end using the PyMOL prompt is also supported. This should work even in earlier versions of PyMOL (1.7.4+) where the GUI is non-functional. Simply run the following command on the prompt: 32 | 33 | .. code-block:: python 34 | 35 | install_pyvol 36 | 37 | Installation from the packaged installer is also available using the PyMOL prompt: 38 | 39 | .. code-block:: python 40 | 41 | install_pyvol_local 42 | 43 | 44 | Manual Installation 45 | ------------------- 46 | 47 | PyVOL minimally requires `biopython`, `MSMS`, `numpy`, `pandas`, `scipy`, `scikit-learn`, `trimesh`, and `msms` in order to run. PyVOL is available for manual installation from `github `_ or through `PyPI `_. Most conveniently: 48 | 49 | .. code-block:: bash 50 | 51 | pip install bio-pyvol 52 | 53 | Again, for academic users and non-academic users with the Schrodinger incentive PyMOL distribution, installation is now complete. For all others, refer to manual :ref:`install:MSMS Installation`. 54 | 55 | .. note:: 56 | When using command-line installation commands, make sure to use the right python environment. By default, pip will use the system python, but PyMOL often includes its own python environment. To check which python environment to use, run `import sys; print(sys.executable)` on the PyMOL prompt. If that is anything besides the system default python, use ` -m pip install bio-pyvol` to install PyVOL into the PyMOL-accessible environment. 57 | 58 | MSMS Installation 59 | ----------------- 60 | 61 | MSMS is provided with PyVOL for ease of use for academic users. If MSMS is available on the system path, it is automatically detected. Common locations (including the bundled version for academic users) can be searched using the GUI :guilabel:`Settings` tab. Select the appropriate location to search and then click :guilabel:`Check Path`. If a viable MSMS executable is found at that location, it is displayed. In this case the :guilabel:`Change MSMS Path` button allows the default location for MSMS to be set. This stores the MSMS path under the PyMOL variable `pyvol_msms_exe` which can be manually accessed and edited via PyMOL's settings manager. 62 | 63 | MSMS can also be manually installed and then added to the path or provided as the `custom` location (i.e. `pyvol_msms_exe` variable). MSMS can be downloaded from `MGLTools `_ on all systems or installed on MacOS and Linux using the bioconda channel: 64 | 65 | .. code-block:: bash 66 | 67 | conda install -c bioconda msms 68 | 69 | 70 | Updating 71 | -------- 72 | 73 | PyVOL can be updated through the PyMOL GUI simply by navigating :menuselection:`PyVOL --> Settings -->` :guilabel:`Check for Updates`. This queries the PyPI server to detect if an update is available. If an update is available for download, the same button becomes :guilabel:`Update PyVOL` and will update the back-end. The new version of the PyVOL back-end will notify you if it expects an updated GUI. If the GUI also needs to be updated, uninstall the `pyvol_gui` using :menuselection:`Plugins --> Plugin Manager --> Installed Plugins --> pyvol_gui x.x.x -->` :guilabel:`Uninstall`. Restart PyMOL, download the updated GUI from :download:`github `, and install the updated GUI as described above. 74 | 75 | Alternatively, PyVOL can be manually updated via the command line: 76 | 77 | .. code-block:: bash 78 | 79 | pip update bio-pyvol 80 | 81 | or the PyMOL prompt: 82 | 83 | .. code-block:: python 84 | 85 | update_pyvol 86 | 87 | 88 | Uninstalling 89 | ------------ 90 | 91 | PyVOL can be uninstalled through its GUI by navigating :menuselection:`PyVOL --> Settings -->` :guilabel:`Uninstall PyVOL`. This uninstalls the back-end. Then use the plugin manager to uninstall the `pyvol_plugin`. 92 | 93 | Again, PyVOL can also be uninstalled via the command line: 94 | 95 | .. code-block:: bash 96 | 97 | pip uninstall bio-pyvol 98 | -------------------------------------------------------------------------------- /docs/source/introduction.rst: -------------------------------------------------------------------------------- 1 | 2 | ============ 3 | Introduction 4 | ============ 5 | 6 | .. marker-start-readme 7 | 8 | .. figure:: _static/overview_image_v01.png 9 | :align: center 10 | 11 | PyVOL Pocket Identification 12 | 13 | Overview 14 | -------- 15 | 16 | PyVOL is a python library packaged into a `PyMOL `_ GUI for identifying protein binding pockets, partitioning them into sub-pockets, and calculating their volumes. PyVOL can be run as a PyMOL plugin through its GUI or the PyMOL prompt, as an imported python library, or as a command-line program. Visualization of results is exclusively supported through PyMOL though exported surfaces are compatible with standard 3D geometry visualization programs. The project is hosted on github by the Schlessinger Lab. Please access the repository to view code or submit bugs. The package has been most extensively tested with PyMOL 2.3+ running Python 3.7. Support for all python versions 2.7+ is intended but not as thoroughly tested. Support for PyMOL 1.7.4+ without the GUI is as yet incomplete. Unfortunately, PyVOL can not currently run on MacOS Catalina due to its restrictions on running 32-bit executables. The Mac-compatible MSMS executable is not yet available in a 64-bit form. 17 | 18 | Quick Installation into PyMOL 2.0+ 19 | ---------------------------------- 20 | 21 | PyVOL can be installed into any python environment, but installing directly into PyMOL 2.0+ is easiest. Download the :download:`basic GUI installer ` and then use the PyMOL plugin manager to install that file: :menuselection:`Plugins --> Plugin Manager --> Install New Plugin --> Install from local file -->` :guilabel:`Choose file...` 22 | 23 | This installs the PyVOL GUI. Select :menuselection:`Plugins --> PyVOL --> Settings -->` :guilabel:`Install PyVOL from PyPI` to fetch PyVOL and any missing dependencies. Once PyVOL has been installed, the location of MSMS must be added to the path. In the `MSMS Settings` panel, common locations for the executable can be searched. Once an executable has been identified and is displayed, :guilabel:`Change MSMS Path` can be clicked to make that executable visible to the back-end. The GUI should then display that it can find MSMS. For academic users and non-academic users with the Schrodinger incentive PyMOL distribution, installation is now complete. For all others :ref:`install:MSMS Installation`. 24 | 25 | Example Basic Run 26 | ----------------- 27 | 28 | A simple calculation using the PyMOL prompt is to load a protein of interest and then run the `pocket` command. This is an example for the Sorafenib-bound structure of BRAF: 29 | 30 | .. code-block:: python 31 | 32 | fetch '1uwh' 33 | pocket protein="1uwh and chain B" 34 | -------------------------------------------------------------------------------- /docs/source/load_previous.rst: -------------------------------------------------------------------------------- 1 | .. |rarr| unicode:: U+2192 .. right arrow 2 | 3 | ======================== 4 | Loading Previous Results 5 | ======================== 6 | 7 | PyMOL cannot load custom CGO objects back into sessions correctly, so any PyMOL session containing PyVOL surfaces will have issues. PyMOL log files can be used but can take a while to run on slower computers. 8 | 9 | .. figure:: _static/load_parameters_gui.png 10 | :align: center 11 | 12 | GUI section that loads previous calculations into PyMOL for visualization with parameter mapping: Pocket Directory |rarr| `data_dir` and Display Prefix |rarr| `prefix` 13 | 14 | The PyMOL `load_pocket` command allows previous results to be read back in from file and displayed. A display prefix and display options (previously described :ref:`output:Display Options`) can be provided to overwrite configuration file values. `load_pocket` requires the directory holding the data from a previous calculation as its first parameter. This corresponds to the `output_dir` for new calculations and by default ends in '.pyvol'. If a data file is instead provided, PyVOL instead processes the encompassing directory. 15 | 16 | .. code-block:: python 17 | 18 | load_pocket 19 | -------------------------------------------------------------------------------- /docs/source/output.rst: -------------------------------------------------------------------------------- 1 | .. |rarr| unicode:: U+2192 .. right arrow 2 | 3 | ========================== 4 | Output and Display Options 5 | ========================== 6 | 7 | .. figure:: _static/display_parameters_gui.png 8 | :align: center 9 | 10 | GUI section controlling output and display options with argument mapping: Display Mode radio button |rarr| `display_mode`, Palette |rarr| `palette`, Alpha |rarr| `alpha`, and Project Dir |rarr| `project_dir` 11 | 12 | File Output Options 13 | ------------------- 14 | 15 | PyVOL always creates an output directory. It looks for a project directory (`project_dir` argument) and then creates an output directory (`output_dir`) within it. If a project directory is not provided, the current working directory is used. The output directory and all included files are named by default using a prefix consisting of a timestamp followed by a system-compatible chunk of the protein identifier (filename or selection string). The prefix can be set using the `prefix` argument. 16 | 17 | A minimum of six files are produced in a completed calculation: 1) input protein geometry (`prefix`_prot.pdb), 2) a configuration file recording all options to recapitulate a calculation (`prefix`.cfg), 3) a detailed log output (`prefix`.log), 4) a report of all calculated volumes (`prefix`.rept), and pocket geometry information contained in two files: 5) surface geometry (`prefix`_p`n`.obj) and 6) tangent sphere definitions (`prefix`_p`n`.xyzrg). If multiple pockets are calculated (e.g., `all` and `subdivide` runs), pocket geometry files are written for each. 18 | 19 | .. code-block:: python 20 | 21 | # arguments: project_dir, output_dir, prefix 22 | pocket prot_file=, project_dir=, output_dir=, prefix= 23 | pocket protein=<"PyMOL selection string">, project_dir=, output_dir=, prefix= 24 | 25 | 26 | Logger Options 27 | -------------- 28 | 29 | PyVOL output is done through loggers. Logger handler levels can be configured via the `logger_stream_level` and `logger_file_level` arguments. The `logger_stream_level` controls the level of information printed to the screen during runs. The `logger_file_level` sets the amount of information written to the log file. Default levels of the stream and file handlers are respectively "INFO" and "DEBUG". This provides a more limited summary at run time with further details written to file. 30 | 31 | .. code-block:: python 32 | 33 | # arguments: project_dir, output_dir, prefix 34 | pocket prot_file=, logger_stream_level="INFO", logger_file_level="DEBUG" 35 | pocket protein=<"PyMOL selection string">, logger_stream_level="INFO", logger_file_level="DEBUG" 36 | 37 | 38 | Display Options 39 | --------------- 40 | 41 | .. figure:: _static/display_v01.png 42 | :align: center 43 | 44 | Examples of the three primary display modes. 45 | 46 | When running through PyMOL or loading previous results into a PyMOL session, calculated surfaces can be visualized in any of three different ways by setting the `display_mode` parameter. Surfaces can be represented using a solid mesh (`solid`), a wireframe mesh (`mesh`), or a group of spheres (`spheres`). Transparency (when applicable) is set with the `alpha` argument: a float [0,0, 1.0] that is equal to (1 - transparency). The displayed color of surfaces is controlled using the `palette` parameter. For non-programmatic invocation, the palette is interpreted as a comma-separated list of PyMOL color strings or space-separated RGB floats. A default palette is automatically selected, and additional colors are automatically interpolated when more surfaces need to be displayed. If 5 colors need to be chosen from an input palette of 4 colors, the first and fifth palette colors are the first and fourth of the input colors. The second color contains 2/3 of the first and 1/3 of the second input colors through linear interpolation. The second through fourth colors are similarly interpolated. 47 | 48 | .. code-block:: python 49 | 50 | # arguments: display_mode, alpha, palette 51 | pocket protein=<"PyMOL selection string">, display_mode=mesh 52 | pocket protein=<"PyMOL selection string">, display_mode=solid, alpha=0.85 53 | pocket protein=<"PyMOL selection string">, display_mode=spheres 54 | pocket protein=<"PyMOL selection string">, mode=all, display_mode=solid, palette="tv_red,tv_orange,0.34 0.26 0.74" 55 | pocket protein=<"PyMOL selection string">, mode=largest, subdivide=True, display_mode=mesh, palette="marine,forest_green,magenta,cyan" 56 | 57 | .. note:: 58 | 59 | Specifying non-standard colors for display purposes can be syntactically difficult. The easiest way to match RGB values, for instance to other figure elements, is to edit the produced configuration file and reload the results into PyMOL using :ref:`load_previous:Loading Previous Results`. 60 | -------------------------------------------------------------------------------- /docs/source/partitioning.rst: -------------------------------------------------------------------------------- 1 | .. |rarr| unicode:: U+2192 .. right arrow 2 | 3 | ==================== 4 | Partitioning Options 5 | ==================== 6 | 7 | PyVOL can deterministically divide a binding pocket into subpockets. This can be run on the output of any surface determination that results in a single returned surface. PyVOL currently calculates *de novo* complete binding pocket surfaces prior to partitioning because determination of the overall pocket is computationally trivial relative to subdivision. Processing time scales with the volume and complexity of the studied pocket. Most jobs take just seconds, but partitioning a pocket with total volume exceeding 1000-1500 Å:superscript:`3` can extend computation time past one minute. 8 | 9 | .. figure:: _static/partitioning_parameters_gui.png 10 | :align: center 11 | 12 | GUI section controlling user binding pocket partition into subpockets with argument mapping: Subdivide |rarr| `subdivide`, Max subpockets |rarr| `max_clusters`, and Subpocket radius |rarr| `min_subpocket_rad` 13 | 14 | Enabling Subpocket Partitioning 15 | ------------------------------- 16 | 17 | Subpocket partitioning is enabled by setting the `subdivide` argument to `True`. In the GUI, this is done by selecting the `Subdivide` checkbox. 18 | 19 | .. code-block:: python 20 | 21 | # arguments: subdivide 22 | pocket prot_file=, subdivide=True 23 | pocket protein=<"PyMOL selection string">, subdivide=True 24 | 25 | Controlling the Number of Subpockets 26 | ------------------------------------ 27 | 28 | Parameters controlling the number of sub-pockets identified generally perform well using defaults; however, they can be easily adjusted as needed. The two most important parameters are controlled with the `max_clusters` and `min_subpocket_rad` arguments. PyVOL clusters volume into the maximum number of regions that make physical sense according to its hierarchical clustering algorithm. This means that there is a maximum number of clusters that is determined by the `min_subpocket_rad` (the smallest sphere used to identify new regions). Larger values of the `min_subpocket_rad` can prohibit unique identification of smaller regions and can cause partitioning to fail altogether. Setting the maximum number of clusters simply sets an upper bound to the number of subpockets identified. If the number of clusters originally determined is greater than the supplied maximum, clusters are iteratively merged using a metric that is related to an edge-biased surface area between adjacent clusters. 29 | 30 | .. code-block:: python 31 | 32 | # arguments: min_subpocket_rad, max_clusters 33 | pocket prot_file=, subdivide=True, min_subpocket_rad=<1.7>, max_clusters=<10> 34 | pocket protein=<"PyMOL selection string">, subdivide=True, min_subpocket_rad=<1.7>, max_clusters=<10> 35 | 36 | Other Partitioning Parameters 37 | ----------------------------- 38 | 39 | The size of the probe used to calculate surface accesibility of subpockets can be set with the `min_subpocket_surf_rad`. Calculation stability is less sensitive to the value of this parameter than the overall minimum probe radius. In practice, it should be set to a value slightly smaller than the overall minimum radius but not less than 1.0 Å. Unless changing the minimum used for overall calculations, the default value should be left unchanged. 40 | 41 | PyVOL currently defaults to performing radial sampling frequency at 10 Å:superscript:`-1` but this can be adjusted using the `radial_sampling` argument. Larger `radial_sampling` values should significantly improve calculation speed but at the cost of pocket resolution. 42 | 43 | PyVOL isolates the pocket to be subdivided prior to running partitioning. The local environment of the pocket is isolated by identifying all atoms within a set distance of the surface calculated for the pocket of interest. This distance is set to the maximum radius used for bulk solvent surface identification plus a buffer. The magnitude of this buffer is by default 1 A and can be set using the `inclusion_radius_buffer` argument. 44 | 45 | The maximum sampled internal radius of subpockets can be set with the `max_subpocket_rad` argument. Varying this parameter above ~2.7 Å is unlikely to alter results. The only practical scenario for setting this variable is when an unusually low maximum radius is used in determining bulk solvent surfaces. If internal pocket cross sections are larger than the external probe used, setting the `max_subpocket_rad` to a higher value can permit proper clustering. For the majority of users, this parameter should never be adjusted. 46 | 47 | The minimum number of tangent surface spheres belonging to a subpocket can be set with the `min_cluster_size`. The purpose of this filter is to remove small, aphysical sphere groupings before clustering. In practice, this never needs to be adjusted. 48 | 49 | .. code-block:: python 50 | 51 | # arguments: min_subpocket_surf_rad, radial_sampling, max_subpocket_rad, min_cluster_size 52 | pocket prot_file=, subdivide=True, min_subpocket_rad=<1.7>, max_clusters=<10>, min_subpocket_surf_rad=<10>, radial_sampling=<0.1>, max_subpocket_rad=<3.4>, min_cluster_size=<50> 53 | pocket protein=<"PyMOL selection string">, subdivide=True, min_subpocket_rad=<1.7>, max_clusters=<10> min_subpocket_surf_rad=<10>, radial_sampling=<0.1>, max_subpocket_rad=<3.4>, min_cluster_size=<50> 54 | -------------------------------------------------------------------------------- /docs/source/pocket_specification.rst: -------------------------------------------------------------------------------- 1 | .. |rarr| unicode:: U+2192 .. right arrow 2 | 3 | ==================== 4 | Pocket Specification 5 | ==================== 6 | 7 | PyVOL runs in one of three modes (`largest`, `specific` or `all`). By default it runs in `largest` mode and returns only the single volume and geometry corresponding to the largest pocket identified when calculating `all` pockets. However, manual identification of the pocket of interest is often preferable. This can be done through specification of a ligand, a residue, or a coordinate. If any specification is given, the mode must be changed to `specific` in order to process that parameter. The `specific` mode is the fastest by a small margin because it calculates the fewest surfaces. 8 | 9 | .. figure:: _static/pocket_specification_gui.png 10 | :align: center 11 | 12 | GUI section controlling user specification of binding pockets with parameter mapping: primary radio buttons |rarr| `mode`, Minimum Volume |rarr| `min_volume`, Ligand: PyMOL Selection |rarr| `ligand`, Residue PyMOL Selection |rarr| `residue`, Residue Id |rarr| `resid`, and Coordinate |rarr| `coordinates` 13 | 14 | .. figure:: _static/spec_v01.png 15 | :align: center 16 | 17 | Example of output using specification parameters. PyMOL commands to generate these panels consist of a specification argument in addition to this basic commmand: `pocket protein='3kvg and chain A', protein_only=True, palette='tv_red,teal,magenta'`. The first panel is produced by adding `mode=all`, the second with `mode=largest`, and the final with any of the following: 1) `mode=specific, resid=A487`, 2) `mode=specific, residue='3k5v and chain A and resi 487'`, 3) `mode=specific, ligand=`, or 4) `mode=specific, coordinate='12.3,42.2,48.6'`. 18 | 19 | All Mode 20 | -------- 21 | 22 | When running in `all` mode, PyVOL determines all surfaces with inward-facing normals with volumes over a minimum threshold defined by the `Minimum Volume` (`min_volume` argument). This functions similarly to the `largest` mode except that 1) all surfaces are returned rather than just the largest, 2) if the largest surface has a volume less than `Minimum Volume`, no surface will be returned at all, and 3) subpocket partitioning cannot occur on the output from this mode. By default the minimum volume is set to 200 Å:superscript:`3`. This is a heuristically determined threshold that is generally useful at distinguishing between artifacts and interesting pockets. 23 | 24 | .. code-block:: python 25 | 26 | # arguments: mode, min_volume 27 | pocket prot_file=, mode=all, min_volume=<200> 28 | pocket protein=<"PyMOL selection string">, mode=all, min_volume=<200> 29 | 30 | 31 | Largest Mode 32 | ------------ 33 | 34 | In the default `largest` mode, PyVOL determines all surfaces with inward-facing normals, calculates the volume of each, and selects the largest. The pocket selected with this mode is usually the pocket of pharmacological interest in a protein. However, sometimes changes in the minimum or maximum radius can lead to the unexpected selection of an alternative, normally superficial pocket. 35 | 36 | .. code-block:: python 37 | 38 | # arguments: mode 39 | pocket prot_file=, mode=largest 40 | pocket protein=<"PyMOL selection string">, mode=largest 41 | 42 | 43 | Specific Mode 44 | ------------- 45 | 46 | The final mode, the `specific` mode, is invoked through specification of a ligand, residue, or coordinate. PyVOL automatically switches to this mode if any specification is provided, but this behavior can be overriden. Manual specification of the `specific` mode is safer. There is an internal priority to which specification is used, but only a single option should be specified to avoid ambiguity. 47 | 48 | Ligand Specification 49 | ^^^^^^^^^^^^^^^^^^^^ 50 | 51 | A ligand occupying the binding pocket of interest can be specified similarly to protein definition. All inputs can accept a `lig_file` argument specifying a pdb file containing ligand geometry, and PyMOL inputs can accept a `ligand` argument containing a PyMOL selection string. If the `ligand` argument is provided, atoms belonging to the `ligand` are removed from the `protein` selection before pocket identification. In all cases, bulk solvent boundary determination includes the provided ligand, so ligands that extend somewhat beyond the convex hull of the protein can include some of that volume within the calculated binding pocket. In these cases the calculated volumes depend on the exact identity and pose of the ligand provided. This option is improper for *apo* volumes with the trade-off that calculated volumes can be meaningfully compared to small molecule volumes. 52 | 53 | .. code-block:: python 54 | 55 | # arguments: lig_file, ligand 56 | pocket prot_file=, mode=specific, ligand= 57 | pocket protein=<"PyMOL selection string">, mode=specific, lig_file=<"ligand selection string"> 58 | 59 | # Trivial case in which a single organic small molecule is present in the protein selection 60 | pocket protein=<"PyMOL selection string">, ligand=<"'PyMOL selection string' and org"> 61 | 62 | Supplying a ligand opens up two additional options. `Inclusion Radius` (`lig_incl_rad` argument) prevents the exterior surface of the protein (bulk solvent surface definition) from being defined within that distance from the ligand. In cases where a ligand extends somewhat into solvent and calculated volumes would otherwise be smaller than the volume of the known ligand, this can be used to produce a more useful surface. `Exclusion Radius` (`lig_excl_rad` argument) limits the maximum scope of the identified surface as the locus of points that distance from the supplied ligand. Both of these options introduce a heuristic that alters reported results. They are most useful when standardizing volumes across a series of similar structures as they provide a mechanism to limit volume variability due to variation in bulk solvent boundary determination. 63 | 64 | .. code-block:: python 65 | 66 | # arguments: lig_incl_rad, lig_excl_rad 67 | pocket prot_file=, mode=specific, ligand=, lig_incl_rad=<3.5>, lig_excl_rad=<5.2> 68 | pocket protein=<"PyMOL selection string">, mode=specific, lig_file=<"ligand selection string">, lig_incl_rad=<3.5>, lig_excl_rad=<5.2> 69 | 70 | .. note:: 71 | 72 | SDF format ligand files are not currently supported for input using `lig_file` because that would increase the number of software dependencies. Reading the sdf file into PyMOL and then passing the ligand into PyVOL using the `ligand` argument is the current solution. 73 | 74 | Residue Specification 75 | ^^^^^^^^^^^^^^^^^^^^^ 76 | 77 | A bordering residue can be supplied to localize a pocket. Once again, this can be done either by specifying a residue ID or with a PyMOL selection string when running through PyMOL. The `resid` argument accepts a string specifying a residue by chain and index (i.e., residue 25 of chain A would be "A35"). The chain is inferred if not explicitly included. Only sidechain atoms are considered. The PyMOL `residue` argument allows specification of a PyMOL selection bordering the pocket of interest. This selection can be of arbitrary size but has been primarily tested holding single residues. Only the sidechains of the provided selection are used for pocket specification. In both of these cases, PyVOL tries to identify the residue atom closest to an interior surface and uses that atom to specify the adjacent pocket of interest. Some residues are adjacent to multiple pockets and make specification computationally arbitrary and unpredictable. If having trouble, specify a single atom as a PyMOL selection string. 78 | 79 | .. code-block:: python 80 | 81 | # arguments: resid, residue 82 | pocket prot_file=, mode=specific, resid= 83 | pocket protein=<"PyMOL selection string">, mode=specific, residue=<"residue selection string"> 84 | 85 | 86 | Coordinate Specification 87 | ^^^^^^^^^^^^^^^^^^^^^^^^ 88 | 89 | The final method for specifying a pocket of interest is through providing a coordinate that is within the pocket using the `coordinates` argument. PyVOL identifies the closest atom in the protein selection to the supplied coordinate and uses it to define the surface of the calculated pocket. The coordinate value is accepted as a string of three floats with spaces in between values (e.g., "23.1 47.2 -12.7"). 90 | 91 | .. code-block:: python 92 | 93 | # arguments: coordinates 94 | pocket prot_file=, mode=specific, coordinates="x,y,z" 95 | pocket protein=<"PyMOL selection string">, mode=specific, coordinates="x,y,z" 96 | -------------------------------------------------------------------------------- /docs/source/pyvol.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | PyVOL Package 3 | ============= 4 | 5 | Submodules 6 | ---------- 7 | 8 | PyVOL Cluster Module 9 | -------------------- 10 | 11 | .. automodule:: pyvol.cluster 12 | :members: 13 | :undoc-members: 14 | :show-inheritance: 15 | 16 | PyVOL Identify Module 17 | --------------------- 18 | 19 | .. automodule:: pyvol.identify 20 | :members: 21 | :undoc-members: 22 | :show-inheritance: 23 | 24 | 25 | PyVOL Pymol Interface Module 26 | ---------------------------- 27 | 28 | .. automodule:: pyvol.pymol_interface 29 | :members: 30 | :undoc-members: 31 | :show-inheritance: 32 | 33 | PyVOL Pymol Utilities Module 34 | ---------------------------- 35 | 36 | .. automodule:: pyvol.pymol_utilities 37 | :members: 38 | :undoc-members: 39 | :show-inheritance: 40 | 41 | PyVOL Spheres Module 42 | -------------------- 43 | 44 | .. automodule:: pyvol.spheres 45 | :members: 46 | :undoc-members: 47 | :show-inheritance: 48 | 49 | PyVOL Utilities Module 50 | ---------------------- 51 | 52 | .. automodule:: pyvol.utilities 53 | :members: 54 | :undoc-members: 55 | :show-inheritance: 56 | -------------------------------------------------------------------------------- /docs/source/shell.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Shell Interface 3 | =============== 4 | 5 | PyVOL can also be run from the system command line using bash or any standard shell. If installed using ``pip``, a ``pyvol`` entry point should be automatically installed and made available on the path. Otherwise, manual invocation of ``pyvol/__main__.py`` should work. 6 | 7 | Running from the Shell 8 | ---------------------- 9 | 10 | From the command-line, PyVOL is run exclusively using a configuration file. 11 | 12 | .. code-block:: bash 13 | 14 | python -m pyvol 15 | 16 | Template Configuration File Generation 17 | -------------------------------------- 18 | 19 | A template configuration file with default values supplied can be generated using: 20 | 21 | .. code-block:: bash 22 | 23 | python -m pyvol -t 24 | 25 | Rerunning Previous Calculations 26 | -------------------------------- 27 | 28 | Each PyVOL job writes the configuration file to recapitulate the exact run. After modifying a configuration file, unset the `prefix` and `output_dir` parameters in order to direct the output of the new run into a new folder. 29 | 30 | 31 | .. note:: 32 | 33 | When unsetting parameters in the configuration file, delete the entire line including the parameter name rather than just leaving the definition blank. For some parameters, leaving the definition blank confuses the configuration file reader. 34 | -------------------------------------------------------------------------------- /installers/pyvol-installer.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/schlessinger-lab/pyvol/9b2ef8f50d56d626d1f5d88888a75cbe3204ff14/installers/pyvol-installer.zip -------------------------------------------------------------------------------- /pyvol/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | __version__ = "1.7.8" 3 | __guiversion__ = "1.7.8" 4 | -------------------------------------------------------------------------------- /pyvol/__main__.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | from pyvol import configuration, identify 4 | 5 | 6 | # if __name__ == "__main__": 7 | def main(): 8 | parser = argparse.ArgumentParser(prog="PyVOL", description="Identification, calculation, and segmentation of protein binding pockets", epilog="For complete documentation and tutorials on use, visit the project webpage: https://schlessingerlab.github.io/pyvol") 9 | parser.add_argument("cfg_file", help="input configuration file or output configuration file if specificying --template option") 10 | parser.add_argument("-t", "--template", action='store_true', help="write a template configuration file") 11 | 12 | args = parser.parse_args() 13 | 14 | if args.template: 15 | configuration.defaults_to_file(args.cfg_file) 16 | else: 17 | identify.pocket_wrapper(**configuration.file_to_opts(args.cfg_file)) 18 | -------------------------------------------------------------------------------- /pyvol/cluster.py: -------------------------------------------------------------------------------- 1 | 2 | """ Contains functions to cluster spheres objects in memory; used in subpocket clustering. """ 3 | 4 | from .spheres import Spheres 5 | import itertools 6 | import logging 7 | import numpy as np 8 | import scipy 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | def cluster_within_r(spheres, radius, allow_new=True): 13 | """ Cluster spheres with the same radius using DBSCAN, modifying input data in situ 14 | 15 | Args: 16 | spheres (Spheres): complete set of input spheres 17 | radius (float): radius at which clustering is to occur 18 | allow_new (bool): permit new clusters? (Default value = True) 19 | 20 | """ 21 | from sklearn.cluster import DBSCAN 22 | 23 | r_indices = np.where(spheres.r == radius)[0] 24 | selected = spheres.xyzrg[r_indices, :] 25 | 26 | ungrouped_indices = np.where(selected[:, 4] == 0)[0] 27 | 28 | if ungrouped_indices.shape[0] > 0: 29 | ungrouped_data = selected[ungrouped_indices] 30 | db = DBSCAN(eps=radius, min_samples=1).fit(ungrouped_data[:, 0:3]) 31 | 32 | if allow_new: 33 | starting_index = np.amax(selected[:, 4]) + 1 34 | np.put(spheres.g, r_indices[ungrouped_indices], db.labels_ + starting_index) 35 | else: 36 | starting_index = np.amin(selected[:, 4]) - 1 37 | np.put(spheres.g, r_indices[ungrouped_indices], -1 * db.labels_ + starting_index) 38 | 39 | logger.debug("Clustered spheres at radius {0}".format(radius)) 40 | 41 | 42 | def cluster_between_r(spheres, ref_radius, target_radius): 43 | """ Cluster spheres from a target radius to a reference radius, modifying input data in situ 44 | 45 | Args: 46 | spheres (Spheres): complete set of input spheres 47 | ref_radius (float): radius from which cluster identities will be drawn 48 | target_radius (float): radius to which cluster identities will be propagated 49 | 50 | """ 51 | ref_data = spheres.xyzrg[spheres.r == ref_radius] 52 | r_indices = np.where(spheres.r == target_radius)[0] 53 | target_data = spheres.xyzrg[r_indices] 54 | 55 | if (ref_data.shape[0] > 0) and (target_data.shape[0] > 0): 56 | 57 | kdtree = scipy.spatial.cKDTree(ref_data[:, 0:3]) 58 | dist, indices = kdtree.query(target_data[:, 0:3], distance_upper_bound=ref_radius, n_jobs=-1) 59 | 60 | target_indices = indices < ref_data.shape[0] 61 | ref_indices = indices[target_indices] 62 | 63 | np.put(spheres.g, r_indices[target_indices], ref_data[ref_indices, 4]) 64 | 65 | logger.debug("Clustered spheres at radius {0} against those at {1}".format(target_radius, ref_radius)) 66 | 67 | 68 | def cluster_improperly_grouped(spheres, radius, min_cluster_size=1, max_clusters=None): 69 | """ Reassigns improperly clustered spheres to 'proper' clusters, modifying input data in situ 70 | 71 | Args: 72 | spheres (Spheres): complete set of input spheres 73 | radius (float): radius at which closest groups are identified 74 | min_cluster_size (int): minimum number of spheres in a 'proper' cluster (Default value = 1) 75 | max_clusters (int): maximum number of 'proper' clusters (Default value = None) 76 | 77 | """ 78 | min_group = np.amin(spheres.g) 79 | group_list = np.flip(np.arange(min_group, 0), axis=0) 80 | reassign_groups_to_closest(spheres, group_list, radius) 81 | 82 | spheres.remove_ungrouped() 83 | 84 | group_counts = np.bincount(spheres.g.astype(int)) 85 | small_groups = np.where(group_counts < min_cluster_size)[0] 86 | if len(small_groups) > 1: 87 | # always includes the 0 group 88 | reassign_groups_to_closest(spheres, small_groups[1:], radius) 89 | disconnected_small_groups = np.where(group_counts < min_cluster_size)[0] 90 | if len(disconnected_small_groups) > 1: 91 | spheres.remove_groups(disconnected_small_groups) 92 | 93 | group_counts = np.bincount(spheres.g.astype(int)) 94 | num_groups = np.count_nonzero(group_counts) 95 | 96 | if max_clusters is not None: 97 | if num_groups > max_clusters: 98 | reassign_groups_to_closest(spheres, np.where(group_counts > 0)[0], radius, iterations=(num_groups - max_clusters)) 99 | logger.debug("Improperly grouped spheres re-clustered yielding {0} groups".format(num_groups)) 100 | 101 | 102 | def extract_groups(spheres, surf_radius=None, prefix=None, group_names=None): 103 | """ Extracts spheres belonging to each cluster from the complete input set and optionally calculates bounded surfaces 104 | 105 | Args: 106 | spheres (Spheres): complete set of input spheres 107 | surf_radius: radius used to calculate bounding spheres for individual groups (Default value = None) 108 | prefix: prefix to identify new surfaces (Default value = None) 109 | 110 | Returns: 111 | group_list ([Spheres]): a list of Spheres objects each corresponding to a different cluster 112 | 113 | """ 114 | groups = np.unique(spheres.g) 115 | 116 | group_list = [] 117 | for index, group in enumerate(groups): 118 | group_spheres = Spheres(xyzrg = spheres.xyzrg[spheres.g == group].copy()) 119 | if prefix is not None: 120 | group_spheres.name = "{0}_p0_sp{1}".format(prefix, index) 121 | elif group_names is not None: 122 | group_spheres.name = group_names[index] 123 | group_list.append(group_spheres) 124 | 125 | logger.debug("Extracting {0} groups from {1}".format(len(group_list), spheres.name)) 126 | 127 | if surf_radius is not None: 128 | exterior_list = [group_spheres.calculate_surface(probe_radius=surf_radius)[0] for group_spheres in group_list] 129 | reindices = np.flip(np.argsort([s.mesh.volume for s in exterior_list]), 0) 130 | 131 | new_group_list = [] 132 | new_ext_list = [] 133 | for index in reindices: 134 | g_s = group_list[index] 135 | e_s = exterior_list[index] 136 | 137 | g_s.g = index 138 | e_s.g = index 139 | g_s.mesh = e_s.mesh.copy() 140 | 141 | e_s.name = g_s.name 142 | new_group_list.append(g_s) 143 | new_ext_list.append(e_s) 144 | 145 | return new_group_list 146 | else: 147 | return group_list 148 | 149 | 150 | def hierarchically_cluster_spheres(spheres, ordered_radii, min_new_radius=None, min_cluster_size=10, max_clusters=None): 151 | """ Cluster spheres by grouping spheres at large radius and propagating those assignments down to smaller radii 152 | 153 | Args: 154 | spheres (Spheres): complete set of input spheres 155 | ordered_radii ([float]): list of radii ordered from largest to smallest 156 | min_new_radius (float): smallest spheres to keep (Default value = None) 157 | min_cluster_size (int): minimum number of spheres in a cluster (Default value = 10) 158 | max_clusters (int): maximum number of clusters (Default value = None) 159 | 160 | """ 161 | if min_new_radius is None: 162 | min_new_radius = np.amin(ordered_radii) 163 | 164 | for index, radius in enumerate(ordered_radii): 165 | initial_grouped = spheres.xyzrg[spheres.g != 0].shape[0] 166 | if index > 0: 167 | cluster_between_r(spheres, ref_radius=ordered_radii[index - 1], target_radius=ordered_radii[index]) 168 | 169 | cluster_within_r(spheres, radius, allow_new=(radius >= min_new_radius)) 170 | logger.debug("Finished naive sphere clustering for spheres in {0}".format(spheres.name)) 171 | 172 | cluster_improperly_grouped(spheres, radius=ordered_radii[-1], min_cluster_size=min_cluster_size, max_clusters=max_clusters) 173 | logger.debug("Finished hierarchically clustering for spheres in {0}".format(spheres.name)) 174 | 175 | 176 | def identify_closest_grouped(spheres, group, radius): 177 | """ Identifies the closest 'properly' grouped cluster to a specified group 178 | 179 | Args: 180 | spheres (Spheres): complete set of input spheres 181 | group (float): group for which to identify the closest clusters 182 | radius (float): radius at which to perform the search 183 | 184 | Returns: 185 | group (float): passthrough of input group 186 | closest (float): id of the closest cluster 187 | magnitude (int): number of pairwise closest connections between the queried group and the closest identified cluster 188 | """ 189 | target_indices = np.where((spheres.r == radius) & (spheres.g == group))[0] 190 | grouped_indices = np.where((spheres.r == radius) & (spheres.g > 0) & (spheres.g != group))[0] 191 | 192 | target_data = spheres.xyzrg[target_indices] 193 | grouped_data = spheres.xyzrg[grouped_indices] 194 | 195 | if (target_data.shape[0] > 0) and (grouped_data.shape[0] > 0): 196 | kdtree = scipy.spatial.cKDTree(grouped_data[:, 0:3]) 197 | dist, indices = kdtree.query(target_data[:, 0:3], distance_upper_bound=1.41 * radius, n_jobs=-1) 198 | # 1.41 factor allows the two spheres to intersect at pi/4 from the closest point 199 | 200 | t_indices = indices < grouped_data.shape[0] 201 | group_indices = indices[t_indices] 202 | if len(group_indices) > 0: 203 | counts = np.bincount(grouped_data[group_indices,4].astype(int)) 204 | closest = np.argmax(counts) 205 | magnitude = counts[closest] 206 | return [group, closest, magnitude] 207 | else: 208 | return [None, None, 0] 209 | else: 210 | return [None, None, 0] 211 | 212 | 213 | def merge_sphere_list(s_list, r=None, g=None): 214 | """ 215 | 216 | Args: 217 | s_list ([Spheres]): list of input spheres 218 | r (float): radius value to assign to output Spheres (Default value = None) 219 | g (float): group value to assign to output Spheres (Default value = None) 220 | 221 | Returns: 222 | merged_spheres (Spheres): a single Spheres object containing the merged input lists 223 | """ 224 | selected_data_list = [] 225 | 226 | for i, s in enumerate(s_list): 227 | if s is None: 228 | continue 229 | selected_data = s.xyzrg 230 | 231 | if r is not None: 232 | selected_data = selected_data[selected_data[:, 3] == r] 233 | if g is not None: 234 | selected_data = selected_data[selected_data[:, 4] == g] 235 | 236 | if selected_data.shape[0] > 0: 237 | selected_data_list.append(selected_data) 238 | 239 | if len(selected_data_list) > 0: 240 | return Spheres(xyzrg=np.vstack(selected_data_list)) 241 | else: 242 | return None 243 | 244 | 245 | def reassign_group(spheres, source_group, target_group): 246 | """ Reassign a group in place 247 | 248 | Args: 249 | spheres (Spheres): complete set of input spheres 250 | source_group (float): group to change 251 | target_group (float): new group id 252 | 253 | """ 254 | source_indices = np.where(spheres.g == source_group) 255 | 256 | np.put(spheres.g, source_indices, target_group) 257 | 258 | 259 | def reassign_groups_to_closest(spheres, group_list, radius, iterations=None, preserve_largest=False): 260 | """ Reassign a group to the closest group as identified by maximum linkage; operates in place 261 | 262 | Args: 263 | spheres (Spheres): complete set of input spheres 264 | group_list ([float]): list of group ids which are to be iteratively reassigned 265 | radius (float): radius at which searches are to take place 266 | iterations (int): number of times to attempt to reassign groups (Default value = None) 267 | preserve_largest: keep the group id of the group with more members? (Default value = False) 268 | 269 | """ 270 | if iterations is None: 271 | iterations = len(group_list) 272 | 273 | for i in range(iterations): 274 | linkages = [] 275 | for group in group_list: 276 | linkages.append(identify_closest_grouped(spheres, group, radius)) 277 | 278 | nonzero_linkages = [link for link in linkages if link[2] > 0] 279 | if len(nonzero_linkages) > 0: 280 | best_link = sorted(nonzero_linkages, key=lambda x: x[2])[-1] 281 | if preserve_largest: 282 | group_sizes = np.bincount(spheres.g.astype(int)) 283 | if group_sizes[best_link[0]] > group_sizes[best_link[1]]: 284 | best_link = [best_link[1], best_link[0]] 285 | 286 | reassign_group(spheres, best_link[0], best_link[1]) 287 | else: 288 | break 289 | 290 | 291 | def remove_interior(spheres): 292 | """ Remove all spheres which are completely enclosed in larger spheres; operates in place 293 | 294 | Args: 295 | spheres (Spheres): complete set of input spheres 296 | 297 | """ 298 | min_rad = np.amin(spheres.r) 299 | max_rad = np.amax(spheres.r) 300 | 301 | point_tree = scipy.spatial.cKDTree(spheres.xyz) 302 | neighbors = point_tree.query_ball_tree(point_tree, r=(max_rad - min_rad)) 303 | 304 | interior_indices = [] 305 | for point_index, nlist in enumerate(neighbors): 306 | if point_index in interior_indices: 307 | continue 308 | 309 | if len(nlist) <= 1: 310 | continue 311 | 312 | inclusion = spheres.r[point_index] - spheres.r[nlist].reshape(-1, 1) - scipy.spatial.distance.cdist(spheres.xyz[nlist], spheres.xyz[point_index].reshape(1, -1)) 313 | included_indices = np.where(inclusion > 0)[0] 314 | if len(included_indices) > 0: 315 | interior_indices.extend(list(np.array(nlist)[included_indices])) 316 | 317 | interior_indices = np.unique(interior_indices).astype(int) 318 | spheres.xyzrg = np.delete(spheres.xyzrg, interior_indices, axis=0) 319 | 320 | logger.debug("Removed interior spheres from {0}".format(spheres.name)) 321 | 322 | 323 | def remove_included_spheres(spheres, ref_spheres, radius): 324 | """ Removes all spheres with centers within radius of ref_spheres 325 | 326 | """ 327 | 328 | kdtree = scipy.spatial.cKDTree(spheres.xyz) 329 | groups = kdtree.query_ball_point(ref_spheres.xyz, radius, n_jobs=-1) 330 | indices = np.unique(list(itertools.chain.from_iterable(groups))) 331 | 332 | spheres.xyzrg = np.delete(spheres.xyzrg, indices, axis=0) 333 | 334 | logger.debug("Removed all spheres within {0} A of reference".format(radius)) 335 | 336 | 337 | 338 | def remove_overlap(spheres, radii=None, spacing=0.1, iterations=20, tolerance=0.02, static_last_group=False): 339 | """ Remove overlap between groups; operates in place 340 | 341 | Args: 342 | spheres (Spheres): complete set of input spheres 343 | radii ([float]): radii at which to perform searches for overlap (Default value = None) 344 | spacing (float): binning radius (Default value = 0.1) 345 | iterations (int): number of times to attempt overlap removal (Default value = 20) 346 | tolerance (float): overlap tolerance (Default value = 0.02) 347 | static_last_group (bool): don't move the 'other' group but rather the first group twice as much (effectively leaves the group with the highest index in place while moving everything else around it) 348 | 349 | """ 350 | from sklearn.preprocessing import normalize 351 | groups = np.unique(spheres.g)[:-1] 352 | 353 | if spheres.xyzrg.shape[0] == 0: 354 | logger.warning("Attempting to remove overlap in an empty sphere set") 355 | return 356 | 357 | if radii is None: 358 | radii = [np.amax(spheres.r)] 359 | spacing = radii[0] 360 | 361 | for radius in radii: 362 | for group in groups: 363 | group_indices = np.where((spheres.g == group) & (spheres.r > (radius - spacing)) & (spheres.r <= radius))[0] 364 | other_indices = np.where((spheres.g != group) & (spheres.r > (radius - spacing)) & (spheres.r <= radius))[0] 365 | 366 | if len(group_indices) == 0 or len(other_indices) == 0: 367 | continue 368 | 369 | group_data = spheres.xyzrg[group_indices] 370 | other_data = spheres.xyzrg[other_indices] 371 | 372 | other_tree = scipy.spatial.cKDTree(other_data[:, 0:3]) 373 | group_tree = scipy.spatial.cKDTree(group_data[:, 0:3]) 374 | 375 | neighbors = group_tree.query_ball_tree(other_tree, r=2 * radius) 376 | 377 | altered_group_indices = [] 378 | altered_other_indices = [] 379 | 380 | for iteration in range(iterations): 381 | overlaps = np.zeros(len(neighbors)) 382 | overlap_indices = -1 * np.ones(len(neighbors)) 383 | 384 | for group_index, nlist in enumerate(neighbors): 385 | if len(nlist) == 0: 386 | continue 387 | overlap = other_data[nlist, 3].reshape(-1, 1) + group_data[group_index, 3] - scipy.spatial.distance.cdist(other_data[nlist, 0:3], group_data[group_index, 0:3].reshape(1, -1)) 388 | most_overlapping_index = np.argmax(overlap) 389 | if overlap[most_overlapping_index] > 0: 390 | overlaps[group_index] = overlap[most_overlapping_index] 391 | overlap_indices[group_index] = nlist[most_overlapping_index] 392 | 393 | overlapped_group_indices = np.where(overlaps > tolerance)[0] 394 | if len(overlapped_group_indices) == 0: 395 | break 396 | 397 | overlaps = overlaps[overlapped_group_indices] 398 | overlap_indices = overlap_indices[overlapped_group_indices].astype(int) 399 | 400 | reorder = np.argsort(overlaps)[::-1] 401 | overlaps = overlaps[reorder] 402 | overlap_indices = overlap_indices[reorder] 403 | overlapped_group_indices = overlapped_group_indices[reorder] 404 | 405 | foo, closest_indices = np.unique(overlap_indices, return_index=True) 406 | overlaps = overlaps[closest_indices] 407 | overlap_indices = overlap_indices[closest_indices] 408 | overlapped_group_indices = overlapped_group_indices[closest_indices] 409 | 410 | if not static_last_group: 411 | overlap_adjustment = 0.26 * overlaps # 0.25 should work but leads to a logarithmic approach of proper adjustment 412 | else: 413 | overlap_adjustment = 0.51 * overlaps # move the mobile group twice as much if the other group isn't moving 414 | 415 | vector = overlap_adjustment[:, np.newaxis] * normalize(group_data[overlapped_group_indices, 0:3] - other_data[overlap_indices, 0:3]) 416 | 417 | group_data[overlapped_group_indices, 0:3] = group_data[overlapped_group_indices, 0:3] + vector 418 | group_data[overlapped_group_indices, 3] = group_data[overlapped_group_indices, 3] - overlap_adjustment 419 | altered_group_indices.extend(list(overlapped_group_indices)) 420 | 421 | if not static_last_group: 422 | other_data[overlap_indices, 0:3] = other_data[overlap_indices, 0:3] - vector 423 | other_data[overlap_indices, 3] = other_data[overlap_indices, 3] - overlap_adjustment 424 | altered_other_indices.extend(list(overlap_indices)) 425 | 426 | altered_group_indices = np.unique(altered_group_indices).astype(int) 427 | altered_other_indices = np.unique(altered_other_indices).astype(int) 428 | 429 | spheres.xyzrg[group_indices[altered_group_indices]] = group_data[altered_group_indices] 430 | spheres.xyzrg[other_indices[altered_other_indices]] = other_data[altered_other_indices] 431 | -------------------------------------------------------------------------------- /pyvol/configuration.py: -------------------------------------------------------------------------------- 1 | 2 | """ Handles options for PyVOL. Reads configuration files and objects and converts them to option dictionaries and then back again. Accepts and parses string input for all parameters from commandline/gui inputs. """ 3 | 4 | from . import utilities 5 | import configparser 6 | import logging 7 | import numpy as np 8 | import os 9 | import re 10 | import tempfile 11 | from datetime import datetime 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | _option_constraints = { 16 | "general_min_rad_dflt": 1.4, 17 | "general_max_rad_dflt": 3.4, 18 | "general_min_rad_min": 1.2, 19 | "general_min_rad_max": 2.0, 20 | "general_max_rad_min": 2.0, 21 | "general_max_rad_max": 5.0, 22 | "general_constrain_radii": False, 23 | "specification_min_volume_dflt": 200, 24 | "partitioning_subdivide_dflt": False, 25 | "partitioning_max_clusters_dflt": 50, 26 | "partitioning_max_clusters_min": 2, 27 | "partitioning_min_subpocket_rad_dflt": 1.7, 28 | "partitioning_max_subpocket_rad_dflt": 3.4, 29 | "partitioning_min_subpocket_surf_rad_dflt": 1.0, 30 | "partitioning_radial_sampling_dflt": 0.1, 31 | "partitioning_inclusion_radius_buffer_dflt": 1.0, 32 | "partitioning_min_cluster_size_dflt": 5, 33 | "pymol_protein_only_dflt": False, 34 | "pymol_display_mode_dflt": "solid", 35 | "pymol_alpha_dflt": 0.85, 36 | } 37 | 38 | def clean_opts(input_opts): 39 | """ Cleans opts and then returns the sanitized contents 40 | 41 | Args: 42 | input_opts (dict): dictionary of all input options 43 | 44 | Returns: 45 | cleaned_opts (dict): dictionary containing all options for a PyVOL run with extraneous options removed and necessary defaults provided 46 | """ 47 | 48 | timestamp = datetime.now().strftime(r"%H%M%S-%f") 49 | 50 | trimmed_opts = {} 51 | for k, v in input_opts.items(): 52 | if v is not None: 53 | trimmed_opts[k] = v 54 | input_opts = trimmed_opts 55 | 56 | # Load options 57 | opts = {} 58 | opts["protein"] = input_opts.get("protein") 59 | opts["prot_file"] = input_opts.get("prot_file") 60 | opts["lig_file"] = input_opts.get("lig_file") 61 | 62 | try: 63 | opts["min_rad"] = float(input_opts.get("min_rad", _option_constraints.get("general_min_rad_dflt"))) 64 | except: 65 | logger.warning("Improper minimum radius parameter ({0}) removed and replaced with default ({1})".format(opts.get("min_rad"), _option_constraints.get("general_min_rad_dflt"))) 66 | opts["min_rad"] = _option_constraints.get("general_min_rad_dflt") 67 | try: 68 | opts["max_rad"] = float(input_opts.get("max_rad", _option_constraints.get("general_max_rad_dflt"))) 69 | except: 70 | logger.warning("Improper maximum radius parameter ({0}) removed and replaced with default ({1})".format(opts.get("max_rad"), _option_constraints.get("general_max_rad_dflt"))) 71 | opts["max_rad"] = _option_constraints.get("general_max_rad_dflt") 72 | opts["constrain_radii"] = input_opts.get("constrain_radii", _option_constraints.get("general_constrain_radii")) 73 | 74 | opts["mode"] = input_opts.get("mode") 75 | opts["coordinates"] = input_opts.get("coordinates") 76 | if opts.get("coordinates") is not None: 77 | if isinstance(opts.get("coordinates"), str): 78 | try: 79 | opts["coordinates"] = np.asarray([float(x) for x in opts.get("coordinates").split(" ")]).reshape(-1,3) 80 | except: 81 | logger.error("Coordinates argument not parsed from str correctly: {0}".format(opts.get("coordinates"))) 82 | raise ValueError 83 | if isinstance(opts.get("coordinates"), list): 84 | opts["coordinates"] = np.array([float(x) for x in opts.get("coordinates")]).reshape(-1,3) 85 | 86 | if opts.get("coordinates").shape != (1,3): 87 | logger.error("Coordinates argument contains the wrong number of dimensions: {0}".format(opts.get("coordinates").shape)) 88 | raise ValueError 89 | opts["resid"] = input_opts.get("resid") 90 | opts["lig_excl_rad"] = input_opts.get("lig_excl_rad") 91 | if opts.get("lig_excl_rad") is not None: 92 | try: 93 | opts["lig_excl_rad"] = float(opts.get("lig_excl_rad")) 94 | except: 95 | logger.warning("Improper ligand exclusion radius parameter removed ({0})".format(opts.get("lig_excl_rad"))) 96 | opts["lig_excl_rad"] = None 97 | opts["lig_incl_rad"] = input_opts.get("lig_incl_rad") 98 | if opts.get("lig_incl_rad") is not None: 99 | try: 100 | opts["lig_incl_rad"] = float(opts["lig_incl_rad"]) 101 | except: 102 | logger.warning("Improper ligand inclusion radius parameter removed ({0})".format(opts.get("lig_incl_rad"))) 103 | opts["lig_incl_rad"] = None 104 | opts["min_volume"] = input_opts.get("min_volume") 105 | if opts.get("min_volume") is not None: 106 | try: 107 | opts["min_volume"] = float(input_opts.get("min_volume")) 108 | except: 109 | logger.warning("Improper minimum volume parameter removed ({0})".format(opts.get("min_volume"))) 110 | opts["min_volume"] = None 111 | else: 112 | if opts.get("mode") == "all": 113 | logger.warning("Minimum volume parameter for pocket identification set to default ({0}) for a calculation in 'all' mode with no input parameter value".format(_option_constraints.get("specification_min_volume_dflt"))) 114 | opts["min_volume"] = _option_constraints.get("specification_min_volume_dflt") 115 | 116 | opts["subdivide"] = input_opts.get("subdivide", _option_constraints.get("partitioning_subdivide_dflt")) 117 | if not isinstance(opts.get("subdivide"), bool): 118 | logger.warning("Non-boolean subdivide parameter replaced by default ({0})".format(_option_constraints.get("partitioning_subdivide_dflt"))) 119 | opts["subdivide"] = _option_constraints.get("partitioning_subdivide_dflt") 120 | if opts["subdivide"]: 121 | try: 122 | opts["max_clusters"] = int(input_opts.get("max_clusters", _option_constraints.get("partitioning_max_clusters_dflt"))) 123 | opts["min_subpocket_rad"] = float(input_opts.get("min_subpocket_rad", _option_constraints.get("partitioning_min_subpocket_rad_dflt"))) 124 | opts["max_subpocket_rad"] = float(input_opts.get("max_subpocket_rad", _option_constraints.get("partitioning_max_subpocket_rad_dflt"))) 125 | opts["min_subpocket_surf_rad"] = float(input_opts.get("min_subpocket_surf_rad", _option_constraints.get("partitioning_min_subpocket_surf_rad_dflt"))) 126 | opts["radial_sampling"] = float(input_opts.get("radial_sampling", _option_constraints.get("partitioning_radial_sampling_dflt"))) 127 | opts["inclusion_radius_buffer"] = float(input_opts.get("inclusion_radius_buffer", _option_constraints.get("partitioning_inclusion_radius_buffer_dflt"))) 128 | opts["min_cluster_size"] = int(input_opts.get("min_cluster_size", _option_constraints.get("parttitioning_min_cluster_size_dflt"))) 129 | except: 130 | raise ValueError("provided partitioning parameter unable to be cast to int/float; check inputs for a non-numeric value") 131 | 132 | opts["project_dir"] = input_opts.get("project_dir") 133 | opts["output_dir"] = input_opts.get("output_dir") 134 | opts["prefix"] = input_opts.get("prefix") 135 | if opts["prefix"] is None: 136 | if opts.get("prot_file") is not None: 137 | opts["prefix"] = "{0}_{1}".format(timestamp, os.path.splitext(os.path.basename(opts["prot_file"]))[0]) 138 | elif opts.get("protein") is not None: 139 | opts["prefix"] = "{0}_{1}".format(timestamp, opts.get("protein").split()[0].strip("(").strip(")")) 140 | else: 141 | logger.error("No protein input detected: either prot_file or the PyMOL protein selection must be defined") 142 | raise ValueError("No protein geometry defined: provide either a protein file or protein PyMOL selection") 143 | logger.info("Run prefix set to: {0}".format(opts.get("prefix"))) 144 | 145 | if opts.get("output_dir") is None: 146 | if opts.get("project_dir") is not None: 147 | opts["output_dir"] = os.path.join(opts.get("project_dir"), "{0}.pyvol".format(opts.get("prefix"))) 148 | else: 149 | opts["output_dir"] = os.path.join(os.getcwd(), "{0}.pyvol".format(opts.get("prefix"))) 150 | logger.info("Output directory set to: {0}".format(opts.get("output_dir"))) 151 | 152 | utilities.check_dir(opts.get("output_dir")) 153 | 154 | if opts.get("prot_file") is None: 155 | opts["prot_file"] = os.path.join(opts.get("output_dir"), "{0}_prot.pdb".format(opts.get("prefix"))) 156 | 157 | opts["ligand"] = input_opts.get("ligand") 158 | opts["lig_file"] = input_opts.get("lig_file") 159 | if (opts.get("ligand") is not None) and (opts.get("lig_file") is None): 160 | opts["lig_file"] = os.path.join(opts.get("output_dir"), "{0}_lig.pdb".format(opts.get("prefix"))) 161 | 162 | opts["logger_stream_level"] = input_opts.get("logger_stream_level") 163 | if opts["logger_stream_level"] not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: 164 | opts["logger_stream_level"] = None 165 | opts["logger_file_level"] = input_opts.get("logger_file_level") 166 | if opts["logger_file_level"] not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: 167 | opts["logger_file_level"] = None 168 | 169 | opts["protein"] = input_opts.get("protein") 170 | opts["ligand"] = input_opts.get("ligand") 171 | opts["protein_only"] = input_opts.get("protein_only") 172 | if not isinstance(opts["protein_only"], bool): 173 | opts["protein_only"] = _option_constraints.get("pymol_protein_only_dflt") 174 | opts["display_mode"] = input_opts.get("display_mode") 175 | if not opts["display_mode"] in ["solid", "mesh", "spheres"]: 176 | opts["display_mode"] = "solid" 177 | opts["palette"] = input_opts.get("palette") 178 | if opts.get("palette") is not None: 179 | palette_valid = False 180 | if isinstance(opts.get("palette"), str): 181 | fragments = list(filter(None, re.split(",", opts.get("palette").strip("\'").strip("\"")))) 182 | cleaned_pieces = [] 183 | for fragment in fragments: 184 | pieces = list(filter(None, fragment.split(" "))) 185 | if len(pieces) > 1: 186 | try: 187 | rgb = [float(piece) for piece in pieces] 188 | cleaned_pieces.append(rgb) 189 | except: 190 | cleaned_pieces.extend(pieces) 191 | else: 192 | cleaned_pieces.append(pieces[0]) 193 | if len(cleaned_pieces) > 0: 194 | logger.info("PyMOL palette parsed: {0}".format(",".join(cleaned_pieces))) 195 | opts["palette"] = cleaned_pieces 196 | else: 197 | logger.warning("PyMOL palette unable to be parsed") 198 | 199 | if input_opts.get("alpha") is not None: 200 | try: 201 | opts["alpha"] = float(input_opts.get("alpha")) 202 | except: 203 | logger.warning("Improper alpha parameter removed ({0}) and set to default ({1})".format(opts.get("lig_incl_rad"), _option_constraints.get("pymol_alpha_dflt"))) 204 | opts["alpha"] = _option_constraints.get("pymol_alpha_dflt") 205 | else: 206 | opts["alpha"] = _option_constraints.get("pymol_alpha_dflt") 207 | 208 | # Clean options 209 | if opts.get("prot_file") is None: 210 | logger.error("A protein file must be provided--Terminating job") 211 | raise 212 | 213 | if opts.get("constrain_radii"): 214 | if opts.get("min_rad") < _option_constraints.get("general_min_rad_min"): 215 | logger.warning("Minimum radius constrained from {0} to {1}".format(opts.get("min_rad"), _option_constraints.get("general_min_rad_min"))) 216 | opts["min_rad"] = _option_constraints.get("general_min_rad_min") 217 | elif opts["min_rad"] > _option_constraints.get("general_min_rad_max"): 218 | logger.info("Minimum radius constrained from {0} to {1}".format(opts.get("min_rad"), _option_constraints.get("general_min_rad_max"))) 219 | opts["min_rad"] = _option_constraints.get("general_min_rad_max") 220 | if opts["max_rad"] < _option_constraints.get("general_max_rad_min"): 221 | logger.info("Maximum radius constrained from {0} to 2.0".format(opts.get("max_rad"), _option_constraints.get("general_max_rad_min"))) 222 | opts["max_rad"] = _option_constraints.get("general_max_rad_min") 223 | elif opts["max_rad"] > _option_constraints.get("general_max_rad_max"): 224 | logger.info("Maximum radius constrained from {0} to 5.0".format(opts.get("max_rad"), _option_constraints.get("general_max_rad_max"))) 225 | opts["max_rad"] = _option_constraints.get("general_max_rad_max") 226 | 227 | if opts["mode"] in ["all", "largest"]: 228 | logger.info("Running in all or largest mode: removing lig_file, coordinates, resid, lig_excl_rad, and lig_incl_rad parameters from input") 229 | opts["lig_file"] = None 230 | opts["coordinates"] = None 231 | opts["resid"] = None 232 | opts["lig_excl_rad"] = None 233 | opts["lig_incl_rad"] = None 234 | else: 235 | if opts["lig_file"] is not None: 236 | if opts["mode"] is None: 237 | logger.info("Running in specific mode using the provided input ligand: removing resid and coordinates parameters from input") 238 | opts["mode"] = "specific" 239 | opts["resid"] = None 240 | opts["coordinates"] = None 241 | elif opts["resid"] is not None: 242 | if opts["mode"] is None: 243 | logger.info("Running in specific mode using the provided residue id ({0}): removing coordinates parater from input".format(opts["resid"])) 244 | opts["mode"] = "specific" 245 | opts["coordinates"] = None 246 | elif opts["coordinates"] is not None: 247 | logger.info("Running in specific mode using the provided coordinates: {0}".format(opts["coordinates"])) 248 | opts["mode"] = "specific" 249 | else: 250 | logger.info("Defaulting to running in largest mode because no mode was specified and no parameters sufficient to specify a pocket were provided") 251 | opts["mode"] = "largest" 252 | 253 | if opts.get("subdivide"): 254 | if opts.get("max_clusters") < _option_constraints.get("partitioning_max_clusters_min"): 255 | logger.warning("Subpocket analysis impossible with maximum clusters of {0}; disabling subpocket analysis and removing max_clusters, min_subpocket_rad, max_cluster_rad, min_subpocket_surf_rad, radial_sampling, inclusion_radius_buffer, and min_cluster_size parameters if present".format(opts["max_clusters"])) 256 | opts["subdivide"] = False 257 | opts["max_clusters"] = None 258 | opts["min_subpocket_rad"] = None 259 | opts["max_cluster_rad"] = None 260 | opts["min_subpocket_surf_rad"] = None 261 | opts["radial_sampling"] = None 262 | opts["inclusion_radius_buffer"] = None 263 | opts["min_cluster_size"] = None 264 | 265 | # Remove all empty options 266 | cleaned_opts = {} 267 | for k, v in opts.items(): 268 | if v is not None: 269 | cleaned_opts[k] = v 270 | 271 | logger.debug("Input options sanitized") 272 | return cleaned_opts 273 | 274 | 275 | def opts_to_cfg(opts): 276 | """ creates the configuration file corresponding to input options 277 | 278 | Args: 279 | opts (dict): option dictionary for which to create a configuration object 280 | 281 | Returns: 282 | config (ConfigParser): configuration object containing formatted options 283 | """ 284 | 285 | config = configparser.ConfigParser() 286 | 287 | config.add_section("General") 288 | if opts.get("prot_file") is not None: 289 | config.set("General", "prot_file", str(opts.get("prot_file"))) 290 | if opts.get("lig_file") is not None: 291 | config.set("General", "lig_file", str(opts.get("lig_file"))) 292 | if opts.get("min_rad") is not None: 293 | config.set("General", "min_rad", str(opts.get("min_rad"))) 294 | if opts.get("max_rad") is not None: 295 | config.set("General", "max_rad", str(opts.get("max_rad"))) 296 | if opts.get("constrain_radii") is not None: 297 | config.set("General", "constrain_radii", str(opts.get("constrain_radii"))) 298 | 299 | config.add_section("Specification") 300 | if opts.get("mode") is not None: 301 | config.set("Specification", "mode", str(opts.get("mode"))) 302 | if opts.get("resid") is not None: 303 | config.set("Specification", "resid", str(opts.get("resid"))) 304 | if opts.get("coordinates") is not None: 305 | config.set("Specification", "coordinates", str(opts.get("coordinates"))) 306 | if opts.get("lig_excl_rad") is not None: 307 | config.set("Specification", "lig_excl_rad", str(opts.get("lig_excl_rad"))) 308 | if opts.get("lig_incl_rad") is not None: 309 | config.set("Specification", "lig_incl_rad", str(opts.get("lig_incl_rad"))) 310 | if opts.get("min_volume") is not None: 311 | config.set("Specification", "min_volume", str(opts.get("min_volume"))) 312 | 313 | config.add_section("Partitioning") 314 | if opts.get("subdivide") is not None: 315 | config.set("Partitioning", "subdivide", str(opts.get("subdivide"))) 316 | if opts.get("max_clusters") is not None: 317 | config.set("Partitioning", "max_clusters", str(opts.get("max_clusters"))) 318 | if opts.get("min_subpocket_rad") is not None: 319 | config.set("Partitioning", "min_subpocket_rad", str(opts.get("min_subpocket_rad"))) 320 | if opts.get("max_subpocket_rad") is not None: 321 | config.set("Partitioning", "max_subpocket_rad", str(opts.get("max_subpocket_rad"))) 322 | if opts.get("min_subpocket_surf_rad") is not None: 323 | config.set("Partitioning", "min_subpocket_surf_rad", str(opts.get("min_subpocket_surf_rad"))) 324 | if opts.get("radial_sampling") is not None: 325 | config.set("Partitioning", "radial_sampling", str(opts.get("radial_sampling"))) 326 | if opts.get("inclusion_radius_buffer") is not None: 327 | config.set("Partitioning", "inclusion_radius_buffer", str(opts.get("inclusion_radius_buffer"))) 328 | if opts.get("min_cluster_size") is not None: 329 | config.set("Partitioning", "min_cluster_size", str(opts.get("min_cluster_size"))) 330 | 331 | config.add_section("Output") 332 | if opts.get("project_dir") is not None: 333 | config.set("Output", "project_dir", str(opts.get("project_dir"))) 334 | if opts.get("output_dir") is not None: 335 | config.set("Output", "output_dir", str(opts.get("output_dir"))) 336 | if opts.get("prefix") is not None: 337 | config.set("Output", "prefix", str(opts.get("prefix"))) 338 | if opts.get("logger_stream_level") is not None: 339 | config.set("Output", "logger_stream_level", str(opts.get("logger_stream_level"))) 340 | if opts.get("logger_file_level") is not None: 341 | config.set("Output", "logger_file_level", str(opts.get("logger_file_level"))) 342 | 343 | 344 | config.add_section("PyMOL") 345 | if opts.get("protein") is not None: 346 | config.set("PyMOL", "protein", str(opts.get("protein"))) 347 | if opts.get("ligand") is not None: 348 | config.set("PyMOL", "ligand", str(opts.get("ligand"))) 349 | if opts.get("protein_only") is not None: 350 | config.set("PyMOL", "protein_only", str(opts.get("protein_only"))) 351 | if opts.get("display_mode") is not None: 352 | config.set("PyMOL", "display_mode", str(opts.get("display_mode"))) 353 | if opts.get("palette") is not None: 354 | config.set("PyMOL", "palette", str(opts.get("palette"))) 355 | if opts.get("alpha") is not None: 356 | config.set("PyMOL", "alpha", str(opts.get("alpha"))) 357 | 358 | return config 359 | 360 | def defaults_to_cfg(): 361 | """ Creates a blank template cfg with all accepted fields and reasonable default values 362 | 363 | Returns: 364 | config (ConfigParser): configuration object containing defaults 365 | """ 366 | config = configparser.ConfigParser(allow_no_value=True) 367 | config.add_section("General") 368 | config.set("General", "prot_file") 369 | config.set("General", "lig_file") 370 | config.set("General", "min_rad", _option_constraints.get("general_min_rad_dflt")) 371 | config.set("General", "max_rad", _option_constraints.get("general_max_rad_dflt")) 372 | config.set("General", "constrain_radii", _option_constraints.get("general_constrain_radii")) 373 | 374 | config.add_section("Specification") 375 | config.set("Specification", "mode") 376 | config.set("Specification", "coordinates") 377 | config.set("Specification", "resid") 378 | config.set("Specification", "lig_excl_rad") 379 | config.set("Specification", "lig_incl_rad") 380 | config.set("Specification", "min_volume", _option_constraints.get("specification_min_volume_dflt")) 381 | 382 | config.add_section("Partitioning") 383 | config.set("Partitioning", "subdivide", _option_constraints.get("partitioning_subdivide_dflt")) 384 | config.set("Partitioning", "max_clusters") 385 | config.set("Partitioning", "min_subpocket_rad", _option_constraints.get("partitioning_min_subpocket_rad_dflt")) 386 | config.set("Partitioning", "max_subpocket_rad", ) 387 | config.set("Partitioning", "min_subpocket_surf_rad", ) 388 | config.set("Partitioning", "radial_sampling", ) 389 | config.set("Partitioning", "inclusion_radius_buffer") 390 | config.set("Partitioning", "min_cluster_size") 391 | 392 | config.add_section("Output") 393 | config.set("Output", "project_dir") 394 | config.set("Output", "prefix") 395 | config.set("Output", "logger_stream_level", "INFO") 396 | config.set("Output", "logger_file_level", "DEBUG") 397 | 398 | config.add_section("PyMOL") 399 | config.set("PyMOL", "protein") 400 | config.set("PyMOL", "ligand") 401 | config.set("PyMOL", "protein_only", "False") 402 | config.set("PyMOL", "display_mode", "solid") 403 | config.set("PyMOL", "palette") 404 | config.set("PyMOL", "alpha", "0.85") 405 | 406 | return config 407 | 408 | 409 | def defaults_to_file(filename): 410 | """ writes a default configuation file to disk 411 | 412 | Args: 413 | filename (str): output filename to which to write the configuration file to disk 414 | """ 415 | 416 | cfg_to_file(defaults_to_cfg(), filename) 417 | 418 | 419 | def cfg_to_opts(config): 420 | """ converts a config to opts 421 | 422 | Args: 423 | config (ConfigParser): configuration object from which options are to be extracted 424 | 425 | Returns: 426 | opts (dict): dictionary of options read in from the configuration object 427 | """ 428 | 429 | opts = {} 430 | opts["prot_file"] = config.get("General", "prot_file", fallback=None) 431 | opts["lig_file"] = config.get("General", "lig_file", fallback=None) 432 | opts["min_rad"] = config.getfloat("General", "min_rad", fallback=1.4) 433 | opts["max_rad"] = config.getfloat("General", "max_rad", fallback=3.4) 434 | 435 | opts["mode"] = config.get("Specification", "mode", fallback=None) 436 | opts["resid"] = config.get("Specification", "resid", fallback=None) 437 | opts["coordinates"] = config.get("Specification", "coordinates", fallback=None) 438 | opts["lig_excl_rad"] = config.getfloat("Specification", "lig_excl_rad", fallback=-1) 439 | opts["lig_incl_rad"] = config.getfloat("Specification", "lig_incl_rad", fallback=-1) 440 | 441 | opts["subdivide"] = config.getboolean("Partitioning", "subdivide", fallback=False) 442 | opts["min_volume"] = config.getint("Partitioning", "min_volume", fallback=200) 443 | opts["max_clusters"] = config.getint("Partitioning", "max_clusters", fallback=100) 444 | opts["min_subpocket_rad"] = config.getfloat("Partitioning", "min_subpocket_rad", fallback=1.7) 445 | opts["max_subpocket_rad"] = config.getfloat("Partitioning", "max_subpocket_rad", fallback=3.4) 446 | opts["min_subpocket_surf_rad"] = config.getfloat("Partitioning", "min_subpocket_surf_rad", fallback=1.0) 447 | opts["radial_sampling"] = config.getfloat("Partitioning", "radial_sampling", fallback=0.1) 448 | opts["inclusion_radius_buffer"] = config.getfloat("Partitioning", "inclusion_radius_buffer", fallback=1.0) 449 | opts["min_cluster_size"] = config.getint("Partitioning", "min_cluster_size", fallback=50) 450 | 451 | opts["project_dir"] = config.get("Output", "project_dir", fallback=None) 452 | opts["output_dir"] = config.get("Output", "output_dir", fallback=None) 453 | opts["prefix"] = config.get("Output", "prefix", fallback=None) 454 | opts["logger_stream_level"] = config.get("Output", "logger_stream_level", fallback="INFO") 455 | opts["logger_file_level"] = config.get("Output", "logger_file_level", fallback="DEBUG") 456 | 457 | opts["protein"] = config.get("PyMOL", "protein", fallback=None) 458 | opts["ligand"] = config.get("PyMOL", "ligand", fallback=None) 459 | opts["protein_only"] = config.get("PyMOL", "protein_only", fallback=True) 460 | opts["display_mode"] = config.get("PyMOL", "display_mode", fallback="solid") 461 | opts["palette"] = config.get("PyMOL", "palette", fallback=None) 462 | opts["alpha"] = config.get("PyMOL", "alpha", fallback=0.85) 463 | 464 | return opts 465 | 466 | 467 | def cfg_to_file(cfg, filename): 468 | """ writes a configuration to file 469 | 470 | Args: 471 | cfg (ConfigParser): configuration object to be written to disk 472 | filename (str): target filename on disk 473 | """ 474 | 475 | with open(filename, 'w') as configfile: 476 | cfg.write(configfile) 477 | logger.info("Configuration file written to {0}".format(filename)) 478 | 479 | 480 | def file_to_cfg(filename): 481 | """ reads a cfg file into a configuration object 482 | 483 | Args: 484 | filename (str): input filename of a configuration file 485 | 486 | Returns: 487 | config (ConfigParser): configuration object holding the contents of the file 488 | """ 489 | 490 | config = configparser.ConfigParser() 491 | config.read(filename) 492 | logger.info("Configuration file read from {0}".format(filename)) 493 | return config 494 | 495 | 496 | def file_to_opts(filename): 497 | """ reads a cfg file and converts it into an options dictionary 498 | 499 | Args: 500 | filename (str): input filename of a configuration file 501 | 502 | Returns: 503 | opts (dict): dictionary object containing PyVOL options 504 | """ 505 | 506 | return cfg_to_opts(file_to_cfg(filename)) 507 | 508 | 509 | def opts_to_file(opts, filename=None): 510 | """ writes options to a configuration file 511 | 512 | Args: 513 | opts (dict): dictionary object containing PyVOL options 514 | filename (str): target file to which to write the configuration 515 | 516 | """ 517 | 518 | if filename is None: 519 | filename = os.path.join(opts.get("output_dir"), "{0}.cfg".format(opts.get("prefix"))) 520 | 521 | cfg_to_file(opts_to_cfg(opts), filename) 522 | -------------------------------------------------------------------------------- /pyvol/construct.py: -------------------------------------------------------------------------------- 1 | 2 | # experimental code not included in distributed release 3 | 4 | import math 5 | import numpy as np 6 | import os 7 | from pyvol.spheres import Spheres 8 | from pyvol import cluster, pymol_utilities, utilities 9 | from pymol import cgo, cmd 10 | import shutil 11 | import tempfile 12 | 13 | def make_interface(center, normal, rn=1.5, rp=0.6, rpn=1.0, gc=None, g1=None, g2=None): 14 | 15 | neg_xyz = np.array([ 16 | [0, 0, 1.1], 17 | [1.1, 0, 1.1], 18 | [0.55, 0.95, 1.1], 19 | [-0.55, 0.95, 1.1], 20 | [-1.1, 0, 1.1], 21 | [-0.55, -0.95, 1.1], 22 | [0.55, -0.95, 1.1], 23 | [2.2, 0, 1.1], 24 | [1.9, 1.1, 1.1], 25 | [1.1, 1.9, 1.1], 26 | [0, 2.2, 1.1], 27 | [-1.1, 1.9, 1.1], 28 | [-1.9, 1.1, 1.1], 29 | [-2.2, 0, 1.1], 30 | [-1.9, -1.1, 1.1], 31 | [-1.1, -1.9, 1.1], 32 | [0, -2.2, 1.1], 33 | [1.1, -1.9, 1.1], 34 | [1.9, -1.1, 1.1], 35 | [0, 0, -1.1], 36 | [1.1, 0, -1.1], 37 | [0.55, 0.95, -1.1], 38 | [-0.55, 0.95, -1.1], 39 | [-1.1, 0, -1.1], 40 | [-0.55, -0.95, -1.1], 41 | [0.55, -0.95, -1.1], 42 | [2.2, 0, -1.1], 43 | [1.9, 1.1, -1.1], 44 | [1.1, 1.9, -1.1], 45 | [0, 2.2, -1.1], 46 | [-1.1, 1.9, -1.1], 47 | [-1.9, 1.1, -1.1], 48 | [-2.2, 0, -1.1], 49 | [-1.9, -1.1, -1.1], 50 | [-1.1, -1.9, -1.1], 51 | [0, -2.2, -1.1], 52 | [1.1, -1.9, -1.1], 53 | [1.9, -1.1, -1.1], 54 | ]) 55 | p2_xyz = np.array([ 56 | [0, 0, 3.2], 57 | [1.1, 0, 3.2], 58 | [0.55, 0.95,3.21], 59 | [-0.55, 0.95, 3.2], 60 | [-1.1, 0, 3.2], 61 | [-0.55, -0.95, 3.2], 62 | [0.55, -0.95, 3.2], 63 | [2.2, 0, 3.2], 64 | [1.9, 1.1, 3.2], 65 | [1.1, 1.9, 3.2], 66 | [0, 2.2, 3.2], 67 | [-1.1, 1.9, 3.2], 68 | [-1.9, 1.1, 3.2], 69 | [-2.2, 0, 3.2], 70 | [-1.9, -1.1, 3.2], 71 | [-1.1, -1.9, 3.2], 72 | [0, -2.2, 3.2], 73 | [1.1, -1.9, 3.2], 74 | [1.9, -1.1, 3.2], 75 | 76 | [4.3, 0, 3.2], 77 | [3.7, 2.15, 3.2], 78 | [2.15, 3.7, 3.2], 79 | [0, 4.3, 3.2], 80 | [-2.15, 3.7, 3.2], 81 | [-3.7, 2.15, 3.2], 82 | [-4.3, 0, 3.2], 83 | [-3.7, -2.15, 3.2], 84 | [-2.15, -3.7, 3.2], 85 | [0, -4.3, 3.2], 86 | [2.15, -3.7, 3.2], 87 | [3.7, -2.15, 3.2], 88 | 89 | [4.3, 0, 2.0], 90 | [3.7, 2.15, 2.0], 91 | [2.15, 3.7, 2.0], 92 | [0, 4.3, 2.0], 93 | [-2.15, 3.7, 2.0], 94 | [-3.7, 2.15, 2.0], 95 | [-4.3, 0, 2.0], 96 | [-3.7, -2.15, 2.0], 97 | [-2.15, -3.7, 2.0], 98 | [0, -4.3, 2.0], 99 | [2.15, -3.7, 2.0], 100 | [3.7, -2.15, 2.0], 101 | 102 | [4.3, 0, 0.8], 103 | [3.7, 2.15, 0.8], 104 | [2.15, 3.7, 0.8], 105 | [0, 4.3, 0.8], 106 | [-2.15, 3.7, 0.8], 107 | [-3.7, 2.15, 0.8], 108 | [-4.3, 0, 0.8], 109 | [-3.7, -2.15, 0.8], 110 | [-2.15, -3.7, 0.8], 111 | [0, -4.3, 0.8], 112 | [2.15, -3.7, 0.8], 113 | [3.7, -2.15, 0.8], 114 | ]) 115 | p1_xyz = np.array([ 116 | [0, 0, -3.2], 117 | [1.1, 0, -3.2], 118 | [0.55, 0.95, -3.21], 119 | [-0.55, 0.95, -3.2], 120 | [-1.1, 0, -3.2], 121 | [-0.55, -0.95, -3.2], 122 | [0.55, -0.95, -3.2], 123 | [2.2, 0, -3.2], 124 | [1.9, 1.1, -3.2], 125 | [1.1, 1.9, -3.2], 126 | [0, 2.2, -3.2], 127 | [-1.1, 1.9, -3.2], 128 | [-1.9, 1.1, -3.2], 129 | [-2.2, 0, -3.2], 130 | [-1.9, -1.1, -3.2], 131 | [-1.1, -1.9, -3.2], 132 | [0, -2.2, -3.2], 133 | [1.1, -1.9, -3.2], 134 | [1.9, -1.1, -3.2], 135 | 136 | [4.3, 0, -3.2], 137 | [3.7, 2.15, -3.2], 138 | [2.15, 3.7, -3.2], 139 | [0, 4.3, -3.2], 140 | [-2.15, 3.7, -3.2], 141 | [-3.7, 2.15, -3.2], 142 | [-4.3, 0, -3.2], 143 | [-3.7, -2.15, -3.2], 144 | [-2.15, -3.7, -3.2], 145 | [0, -4.3, -3.2], 146 | [2.15, -3.7, -3.2], 147 | [3.7, -2.15, -3.2], 148 | 149 | [4.3, 0, -2.0], 150 | [3.7, 2.15, -2.0], 151 | [2.15, 3.7, -2.0], 152 | [0, 4.3, -2.0], 153 | [-2.15, 3.7, -2.0], 154 | [-3.7, 2.15, -2.0], 155 | [-4.3, 0, -2.0], 156 | [-3.7, -2.15, -2.0], 157 | [-2.15, -3.7, -2.0], 158 | [0, -4.3, -2.0], 159 | [2.15, -3.7, -2.0], 160 | [3.7, -2.15, -2.0], 161 | 162 | [4.3, 0, -0.8], 163 | [3.7, 2.15, -0.8], 164 | [2.15, 3.7, -0.8], 165 | [0, 4.3, -0.8], 166 | [-2.15, 3.7, -0.8], 167 | [-3.7, 2.15, -0.8], 168 | [-4.3, 0, -0.8], 169 | [-3.7, -2.15, -0.8], 170 | [-2.15, -3.7, -0.8], 171 | [0, -4.3, -0.8], 172 | [2.15, -3.7, -0.8], 173 | [3.7, -2.15, -0.8], 174 | ]) 175 | 176 | 177 | rot_matrix = utilities.calculate_rotation_matrix(np.array([0, 0, 1]), normal) 178 | 179 | neg_spheres = Spheres(xyz=neg_xyz * rot_matrix + center, r=np.float64(rn), g=np.float64(gc), name="negative_connection") 180 | p1p_spheres = Spheres(xyz=p1_xyz * rot_matrix + center, r=np.float64(rp), g=g1, name="p1_connection") 181 | p1n_spheres = neg_spheres + Spheres(xyz=p2_xyz * rot_matrix + center, r=np.float64(rpn), g=gc, name="p1n_connection") 182 | p1n_spheres.name = "p1n" 183 | 184 | p2p_spheres = Spheres(xyz=p2_xyz * rot_matrix + center, r=np.float64(rp), g=g2, name="p2_connection") 185 | p2n_spheres = neg_spheres + Spheres(xyz=p1_xyz * rot_matrix + center, r=np.float64(rpn), g=gc, name="p2n_connection") 186 | p2n_spheres.name = "p2n" 187 | return [p1p_spheres, p1n_spheres, p2p_spheres, p2n_spheres, neg_spheres] 188 | # return Spheres(xyz=neg_xyz * rot_matrix + center, r=np.float64(r), g=np.float64(g), name="negative_connection"), Spheres(xyz=p1_xyz * rot_matrix + center, r=np.float64(r), g=g1, name="p1_connection") 189 | 190 | def construct_3d_surfaces(*domains, **kwargs): 191 | # test = Cylinder(np.array([0,0,1]), np.array([0, 0, 1]), g=9) 192 | 193 | surface_rad = 1.4 194 | 195 | atomic_spheres = None 196 | domain_names = [] 197 | for index, domain in enumerate(domains): 198 | output_dir = tempfile.mkdtemp() 199 | prefix = "domain_{0}".format(index) 200 | domain_names.append(domain.split(" ")[0]) 201 | domain_pdb_file = os.path.join(output_dir, "{0}.pdb".format(prefix)) 202 | cmd.save(domain_pdb_file, "{0} and poly".format(domain)) 203 | 204 | d_p = Spheres(pdb=domain_pdb_file, g=np.float64(index + 1)) 205 | 206 | if atomic_spheres is None: 207 | atomic_spheres = d_p 208 | else: 209 | atomic_spheres = atomic_spheres + d_p 210 | 211 | cluster.remove_overlap(atomic_spheres) 212 | 213 | domains = cluster.extract_groups(atomic_spheres, surf_radius=surface_rad, group_names=domain_names) 214 | 215 | # add the connectors 216 | if True: 217 | g1 = 2 218 | g2 = 1 219 | gc = len(domains) + 1 220 | 221 | residue_coordinates = cmd.get_coords("2wtk and chain B and resi 185", 1) 222 | 223 | coord, normal = utilities.closest_vertex_normals(domains[g1 -1].mesh, domains[g2 -1].mesh, ref_coordinates=residue_coordinates) 224 | p1p, p1n, p2p, p2n, connector = make_interface(coord, normal, gc=gc, g1=g1, g2=g2) 225 | 226 | # domains = p1p, p1n, p2p, p2n 227 | 228 | g1_name = domains[g1 -1].name 229 | cluster.remove_included_spheres(domains[g1 -1], p2n, 1.5) 230 | domains[g1 - 1] = domains[g1 -1] + p1p 231 | domains[g1 - 1].g = g1 232 | domains[g1 - 1].name = g1_name 233 | 234 | g2_name = domains[g2 - 1].name 235 | cluster.remove_included_spheres(domains[g2 -1], p1n, 1.5) 236 | domains[g2 - 1] = domains[g2 -1] + p2p 237 | domains[g2 - 1].g = g2 238 | domains[g2 - 1].name = g2_name 239 | 240 | domains.append(connector) 241 | domains[-1].g = gc 242 | 243 | gestalt = cluster.merge_sphere_list(domains) 244 | 245 | print(gestalt, gestalt.xyzrg.shape, np.unique(gestalt.g)) 246 | cluster.remove_overlap(gestalt, static_last_group=True) 247 | print(gestalt.xyzrg.shape, np.unique(gestalt.g)) 248 | gestalt_names = [domain.name for domain in domains] 249 | domains = cluster.extract_groups(gestalt, surf_radius=surface_rad, group_names=gestalt_names) 250 | print(domains) 251 | 252 | colors = ['tv_red', 'tv_orange', 'tv_blue', 'tv_green'] 253 | for index, domain in enumerate(domains): 254 | print(domain.name, domain.xyzrg.shape) 255 | if not "connection" in domain.name: 256 | pymol_utilities.display_spheres_object(domain, domain.name, mode="mesh", color=colors[index]) 257 | else: 258 | pymol_utilities.display_spheres_object(domain, domain.name, mode="spheres", color=colors[index]) 259 | 260 | 261 | 262 | from pymol import cmd 263 | cmd.extend("test3d", construct_3d_surfaces) 264 | -------------------------------------------------------------------------------- /pyvol/exceptions.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class MSMSError(Exception): 4 | """ Raised when MSMS fails to run correctly for any reason 5 | 6 | """ 7 | pass 8 | -------------------------------------------------------------------------------- /pyvol/identify.py: -------------------------------------------------------------------------------- 1 | 2 | from .spheres import Spheres 3 | from . import cluster, configuration, utilities 4 | import glob 5 | import inspect 6 | import itertools 7 | import logging 8 | import numpy as np 9 | import os 10 | import pandas as pd 11 | import shutil 12 | import sys 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | def load_calculation(data_dir, input_opts=None): 17 | """ load the results of a calculation from file 18 | 19 | Args: 20 | data_dir (str): directory where previous calculation results are stored 21 | input_opts (dict): dictionary of pyvol options that is used to update the options read in from file 22 | 23 | Returns: 24 | pockets ([Spheres]): a list of Spheres objects each of which contains the geometric information describing a distinct pocket or subpocket 25 | opts (dict): updated PyVOL options dictionary 26 | 27 | """ 28 | 29 | if not os.path.isdir(data_dir): 30 | logger.error("{0} is not a directory".format(data_dir)) 31 | raise FileNotFoundError 32 | 33 | cfg_files = glob.glob(os.path.join(data_dir, "*.cfg")) 34 | if len(cfg_files) == 0: 35 | logger.error("No cfg file found in {0}".format(data_dir)) 36 | raise FileNotFoundError 37 | elif len(cfg_files) > 1: 38 | logger.error("Multiple cfg files found in {0}".format(data_dir)) 39 | raise FileNotFoundError 40 | 41 | opts = configuration.file_to_opts(cfg_files[0]) 42 | if isinstance(input_opts, dict): 43 | opts.update(input_opts) 44 | opts = configuration.clean_opts(opts) 45 | 46 | rept_file = os.path.join(data_dir, "{0}.rept".format(opts.get("prefix"))) 47 | if not os.path.isfile(rept_file): 48 | logger.error("No rept file found at {0}".format(rept_file)) 49 | raise FileNotFoundError 50 | 51 | rept_df = pd.read_csv(rept_file) 52 | pockets = [] 53 | for index, row in rept_df.iterrows(): 54 | xyzrg_file = os.path.join(data_dir, "{0}.xyzrg".format(row["name"])) 55 | pockets.append(Spheres(spheres_file=xyzrg_file)) 56 | 57 | return pockets, opts 58 | 59 | 60 | def pocket(**opts): 61 | """Calculates the SES for a binding pocket 62 | 63 | Args: 64 | opts (dict): dictionary containing all PyVOL options (see pyvol.pymol_interface.pymol_pocket_cmdline for details) 65 | 66 | Returns: 67 | pockets ([Spheres]): a list of Spheres objects each of which contains the geometric information describing a distinct pocket or subpocket 68 | 69 | """ 70 | 71 | if os.path.dirname(opts.get("prot_file")) != opts.get("output_dir"): 72 | new_prot_file = os.path.join(opts.get("output_dir"), os.path.basename(opts.get("prot_file"))) 73 | shutil.copyfile(opts.get("prot_file"), new_prot_file) 74 | opts["prot_file"] = new_prot_file 75 | 76 | if opts.get("lig_file") is not None: 77 | new_lig_file = os.path.join(opts.get("output_dir"), os.path.basename(opts.get("lig_file"))) 78 | shutil.copyfile(opts.get("lig_file"), new_lig_file) 79 | opts["lig_file"] = new_lig_file 80 | 81 | p_s = Spheres(pdb=opts.get("prot_file"), name="{0}_prot".format(opts.get("prefix"))) 82 | logger.debug("Protein geometry read from {0}".format(opts.get("prot_file"))) 83 | 84 | pl_s = p_s.copy() 85 | if opts.get("lig_file") is not None: 86 | l_s = Spheres(pdb=opts.get("lig_file"), r=opts.get("lig_incl_rad"), name="{0}_lig_incl".format(opts.get("prefix"))) 87 | logger.debug("Ligand geometry read from {0}".format(opts.get("lig_file"))) 88 | if opts.get("lig_incl_rad") is not None: 89 | pl_s = p_s + l_s 90 | logger.debug("Ligand-inclusion radius of {0} applied".format(opts.get("lig_incl_rad"))) 91 | else: 92 | l_s = None 93 | 94 | pl_s.name = "{0}_interior".format(opts.get("prefix")) 95 | 96 | pl_bs = pl_s.calculate_surface(probe_radius=opts.get("max_rad"))[0] 97 | logger.debug("Outer bulk-solvent surface calculated") 98 | pl_bs.name = "{0}_boundary".format(opts.get("prefix")) 99 | 100 | pa_s = p_s + pl_bs 101 | pa_s.name = "{0}_exterior".format(opts.get("prefix")) 102 | if (l_s is not None) and (opts.get("lig_excl_rad") is not None): 103 | le_s = Spheres(xyz=l_s.xyzr, r=opts.get("lig_excl_rad"), name="{0}_lig_excl".format(opts.get("prefix"))) 104 | le_bs = le_s.calculate_surface(probe_radius=opts.get("max_rad"))[0] 105 | pa_s = pa_s + le_bs 106 | logger.debug("Ligand-excluded radius of {0} applied".format(opts.get("lig_excl_rad"))) 107 | 108 | if opts.get("mode") == "all": 109 | all_pockets = pa_s.calculate_surface(probe_radius=opts.get("min_rad"), all_components=True, min_volume=opts.get("min_volume")) 110 | for index, pocket in enumerate(all_pockets): 111 | pocket.name = "{0}_p{1}".format(opts.get("prefix"), index) 112 | logger.info("Pockets calculated using mode 'all': {0}".format(len(all_pockets))) 113 | if opts.get("subdivide"): 114 | logger.warning("Subpocket clustering not currently supported when calculating all independent pockets") 115 | else: 116 | if opts.get("mode") == "largest": 117 | bp_bs = pa_s.calculate_surface(probe_radius=opts.get("min_rad"), all_components=True, largest_only=True)[0] 118 | logger.info("Largest pocket identified") 119 | elif opts.get("mode") == "specific": 120 | if opts.get("coordinates") is not None: 121 | coordinate = opts.get("coordinates") 122 | logger.info("Specific pocket identified from coordinate: {0}".format(opts.get("coordinates"))) 123 | elif opts.get("resid") is not None: 124 | resid = str(opts.get("resid")) 125 | chain = None 126 | if not resid[0].isdigit(): 127 | chain = resid[0] 128 | resid = int(resid[1:]) 129 | else: 130 | resid = int(resid) 131 | coordinate = utilities.coordinates_for_resid(opts.get("prot_file"), resid=resid, chain=chain) 132 | logger.info("Specific pocket identified from residue: {0} -> {1} (truncated)".format(opts.get("resid"), coordinate[0,:])) 133 | elif l_s is not None: 134 | lig_coords = l_s.xyz 135 | coordinate = np.mean(l_s.xyz, axis=0).reshape(1, -1) 136 | logger.info("Specific pocket identified from mean ligand position: {0}".format(coordinate)) 137 | else: 138 | logger.error("A coordinate, ligand, or residue must be supplied to run in specific mode") 139 | return None 140 | 141 | p_bs = p_s.calculate_surface(probe_radius=opts.get("min_rad"))[0] 142 | id_coord = p_bs.nearest_coord_to_external(coordinate).reshape(1, -1) 143 | bp_bs = pa_s.calculate_surface(probe_radius=opts.get("min_rad"), coordinate=id_coord)[0] 144 | else: 145 | logger.error("Unrecognized mode <{0}>--should be 'all', 'largest', or 'specific'".format(opts.get("mode"))) 146 | return None 147 | 148 | bp_bs.name = "{0}_p0".format(opts.get("prefix")) 149 | 150 | if bp_bs.mesh.volume > pl_bs.mesh.volume: 151 | logger.error("Binding pocket not correctly identified--try an alternative method to specify the binding pocket") 152 | return [], opts 153 | else: 154 | all_pockets = [bp_bs] 155 | 156 | if opts.get("subdivide"): 157 | all_pockets.extend(subpockets(bounding_spheres = pa_s, ref_spheres = bp_bs, **opts)) 158 | logger.info("Subpockets identified: {0}".format(len(all_pockets) - 1)) 159 | 160 | write_report(all_pockets, **opts) 161 | write_cfg(**opts) 162 | 163 | return all_pockets, opts 164 | 165 | 166 | def pocket_wrapper(**opts): 167 | """ wrapper for pocket that configures the logger, sanitizes inputs, and catches errors; useful when running from the command line or PyMOL but split from the core code for programmatic usage 168 | 169 | Args: 170 | opts (dict): dictionary containing all PyVOL options (see pyvol.pymol_interface.pymol_pocket_cmdline for details) 171 | 172 | Returns: 173 | pockets ([Spheres]): a list of Spheres objects each of which contains the geometric information describing a distinct pocket or subpocket 174 | output_opts (dict): dictionary containing the actual options used in the pocket calculation 175 | 176 | """ 177 | 178 | opts = configuration.clean_opts(opts) 179 | 180 | utilities.check_dir(opts.get("output_dir")) 181 | 182 | log_file = os.path.join(opts.get("output_dir"), "{0}.log".format(opts.get("prefix"))) 183 | utilities.configure_logger(filename=log_file, stream_level=opts.get("logger_stream_level"), file_level=opts.get("logger_file_level")) 184 | logger.debug("Logger configured") 185 | 186 | all_pockets, output_opts = pocket(**opts) 187 | 188 | return all_pockets, output_opts 189 | 190 | 191 | def subpockets(bounding_spheres, ref_spheres, **opts): 192 | """ 193 | 194 | Args: 195 | bounding_spheres (Spheres): a Spheres object containing both the peptide and solvent exposed face external spheres 196 | ref_spheres (Spheres): a Spheres object holding the interior spheres that define the pocket to be subdivided 197 | opts (dict): a dictionary containing all PyVOL options (see pyvol.configuration.clean_opts for details) 198 | 199 | Returns: 200 | grouped_list ([Spheres]): a list of Spheres objects each of which contains the geometric information describing a distinct subpocket 201 | 202 | """ 203 | 204 | nonextraneous_rad = opts.get("min_rad") + opts.get("max_rad") + opts.get("inclusion_radius_buffer") 205 | nonextraneous_spheres = bounding_spheres.identify_nonextraneous(ref_spheres=ref_spheres, radius=nonextraneous_rad) 206 | 207 | sampling_radii = np.flip(np.arange(opts.get("min_rad"), opts.get("max_subpocket_rad"), opts.get("radial_sampling")), axis=0) 208 | unmerged_sphere_lists = utilities.sphere_multiprocessing(nonextraneous_spheres, sampling_radii, all_components=True) 209 | spheres = cluster.merge_sphere_list(itertools.chain(*unmerged_sphere_lists)) 210 | 211 | cluster.hierarchically_cluster_spheres(spheres, ordered_radii=sampling_radii, min_new_radius=opts.get("min_subpocket_rad"), min_cluster_size=opts.get("min_cluster_size"), max_clusters=opts.get("max_clusters")) 212 | 213 | cluster.remove_overlap(spheres, radii=sampling_radii, spacing=opts.get("radial_sampling")) 214 | cluster.remove_overlap(spheres) 215 | cluster.remove_interior(spheres) 216 | grouped_list = cluster.extract_groups(spheres, surf_radius=opts.get("min_subpocket_surf_rad"), prefix=opts.get("prefix")) 217 | return grouped_list 218 | 219 | 220 | def write_cfg(**opts): 221 | """ write the processed configuration to file 222 | 223 | Args: 224 | output_dir (str): output directory, relative or absolute 225 | prefix (str): identifying prefix for the output files 226 | 227 | """ 228 | 229 | utilities.check_dir(opts.get("output_dir")) 230 | configuration.opts_to_file(opts) 231 | 232 | 233 | def write_report(all_pockets, **opts): 234 | """ Write a brief report of calculated volumes to file 235 | 236 | Args: 237 | all_pockets ([Spheres]): a list of Spheres objects each of which contains the complete information about a distinct pocket or subpocket 238 | output_dir (str): output directory, relative or absolute 239 | prefix (str): identifying prefix for output files 240 | 241 | """ 242 | import os 243 | import pandas as pd 244 | 245 | utilities.check_dir(opts.get("output_dir")) 246 | 247 | rept_list = [] 248 | 249 | for pocket in all_pockets: 250 | spheres_name = os.path.join(opts.get("output_dir"), "{0}.xyzrg".format(pocket.name)) 251 | pocket.write(spheres_name) 252 | rept_list.append({"name": pocket.name, 253 | "volume": pocket.mesh.volume 254 | }) 255 | rept_df = pd.DataFrame(rept_list) 256 | rept_name = os.path.join(opts.get("output_dir"), "{0}.rept".format(opts.get("prefix"))) 257 | rept_df.to_csv(rept_name, index=False) 258 | logger.info("Report written to: {0}".format(rept_name)) 259 | -------------------------------------------------------------------------------- /pyvol/poses.py: -------------------------------------------------------------------------------- 1 | 2 | # Experimental code not included in distributed release 3 | 4 | from pyvol.spheres import Spheres 5 | import logging 6 | import numpy as np 7 | import pandas as pd 8 | import scipy 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | try: 13 | from rdkit import Chem 14 | from rdkit.Chem.Draw import rdMolDraw2D 15 | from rdkit.Chem import AllChem 16 | except: 17 | logger.warning("rdkit not found; small molecule analysis will not be available prior to installation") 18 | 19 | 20 | # def pose_report(pose_file, pocket_file, output_dir, output_prefix=None, name_parameter="_Name", scoring_parameter="r_i_glide_gscore", pocket_tolerance=3, panelx=250, panely=200, molsPerRow=4, rowsPerPage=6, palette=[(1,0.2,0.2), (1,0.55,0.15), (1,1,0.2), (0.2,1,0.2), (0.3,0.3,1), (0.5,1,1), (1,0.5,1)]): 21 | # """ Creates a report that highlights 2D compound representations by subpocket occupancy according to the poses in a provided sdf file 22 | # 23 | # Args: 24 | # pose_file (str): input SDF file containing docked compound poses 25 | # pocket_file (str): input csv containing the spheres 5 dimensional array describing subpocket geometry; output with a "_spa.csv" ending 26 | # output_dir (str): output directory for all files 27 | # output_prefix (str): output prefix 28 | # name_parameter (str): SDF property key for the molecule name 29 | # scoring_parameter (str): SDF property key for whichever property should be shown in the report 30 | # pocket_tolerance (float): maximum distance (Angstrom) at which an atom outside of the defined subpocket volumes is still associated with a subpocket 31 | # panelx (int): horizontal width of the drawing space for each molecule 32 | # panely (int): vertical height of the drawing space for each molecule 33 | # molsPerRow (int): number of molecules to fit on a row (total width is <= panelx * molsPerRow) 34 | # rowsPerPage (int): number of rows of molecules to fit on each page (total height is <= panely * rowsPerPage) 35 | # palette ([(float,float,float)]): list of tuples of fractional RGB values that controls the highlighting colors 36 | # 37 | # """ 38 | # 39 | # poses.compound_occupancy(pose_file, pocket_file, output_dir, output_prefix, name_parameter, scoring_parameter, pocket_tolerance, panelx, panely, molsPerRow, rowsPerPage, palette) 40 | 41 | 42 | def compound_occupancy(pose_file, pocket_file, output_dir, output_prefix=None, name_parameter="_Name", scoring_parameter="r_i_glide_gscore", pocket_tolerance=3, panelx=250, panely=200, molsPerRow=4, rowsPerPage=6, palette=[(1,0.2,0.2), (1,0.55,0.15), (1,1,0.2), (0.2,1,0.2), (0.3,0.3,1), (0.5,1,1), (1,0.5,1)]): 43 | """ Creates a report that highlights 2D compound representations by subpocket occupancy according to the poses in a provided sdf file 44 | 45 | Args: 46 | pose_file (str): input SDF file containing docked compound poses 47 | pocket_file (str): input csv containing the spheres' 5 dimensional array describing subpocket geometry; output with a "_spa.csv" ending 48 | output_dir (str): output directory for all files 49 | output_prefix (str): output prefix 50 | name_parameter (str): SDF property key for the molecule name 51 | scoring_parameter (str): SDF property key for whichever property should be shown in the report 52 | pocket_tolerance (float): maximum distance (Angstrom) at which an atom outside of the defined subpocket volumes is still associated with a subpocket 53 | panelx (int): horizontal width of the drawing space for each molecule 54 | panely (int): vertical height of the drawing space for each molecule 55 | molsPerRow (int): number of molecules to fit on a row (total width is <= panelx * molsPerRow) 56 | rowsPerPage (int): number of rows of molecules to fit on each page (total height is <= panely * rowsPerPage) 57 | palette ([(float,float,float)]): list of tuples of fractional RGB values that controls the highlighting colors 58 | 59 | """ 60 | 61 | molecules = [x for x in Chem.ForwardSDMolSupplier(pose_file) if x is not None] 62 | 63 | spheres = np.loadtxt(spheres_file, delimiter=' ') 64 | subpocket_spheres = Spheres(xyzrg=sp) 65 | 66 | for molecule in molecules: 67 | set_atom_subpockets(molecule, subpocket_spheres.propagate_groups_to_external(molecule.GetConformers()[0].GetPositions(), tolerance=pocket_tolerance)) 68 | draw_molecules_with_subpockets(molecules, output_dir, output_prefix, palette, panelx, panely, molsPerRow, rowsPerPage, name_parameter, scoring_parameter) 69 | 70 | 71 | def draw_molecules_with_subpockets(molecules, output_dir, output_prefix, palette=[(1,0.2,0.2), (1,0.55,0.15), (1,1,0.2), (0.2,1,0.2), (0.3,0.3,1), (0.5,1,1), (1,0.5,1)], panelx=250, panely=200, molsPerRow=4, rowsPerPage=6, name_parameter="_Name", scoring_parameter="r_i_glide_gscore"): 72 | """ Creates a report that highlights the 2D representation of docked molecules to indicate subpocket occupancy 73 | 74 | Args: 75 | molecules ([rdkit.Chem.rdchem.Mol]): list of rdkit molecules to draw 76 | output_dir (str): output directory 77 | output_prefix (str): output prefix 78 | palette ([(float,float,float)]): list of tuples of fractional RGB values; TODO: allow input of pymol style color strings 79 | panelx (int): horizontal width of the drawing space for each molecule 80 | panely (int): vertical height of the drawing space for each molecule 81 | molsPerRow (int): number of molecules to fit on a row (total width is <= panelx * molsPerRow) 82 | rowsPerPage (int): number of rows of molecules to fit on each page (total height is <= panely * rowsPerPage) 83 | name_parameter (str): SDF property key for the molecule name 84 | scoring_parameter (str): SDF property key for whichever property should be shown in the report 85 | 86 | """ 87 | 88 | 89 | hats = [] 90 | hbnds = [] 91 | hatcolors = [] 92 | hbndcolors = [] 93 | prepped_mols = [] 94 | legends = [] 95 | for index, m in enumerate(molecules): 96 | ats = [] 97 | bnds = [] 98 | atcolors = {} 99 | bndcolors = {} 100 | 101 | for atom in m.GetAtoms(): 102 | subpocket = atom.GetIntProp("subpocket") 103 | idx = atom.GetIdx() 104 | if subpocket >= 0: 105 | ats.append(idx) 106 | atcolors[idx] = palette[subpocket] 107 | 108 | for bnd in m.GetBonds(): 109 | if bnd.GetBeginAtom().GetIntProp("subpocket") == bnd.GetEndAtom().GetIntProp("subpocket"): 110 | bnds.append(bnd.GetIdx()) 111 | bndcolors[bnd.GetIdx()] = palette[bnd.GetBeginAtom().GetIntProp("subpocket")] 112 | 113 | hats.append(ats) 114 | hbnds.append(bnds) 115 | hatcolors.append(atcolors) 116 | hbndcolors.append(bndcolors) 117 | AllChem.Compute2DCoords(m) 118 | prepped_mols.append(rdMolDraw2D.PrepareMolForDrawing(m)) 119 | legends.append("{0}: {1} ({2})".format(index, m.GetProp(name_parameter), m.GetProp(scoring_parameter))) 120 | 121 | molsPerPage = molsPerRow * rowsPerPage 122 | nPages = len(molecules) // molsPerPage 123 | if molsPerPage * nPages < len(molecules): 124 | nPages += 1 125 | 126 | page_data = [] 127 | for page in range(nPages): 128 | start_mol = page * molsPerPage 129 | end_mol = min((page + 1) * molsPerPage, len(molecules)) 130 | 131 | nRows = (end_mol - start_mol) // molsPerRow 132 | if molsPerRow * nRows < (end_mol - start_mol): 133 | nRows += 1 134 | 135 | canvasx = panelx * molsPerRow 136 | canvasy = panely * nRows 137 | 138 | drawer = rdMolDraw2D.MolDraw2DCairo(canvasx, canvasy, panelx, panely) 139 | 140 | page_mols = prepped_mols[start_mol:end_mol] 141 | page_hats = hats[start_mol:end_mol] 142 | page_hbnds = hbnds[start_mol:end_mol] 143 | page_hatcolors = hatcolors[start_mol:end_mol] 144 | page_hbndcolors = hbndcolors[start_mol:end_mol] 145 | page_legends = legends[start_mol:end_mol] 146 | 147 | drawer.DrawMolecules(page_mols, highlightAtoms=page_hats,highlightBonds=page_hbnds, highlightAtomColors=page_hatcolors, highlightBondColors=page_hbndcolors, legends=page_legends) 148 | 149 | drawer.FinishDrawing() 150 | txt = drawer.GetDrawingText() 151 | page_data.append(txt) 152 | 153 | filenames = [os.path.join(output_dir, "{0}_table_{1}.png".format(output_prefix, x)) for x in range(len(page_data))] 154 | 155 | for index, pd in enumerate(page_data): 156 | with open(filenames[index], 'wb') as f: 157 | f.write(page_data[index]) 158 | 159 | 160 | def set_atom_subpockets(molecule, groups): 161 | """ Apply calculated subpocket groups to each atom in a molecule 162 | 163 | Args: 164 | molecule (rdkit.Chem.rdchem.Mol): rdkit molecule object 165 | groups ([int]): list of subpocket identifiers 166 | 167 | """ 168 | if molecule.GetNumAtoms() != groups.shape[0]: 169 | raise ValueError("Incorrect number of groups for molecule") 170 | 171 | for index, atom in enumerate(molecule.GetAtoms()): 172 | atom.SetIntProp("subpocket", int(groups[index])) 173 | -------------------------------------------------------------------------------- /pyvol/pymol_interface.py: -------------------------------------------------------------------------------- 1 | 2 | """ Front facing PyMOL functions """ 3 | 4 | from . import configuration 5 | from . import identify 6 | from . import pymol_utilities 7 | from .spheres import Spheres 8 | from . import utilities 9 | import logging 10 | import numpy as np 11 | import os 12 | import shutil 13 | 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | try: 18 | from pymol import cgo, cmd, CmdException 19 | except: 20 | logger.error("PyMOL not imported") 21 | 22 | 23 | def display_pockets(pockets, **opts): 24 | """ Display a list of pockets 25 | 26 | Args: 27 | pockets ([Spheres]): list of spheres object to display 28 | opts (dict): a dictionary containing all PyVOL options (see pyvol.pymol_interface.pymol_pocket_cmdline for details) 29 | 30 | """ 31 | 32 | opts["palette"] = pymol_utilities.construct_palette(color_list=opts.get("palette"), max_value=len(pockets)) 33 | 34 | if len(pockets) == 0: 35 | logger.info("No pockets found to display.") 36 | 37 | for index, pocket in enumerate(pockets): 38 | logger.info("Pocket {0} ({1}) \tVolume: {2} A^3".format(index, pocket.name, np.round(pocket.mesh.volume))) 39 | pymol_utilities.display_spheres_object(pocket, pocket.name, state=1, color=opts.get("palette")[index], alpha=opts.get("alpha"), mode=opts.get("display_mode")) 40 | 41 | 42 | def load_calculation_cmdline(data_dir, prefix=None, display_mode=None, palette=None, alpha=None): 43 | """ Loads a pocket from memory and displays it in PyMOL 44 | 45 | Args: 46 | data_dir (str): directory containing PyVOL output (by default ends in .pyvol) 47 | prefix (str): internal display name (Default value = None) 48 | display_mode (str): display mode (Default value = "solid") 49 | palette (str): comma-separated list of PyMOL color strings (Default value = None) 50 | alpha (float): transparency value (Default value = 1.0) 51 | 52 | """ 53 | 54 | if not os.path.isdir(data_dir): 55 | if os.path.isfile(data_dir): 56 | data_dir = os.path.dirname(data_dir) 57 | else: 58 | logger.error("Ambiguous/unparseable data_dir input: {0}".format(data_dir)) 59 | raise ValueError 60 | 61 | input_opts = {} 62 | if prefix is not None: 63 | input_opts["display_prefix"] = prefix 64 | if display_mode is not None: 65 | input_opts["display_mode"] = display_mode 66 | if palette is not None: 67 | input_opts["palette"] = palette 68 | if alpha is not None: 69 | input_opts["alpha"] = alpha 70 | 71 | 72 | pockets, opts = identify.load_calculation(data_dir, input_opts=input_opts) 73 | display_pockets(pockets, **opts) 74 | logger.info("Loading {0} with mode {1}".format(spheres.name, display_mode)) 75 | 76 | 77 | def pymol_pocket_cmdline(protein=None, ligand=None, prot_file=None, lig_file=None, min_rad=1.4, max_rad=3.4, constrain_radii=True, mode="largest", coordinates=None, residue=None, resid=None, lig_excl_rad=None, lig_incl_rad=None, min_volume=200, subdivide=False, max_clusters=None, min_subpocket_rad=1.7, max_subpocket_rad=3.4, min_subpocket_surf_rad=1.0, radial_sampling=0.1, inclusion_radius_buffer=1.0, min_cluster_size=50, project_dir=None, output_dir=None, prefix=None, logger_stream_level="INFO", logger_file_level="DEBUG", protein_only=False, display_mode="solid", alpha=1.0, palette=None): 78 | """ PyMOL-compatible command line entry point 79 | 80 | Args: 81 | protein (str): PyMOL-only PyMOL selection string for the protein (Default value = None) 82 | ligand (str): PyMOL-only PyMOL selection string for the ligand (Default value = None) 83 | prot_file (str): filename for the input pdb file containing the peptide--redundant with protein argument (Default value =- ) 84 | lig_file (str): filename for the input pdb file containing a ligand--redundant with ligand argument (Default value = None) 85 | min_rad (float): radius for SES calculations (Default value = 1.4) 86 | max_rad (float): radius used to identify the outer, bulk solvent exposed surface (Default value = 3.4) 87 | constrain_radii (bool): restrict input radii to tested values? (Default value = False) 88 | mode (str): pocket identification mode (can be largest, all, or specific) (Default value = "largest") 89 | coordinates ([float]): 3D coordinate used for pocket specification (Default value = None) 90 | residue (str): Pymol-only PyMOL selection string for a residue to use for pocket specification (Default value=None) 91 | resid (str): residue identifier for pocket specification (Default value = None) 92 | lig_excl_rad (float): maximum distance from a provided ligand that can be included in calculated pockets (Default value = None) 93 | lig_incl_rad (float): minimum distance from a provided ligand that should be included in calculated pockets when solvent border is ambiguous (Default value = None) 94 | min_volume (float): minimum volume of pockets returned when running in 'all' mode (Default value = 200) 95 | subdivide (bool): calculate subpockets? (Default value = False) 96 | max_clusters (int): maximum number of clusters (Default value = None) 97 | min_subpocket_rad (float): minimum radius that identifies distinct subpockets (Default value = 1.7) 98 | max_subpocket_rad (float): maximum sampling radius used in subpocket identification (Default value = 3.4) 99 | min_subpocket_surf_rad (float): radius used to calculate subpocket surfaces (Default value = 1.0) 100 | inclusion_radius_buffer (float): buffer radius in excess of the nonextraneous radius from the identified pocket used to identify atoms pertinent to subpocket clustering (Default value = 1.0) 101 | radial_sampling (float): radial sampling used for subpocket clustering (Default value = 0.1) 102 | min_cluster_size (int): minimum number of spheres in a proper cluster; used to eliminate insignificant subpockets (Default value = 50) 103 | project_dir (str): parent directory in which to create the output directory if the output directory is unspecified (Default value = None) 104 | output_dir (str): filename of the directory in which to place all output; can be absolute or relative (Default value = None) 105 | prefix (str): identifying string for output (Default value = None) 106 | logger_stream_level (str): sets the logger level for stdio output (Default value = "INFO") 107 | logger_file_level (str): sets the logger level for file output (Default value = "DEBUG") 108 | protein_only (bool): PyMOL-only include only peptides in protein file 109 | display_mode (str): PyMOL-only display mode for calculated pockets (Default value = "solid") 110 | alpha (float): PyMOL-only display option specifying translucency of CGO objects (Default value = 1.0) 111 | palette (str): PyMOL-only display option representing a comma separated list of PyMOL color strings (Default value = None) 112 | 113 | """ 114 | 115 | opts = { 116 | "protein": protein, 117 | "ligand": ligand, 118 | "prot_file": prot_file, 119 | "lig_file": lig_file, 120 | "min_rad": min_rad, 121 | "max_rad": max_rad, 122 | "constrain_radii": constrain_radii, 123 | "mode": mode, 124 | "residue": residue, 125 | "resid": resid, 126 | "coordinates": coordinates, 127 | "lig_excl_rad": lig_excl_rad, 128 | "lig_incl_rad": lig_incl_rad, 129 | "min_volume": min_volume, 130 | "subdivide": subdivide, 131 | "max_clusters": max_clusters, 132 | "min_subpocket_rad": min_subpocket_rad, 133 | "max_subpocket_rad": max_subpocket_rad, 134 | "min_subpocket_surf_rad": min_subpocket_surf_rad, 135 | "radial_sampling": radial_sampling, 136 | "inclusion_radius_buffer": inclusion_radius_buffer, 137 | "min_cluster_size": min_cluster_size, 138 | "project_dir": project_dir, 139 | "output_dir": output_dir, 140 | "prefix": prefix, 141 | "logger_stream_level": logger_stream_level, 142 | "logger_file_level": logger_file_level, 143 | "protein_only": protein_only, 144 | "display_mode": display_mode, 145 | "alpha": alpha, 146 | "palette": palette 147 | } 148 | 149 | pymol_pocket(**opts) 150 | 151 | def pymol_pocket(**opts): 152 | """ Perform PyMOL-dependent processing of inputs to generate input files for PyVOL pocket processing 153 | 154 | Args: 155 | opts (dict): dictionary containing all PyVOL options (see pyvol.pymol_interface.pymol_pocket_cmdline for details) 156 | 157 | Returns: 158 | pockets ([Spheres]): a list of Spheres objects each of which contains the geometric information describing a distinct pocket or subpocket 159 | output_opts (dict): dictionary containing the actual options used in the pocket calculation 160 | 161 | """ 162 | 163 | 164 | boolean_args = ["constrain_radii", "subdivide", "protein_only"] 165 | for arg in boolean_args: 166 | if not isinstance(opts.get(arg), bool): 167 | if opts.get(arg) in ["True", "true", "t", "1"]: 168 | opts[arg] = True 169 | elif opts.get(arg) in ["False", "false", "f", "0"]: 170 | opts[arg] = False 171 | else: 172 | logger.warning("Boolean argument {0} ({1}) not parsed correctly and reverting to default".format(arg, opts[arg])) 173 | opts = configuration.clean_opts(opts) 174 | 175 | if opts.get("protein") is None: 176 | if opts.get("prot_file") is None: 177 | logger.error("No protein input: prot_file and protein inputs are empty") 178 | raise ValueError 179 | else: 180 | logger.debug("Protein file already specified on disk; skipping protein processing.") 181 | else: 182 | if opts.get("protein_only"): 183 | opts["protein"] = "({0}) and (poly)".format(opts.get("protein")) 184 | 185 | if opts.get("ligand") is not None: 186 | opts["protein"] = "({0}) and not ({1})".format(opts.get("protein"), opts.get("ligand")) 187 | 188 | logger.debug("Final protein selection: {0}".format(opts.get("protein"))) 189 | prot_atoms = cmd.count_atoms(opts.get("protein")) 190 | if prot_atoms == 0: 191 | logger.error("No atoms included in protein selection--ending calculation") 192 | return 193 | elif prot_atoms < 100: 194 | logger.warning("Only {0} atoms included in protein selection".format(prot_atoms)) 195 | 196 | cmd.save(opts.get("prot_file"), opts.get("protein")) 197 | logger.debug("Protein '{0}' saved to {1}".format(opts.get("protein"), opts.get("prot_file"))) 198 | 199 | if (opts.get("mode") == "specific") and (opts.get("ligand") is not None): 200 | cmd.save(opts.get("lig_file"), opts.get("ligand")) 201 | logger.debug("Ligand selection: {0}".format(opts.get("ligand"))) 202 | 203 | if opts.get("coordinates") is not None: 204 | opts["residue"] = None 205 | else: 206 | if opts.get("residue") is not None: 207 | opts["coordinates"] = cmd.get_coords("{0} and sidechain".format(opts.get("residue")), 1) 208 | 209 | pockets, output_opts = identify.pocket_wrapper(**opts) 210 | 211 | display_pockets(pockets, **output_opts) 212 | return pockets, output_opts 213 | -------------------------------------------------------------------------------- /pyvol/pymol_utilities.py: -------------------------------------------------------------------------------- 1 | 2 | """ PyMOL convenience functions used by the front-end contained in pymol_interface. """ 3 | 4 | import logging 5 | import math 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | try: 10 | from pymol import cgo, cmd, CmdException 11 | except: 12 | logger.warning("PyMOL not imported") 13 | 14 | def construct_palette(color_list=None, max_value=7, min_value=0): 15 | """ Construct a palette 16 | 17 | Args: 18 | color_list ([str]): list of PyMOL color strings (Default value = None) 19 | max_value (int): max palette index (Default value = 7) 20 | min_value (int): min palette index (Default value = 1) 21 | 22 | Returns: 23 | palette ([str]): list of color definitions 24 | 25 | """ 26 | output_range = max_value - min_value 27 | 28 | default_color_list = ['tv_red', 'tv_orange', 'tv_yellow', 'tv_green', 'tv_blue', 'aquamarine', 'violet'] 29 | if color_list is None: 30 | color_list = default_color_list 31 | elif (output_range > 1) and len(colors) == 1: 32 | logger.warning("Only a single color has been provided for multi-output visualization--supplementing the input palette with default values") 33 | color_list.extend(default_color_list) 34 | 35 | colors = [] 36 | for color in color_list: 37 | if isinstance(color, str): 38 | colors.append(cmd.get_color_tuple(color)) 39 | else: 40 | colors.append(tuple(color)) 41 | 42 | 43 | palette = [] 44 | if output_range <= len(colors): 45 | for color in colors[:max_value]: 46 | palette.append('0x%02x%02x%02x' % tuple([int(255 * x) for x in color])) 47 | elif (output_range > len(colors)) and (len(colors) > 1): 48 | step = float(len(colors)) / float(output_range) 49 | for i in range(output_range): 50 | ix = float(i) * step 51 | 52 | # get the indices of the surrounding colors correcting for floating point imprecision 53 | lower_ind = max(int(math.floor(ix)), 0) 54 | upper_ind = min(int(math.ceil(ix)), len(colors) - 1) 55 | fx = ix - lower_ind 56 | 57 | if lower_ind == upper_ind: 58 | # special case where interpolation is exactly at an input color 59 | palette.append('0x%02x%02x%02x' % tuple([int(255 * x) for x in colors[lower_ind]])) 60 | else: 61 | color = [fx * colors[lower_ind][i] + (1 - fx) * colors[upper_ind][i] for i in range(3)] 62 | palette.append('0x%02x%02x%02x' % tuple([int(255 * x) for x in color])) 63 | 64 | logger.debug("Palette constructed with {0} colors".format(len(palette))) 65 | return palette 66 | 67 | 68 | def display_pseudoatom_group(spheres, name, color='gray60', palette=None): 69 | """ Displays a collection of pseudoatoms 70 | 71 | Args: 72 | spheres (Spheres): Spheres object holding pocket geometry 73 | name (str): display name 74 | color (str): PyMOL color string (Default value = 'gray60') 75 | palette ([str]): palette (Default value = None) 76 | 77 | """ 78 | 79 | if spheres is None: 80 | return 81 | 82 | for index, xyzrg in enumerate(spheres.xyzrg): 83 | if palette is None: 84 | cmd.pseudoatom("{0}.{1}".format(name, index), pos=list(xyzrg[0:3]), vdw=float(xyzrg[3]), color=color) 85 | else: 86 | cmd.pseudoatom("{0}.{1}".format(name, index), pos=list(xyzrg[0:3]), vdw=float(xyzrg[3]), color=palette[int(xyzrg[4] - 1)]) 87 | 88 | group_name = "{0}_g".format(name) 89 | cmd.group(group_name, "{0}.*".format(name)) 90 | cmd.show("spheres", group_name) 91 | logger.debug("Pseudoatom group of {0} spheres created with group name {1}".format(spheres.xyzrg.shape[0], group_name)) 92 | return group_name 93 | 94 | 95 | def display_spheres_object(spheres, name, state=1, color='marine', alpha=1.0, mode="solid"): 96 | """ Loads a mesh object into a cgo list for display in PyMOL 97 | 98 | Args: 99 | spheres (Spheres): Spheres object containing all geometry 100 | name (str): display name 101 | state (int): model state (Default value = 1) 102 | color (str): PyMOL color string (Default value = 'marine') 103 | alpha (float): transparency value (Default value = 1.0) 104 | mode (str): display mode (Default value = "solid") 105 | palette ([str]): palette (Default value = None) 106 | 107 | """ 108 | 109 | alpha = float(alpha) 110 | if spheres is None: 111 | return None 112 | 113 | if (mode == "mesh") or (mode == "solid"): 114 | if spheres.mesh is None: 115 | return None 116 | else: 117 | if mode == "solid": 118 | cmd.load_cgo(mesh_to_solid_CGO(spheres.mesh, color=color, alpha=alpha), name, state) 119 | else: 120 | cmd.load_cgo(mesh_to_wireframe_CGO(spheres.mesh, color=color, alpha=alpha), name, state) 121 | return None 122 | elif mode == "spheres": 123 | return display_pseudoatom_group(spheres, name, color=color) 124 | 125 | 126 | def mesh_to_solid_CGO(mesh, color, alpha=1.0): 127 | """Creates a solid CGO object for a mesh for display in PyMOL 128 | 129 | Args: 130 | mesh (Trimesh): Trimesh mesh object 131 | color (str): PyMOL color string (Default value = 'gray60') 132 | alpha (float): transparency value (Default value = 1.0) 133 | 134 | Returns: 135 | cgobuffer (str): CGO buffer that contains the instruction to load a solid object 136 | 137 | """ 138 | 139 | cgobuffer = [cgo.BEGIN, cgo.TRIANGLES, cgo.ALPHA, alpha] 140 | color_values = cmd.get_color_tuple(cmd.get_color_index(color)) 141 | 142 | for face in mesh.faces: 143 | for v_index in face: 144 | cgobuffer.append(cgo.COLOR) 145 | cgobuffer.extend(color_values) 146 | 147 | cgobuffer.append(cgo.NORMAL) 148 | cgobuffer.extend([mesh.vertex_normals[v_index][i] for i in range(3)]) 149 | cgobuffer.append(cgo.VERTEX) 150 | cgobuffer.extend([mesh.vertices[v_index][i] for i in range(3)]) 151 | cgobuffer.append(cgo.END) 152 | logger.debug("CGO solid object created for mesh: {0}".format(mesh)) 153 | return cgobuffer 154 | 155 | 156 | def mesh_to_wireframe_CGO(mesh, color_tuple, alpha=1.0): 157 | """Creates a wireframe CGO object for a mesh for display in PyMOL 158 | 159 | Args: 160 | mesh (Trimesh): Trimesh mesh object 161 | color (str): PyMOL color string (Default value = 'gray60') 162 | alpha (float): transparency value (Default value = 1.0) 163 | 164 | Returns: 165 | cgobuffer (str): CGO buffer that contains the instruction to load a wireframe object 166 | 167 | """ 168 | 169 | cgobuffer = [cgo.BEGIN, cgo.LINES, cgo.ALPHA, alpha] 170 | 171 | cgobuffer.append(cgo.COLOR) 172 | cgobuffer.extend(cmd.get_color_tuple(cmd.get_color_index(color))) 173 | 174 | for edge in mesh.edges: 175 | cgobuffer.append(cgo.VERTEX) 176 | cgobuffer.extend(mesh.vertices[edge[0]]) 177 | cgobuffer.append(cgo.VERTEX) 178 | cgobuffer.extend(mesh.vertices[edge[1]]) 179 | 180 | cgobuffer.append(cgo.END) 181 | logger.debug("CGO wireframe object created for mesh: {0}".format(mesh)) 182 | return cgobuffer 183 | -------------------------------------------------------------------------------- /pyvol/pyvol_gui/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | __version__ = "1.7.8" 4 | 5 | import logging 6 | import os 7 | from pymol.Qt import QtCore, QtWidgets 8 | import subprocess 9 | import sys 10 | import time 11 | 12 | logger = logging.getLogger("pyvol.plugin") 13 | 14 | def __init_plugin__(app=None): 15 | # Load the plugin in three steps: 1) import PyVOL 2) find msms if necessary 3) try to add gui 16 | 17 | # Import PyVOL 18 | try: 19 | from pymol import cmd 20 | from pyvol import pymol_interface 21 | cmd.extend('pocket', pymol_interface.pymol_pocket_cmdline) 22 | cmd.extend('load_pocket', pymol_interface.load_calculation_cmdline) 23 | # cmd.extend('pose_report', pymol_interface.pose_report) 24 | logger.debug("PyVOL successfully imported") 25 | except: 26 | logger.info("PyVOL not imported; install from conda (or PyPI with manual msms installation)") 27 | 28 | # add MSMS path to PyMOL preferences 29 | import distutils 30 | import distutils.util 31 | 32 | msms_exe = distutils.spawn.find_executable("msms") 33 | if msms_exe is None: 34 | logger.info("MSMS not found in the path; confirm installation manually or reinstall using conda") 35 | # Try to link the GUI 36 | 37 | try: 38 | from pymol.plugins import addmenuitemqt 39 | addmenuitemqt('PyVOL', pyvol_window) 40 | except: 41 | logger.warning("PyVOL GUI not able to be loaded. This is most often seen when using older PyMOL distributions that use tkinter for GUIs rather thean QT. Update PyMOL to enable the GUI") 42 | 43 | try: 44 | cmd.extend("install_pyvol", install_pypi_pyvol) 45 | cmd.extend("install_cached_pyvol", install_cached_pyvol) 46 | cmd.extend("update_pyvol", update_pypi_pyvol) 47 | except: 48 | logger.warning("PyVOL installation commands not able to be added to command-line interface") 49 | 50 | 51 | def pyvol_window(): 52 | """ """ 53 | 54 | from pymol.Qt.utils import loadUi 55 | 56 | dialog = QtWidgets.QDialog() 57 | uifile = os.path.join(os.path.dirname(__file__), 'pyvol_gui.ui') 58 | form = loadUi(uifile, dialog) 59 | 60 | refresh_installation_status(form) 61 | 62 | form.close_button.clicked.connect(dialog.close) 63 | form.run_button.clicked.connect(lambda: run_gui_pyvol(form)) 64 | 65 | form.browse_button.clicked.connect(lambda: browse_pocket_file(form)) 66 | form.load_button.clicked.connect(lambda: run_gui_load(form)) 67 | 68 | form.install_remote_button.clicked.connect(lambda: install_remote_pyvol(form)) 69 | form.install_cache_button.clicked.connect(lambda: install_local_pyvol(form)) 70 | 71 | form.check_updates_button.clicked.connect(lambda: refresh_installation_status(form, check_for_updates=True)) 72 | form.update_button.clicked.connect(lambda: update_pyvol(form)) 73 | 74 | form.msms_included_cbox.stateChanged.connect(lambda: toggle_included_msms(form)) 75 | 76 | dialog.show() 77 | 78 | 79 | def browse_pocket_file(form): 80 | """ Launches a window to select a file 81 | 82 | """ 83 | 84 | pocket_file_name = QtWidgets.QFileDialog.getOpenFileNames(None, 'Open file', os.getcwd(), filter='Pocket Files (*.pyvol)')[0][0] 85 | form.pocket_dir_ledit.setText(pocket_file_name) 86 | 87 | 88 | def install_remote_pyvol(form): 89 | """ Attempts a de novo PyVOL installation using conda 90 | 91 | """ 92 | from conda.cli import python_api 93 | python_api.run_command(python_api.Commands.INSTALL, "bio-pyvol") 94 | 95 | try: 96 | from pymol import cmd 97 | from pyvol import pymol_interface 98 | cmd.extend('pocket', pymol_interface.pocket) 99 | cmd.extend('load_pocket', pymol_interface.load_pocket) 100 | except: 101 | pass 102 | refresh_installation_status(form) 103 | 104 | 105 | def update_pyvol(): 106 | """ Attempts to update PyVOL using conda 107 | 108 | """ 109 | from conda.cli import python_api 110 | python_api.run_command(python_api.Commands.UPDATE, "bio-pyvol") 111 | 112 | msg = QtWidgets.QMessageBox() 113 | msg.setIcon(QtWidgets.QMessageBox.Information) 114 | msg.setWindowTitle("PyVOL Updated") 115 | msg.setInformativeText("The PyVOL backend has been updated; however, PyMOL will not load the new code until it is restarted.") 116 | msg.setStandardButtons(QtWidgets.QMessageBox.Ok) 117 | msg.setMinimumSize(QtCore.QSize(600, 200)) # Doesn't seem to work 118 | msg.exec_() 119 | 120 | refresh_installation_status(form) 121 | 122 | 123 | def refresh_installation_status(form, check_for_updates=False): 124 | """ Check for updates and adjust the GUI to reflect the current installation status and availability of updates 125 | 126 | Args: 127 | check_for_updates (bool): query servers to see if an update is available? (Default value = False) 128 | 129 | """ 130 | import distutils 131 | import distutils.util 132 | import json 133 | 134 | def apply_color(string, color): 135 | """ Applies a color to html text 136 | 137 | Args: 138 | string (str): text 139 | color (str): color to apply 140 | 141 | Returns: 142 | colored_text (str): html formatted text 143 | 144 | """ 145 | return "{1}".format(color, string) 146 | 147 | all_pckgs = subprocess.check_output([sys.executable, "-m", "pip", "list", "--format=json"]).decode('utf-8').strip() 148 | pckgs = json.loads(all_pckgs) 149 | 150 | status_msg = "" 151 | pyvol_version = None 152 | biopython_version = None 153 | numpy_version = None 154 | pandas_version = None 155 | scipy_version = None 156 | sklearn_version = None 157 | trimesh_version = None 158 | 159 | remote_msg = None 160 | 161 | pyvol_installed = False 162 | for pckg in pckgs: 163 | if pckg["name"] == "bio-pyvol": 164 | pyvol_version = pckg["version"] 165 | pyvol_installed = True 166 | 167 | for pckg in pckgs: 168 | if pckg["name"] == "biopython": 169 | biopython_version = pckg["version"] 170 | if pyvol_installed: 171 | biopython_version = apply_color(biopython_version, "green") 172 | elif pckg["name"] == "numpy": 173 | numpy_version = pckg["version"] 174 | if pyvol_installed: 175 | numpy_version = apply_color(numpy_version, "green") 176 | elif pckg["name"] == "pandas": 177 | pandas_version = pckg["version"] 178 | if pyvol_installed: 179 | pandas_version = apply_color(pandas_version, "green") 180 | elif pckg["name"] == "scipy": 181 | scipy_version = pckg["version"] 182 | if pyvol_installed: 183 | scipy_version = apply_color(scipy_version, "green") 184 | elif pckg["name"] == "scikit-learn": 185 | sklearn_version = pckg["version"] 186 | if pyvol_installed: 187 | sklearn_version = apply_color(sklearn_version, "green") 188 | elif pckg["name"] == "trimesh": 189 | trimesh_version = pckg["version"] 190 | if pyvol_installed: 191 | trimesh_version = apply_color(trimesh_version, "green") 192 | 193 | if pyvol_version is None: 194 | pyvol_version = apply_color("not found", "red") 195 | if biopython_version is None: 196 | biopython_version = apply_color("not found", "red") 197 | if numpy_version is None: 198 | numpy_version = apply_color("not found", "red") 199 | if pandas_version is None: 200 | pandas_version = apply_color("not found", "red") 201 | if scipy_version is None: 202 | scipy_version = apply_color("not found", "red") 203 | if sklearn_version is None: 204 | sklearn_version = apply_color("not found", "red") 205 | if trimesh_version is None: 206 | trimesh_version = apply_color("not found", "red") 207 | 208 | # new options for finding msms 209 | msms_installed = False 210 | included_msms_present = False 211 | included_msms_exe = None 212 | 213 | if pyvol_installed: 214 | msms_exe = distutils.spawn.find_executable("msms") 215 | if msms_exe is not None: 216 | if os.path.exists(msms_exe): 217 | msms_installed = True 218 | else: 219 | msms_exe = None 220 | 221 | if msms_installed: 222 | form.msms_system_label.setText("{0}".format(apply_color(msms_exe, "blue"))) 223 | else: 224 | form.msms_system_label.setText("{0}".format(apply_color("not found", "red"))) 225 | 226 | if not pyvol_installed: 227 | gui_version = __version__ 228 | form.run_tab.setEnabled(False) 229 | form.run_button.setEnabled(False) 230 | form.load_tab.setEnabled(False) 231 | form.tabWidget.setCurrentIndex(2) 232 | form.check_updates_button.setEnabled(False) 233 | form.update_button.setEnabled(False) 234 | 235 | form.install_remote_browser.setText("Conda has not yet been queried.
") 236 | form.install_remote_button.setEnabled(True) 237 | 238 | cache_present = False 239 | cache_version = None 240 | installer_dir = os.path.dirname(os.path.realpath(__file__)) 241 | cache_dir = os.path.join(installer_dir, "cached_source") 242 | 243 | if pyvol_installed: 244 | form.setWindowTitle("PyVOL v{0}".format(pyvol_version)) 245 | form.install_cache_button.setEnabled(False) 246 | form.install_remote_button.setEnabled(False) 247 | form.check_updates_button.setEnabled(True) 248 | form.update_button.setEnabled(False) 249 | 250 | if check_for_updates: 251 | update_available = False 252 | 253 | avail_pckgs = subprocess.check_output([sys.executable, "-m", "pip", "list", "--outdated", "--format=json"]).decode('utf-8').strip() 254 | avail = json.loads(avail_pckgs) 255 | for pckg in avail: 256 | if pckg["name"] == "bio-pyvol": 257 | update_available = True 258 | form.install_remote_browser.setText(("A new version of PyVOL is available through Conda:
" 259 | "  pyvol: {0} -> {1}").format(pyvol_version, apply_color(pckg['latest_version'], "blue"))) 260 | break 261 | 262 | if update_available: 263 | form.update_button.setEnabled(True) 264 | else: 265 | form.update_button.setEnabled(True) 266 | form.install_remote_browser.setText(("Local PyVOL is up to date (version {0})
").format(pyvol_version)) 267 | 268 | if msms_installed: 269 | form.run_tab.setEnabled(True) 270 | form.run_button.setEnabled(True) 271 | form.load_tab.setEnabled(True) 272 | status_msg = "PyVOL seems to be correctly installed.
" 273 | else: 274 | form.run_tab.setEnabled(False) 275 | form.run_button.setEnabled(False) 276 | form.load_tab.setEnabled(False) 277 | form.tabWidget.setCurrentIndex(2) 278 | status_msg = apply_color("Error: MSMS must be installed for PyVOL to run.
", "red") 279 | 280 | gui_version = None 281 | expected_gui_version = None 282 | try: 283 | import pyvol 284 | expected_gui_version = pyvol.__guiversion__ 285 | if __version__ == expected_gui_version: 286 | gui_version = apply_color(__version__, "green") 287 | else: 288 | gui_version = apply_color("{0} ({1} expected)".format(__version__, expected_gui_version), "blue") 289 | status_msg = status_msg + "{0}--check whether the PyVOL backend is up-to-date and using the PyMOL plugin manager reinstall the newest version of the plugin from github.
".format(apply_color("GUI version mismatch", "red")) 290 | except: 291 | gui_version = __version__ 292 | form.install_status_browser.setText(( 293 | "  pyvol: {0}
" 294 | "  pyvol gui: {7}
" 295 | "  biopython: {1}
" 296 | "  numpy: {2}
" 297 | "  pandas: {3}
" 298 | "  scipy: {4}
" 299 | "  sklearn: {5}
" 300 | "  trimesh: {6}

" 301 | "{8}" 302 | ).format(pyvol_version, biopython_version, numpy_version, pandas_version, scipy_version, sklearn_version, trimesh_version, gui_version, status_msg)) 303 | 304 | def run_gui_load(form): 305 | """ Loads a precalculated pocket into PyMOL 306 | 307 | """ 308 | from pyvol import pymol_interface 309 | 310 | # Loading Parameters 311 | pocket_dir = form.pocket_dir_ledit.text() 312 | prefix = form.load_prefix_ledit.text() 313 | if form.load_solid_rbutton.isChecked(): 314 | display_mode = "solid" 315 | elif form.load_mesh_rbutton.isChecked(): 316 | display_mode = "mesh" 317 | elif form.load_spheres_rbutton.isChecked(): 318 | display_mode = "spheres" 319 | palette = form.load_color_ledit.text() 320 | alpha = form.load_alpha_ledit.text() 321 | 322 | if prefix == "": 323 | prefix = None 324 | if palette == "": 325 | color = None 326 | if alpha == "": 327 | alpha = None 328 | 329 | pymol_interface.load_calculation_cmdline(pocket_dir, prefix=prefix, display_mode=display_mode, palette=color, alpha=alpha) 330 | 331 | def run_gui_pyvol(form): 332 | """ Runs a PyVOL calculation 333 | 334 | """ 335 | from pyvol import pymol_interface 336 | 337 | # Basic Parameters 338 | protein = form.prot_sele_ledit.text() 339 | protein_only = form.excl_org_cbox.isChecked() 340 | min_rad = form.min_rad_ledit.text() 341 | max_rad = form.max_rad_ledit.text() 342 | 343 | # Pocket Selection 344 | min_volume = None 345 | ligand = None 346 | residue = None 347 | resid = None 348 | coordinate = None 349 | 350 | if form.all_rbutton.isChecked(): 351 | mode = "all" 352 | min_volume = form.min_volume_ledit.text() 353 | elif form.largest_rbutton.isChecked(): 354 | mode = "largest" 355 | elif form.ligand_rbutton.isChecked(): 356 | mode = "specific" 357 | ligand = form.lig_sele_ledit.text() 358 | elif form.residue_rbutton.isChecked(): 359 | mode = "specific" 360 | residue = form.residue_sele_ledit.text() 361 | elif form.resid_rbutton.isChecked(): 362 | mode = "specific" 363 | resid = form.resid_ledit.text() 364 | elif form.coordinate_rbutton.isChecked(): 365 | mode = "specific" 366 | coordinate = form.coordinate_ledit.text() 367 | 368 | # Partitioning Parameters 369 | subdivide = form.subdivide_cbox.isChecked() 370 | max_clusters = form.max_clusters_ledit.text() 371 | min_subpocket_rad = form.min_internal_rad_ledit.text() 372 | 373 | # Display and Output Options 374 | if form.solid_rbutton.isChecked(): 375 | display_mode = "solid" 376 | elif form.mesh_rbutton.isChecked(): 377 | display_mode = "mesh" 378 | elif form.spheres_rbutton.isChecked(): 379 | display_mode = "spheres" 380 | alpha = form.alpha_ledit.text() 381 | palette = form.palette_ledit.text() 382 | if palette == "": 383 | palette = None 384 | project_dir = form.project_dir_ledit.text() 385 | if project_dir == "": 386 | project_dir = None 387 | 388 | pymol_interface.pymol_pocket_cmdline(protein=protein, protein_only=protein_only, min_rad=min_rad, max_rad=max_rad, mode=mode, min_volume=min_volume, ligand=ligand, residue=residue, resid=resid, coordinates=coordinate, display_mode=display_mode, palette=palette, alpha=alpha, project_dir=project_dir, subdivide=subdivide, min_subpocket_rad=min_subpocket_rad, max_clusters=max_clusters) 389 | -------------------------------------------------------------------------------- /pyvol/spheres.py: -------------------------------------------------------------------------------- 1 | 2 | """ Defines the Spheres class which holds geometric information and performs basic operations on its data """ 3 | 4 | from . import utilities 5 | from .exceptions import * 6 | from Bio.PDB import PDBParser 7 | from Bio.PDB.ResidueDepth import _get_atom_radius 8 | import glob 9 | import itertools 10 | import logging 11 | import numpy as np 12 | import os 13 | import pandas as pd 14 | import scipy 15 | import shutil 16 | import sys 17 | import tempfile 18 | import trimesh 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class Spheres(object): 24 | """ """ 25 | 26 | def __init__(self, xyz=None, r=None, xyzr=None, xyzrg=None, g=None, pdb=None, bv=None, mesh=None, name=None, spheres_file=None): 27 | """ 28 | A Spheres object contains a list of xyz centers with r radii and g groups. It can be defined using xyzrg, xyzr (and optionally g), xyz (and optionally r or g), a pdb file (and optionally r or g), or a list of vertices with normals bounded by the spheres (requires r and optionally includes g) 29 | 30 | Args: 31 | xyz (float nx3): Array containing centers (Default value = None) 32 | r (float nx1): Array containing radii (Default value = None) 33 | xyzr (float nx4): Array containing centers and radii (Default value = None) 34 | xyzrg (float nx5): Array containing centers, radii, and groups (Default value = None) 35 | g (float nx1): Array containing groups (Default value = None) 36 | pdb (str): filename of a pdb to be processed into spheres (Default value = None) 37 | bv (float nx6): Array containing vertices and normals (Default value = None) 38 | mesh (Trimesh): mesh object describing the surface (Default value = None) 39 | name (str): descriptive identifier (Default value = None) 40 | spheres_file (str): filename of a Spheres file to be read from disk (Default value = None) 41 | 42 | """ 43 | 44 | if xyzrg is not None: 45 | self.xyzrg = xyzrg 46 | elif xyzr is not None: 47 | self.xyzr = xyzr 48 | 49 | if g is not None: 50 | self.g = g 51 | elif xyz is not None: 52 | self.xyz = xyz 53 | 54 | if r is not None: 55 | self.r = r 56 | if g is not None: 57 | self.g = g 58 | elif pdb is not None: 59 | if not sys.warnoptions: 60 | import warnings 61 | warnings.simplefilter("ignore") 62 | 63 | p = PDBParser(PERMISSIVE=1, QUIET=True) 64 | structure = p.get_structure("prot", pdb) 65 | 66 | self.xyz = np.array([atom.get_coord() for atom in structure[0].get_atoms()]) 67 | 68 | if r is not None: 69 | self.r = r 70 | else: 71 | self.r = [_get_atom_radius(atom, rtype='united') for atom in structure[0].get_atoms()] 72 | 73 | if g is not None: 74 | self.g = g 75 | 76 | elif bv is not None and r is not None: 77 | self.xyz = bv[:, 0:3] + r * bv[:, 3:6] 78 | self.r = r 79 | self.remove_duplicates() 80 | 81 | if g is not None: 82 | self.g = g 83 | elif spheres_file is not None: 84 | xyzr_file = None 85 | obj_file = None 86 | 87 | base, ext = os.path.splitext(spheres_file) 88 | if ext == ".xyzrg": 89 | xyzrg_file = spheres_file 90 | obj_file = "{0}.obj".format(base) 91 | elif ext == ".obj": 92 | xyzrg_file = "{0}.xyzrg".format(base) 93 | if not os.path.isfile(xyzrg_file): 94 | logger.error("No spheres file found with the name: {0}.xyzr or {0}.xyzrg".format(base)) 95 | obj_file = spheres_file 96 | else: 97 | logger.error("Invalid filename given to read in spheres object: {0}".format(spheres_file)) 98 | raise ValueError("Spheres objects must be .xyzrg or .obj ({0} provided)".format(spheres_file)) 99 | spheres_data = np.loadtxt(xyzrg_file, delimiter=' ') 100 | 101 | if spheres_data.shape[1] == 5: 102 | self.xyzrg = spheres_data 103 | elif spheres_data.shape[1] == 4: 104 | self.xyzr = spheres_data 105 | else: 106 | logger.error("Spheres csv file contains the wrong number of columns") 107 | raise ValueError("{0} columns found in file {1}; must contain 4 or 5".format(spheres_data.shape[1], spheres_file)) 108 | mesh = trimesh.load_mesh(obj_file) 109 | 110 | if name is None: 111 | name = os.path.basename(base) 112 | 113 | if mesh is not None: 114 | self.mesh = mesh 115 | else: 116 | self.mesh = None 117 | 118 | if name is not None: 119 | self.name = name 120 | else: 121 | self.name = None 122 | 123 | unique_ind = np.unique(self.xyzrg, axis=0, return_index=True)[1] 124 | self.xyzrg = self.xyzrg[sorted(unique_ind), :] 125 | 126 | 127 | def __add__(self, other): 128 | """ Create a new Spheres object by overloading addition to concatenate xyzr contents; does not add meshes (just spheres) 129 | 130 | Args: 131 | other (Spheres): Spheres object to add 132 | 133 | Returns: 134 | (Spheres): Spheres object representing concatenation 135 | 136 | """ 137 | 138 | if other is not None: 139 | return Spheres(xyzrg=np.concatenate([self.xyzrg, other.xyzrg], axis=0)) 140 | else: 141 | return Spheres(xyzrg=np.copy(self.xyzrg)) 142 | 143 | 144 | def copy(self): 145 | """ Creates a copy in memory of itself 146 | """ 147 | return Spheres(xyzrg=np.copy(self.xyzrg)) 148 | 149 | 150 | def calculate_surface(self, probe_radius=1.4, cavity_atom=None, coordinate=None, all_components=False, exclusionary_radius=2.5, largest_only=False, noh=True, min_volume=200): 151 | """Calculate the SAS for a given probe radius 152 | 153 | Args: 154 | probe_radius (float): radius for surface calculations (Default value = 1.4) 155 | cavity_atom (int): id of a single atom which lies on the surface of the interior cavity of interest (Default value = None) 156 | coordinate ([float]): 3D coordinate to identify a cavity atom (Default value = None) 157 | all_components (bool): return all pockets? (Default value = False) 158 | exclusionary_radius (float): maximum permissibile distance to the closest identified surface element from the supplied coordinate (Default value = 2.5) 159 | largest_only (bool): return only the largest pocket? (Default value = False) 160 | noh (bool): remove waters before surface calculation? (Default value = True) 161 | minimum_volume (int): minimum volume of pockets returned when using 'all_components' (Default value = 200) 162 | 163 | """ 164 | 165 | tmpdir = tempfile.mkdtemp() 166 | xyzr_file = os.path.join(tmpdir, "pyvol.xyzr") 167 | msms_template = os.path.join(tmpdir, "pyvol_msms") 168 | 169 | np.savetxt(xyzr_file, self.xyzr, delimiter=' ', fmt='% 1.3f'+' % 1.3f'+' % 1.3f'+'% 1.2f') 170 | if (cavity_atom is None) and (coordinate is not None): 171 | cavity_atom = self.nearest(coordinate, max_radius=exclusionary_radius) 172 | 173 | msms_cmd = ["msms", "-if", xyzr_file, "-of", msms_template, "-probe_radius", "{0}".format(probe_radius), "-no_area"] 174 | if noh: 175 | msms_cmd.append("-noh") 176 | if cavity_atom is not None: 177 | msms_cmd.extend(["-one_cavity", 1, cavity_atom]) 178 | elif all_components: 179 | msms_cmd.append("-all_components") 180 | 181 | utilities.run_cmd(msms_cmd) 182 | 183 | sphere_list = [] 184 | 185 | def read_msms_output(msms_template): 186 | """ Read the results of a MSMS run 187 | 188 | Args: 189 | msms_template (str): file prefix for the output from MSMS 190 | 191 | Returns: 192 | verts_raw (float nx6): raw contents of vertices file 193 | vertices (float nx3): 1-indexed 3D coordinates of vertices 194 | faces (float nx3): vertex connectivity graph 195 | """ 196 | try: 197 | verts_raw = pd.read_csv("{0}.vert".format(msms_template), sep=r'\s+', skiprows=3, dtype=np.float_, header=None, encoding='latin1').values 198 | faces = pd.read_csv("{0}.face".format(msms_template), sep=r'\s+', skiprows=3, usecols=[0, 1, 2], dtype=np.int_, header=None, encoding='latin1').values 199 | except IOError: 200 | logger.error("MSMS failed to run correctly for {0}".format(msms_template)) 201 | raise MSMSError("MSMS failed to run correctly for {0}".format(msms_template)) 202 | else: 203 | vertices = np.zeros((verts_raw.shape[0] + 1, 3)) 204 | vertices[1:, :] = verts_raw[:, 0:3] 205 | return verts_raw, vertices, faces 206 | 207 | if not all_components: 208 | verts_raw, vertices, faces = read_msms_output(msms_template) 209 | 210 | mesh = trimesh.base.Trimesh(vertices=vertices, faces=faces) 211 | if mesh.volume < 0: 212 | faces = np.flip(faces, axis=1) 213 | mesh = trimesh.base.Trimesh(vertices=vertices, faces=faces) 214 | bspheres = Spheres(bv=verts_raw, r=probe_radius, mesh=mesh) 215 | shutil.rmtree(tmpdir) 216 | logger.debug("Single volume calculated for {0}".format(self.name)) 217 | return [bspheres] 218 | 219 | else: 220 | spheres_list = [] 221 | ac_template_list = [os.path.splitext(x)[0] for x in glob.glob("{0}_*.face".format(msms_template))] 222 | logger.debug("{0} volumes calculated for {1}".format(len(ac_template_list), msms_template)) 223 | 224 | largest_mesh = None 225 | for ac_template in ac_template_list: 226 | verts_raw, vertices, faces = read_msms_output(ac_template) 227 | 228 | tm = trimesh.base.Trimesh(vertices=vertices, faces=faces) 229 | if tm.volume < 0: 230 | tm = trimesh.base.Trimesh(vertices=vertices, faces=np.flip(faces, axis=1)) 231 | 232 | if largest_only: 233 | if largest_mesh is None: 234 | largest_mesh = tm 235 | bspheres = Spheres(bv=verts_raw, r=probe_radius, mesh=tm) 236 | elif tm.volume > largest_mesh.volume: 237 | largest_mesh = tm 238 | bspheres = Spheres(bv=verts_raw, r=probe_radius, mesh=tm) 239 | else: 240 | if min_volume is not None: 241 | if tm.volume < min_volume: 242 | continue 243 | bspheres = Spheres(bv=verts_raw, r=probe_radius, mesh=tm) 244 | spheres_list.append(bspheres) 245 | 246 | shutil.rmtree(tmpdir) 247 | if largest_only: 248 | logger.debug("Largest volume identified for {0}".format(msms_template)) 249 | return [bspheres] 250 | else: 251 | logger.debug("{0} volumes identified with sufficient volume for {0}".format(len(spheres_list), msms_template)) 252 | return sorted(spheres_list, key=lambda s: s.mesh.volume, reverse=True) 253 | 254 | 255 | def identify_nonextraneous(self, ref_spheres, radius): 256 | """Returns all spheres less than radius away from any center in ref_spheres using cKDTree search built on the non-reference set 257 | 258 | Args: 259 | ref_spheres (Spheres): object that defines the pocket of interest 260 | radius (float): maximum distance to sphere centers to be considered nonextraneous 261 | 262 | Returns: 263 | nonextraneous (Spheres): a filtered Spheres object 264 | 265 | """ 266 | 267 | kdtree = scipy.spatial.cKDTree(self.xyz) 268 | groups = kdtree.query_ball_point(ref_spheres.xyz, radius, n_jobs=-1) 269 | indices = np.unique(list(itertools.chain.from_iterable(groups))) 270 | 271 | logger.debug("Non-extraneous spheres removed") 272 | return Spheres(xyzrg=np.copy(self.xyzrg[indices, :])) 273 | 274 | 275 | def nearest(self, coordinate, max_radius=None): 276 | """ Returns the index of the sphere closest to a coordinate; if max_radius is specified, the sphere returned must have a radius <= max_radius 277 | 278 | Args: 279 | coordinate (float nx3): 3D input coordinate 280 | max_radius (float): maximum permissibile distance to the nearest sphere (Default value = None) 281 | 282 | Returns: 283 | nearest_index: index of the closest sphere 284 | 285 | """ 286 | 287 | if max_radius is None: 288 | sphere_list = self.xyz 289 | else: 290 | sphere_list = self.xyz[self.r <= max_radius] 291 | 292 | return np.argmin(scipy.spatial.distance.cdist(sphere_list, coordinate)) 293 | 294 | 295 | def propagate_groups_to_external(self, coordinates, tolerance=3): 296 | """ Propagates group identifications to an external set of coordinates 297 | 298 | Args: 299 | coordinates (Nx3 ndarray): coordinates of the external spheres 300 | tolerance (float): maximum distance exclusive of the radii of the internal spheres 301 | 302 | Returns: 303 | prop_groups ([int]): list of group identifications for the supplied external coordinates 304 | 305 | """ 306 | 307 | kdtree = scipy.spatial.cKDTree(self.xyz) 308 | dist, indices = kdtree.query(coordinates, n_jobs=-1) 309 | 310 | sphere_inclusion = dist - self.r[indices] 311 | prop_groups = self.g[indices].astype(int) 312 | prop_groups[sphere_inclusion > tolerance] = -1 313 | 314 | return prop_groups 315 | 316 | 317 | def nearest_coord_to_external(self, coordinates): 318 | """ Returns the coordinate of the sphere closest to the supplied coordinates 319 | 320 | Args: 321 | coordinates (float nx3): set of coordinates 322 | 323 | Returns: 324 | coordinate (float 1x3): coordinate of internal sphere closest to the supplied coordinates 325 | 326 | """ 327 | 328 | kdtree = scipy.spatial.cKDTree(self.xyz) 329 | dist, indices = kdtree.query(coordinates, n_jobs=-1) 330 | 331 | return self.xyz[indices[np.argmin(dist)], :] 332 | 333 | 334 | def remove_duplicates(self, eps=0.01): 335 | """ Remove duplicate spheres by identifying centers closer together than eps using DBSCAN 336 | 337 | Args: 338 | eps (float): DBSCAN input parameter (Default value = 0.01) 339 | 340 | """ 341 | from sklearn.cluster import DBSCAN 342 | 343 | db = DBSCAN(eps=eps, min_samples=1).fit(self.xyz) 344 | values, indices = np.unique(db.labels_, return_index=True) 345 | self.xyzrg = self.xyzrg[indices, :] 346 | 347 | 348 | def remove_ungrouped(self): 349 | """ Remove all spheres that did not adequately cluster with the remainder of the set 350 | 351 | """ 352 | ungrouped_indices = np.where(self.g < 1) 353 | self.xyzrg = np.delete(self.xyzrg, ungrouped_indices, axis=0) 354 | self.mesh = None 355 | if len(ungrouped_indices) > 0: 356 | logger.debug("{0} ungrouped spheres removed".format(len(ungrouped_indices))) 357 | 358 | 359 | def remove_groups(self, groups): 360 | """ Remove all spheres with specified group affiliations 361 | 362 | Args: 363 | groups ([float]): list of groups to remove 364 | 365 | """ 366 | group_indices = np.where(np.isin(self.g, groups)) 367 | self.xyzrg = np.delete(self.xyzrg, group_indices, axis=0) 368 | self.mesh = None 369 | 370 | 371 | def write(self, filename, contents="xyzrg", output_mesh=True): 372 | """Writes the contents of _xyzrg to a space delimited file 373 | 374 | Args: 375 | filename (str): filename to write the report and mesh if indicated 376 | contents (str): string describing which columns to write to file (Default value = "xyzrg") 377 | output_mesh (bool): write mesh to file? (Default value = True) 378 | 379 | """ 380 | 381 | if contents == "xyzrg": 382 | np.savetxt(filename, self.xyzrg, delimiter=' ') 383 | logger.debug("{0} written to xyzrg file: {1}".format(self.name, filename)) 384 | elif contents == "xyzr": 385 | np.savetxt(filename, self.xyzr, delimiter=' ') 386 | logger.debug("{0} written to xyzr file: {1}".format(self.name, filename)) 387 | elif contents == "xyz": 388 | np.savetxt(filename, self.xyz, delimiter=' ') 389 | logger.debug("{0} written to xyz file: {1}".format(self.name, filename)) 390 | 391 | if output_mesh: 392 | if self.mesh is None: 393 | logger.error("Cannot write out an uninitialized mesh") 394 | raise ValueError("Mesh can not be written to file corresponding to {0}".format(filename)) 395 | else: 396 | output_mesh = "{0}.obj".format(os.path.splitext(filename)[0]) 397 | self.mesh.export(file_obj = output_mesh) 398 | logger.debug("{0} written to obj file: {1}.obj".format(self.name, os.path.splitext(filename)[0])) 399 | 400 | @property 401 | def xyzrg(self): 402 | """ Retrieve the coordinates, radii, and group ids 403 | 404 | """ 405 | return self._xyzrg 406 | 407 | 408 | @xyzrg.setter 409 | def xyzrg(self, value): 410 | """ Set the coordinates, radii, and group ids 411 | 412 | Args: 413 | value (float 5xn): coordinates, radii, and group ids 414 | 415 | """ 416 | if value.shape[1] != 5: 417 | raise ValueError("number of xyzrg array columns must equal 5") 418 | self._xyzrg = np.copy(value).astype(float) 419 | 420 | 421 | @property 422 | def xyzr(self): 423 | """ Retrieve coordinates and radii 424 | 425 | """ 426 | return self._xyzrg[:, 0:4] 427 | 428 | 429 | @xyzr.setter 430 | def xyzr(self, value): 431 | """ Set the coordinates and radii 432 | 433 | Args: 434 | value (float 4xn): coordinates and radii 435 | 436 | """ 437 | # resets all radii, groups, and positions 438 | if value.shape[1] != 4: 439 | raise ValueError("number of xyzr array columns must equal 4") 440 | xyzrg = np.zeros((value.shape[0], 5)) 441 | xyzrg[:, 0:4] = value 442 | self._xyzrg = np.copy(xyzrg).astype(float) 443 | 444 | 445 | @property 446 | def xyz(self): 447 | """ Retrieve the coordinates 448 | 449 | """ 450 | return self._xyzrg[:, 0:3] 451 | 452 | 453 | @xyz.setter 454 | def xyz(self, value): 455 | """ Selectively set the coordinates 456 | 457 | Args: 458 | value (float 3xn): coordinates 459 | 460 | """ 461 | # resets all radii, groups, and positions 462 | if value.shape[1] != 3: 463 | raise ValueError("number of xyz array columns must equal 3") 464 | xyzrg = np.zeros((value.shape[0], 5)) 465 | xyzrg[:, 0:3] = value 466 | self._xyzrg = np.copy(xyzrg).astype(float) 467 | 468 | 469 | @property 470 | def r(self): 471 | """ Retrieve the radii 472 | 473 | """ 474 | return self._xyzrg[:, 3] 475 | 476 | 477 | @r.setter 478 | def r(self, value): 479 | """ Selectively set the radius index 480 | 481 | Args: 482 | value (float 1xn): radii 483 | 484 | """ 485 | if value is np.ndarray: 486 | if self._xyzrg.shape[0] == value.shape[0]: 487 | self._xyzrg[:, 3] = np.copy(value).astype(float) 488 | else: 489 | raise ValueError("Number of radii values must match the number of rows in the internal xyz array") 490 | else: 491 | self._xyzrg[:, 3] = value 492 | 493 | 494 | @property 495 | def g(self): 496 | """ Retrieve the group indices 497 | 498 | """ 499 | return self._xyzrg[:, 4] 500 | 501 | 502 | @g.setter 503 | def g(self, value): 504 | """ Selectively set the group index 505 | 506 | Args: 507 | value (float 1xn): group ids 508 | 509 | """ 510 | if value is np.ndarray: 511 | if self._xyzrg.shape[0] == value.shape[0]: 512 | self._xyzrg[:, 4] = np.copy(value).astype(float) 513 | else: 514 | raise ValueError("Number of group values must match the number of rows in the internal xyzr array") 515 | else: 516 | self._xyzrg[:, 4] = value 517 | -------------------------------------------------------------------------------- /pyvol/utilities.py: -------------------------------------------------------------------------------- 1 | 2 | import itertools 3 | import logging 4 | import math 5 | import multiprocessing 6 | import numpy as np 7 | import os 8 | import scipy 9 | import subprocess 10 | import sys 11 | import types 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | def calculate_rotation_matrix(ref_vector, new_vector): 16 | """ Calculates the 3D rotation matrix to convert from ref_vector to new_vector; not used in main PyVOL calculations 17 | 18 | Args: 19 | ref_vector (3x1 ndarray): original vector 20 | new_vector (3x1 ndarray): target vector 21 | 22 | Returns: 23 | rot_matrix (3x3 ndarray): rotation matrix to convert the original vector to the target vector 24 | """ 25 | 26 | ref_vector = ref_vector / np.linalg.norm(ref_vector) 27 | new_vector = new_vector / np.linalg.norm(new_vector) 28 | 29 | rot_axis = np.cross(ref_vector, new_vector) 30 | if np.linalg.norm(rot_axis) != 0: 31 | rot_axis = rot_axis / np.linalg.norm(rot_axis) 32 | 33 | rot_angle = -1 * math.acos(np.dot(ref_vector, new_vector)) 34 | ca = math.cos(rot_angle) 35 | sa = math.sin(rot_angle) 36 | 37 | rot_matrix = np.matrix([ 38 | [ 39 | 1.0 + (1.0 - ca) * (rot_axis[0]**2 - 1.0), 40 | -rot_axis[2] * sa + (1.0 - ca) * rot_axis[0] * rot_axis[1], 41 | rot_axis[1] * sa + (1.0 - ca) * rot_axis[0] * rot_axis[2] 42 | ],[ 43 | rot_axis[2] * sa + (1.0 - ca) * rot_axis[0] * rot_axis[1], 44 | 1.0 + (1.0 - ca) * (rot_axis[1]**2 - 1.0), 45 | -1.0 * rot_axis[0] * sa + (1.0 - ca) * rot_axis[1] * rot_axis[2] 46 | ],[ 47 | -1.0 * rot_axis[1] * sa + (1.0 - ca) * rot_axis[0] * rot_axis[2], 48 | rot_axis[0] * sa + (1.0 - ca) * rot_axis[1] * rot_axis[2], 49 | 1.0 + (1.0 - ca) * (rot_axis[2]**2 - 1.0) 50 | ]]) 51 | 52 | return rot_matrix 53 | 54 | 55 | def closest_vertex_normals(ref_mesh, query_mesh, ref_coordinates=None, ref_radius=2, interface_gap=2): 56 | """ Returns the location and normal for the closest point between two meshes 57 | 58 | Args: 59 | ref_mesh (trimesh): origin mesh 60 | query_mesh (trimesh): target mesh 61 | ref_coordinates (3xN ndarray): coordinates used to specify the pertinent subregion on the ref_mesh 62 | ref_radius (float): radius used to identify points on the ref_mesh that are sufficiently close to the ref_coordinates 63 | interface_gap (float): maximum distance between the ref and query meshes at the identified point 64 | 65 | Returns: 66 | mean_pos (3x1 ndarray): coordinate of the central point between the meshes 67 | mean_normal (3x1 ndarray): normalized vector pointing from the ref_mesh to the query_mesh 68 | """ 69 | 70 | if ref_coordinates is not None: 71 | reftree = scipy.spatial.cKDTree(ref_mesh.vertices) 72 | ref_groups = reftree.query_ball_point(ref_coordinates, ref_radius, n_jobs=-1) 73 | ref_indices = np.unique(list(itertools.chain.from_iterable(ref_groups))) 74 | else: 75 | ref_indices = np.arange(1, ref_mesh.vertices.shape[0]) 76 | 77 | querytree = scipy.spatial.cKDTree(query_mesh.vertices) 78 | query_groups = querytree.query_ball_point(ref_mesh.vertices[ref_indices, :], interface_gap, n_jobs=-1) 79 | query_indices = np.unique(list(itertools.chain.from_iterable(query_groups))) 80 | 81 | kdtree = scipy.spatial.cKDTree(ref_mesh.vertices[ref_indices, :]) 82 | dist, indices = kdtree.query(query_mesh.vertices[query_indices, :], n_jobs=-1) 83 | 84 | reorder = np.argsort(dist) 85 | for query_index in reorder: 86 | closest_ref_index = ref_indices[indices[query_index]] 87 | closest_query_index = query_indices[query_index] 88 | 89 | dp = np.dot(query_mesh.vertex_normals[closest_query_index], ref_mesh.vertex_normals[closest_ref_index]) 90 | 91 | if dp < -0.95: 92 | mean_pos = np.mean(np.array([ref_mesh.vertices[closest_ref_index, :], query_mesh.vertices[closest_query_index, :]]), axis=0) 93 | mean_normal = -1 * np.mean(query_mesh.vertex_normals[query_indices, :], axis=0) 94 | return mean_pos, mean_normal 95 | return None, None 96 | 97 | 98 | def check_dir(location): 99 | """ Ensure that a specified directory exists 100 | 101 | Args: 102 | location (str): target directory 103 | 104 | """ 105 | if not os.path.isdir(location): 106 | try: 107 | os.makedirs(location) 108 | except: 109 | pass 110 | 111 | 112 | def configure_logger(filename=None, stream_level=None, file_level=None): 113 | """ Configures the base logger 114 | 115 | Args: 116 | filename (str): target filename is the log is to be written to file (Default value = None) 117 | stream_level (str): log level for the stream handler (Default value = None) 118 | file_level (str): log level for the file handler (Default value = None) 119 | 120 | """ 121 | 122 | clean_logger() 123 | 124 | if stream_level is None: 125 | stream_level = "INFO" 126 | if file_level is None: 127 | file_level = "DEBUG" 128 | 129 | main_logger = logging.getLogger("pyvol") 130 | main_logger.setLevel("DEBUG") 131 | 132 | formatter = logging.Formatter("%(name)-12s:".ljust(15) + "\t%(levelname)-8s" + "\t%(message)s") 133 | 134 | sh = logging.StreamHandler(sys.stdout) 135 | sh.setFormatter(formatter) 136 | sh.setLevel(stream_level) 137 | main_logger.addHandler(sh) 138 | 139 | if filename is not None: 140 | check_dir(os.path.dirname(filename)) 141 | fh = logging.FileHandler(filename) 142 | fh.setFormatter(formatter) 143 | fh.setLevel(file_level) 144 | main_logger.addHandler(fh) 145 | 146 | def clean_logger(): 147 | """ Removes current handlers from the main PyVOL logger so that new ones can be assigned 148 | 149 | """ 150 | 151 | main_logger = logging.getLogger("pyvol") 152 | main_logger.handlers = [] 153 | 154 | 155 | def coordinates_for_resid(pdb_file, resid, chain=None, model=0, sidechain_only=True): 156 | """ Extract the 3D coordinates for all atoms in a specified residue from a pdb file 157 | 158 | Args: 159 | pdb_file (str): filename of the specified pdb file 160 | resid (int): residue number 161 | chain (str): chain identifier (Default value = None) 162 | model (int): model identifier (Default value = 0) 163 | sidechain_only (bool): return only sidechain atom coordinates? (Default value = True) 164 | 165 | Returns: 166 | coordinates ([[float]]): 3xN array containing all atomic positions 167 | 168 | """ 169 | logger.debug("Identifying coordinates for residue: {0}".format(resid)) 170 | from Bio.PDB import PDBParser 171 | p = PDBParser(PERMISSIVE=1, QUIET=True) 172 | structure = p.get_structure("prot", pdb_file) 173 | 174 | if chain is not None: 175 | res = structure[model][chain][resid] 176 | else: 177 | res = [r for r in structure[model].get_residues() if r[1] == resid] 178 | if len(res) != 1: 179 | logger.error("Ambiguous or absent residue definition: {0} {2} {1}".format(pdb_file, resid, chain)) 180 | return None 181 | return np.asarray([atom.get_coord() for atom in res.get_atoms() if atom.name not in ["C", "O", "CA", "N", "H", "HA"]]) 182 | 183 | 184 | def _pickle_method(m): 185 | """ Pickles a method; required for multiprocessing compatibility with python 2.x 186 | 187 | Args: 188 | m (method): method to be pickled 189 | 190 | Returns: 191 | pickled_method: pickled_method 192 | 193 | """ 194 | if m.im_self is None: 195 | return getattr, (m.im_class, m.im_func.func_name) 196 | else: 197 | return getattr, (m.im_self, m.im_func.func_name) 198 | 199 | 200 | def run_cmd(options, in_directory=None): 201 | """ Run a program using the command line 202 | 203 | Args: 204 | options ([str]): list of command line options 205 | in_directory (str): directory in which to run the command (Default value = None) 206 | 207 | """ 208 | if in_directory is not None: 209 | current_working_dir = os.getcwd() 210 | os.chdir(in_directory) 211 | 212 | opt_strs = [str(opt) for opt in options] 213 | 214 | try: 215 | subprocess.check_output(opt_strs, stderr=subprocess.STDOUT) 216 | except subprocess.CalledProcessError: 217 | logger.error("Process Failed: {0}".format(" ".join(opt_strs))) 218 | raise 219 | 220 | logger.debug("Shell command: {0}".format(" ".join(opt_strs))) 221 | if in_directory is not None: 222 | os.chdir(current_working_dir) 223 | 224 | 225 | def surface_multiprocessing(args): 226 | """ A single surface calculation designed to be run in parallel 227 | 228 | Args: 229 | args: a tuple containing: 230 | spheres (Spheres): a Spheres object containing all surface producing objects 231 | probe_radius (float): radius to use for probe calculations 232 | kwargs (dict): all remaining arguments accepted by the surface calculation algorithm 233 | 234 | Returns: 235 | surface (Spheres): the input Spheres object but with calculated surface parameters 236 | 237 | """ 238 | spheres, probe_radius, kwargs = args 239 | return spheres.calculate_surface(probe_radius=probe_radius, **kwargs) 240 | 241 | 242 | def sphere_multiprocessing(spheres, radii, workers=None, **kwargs): 243 | """ A wrapper function to calculate multiple surfaces using multiprocessing 244 | 245 | Args: 246 | spheres (Spheres): input Spheres object 247 | radii ([float]): list of radii at which surfaces will be calculated 248 | workers (int): number of workers (Default value = None) 249 | kwargs (dict): all remaining arguments accepted by surface calculation that are constant across parallel calculations 250 | 251 | Returns: 252 | surfaces ([Spheres]): a list of Spheres object each with its surface calculated 253 | 254 | """ 255 | if workers is None: 256 | workers = multiprocessing.cpu_count() 257 | logger.debug("Splitting surface calculation at {0} radii across {1} workers".format(len(radii), workers)) 258 | 259 | pool = multiprocessing.Pool(processes=workers) 260 | results = pool.map(surface_multiprocessing, [(spheres, probe_radius, kwargs) for probe_radius in radii]) 261 | pool.close() 262 | return results 263 | 264 | 265 | if sys.version_info < (3,): 266 | """ Necessary workaround to allow correct pickling of methods in Python 2.x 267 | 268 | """ 269 | 270 | import copy_reg 271 | copy_reg.pickle(types.MethodType, _pickle_method) 272 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | import pathlib 3 | from setuptools import setup 4 | 5 | HERE = pathlib.Path(__file__).parent 6 | README = (HERE / "README.md").read_text() 7 | 8 | setup( 9 | name="bio-pyvol", 10 | version="1.8.0", 11 | description="a PyMOL plugin and python package for visualization, comparison, and volume calculation of protein drug-binding sites", 12 | long_description=README, 13 | long_description_content_type="text/x-rst", 14 | url="https://github.com/schlessinger-lab/pyvol", 15 | author="Ryan H.B. Smith", 16 | author_email="ryan.smith@icahn.mssm.edu", 17 | license="MIT", 18 | classifiers=[ 19 | "License :: OSI Approved :: MIT License", 20 | "Programming Language :: Python :: 3.7", 21 | ], 22 | packages=["pyvol"], 23 | install_requires=[ 24 | "biopython>=1.73", 25 | "numpy>=1.16.1", 26 | "pandas>=0.24.1", 27 | "scipy>=1.2.1", 28 | "scikit-learn>=0.20.2", 29 | "trimesh>=2.36.29", 30 | "configparser", 31 | ], 32 | entry_points={ 33 | "console_scripts": [ 34 | "pyvol=pyvol.__main__:main", 35 | ] 36 | }, 37 | ) 38 | -------------------------------------------------------------------------------- /tests/1uwh_B_lig.pdb: -------------------------------------------------------------------------------- 1 | HETATM 1 CL11 BAX B1723 99.170 33.242 60.794 1.00 73.20 D CL 2 | HETATM 2 C1 BAX B1723 97.439 32.631 64.443 1.00 58.44 D C 3 | HETATM 3 C2 BAX B1723 98.543 32.654 65.253 1.00 55.43 D C 4 | HETATM 4 C3 BAX B1723 99.824 32.858 64.740 1.00 57.67 D C 5 | HETATM 5 C4 BAX B1723 99.980 33.032 63.384 1.00 60.94 D C 6 | HETATM 6 C5 BAX B1723 98.859 33.010 62.545 1.00 62.31 D C 7 | HETATM 7 C6 BAX B1723 97.534 32.797 63.092 1.00 61.69 D C 8 | HETATM 8 C7 BAX B1723 96.373 32.755 62.334 1.00 62.15 D C 9 | HETATM 9 N12 BAX B1723 98.294 32.447 66.621 1.00 49.12 D N 10 | HETATM 10 C13 BAX B1723 97.619 31.494 67.639 1.00 47.09 D C 11 | HETATM 11 N14 BAX B1723 97.696 31.623 69.051 1.00 44.97 D N 12 | HETATM 12 O15 BAX B1723 96.686 30.907 67.145 1.00 45.30 D O 13 | HETATM 13 C16 BAX B1723 96.941 30.667 70.060 1.00 44.49 D C 14 | HETATM 14 C17 BAX B1723 96.763 31.033 71.470 1.00 44.37 D C 15 | HETATM 15 C18 BAX B1723 96.133 30.131 72.325 1.00 44.37 D C 16 | HETATM 16 C19 BAX B1723 95.674 28.912 71.844 1.00 43.79 D C 17 | HETATM 17 C20 BAX B1723 95.841 28.568 70.501 1.00 44.80 D C 18 | HETATM 18 C21 BAX B1723 96.471 29.431 69.612 1.00 44.24 D C 19 | HETATM 19 O22 BAX B1723 95.062 28.039 72.708 1.00 46.06 D O 20 | HETATM 20 C23 BAX B1723 93.674 27.795 72.592 1.00 44.42 D C 21 | HETATM 21 C24 BAX B1723 92.643 28.719 72.082 1.00 45.20 D C 22 | HETATM 22 C25 BAX B1723 91.323 28.270 72.059 1.00 48.00 D C 23 | HETATM 23 N26 BAX B1723 91.043 26.917 72.532 1.00 47.47 D N 24 | HETATM 24 C27 BAX B1723 92.096 26.098 73.007 1.00 45.58 D C 25 | HETATM 25 C28 BAX B1723 93.350 26.566 73.011 1.00 42.71 D C 26 | HETATM 26 C29 BAX B1723 91.913 24.634 73.525 1.00 46.18 D C 27 | HETATM 27 N30 BAX B1723 90.804 23.730 73.140 1.00 47.88 D N 28 | HETATM 28 C31 BAX B1723 89.859 23.548 74.200 1.00 47.01 D C 29 | HETATM 29 O32 BAX B1723 92.996 23.982 73.735 1.00 48.42 D O 30 | HETATM 30 F10 BAX B1723 95.267 32.516 63.047 1.00 63.01 D F 31 | HETATM 31 F8 BAX B1723 96.182 33.902 61.691 1.00 63.14 D F 32 | HETATM 32 F9 BAX B1723 96.484 31.757 61.457 1.00 64.77 D F 33 | CONECT 1 6 34 | CONECT 2 3 7 35 | CONECT 3 2 4 9 36 | CONECT 4 3 5 37 | CONECT 5 4 6 38 | CONECT 6 5 1 7 39 | CONECT 7 2 6 8 40 | CONECT 8 7 30 31 32 41 | CONECT 9 3 10 42 | CONECT 10 9 11 12 43 | CONECT 11 10 13 44 | CONECT 12 10 45 | CONECT 13 11 14 18 46 | CONECT 14 13 15 47 | CONECT 15 14 16 48 | CONECT 16 15 17 19 49 | CONECT 17 16 18 50 | CONECT 18 13 17 51 | CONECT 19 16 20 52 | CONECT 20 19 21 25 53 | CONECT 21 20 22 54 | CONECT 22 21 23 55 | CONECT 23 22 24 56 | CONECT 24 23 25 26 57 | CONECT 25 24 20 58 | CONECT 26 24 29 27 59 | CONECT 27 26 28 60 | CONECT 28 27 61 | CONECT 29 26 62 | CONECT 30 8 63 | CONECT 31 8 64 | CONECT 32 8 65 | END 66 | -------------------------------------------------------------------------------- /tests/test_pyvol.py: -------------------------------------------------------------------------------- 1 | # for now, use pytest==5.3.5 with pytest-sugar and pytest-xdist 2 | # run using python -m pytest -n 16 in the pyvol root directory 3 | 4 | import os 5 | import pytest 6 | from pyvol.identify import pocket_wrapper 7 | 8 | 9 | @pytest.mark.parametrize("prot_file", ["/home/rsmith/research/pyvol_development/pyvol/tests/1uwh_B_prot.pdb"]) 10 | @pytest.mark.parametrize("min_rad", [1.4, 1.6]) 11 | @pytest.mark.parametrize("max_rad", [3.2, 3.4, 3.6]) 12 | @pytest.mark.parametrize("mode,lig_file,resid,coordinates", [("all", None, None, None), ("largest", None, None, None),("specific", None, "B513", None), ("specific","/home/rsmith/research/pyvol_development/pyvol/tests/1uwh_B_lig.pdb", None, None), ("specific", None, None, "95.6 29.8 68.5")]) 13 | def test_specification(prot_file, min_rad, max_rad, mode, lig_file, resid, coordinates): 14 | opts = { 15 | "prot_file": prot_file, 16 | "min_rad": min_rad, 17 | "max_rad": max_rad, 18 | "mode": mode, 19 | "lig_file": lig_file, 20 | "resid": resid, 21 | "coordinates": coordinates, 22 | "project_dir": "/home/rsmith/research/pyvol_development/pytest1" 23 | } 24 | pockets, opts = pocket_wrapper(**opts) 25 | 26 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}.log". format(opts.get("prefix")))) 27 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}.rept".format(opts.get("prefix")))) 28 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}.cfg".format(opts.get("prefix")))) 29 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}_p0.xyzrg".format(opts.get("prefix")))) 30 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}_p0.obj".format(opts.get("prefix")))) 31 | 32 | @pytest.mark.parametrize("prot_file", ["/home/rsmith/research/pyvol_development/pyvol/tests/1uwh_B_prot.pdb"]) 33 | @pytest.mark.parametrize("min_rad", [1.2, 1.4]) 34 | @pytest.mark.parametrize("max_rad", [3.4, 3.6]) 35 | @pytest.mark.parametrize("max_clusters", [2, 10]) 36 | @pytest.mark.parametrize("min_subpocket_rad", [1.5, 1.7]) 37 | @pytest.mark.parametrize("max_subpocket_rad", [3.2, 3.4]) 38 | @pytest.mark.parametrize("min_subpocket_surf_rad", [1.0, 1.2]) 39 | @pytest.mark.parametrize("radial_sampling", [0.1, 0.2]) 40 | @pytest.mark.parametrize("inclusion_radius_buffer", [1.0]) 41 | @pytest.mark.parametrize("min_cluster_size", [50]) 42 | def test_subdivide(prot_file, min_rad, max_rad, max_clusters, min_subpocket_rad, max_subpocket_rad, radial_sampling, min_subpocket_surf_rad, inclusion_radius_buffer, min_cluster_size): 43 | opts = { 44 | "prot_file": prot_file, 45 | "min_rad": min_rad, 46 | "max_rad": max_rad, 47 | "max_clusters": max_clusters, 48 | "min_subpocket_rad": min_subpocket_rad, 49 | "max_subpocket_rad": max_subpocket_rad, 50 | "radial_sampling": radial_sampling, 51 | "min_subpocket_surf_rad": min_subpocket_surf_rad, 52 | "inclusion_radius_buffer": inclusion_radius_buffer, 53 | "min_cluster_size": min_cluster_size, 54 | "mode": "largest", 55 | "subdivide": True, 56 | "project_dir": "/home/rsmith/research/pyvol_development/pytest1" 57 | } 58 | pockets, opts = pocket_wrapper(**opts) 59 | 60 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}.log".format(opts.get("prefix")))) 61 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}.rept".format(opts.get("prefix")))) 62 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}.cfg".format(opts.get("prefix")))) 63 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}_p0.xyzrg".format(opts.get("prefix")))) 64 | assert os.path.isfile(os.path.join(opts.get("output_dir"), "{0}_p0.obj".format(opts.get("prefix")))) 65 | --------------------------------------------------------------------------------