├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── devtools ├── conda-recipe │ ├── README.md │ ├── build.bat │ ├── build.sh │ ├── meta.yaml │ └── run_test.py ├── travis-ci │ ├── deploy_anaconda.sh │ └── install_miniconda.sh └── wheel │ └── sstmap-1.1.4-cp36-cp36m-linux_x86_64.whl ├── setup.py └── sstmap ├── Example.ipynb ├── __init__.py ├── _sstmap_entropy.cpp ├── _sstmap_ext.c ├── _sstmap_probable.cpp ├── grid_water_analysis.py ├── io_core.py ├── io_helpers.py ├── io_spatial.py ├── kdhsa102.cpp ├── kdhsa102.h ├── kdhsa102_main.cpp ├── make_clust_brute.cpp ├── probable.cpp ├── probable.h ├── probable_main.cpp ├── renum_pdb.cpp ├── scripts ├── __init__.py ├── desmond_extract_nbparams.py ├── dtr_to_netcdf.py ├── run_gist.py └── run_hsa.py ├── site_water_analysis.py ├── testing ├── __init__.py └── test_gist_output.py ├── utils.py └── water_analysis.py /.gitignore: -------------------------------------------------------------------------------- 1 | attic/ 2 | *.zip 3 | sstmap/tests/new_gist_tests 4 | test_build.sh 5 | local_build.sh 6 | *.nc 7 | #python object files 8 | *.pyc 9 | 10 | # python packages files 11 | *.egg 12 | *.egg-info 13 | test_build 14 | dist 15 | build 16 | eggs 17 | parts 18 | bin 19 | var 20 | sdist 21 | develop-eggs 22 | .installed.cfg 23 | lib 24 | 25 | # Object files 26 | *.o 27 | *.ko 28 | *.obj 29 | *.elf 30 | 31 | # Precompiled Headers 32 | *.gch 33 | *.pch 34 | 35 | # Libraries 36 | *.lib 37 | *.a 38 | *.la 39 | *.lo 40 | 41 | # Shared objects (inc. Windows DLLs) 42 | *.dll 43 | *.so 44 | *.so.* 45 | *.dylib 46 | 47 | # Executables 48 | *.exe 49 | *.out 50 | *.app 51 | *.i*86 52 | *.x86_64 53 | *.hex 54 | 55 | # Debug files 56 | *.dSYM/ 57 | 58 | #OS junk files 59 | [Tt]humbs.db 60 | *.DS_Store 61 | 62 | #build folder from python setup 63 | build/ 64 | 65 | #test outputs during local testing 66 | <<<<<<< HEAD 67 | ======= 68 | sstmap/tests/*.txt 69 | sstmap/tests/*.log 70 | sstmap/tests/*_data 71 | >>>>>>> master 72 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Credits: Adapted from https://github.com/choderalab/pymbar/.travis.yml 2 | # with some modifications 3 | language: C 4 | sudo: false 5 | 6 | addons: 7 | apt: 8 | sources: 9 | - ubuntu-toolchain-r-test 10 | 11 | before_install: 12 | - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew install md5sha1sum; fi 13 | 14 | branches: 15 | only: 16 | - master 17 | 18 | install: 19 | - source devtools/travis-ci/install_miniconda.sh 20 | - export PYTHONUNBUFFERED=true 21 | - export PATH=$HOME/miniconda/bin:$PATH 22 | - conda config --add channels conda-forge # hightest priority 23 | - conda config --add channels omnia 24 | 25 | script: 26 | # Add org channel 27 | #- conda config --add channels ${ORGNAME} 28 | # Create a test environment 29 | - conda create --quiet --yes -n test python=$python 30 | # Activate the test environment 31 | - source activate test 32 | # Build recipie 33 | - conda install -y mdtraj 34 | 35 | # Install the pip package 36 | - pip install devtools/wheel/sstmap-1.1.4-cp36-cp36m-linux_x86_64.whl 37 | # Install locally-built package 38 | # - conda install --yes --quiet --use-local ${PACKAGENAME} 39 | #- conda install --yes --quiet pip nose nose-timer 40 | # Test the package (will do this once unit tests are ready) 41 | #- cd devtools && nosetests $PACKAGENAME --nocapture --verbosity=2 --with-doctest --with-timer && cd .. 42 | 43 | os: 44 | #- osx 45 | - linux 46 | 47 | 48 | env: 49 | matrix: 50 | # - python=2.7 CONDA_PY=27 51 | # - python=3.4 CONDA_PY=34 52 | # - python=3.5 CONDA_PY=35 53 | - python=3.6 CONDA_PY=36 54 | 55 | global: 56 | 57 | - PACKAGENAME="sstmap" 58 | - ORGNAME="solvationtools" 59 | 60 | #after_success: 61 | # - source devtools/travis-ci/deploy_anaconda.sh 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Kamran Haider, Steven Ramsay, Anthony Cruz Balberdy, Tobias Wulsdorf, Crystal Nguyen, Tom Kurtzman, Michael Gilson, Kurtzman Lab, Gilson Lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = g++ 2 | SOURCEDIR = ./sstmap 3 | INSTALLDIR = ~/anaconda2/bin 4 | bruteclust: $(SOURCEDIR)/make_clust_brute.cpp 5 | $(CC) -o bruteclust $(SOURCEDIR)/make_clust_brute.cpp; mv bruteclust $(INSTALLDIR) 6 | 7 | 8 | kdhsa102: $(SOURCEDIR)/kdhsa102_main.cpp $(SOURCEDIR)/kdhsa102.h 9 | $(CC) -o kdhsa102 $(SOURCEDIR)/kdhsa102.cpp $(SOURCEDIR)/kdhsa102_main.cpp; mv kdhsa102 $(INSTALLDIR) 10 | 11 | 6dimprobable: $(SOURCEDIR)/6dim_main.cpp $(SOURCEDIR)/6dimprobable.h 12 | $(CC) -o 6dimprobable $(SOURCEDIR)/6dimprobable.cpp $(SOURCEDIR)/6dim_main.cpp; mv 6dimprobable $(INSTALLDIR) 13 | 14 | all: bruteclust kdhsa102 6dimprobable 15 | 16 | clean: 17 | rm -f bruteclust 18 | rm -f kdhsa102 19 | rm -f 6dimprobable 20 | 21 | test: pytest -m tests -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/kamran-haider/SSTMap.svg?branch=v1.1_dev)](https://travis-ci.org/kamran-haider/SSTMap) 2 | [![Anaconda-Server Badge](https://anaconda.org/solvationtools/sstmap/badges/installer/conda.svg)](https://conda.anaconda.org/solvationtools) 3 | [![Anaconda-Server Badge](https://anaconda.org/solvationtools/sstmap/badges/downloads.svg)](https://anaconda.org/solvationtools/sstmap) 4 | [![DOI for Citing SSTMap](https://img.shields.io/badge/DOI-10.1021%2Fj.jctc.2017.11.021-blue.svg)](http://doi.org/10.1021/acs.jctc.7b00592) 5 | 6 | Welcome to SSTMap Release version 1.1.4 7 | Thank you for using SSTMap, we've gone through a great deal of work trying to ensure compatibility with various operating systems and MD packages though we have not covered all the possible combinations. If you run into a bug, please report it on the issues and we will work to quickly resolve it. 8 | 9 | Thank you, 10 | 11 | Kamran and the Development Team. 12 | 13 | 14 | SSTMap 15 | ====== 16 | 17 | SSTMap is a tool to study structural and thermodynamic properties of water molecules on solute surfaces. It combines grid inhomogeneous solvation theory (IST) with measures of water structure to produce mapping of solvation structure and thermodynamics in protein binding sites and on the surfaces of other molecules of interest, such as small-molecules or host-guest systems. It provides both site-based and grid-based calculations in one package, with support for multiple MD packages and can be integrated into Python’s scientific computing environment for advanced applications. The cross-platform support is enabled by trajectory and topology parsers of MDTraj and ParmEd. 18 | 19 | Installation 20 | ------------ 21 | 22 | Please check the instructions Here. 23 | 72 | Usage 73 | ----- 74 | 75 | SSTMap provides command-line tools for hydration site analysis (HSA) and Grid Inhomogeneous Solvation Theory (GIST), `run_hsa` and `run_gist`, respectively. The functionality of SSTMap is also available through its Python API, available as the `sstmap` module. For more details, please visit [sstmap.org](sstmap.org). 76 | 77 | An example of running a quick HSA and GIST calculation on a test system available in [sstmap_test_suite](https://github.com/KurtzmanLab/sstmap_test_suite). You can download the full test suite from [here](https://www.dropbox.com/sh/hrijgk8n5z12bgi/AABSigcBf9PN_7-Z26VCCPePa?dl=0) (since Github repository doesn't contain trajectory files). For a given platform, `cd` to its sub-directory and run the commands as shown below. 78 | ```bash 79 | cd sstmap_test_suite/platforms/amber 80 | $ run_hsa -i testcase.prmtop -t md100ps.nc -l ligand.pdb -s 0 -f 100 -o testcase 81 | $ run_gist -i testcase.prmtop -t md100ps.nc -l ligand.pdb -g 20 20 20 -s 0 -f 100 -o testcase 82 | ``` 83 | For examples using MD simulations generated from other packages, such as [Amber](http://ambermd.org/), [Charmm](https://www.charmm.org), [Gromacs](http://www.gromacs.org/), [NAMD](http://www.ks.uiuc.edu/Research/namd/), [OpenMM](http://openmm.org/) and [Desmond](https://www.deshawresearch.com/resources_desmond.html), please follow [this tutorial](http://sstmap.org/2017/06/03/simple-examples/) on [sstmap.org](http://sstmap.org/). SSTMap can also be used as a Python module: 84 | 85 | ```python 86 | import sstmap as sm 87 | # Example 1: Run a grid-based calculation 88 | # with all quantities. 89 | gist = sm.GridWaterAnalysis( 90 | "casp3.prmtop", 91 | "casp3.netcdf", 92 | ligand_file="casp3_ligand.pdb", 93 | grid_dimensions=[40, 40, 40], 94 | prefix="casp3") 95 | gist.calculate_grid_quantities() 96 | # Example 2: Run gist with only energy calculations. 97 | gist.calculate_grid_quantities(energy=True) 98 | # Example 3: Initialize gist with a grid center position. 99 | gist = sm.GridWaterAnalysis( 100 | "casp3.prmtop", 101 | "casp3.netcdf", 102 | grid_center=[35.33, 52.23, 54.96], 103 | grid_dimensions=[40, 40, 40], 104 | prefix="casp3") 105 | ``` 106 | 107 | Principal Developer(s) 108 | --------------------- 109 | * Kamran Haider 110 | 111 | Co-Developers 112 | ------------- 113 | * Steven Ramsey 114 | * Anthony Cruz Balberdi 115 | * Tobias Wulsdorf 116 | 117 | Principal Investigators 118 | --------------------- 119 | * Tom Kurtzman 120 | * Michael Gilson 121 | 122 | References 123 | ---------- 124 | Please cite the following when you use this software. 125 | 126 | [1] Haider K, Cruz A, Ramsey S, Gilson M. and Kurtzman T. Solvation Structure and Thermodynamic Mapping (SSTMap): An Open-Source, Flexible Package for the Analysis of Water in Molecular Dynamics Trajectories. J. Chem. Theory Comput. (10.1021/acs.jctc.7b00592) 2017. 127 | [2] Crystal N. Nguyen, Michael K. Gilson, Tom Young. Structure and Thermodynamics of Molecular Hydration via Grid Inhomogeneous Solvation Theory. eprint arXiv:1108.4876, (2011). 128 | 129 | [3] Crystal N. Nguyen, Tom Kurtzman Young, and Michael K. Gilson. Grid inhomogeneous solvation theory: hydration structure and thermodynamics of the miniature receptor cucurbit[7]uril. J. Chem. Phys. 137, 044101 (2012) 130 | 131 | [4] Haider K, Wickstrom L, Ramsey S, Gilson MK and Kurtzman T. Enthalpic Breakdown of Water Structure on Protein Active Site Surfaces. J Phys Chem B. 120:8743-8756, (2016). http://dx.doi.org/10.1021/acs.jpcb.6b01094. 132 | 133 | [5] Themis Lazaridis. Inhomogeneous Fluid Approach to Solvation Thermodynamics. 1. Theory. The Journal of Physical Chemistry B 102 (18), 3531-3541, (1998). DOI: 10.1021/jp9723574 134 | 135 | 136 | License 137 | ------- 138 | 139 | `SSTMap` is free software and is licensed under the MIT license. 140 | 141 | 142 | Acknowledgements 143 | -------- 144 | This project is funded through the NIH Grant: R01-GM100946 145 | 146 | -------------------------------------------------------------------------------- /devtools/conda-recipe/README.md: -------------------------------------------------------------------------------- 1 | This is a recipe for building the current development package into a conda binary. 2 | 3 | 4 | -------------------------------------------------------------------------------- /devtools/conda-recipe/build.bat: -------------------------------------------------------------------------------- 1 | python setup.py install 2 | ::if errorlevel 1 exit 1 -------------------------------------------------------------------------------- /devtools/conda-recipe/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | $PYTHON setup.py clean 3 | $PYTHON setup.py install -------------------------------------------------------------------------------- /devtools/conda-recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: sstmap 3 | version: 1.1.4 4 | 5 | source: 6 | path: ../../ 7 | 8 | 9 | requirements: 10 | build: 11 | - python 12 | - numpy 1.14.0 13 | - setuptools 14 | - gcc 15 | - toolchain 16 | - libgcc 17 | - gsl 2.2.1 18 | - libopenblas 19 | 20 | run: 21 | - python 22 | - libgcc 23 | - numpy 24 | - scipy 25 | - mdtraj 26 | - parmed 27 | - matplotlib 28 | - gsl 2.2.1 29 | - libopenblas 30 | 31 | #test: 32 | # commands: 33 | # - run_gist -h 34 | # - run_hsa -h 35 | # imports: 36 | # - sstmap 37 | 38 | about: 39 | home: https://github.com/KurtzmanLab/SSTMap 40 | license: MIT 41 | summary: Python tools for analysis of water molecules in MD trajectories. 42 | 43 | -------------------------------------------------------------------------------- /devtools/conda-recipe/run_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | print("sys.platform = ", sys.platform) 4 | print("sys.version = ", sys.version) 5 | 6 | import sstmap -------------------------------------------------------------------------------- /devtools/travis-ci/deploy_anaconda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Deploy to anaconda solvation tools channel 3 | # conda install --yes anaconda-client 4 | pushd . 5 | cd $HOME/miniconda/conda-bld 6 | FILES=*/${PACKAGENAME}*.tar.bz2 7 | for filename in $FILES; do 8 | anaconda -t $CONDA_UPLOAD_TOKEN remove --force ${ORGNAME}/${PACKAGENAME}/${filename} 9 | anaconda -t $CONDA_UPLOAD_TOKEN upload --force -u ${ORGNAME} -p ${PACKAGENAME} ${filename} 10 | done 11 | popd 12 | 13 | #anaconda upload /home/travis/miniconda3/conda-bld/linux-64/sstmap-1.1.0-py36_0.tar.bz2 14 | -------------------------------------------------------------------------------- /devtools/travis-ci/install_miniconda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Credits: Adapted from https://github.com/choderalab/pymbar/blob/master/devtools/travis-ci/install.sh 3 | # with some modifications 4 | pushd . 5 | cd $HOME 6 | 7 | # Install Miniconda 8 | MINICONDA=Miniconda2-latest-Linux-x86_64.sh 9 | if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then MINICONDA=Miniconda2-latest-MacOSX-x86_64.sh; fi 10 | 11 | MINICONDA_HOME=$HOME/miniconda 12 | MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *\(.*\)<\/td> */\1/p') 13 | wget -q http://repo.continuum.io/miniconda/$MINICONDA 14 | if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then 15 | echo "Miniconda MD5 mismatch" 16 | exit 1 17 | fi 18 | bash $MINICONDA -b -p $MINICONDA_HOME 19 | 20 | # Configure miniconda 21 | export PIP_ARGS="-U" 22 | export PATH=$MINICONDA_HOME/bin:$PATH 23 | conda update --yes conda 24 | conda install --yes conda-build jinja2 anaconda-client 25 | popd 26 | -------------------------------------------------------------------------------- /devtools/wheel/sstmap-1.1.4-cp36-cp36m-linux_x86_64.whl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KurtzmanLab/SSTMap/3748c31321e38cf4f23a30e5b2ad4c97511d18e3/devtools/wheel/sstmap-1.1.4-cp36-cp36m-linux_x86_64.whl -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension, find_packages 2 | import numpy 3 | 4 | __version__ = "1.1.4" 5 | 6 | # define the extension module 7 | extensions = [] 8 | extensions.append(Extension('_sstmap_ext', 9 | sources=['sstmap/_sstmap_ext.c'], 10 | include_dirs=[numpy.get_include()], 11 | extra_link_args=['-lgsl','-lgslcblas'])) 12 | extensions.append(Extension('_sstmap_entropy', 13 | sources=['sstmap/_sstmap_entropy.cpp', 'sstmap/kdhsa102.cpp'], 14 | language="c++")) 15 | 16 | extensions.append(Extension('_sstmap_probableconfig', 17 | sources=['sstmap/_sstmap_probable.cpp', 'sstmap/probable.cpp'], 18 | language="c++")) 19 | 20 | setup(name='sstmap', 21 | author='Kamran Haider', 22 | author_email='kamranhaider.mb@gmail.com', 23 | description='SSTMap: A computational tool for studying structure and thermodynamics of water molecules on solute surfaces', 24 | version=__version__, 25 | license='MIT', 26 | url='https://github.com/KurtzmanLab/SSTMap', 27 | platforms=['Linux', 'Mac OS X',], 28 | install_requires=['parmed==3.2.0','matplotlib==2.2.3'], 29 | packages=find_packages(), 30 | ext_modules=extensions, 31 | zip_safe=False, 32 | entry_points={ 33 | 'console_scripts': 34 | ['run_hsa = sstmap.scripts.run_hsa:entry_point', 35 | 'run_gist = sstmap.scripts.run_gist:entry_point']}, ) 36 | -------------------------------------------------------------------------------- /sstmap/Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "scrolled": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "### This is the base class for the grid/box. It serves \n", 12 | "### for handling and manipulation of spatial data only.\n", 13 | "### It does *not* hold any grid values.\n", 14 | "### In principle, the class can handle all sorts of grids/boxes.\n", 15 | "### Since GIST mostly uses rectangular grids, we will invoke\n", 16 | "### our field with a grid spacing vector, instead of a unit cell\n", 17 | "### matrix.\n", 18 | "from io_core import field\n", 19 | "import numpy as np\n", 20 | "\n", 21 | "Bins = np.array([50,50,50])\n", 22 | "Delta = np.array([0.5,0.5,0.5])\n", 23 | "Origin = np.array([0.2121,1.,2])\n", 24 | "\n", 25 | "### Init...\n", 26 | "print \"Initilize field object with bins=\",Bins,\n", 27 | "print \"Delta=\",Delta,\n", 28 | "print \"Origin=\",Origin\n", 29 | "print \"\"\n", 30 | "f = field(Bins=Bins, Delta=Delta, Origin=Origin)\n", 31 | "\n", 32 | "### Coordinates of all grid voxel.\n", 33 | "print \"Retrieve coordinates of all grid voxel. Note that these are not stored, but generated upon retrieval.\"\n", 34 | "print f.get_centers_real()\n", 35 | "print \"\"\n", 36 | "print \"Now retrieve same set of coordinates in fractional space.\"\n", 37 | "print f.get_centers_frac()\n", 38 | "print \"Retrieve a set of real space coordinates in fractional space.\"\n", 39 | "print \"e.g. Origin(real) -> Origin(frac)\"\n", 40 | "print f.get_frac(Origin)\n", 41 | "print \"Now do the same with but in the opposite direction\"\n", 42 | "print \"e.g. Origin(frac) -> Origin(real)\"\n", 43 | "print f.get_real([0,0,0])\n", 44 | "print f.dim" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "### Now we use the gist class, which inherits directly from\n", 54 | "### the field class. That means it holds all spatial information\n", 55 | "### members and functions of field class, but also holds grid\n", 56 | "### quantities.\n", 57 | "\n", 58 | "from io_core import gist\n", 59 | "import numpy as np\n", 60 | "\n", 61 | "Bins = np.array([50,50,50])\n", 62 | "Delta = np.array([0.5,0.5,0.5])\n", 63 | "Origin = np.array([0.2121,1.,2])\n", 64 | "\n", 65 | "g = gist(Bins=Bins, Delta=Delta, Origin=Origin)\n", 66 | "\n", 67 | "### We can use all functionalities from the field class but also\n", 68 | "### have gist numpy arrays and different options for basic manipulation.\n", 69 | "print \"Currently, our gist object is empty. E.g., this is the population array:\"\n", 70 | "print g.Pop\n", 71 | "print \"\"\n", 72 | "print \"... as we can see it has correct shape:\",\n", 73 | "print g.Pop.shape\n", 74 | "print \"\"\n", 75 | "print \"Let us fill it with some fake data.\"\n", 76 | "g.Pop[:] = np.random.rand(50,50, 50)\n", 77 | "print g.Pop\n", 78 | "print \"\"\n", 79 | "print \"We access the data with simple indexing...\"\n", 80 | "query_frac = np.array(np.rint(g.get_frac(g.center)), dtype=int)\n", 81 | "print g.Pop[query_frac[0],\n", 82 | " query_frac[1],\n", 83 | " query_frac[2]]\n", 84 | "print g.Pop[query_frac]" 85 | ] 86 | } 87 | ], 88 | "metadata": { 89 | "kernelspec": { 90 | "display_name": "Python 2", 91 | "language": "python", 92 | "name": "python2" 93 | }, 94 | "language_info": { 95 | "codemirror_mode": { 96 | "name": "ipython", 97 | "version": 2 98 | }, 99 | "file_extension": ".py", 100 | "mimetype": "text/x-python", 101 | "name": "python", 102 | "nbconvert_exporter": "python", 103 | "pygments_lexer": "ipython2", 104 | "version": "2.7.14" 105 | } 106 | }, 107 | "nbformat": 4, 108 | "nbformat_minor": 2 109 | } 110 | -------------------------------------------------------------------------------- /sstmap/__init__.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # SSTMap: A Python library for the calculation of water structure and 3 | # thermodynamics on solute surfaces from molecular dynamics 4 | # trajectories. 5 | # MIT License 6 | # Copyright 2016-2017 Lehman College City University of New York and the Authors 7 | # 8 | # Authors: Kamran Haider, Steven Ramsey, Anthony Cruz Balberdy, Tom Kurtzman 9 | # 10 | # Permission is hereby granted, free of charge, to any person obtaining a copy 11 | # of this software and associated documentation files (the "Software"), to deal 12 | # in the Software without restriction, including without limitation the rights 13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | # copies of the Software, and to permit persons to whom the Software is 15 | # furnished to do so, subject to the following conditions: 16 | 17 | # The above copyright notice and this permission notice shall be included in all 18 | # copies or substantial portions of the Software. 19 | 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | # SOFTWARE. 27 | ############################################################################### 28 | """ 29 | This module contains implementation of a parent class for water analysis in 30 | molecular dynamics simulation trajectories. This class provides methods for 31 | index all atoms in the simulation, calculations of the energy and hydrogen 32 | bonding of water molecules with other atoms in the system. 33 | 34 | Please reference the following if you use this code in your research: 35 | [1] Haider K, Wickstrom L, Ramsey S, Gilson MK and Kurtzman T. Enthalpic Breakdown 36 | of Water Structure on Protein Active Site Surfaces. J Phys Chem B. 120:8743-8756, 37 | 2016. http://dx.doi.org/10.1021/acs.jpcb.6b01094. 38 | [2] Haider K, Cruz A, Ramsey S, Gilson MK, and Kurtzman T. Solvation Structure and 39 | Thermodynamic Mapping (SSTMap): An open-source, flexible package for the analysis 40 | of water in molecular dynamics trajectories. JCTC 14(1):418-425, 2017. 41 | http://dx.doi.org/10.1021/acs.jctc.7b00592. 42 | """ 43 | 44 | __author__ = "Kamran Haider, Anthony Cruz, Steven Ramsey, Tobias Wulsdorf, Tom Kurtzman" 45 | __license__ = "MIT" 46 | __maintainer__ = "Kamran Haider" 47 | __email__ = "kamranhaider.mb@gmail.com" 48 | __version__ = "1.1.3" 49 | 50 | 51 | from sstmap import site_water_analysis, grid_water_analysis, utils 52 | from sstmap.site_water_analysis import SiteWaterAnalysis 53 | from sstmap.grid_water_analysis import GridWaterAnalysis 54 | -------------------------------------------------------------------------------- /sstmap/_sstmap_probable.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "probable.h" 10 | #include 11 | 12 | using namespace std; 13 | 14 | #if PY_MAJOR_VERSION >= 3 15 | extern "C" { 16 | PyMODINIT_FUNC PyInit__sstmap_probableconfig(void); 17 | } 18 | #else 19 | extern "C" { 20 | DL_EXPORT(void) init_sstmap_probableconfig(); 21 | } 22 | #endif 23 | 24 | int prob(string infile, string outfile) { 25 | /* 26 | if (argc <= 1) { 27 | cerr << "\nUSAGE:\n\n" 28 | << "./probable [-i inputfile][-o outfile]\n\n" 29 | << "where\n\n" 30 | << "inputfile is the file to read from (a clusterfile with hydrogen atoms)\n" 31 | << "outfile is an outfile to append probable configs to\n" 32 | << "\t if not specified will be printed to probable.pdb \n\n"; 33 | exit(0); 34 | } 35 | 36 | double s = 0; 37 | */ 38 | 39 | clock_t t; 40 | t = clock(); 41 | //int i = 0; string infile; string outfile; 42 | int i = 0; 43 | /* 44 | while (i tmp; 74 | double temp; 75 | string strtemp; 76 | /* 77 | ifstream input(expfile.c_str()); 78 | //getline(input, strtemp); //skip header 79 | while (!input.eof()) { 80 | getline(input, strtemp); 81 | if (!strtemp.empty()) { 82 | temp = atof(strtemp.substr(31, 7).c_str()); 83 | tmp.push_back(temp); 84 | temp = atof(strtemp.substr(39, 7).c_str()); 85 | tmp.push_back(temp); 86 | temp = atof(strtemp.substr(47, 7).c_str()); 87 | tmp.push_back(temp); 88 | } 89 | } 90 | vector tmp2; 91 | for (int i = 0; i < tmp.size(); i++) { 92 | if (i%9 == 0 || i%9==1 || i%9==2) { 93 | tmp2.push_back(tmp[i]); 94 | } 95 | } 96 | * / 97 | /* 98 | ofstream tout("test.dat"); tout.precision(16); 99 | tout << tmp2.size() << endl; 100 | for (int i = 0; i < tmp2.size(); i++) { 101 | tout << tmp2[i] << "\t"; 102 | if (i%3== 2 && i!=0) { 103 | tout << endl; 104 | } 105 | } 106 | */ 107 | //kdtree trans(tmp2); 108 | //cout << "made trans tree" << endl; 109 | 110 | vector tmp4; 111 | ifstream stput(infile.c_str()); 112 | getline(stput, strtemp); //skip header 113 | while (!stput.eof()) { 114 | getline(stput, strtemp); 115 | if (!strtemp.empty()) { 116 | temp = atof(strtemp.substr(31, 7).c_str()); 117 | tmp4.push_back(temp); 118 | tmp.push_back(temp); 119 | temp = atof(strtemp.substr(39, 7).c_str()); 120 | tmp4.push_back(temp); 121 | tmp.push_back(temp); 122 | temp = atof(strtemp.substr(47, 7).c_str()); 123 | tmp4.push_back(temp); 124 | tmp.push_back(temp); 125 | } 126 | } 127 | vector tmp5; 128 | for (int i = 0; i < tmp4.size(); i++) { 129 | if (i%9 == 0 || i%9 == 1 || i%9 == 2) { 130 | tmp5.push_back(tmp4[i]); 131 | } 132 | } 133 | 134 | kdtree trans(tmp5); 135 | int transi = 0; //index of closest trans 136 | int* indt; 137 | indt = new int[1]; 138 | double* distt; 139 | distt = new double[1]; 140 | double winner = 10000.00; 141 | for (i = 0; i < trans.npts; i++) { 142 | trans.nnearest(i, indt, distt, 1); 143 | if (distt[0] < winner) { 144 | winner = distt[0]; 145 | transi = indt[0]; 146 | } 147 | } 148 | 149 | delete distt; 150 | delete indt; 151 | //s = trans.run_tree_trans(tmp5); 152 | //transout << s << endl; 153 | //transout.close(); 154 | /* 155 | Begin orientational code 156 | */ 157 | vector tmp3; 158 | double pi = 3.14159265359; double cenndist = 10000; 159 | int x_ref[3]; int y_ref[3]; int z_ref[3]; 160 | x_ref[0] = 1; x_ref[1] = 0; x_ref[2] = 0; 161 | y_ref[0] = 0; y_ref[1] = 1; y_ref[2] = 0; 162 | z_ref[0] = 0; z_ref[1] = 0; z_ref[2] = 1; 163 | double ar[3]; double ar2; double h12; double h22; double h1length; double h2length; double arlength; double dotprohah1; double theta; 164 | double crossp_x_ref_h1[3]; double crossp_x_ref_h1_sign; double q[4]; double htemp[3]; double z_mol_vect[3]; double z_mol_vect2; 165 | double z_mol_vectlength; double dotproductz_mol_vectz_ref; double theta3p; double crossp_z_mol_vectz_ref[3]; double crossp_z_mol_vectz_ref_sign; 166 | double q2[4]; double e[4]; double singtest; 167 | for (int i = 0; i < tmp4.size(); i+=9) { 168 | tmp4[i+8] -= tmp4[i+2]; 169 | tmp4[i+5] -= tmp4[i+2]; 170 | tmp4[i+2] -= tmp4[i+2]; 171 | tmp4[i+7] -= tmp4[i+1]; 172 | tmp4[i+4] -= tmp4[i+1]; 173 | tmp4[i+1] -= tmp4[i+1]; 174 | tmp4[i+6] -= tmp4[i]; 175 | tmp4[i+3] -= tmp4[i]; 176 | tmp4[i] -= tmp4[i]; 177 | h12 = pow(tmp4[i+3],2) + pow(tmp4[i+4],2) + pow(tmp4[i+5],2); 178 | h22 = pow(tmp4[i+6],2) + pow(tmp4[i+7],2) + pow(tmp4[i+8],2); 179 | h1length = pow(h12, 0.5); 180 | h2length = pow(h22, 0.5); 181 | if (tmp4[i+3] != 0) { 182 | tmp4[i+3] /= h1length; 183 | } 184 | if (tmp4[i+4] != 0) { 185 | tmp4[i+4] /= h1length; 186 | } 187 | if (tmp4[i+5] != 0) { 188 | tmp4[i+5] /= h1length; 189 | } 190 | if (tmp4[i+6] != 0) { 191 | tmp4[i+6] /= h1length; 192 | } 193 | if (tmp4[i+7] != 0) { 194 | tmp4[i+7] /= h1length; 195 | } 196 | if (tmp4[i+7] != 0) { 197 | tmp4[i+7] /= h1length; 198 | } 199 | ar[0]=tmp4[i+4]*x_ref[2] - tmp4[i+5]*x_ref[1]; 200 | ar[1]=tmp4[i+5]*x_ref[0] - tmp4[i+3]*x_ref[2]; 201 | ar[2]=tmp4[i+3]*x_ref[1] - tmp4[i+4]*x_ref[0]; 202 | ar2 = pow(ar[0],2) + pow(ar[1],2) + pow(ar[2],2); 203 | arlength = pow(ar2, 0.5); 204 | if (ar[0] != 0) { 205 | ar[0] /= arlength; 206 | } 207 | if (ar[1] != 0) { 208 | ar[1] /= arlength; 209 | } 210 | if (ar[2] != 0) { 211 | ar[2] /= arlength; 212 | } 213 | dotprohah1 = 0; 214 | dotprohah1 += x_ref[0]*tmp4[i+3]; 215 | dotprohah1 += x_ref[1]*tmp4[i+4]; 216 | dotprohah1 += x_ref[2]*tmp4[i+5]; 217 | theta = acos(dotprohah1); 218 | crossp_x_ref_h1[0]=tmp4[i+4]*x_ref[2] - tmp4[i+5]*x_ref[1]; 219 | crossp_x_ref_h1[1]=tmp4[i+5]*x_ref[0] - tmp4[i+3]*x_ref[2]; 220 | crossp_x_ref_h1[2]=tmp4[i+3]*x_ref[1] - tmp4[i+4]*x_ref[0]; 221 | crossp_x_ref_h1_sign=crossp_x_ref_h1[0]*tmp4[i+3]+crossp_x_ref_h1[1]*tmp4[i+4]+crossp_x_ref_h1[2]*tmp4[i+5]; 222 | if (crossp_x_ref_h1_sign > 0) { 223 | theta /=2; 224 | } 225 | else { 226 | theta /=-2; 227 | } 228 | q[0]=cos(theta); 229 | q[1]=ar[0]*sin(theta); 230 | q[2]=ar[1]*sin(theta); 231 | q[3]=ar[2]*sin(theta); 232 | 233 | htemp[0]= ((pow(q[0],2)+pow(q[1],2))-(pow(q[2],2)+pow(q[3],2)))* tmp4[i+3]; 234 | htemp[0]= (2*(q[1]*q[2] + q[0]*q[3]) * tmp4[i+4] ) + htemp[0]; 235 | htemp[0]= (2*(q[1]*q[3]-q[0]*q[2])*tmp4[i+5]) + htemp[0]; 236 | htemp[1]= 2*( q[1]*q[2] - q[0]*q[3] ) * tmp4[i+3]; 237 | htemp[1]= ( ( q[0]*q[0]-q[1]*q[1]+q[2]*q[2]-q[3]*q[3] ) * tmp4[i+4] ) + htemp[1]; 238 | htemp[1]= ( 2*( q[2]*q[3] + q[0]*q[1] ) * tmp4[i+5] ) + htemp[1]; 239 | htemp[2]= 2*( q[1]*q[3] + q[0]*q[2]) * tmp4[i+3]; 240 | htemp[2]= ( 2*( q[2]*q[3]-q[0]*q[1] ) * tmp4[i+4] ) + htemp[2]; 241 | htemp[2]= ( ( q[0]*q[0]-q[1]*q[1]-q[2]*q[2]+q[3]*q[3] ) * tmp4[i+5] ) + htemp[2]; 242 | tmp4[i+3]=htemp[0]; 243 | tmp4[i+4]=htemp[1]; 244 | tmp4[i+5]=htemp[2]; 245 | htemp[0]= ((pow(q[0],2)+pow(q[1],2))-(pow(q[2],2)+pow(q[3],2)))* tmp4[i+6]; 246 | htemp[0]= (2*(q[1]*q[2] + q[0]*q[3]) * tmp4[i+7] ) + htemp[0]; 247 | htemp[0]= (2*(q[1]*q[3]-q[0]*q[2])*tmp4[i+8]) + htemp[0]; 248 | htemp[1]= 2*( q[1]*q[2] - q[0]*q[3] ) * tmp4[i+6]; 249 | htemp[1]= ( ( q[0]*q[0]-q[1]*q[1]+q[2]*q[2]-q[3]*q[3] ) * tmp4[i+7] ) + htemp[1]; 250 | htemp[1]= ( 2*( q[2]*q[3] + q[0]*q[1] ) * tmp4[i+8] ) + htemp[1]; 251 | htemp[2]= 2*( q[1]*q[3] + q[0]*q[2]) * tmp4[i+6]; 252 | htemp[2]= ( 2*( q[2]*q[3]-q[0]*q[1] ) * tmp4[i+7] ) + htemp[2]; 253 | htemp[2]= ( ( q[0]*q[0]-q[1]*q[1]-q[2]*q[2]+q[3]*q[3] ) * tmp4[i+8] ) + htemp[2]; 254 | tmp4[i+6]=htemp[0]; 255 | tmp4[i+7]=htemp[1]; 256 | tmp4[i+8]=htemp[2]; 257 | z_mol_vect[0]=tmp4[i+4]*tmp4[i+8] - tmp4[i+5]*tmp4[i+7]; 258 | z_mol_vect[1]=tmp4[i+5]*tmp4[i+6] - tmp4[i+3]*tmp4[i+8]; 259 | z_mol_vect[2]=tmp4[i+3]*tmp4[i+7] - tmp4[i+4]*tmp4[i+6]; 260 | z_mol_vect2= pow(z_mol_vect[0],2) + pow(z_mol_vect[1],2) + pow(z_mol_vect[2],2); 261 | z_mol_vectlength=pow(z_mol_vect2,0.5); 262 | if (z_mol_vect[0] !=0) { 263 | z_mol_vect[0] /= z_mol_vectlength; 264 | } 265 | if (z_mol_vect[1] !=0) { 266 | z_mol_vect[1] /= z_mol_vectlength; 267 | } 268 | if (z_mol_vect[2] !=0) { 269 | z_mol_vect[2] /= z_mol_vectlength; 270 | } 271 | dotproductz_mol_vectz_ref=0; 272 | for(int j=0;j<3;j++) { 273 | dotproductz_mol_vectz_ref+=z_mol_vect[j]*z_ref[j]; 274 | } 275 | theta3p= acos(dotproductz_mol_vectz_ref); 276 | 277 | crossp_z_mol_vectz_ref[0]=z_mol_vect[1]*z_ref[2] - z_mol_vect[2]*z_ref[1]; 278 | crossp_z_mol_vectz_ref[1]=z_mol_vect[2]*z_ref[0] - z_mol_vect[0]*z_ref[2]; 279 | crossp_z_mol_vectz_ref[2]=z_mol_vect[0]*z_ref[1] - z_mol_vect[1]*z_ref[0]; 280 | 281 | crossp_z_mol_vectz_ref_sign=crossp_z_mol_vectz_ref[0]*tmp4[i+3]+crossp_z_mol_vectz_ref[1]*tmp4[i+4]+crossp_z_mol_vectz_ref[2]*tmp4[i+5]; 282 | 283 | if (crossp_z_mol_vectz_ref_sign < 0) { 284 | theta3p /=2; 285 | } 286 | else { 287 | theta3p /=-2; 288 | } 289 | 290 | q2[0]=cos(theta3p); 291 | q2[1]=x_ref[0]*sin(theta3p); 292 | q2[2]=x_ref[1]*sin(theta3p); 293 | q2[3]=x_ref[2]*sin(theta3p); 294 | 295 | e[0]= q[0]*q2[0] - q[1]*q2[1] - q[2]*q2[2] - q[3]*q2[3]; 296 | e[1]= q[0]*q2[1] + q[1]*q2[0] + q[2]*q2[3] - q[3]*q2[2]; 297 | e[2]= q[0]*q2[2] - q[1]*q2[3] + q[2]*q2[0] + q[3]*q2[1]; 298 | e[3]= q[0]*q2[3] + q[1]*q2[2] - q[2]*q2[1] + q[3]*q2[0]; 299 | 300 | singtest=((e[1]*e[2]) + (e[3]*e[0])); 301 | if (singtest > 0.4999) { 302 | tmp3.push_back(sin(pi/2)); 303 | tmp3.push_back(0); 304 | tmp3.push_back(2*atan2(e[1],e[0])); 305 | } 306 | else if (singtest < -0.4999) { 307 | tmp3.push_back(sin(pi/-2)); 308 | tmp3.push_back(0); 309 | tmp3.push_back(-2*atan2(e[1], e[0])); 310 | } 311 | else { 312 | tmp3.push_back(sin(asin(2*singtest))); 313 | tmp3.push_back(atan2(((2*e[1]*e[0])-(2*e[2]*e[3])) , (1 - (2*pow(e[1],2)) - (2*pow(e[3],2))))); 314 | tmp3.push_back(atan2(((2*e[2]*e[0])-(2*e[1]*e[3])) , (1 - (2*pow(e[2],2)) - (2*pow(e[3],2))))); 315 | } 316 | } 317 | kdtree orient(tmp3); 318 | //s = orient.run_tree_orient(); 319 | //orientout << s << endl; 320 | //orientout.close(); 321 | int orienti = 0; //index of closest orient 322 | int* indo; 323 | indo = new int[1]; 324 | double* disto; 325 | disto = new double[1]; 326 | winner = 10000.00; 327 | for (i = 0; i < orient.npts; i++) { 328 | orient.nnearest(i, indo, disto, 1); 329 | if (disto[0] < winner) { 330 | winner = disto[0]; 331 | orienti = indo[0]; 332 | } 333 | } 334 | 335 | delete disto; 336 | delete indo; 337 | 338 | /* 339 | Determined the best water orientation as orienti in array of pts 340 | * Best oxygen position is the position of water at transi in pts 341 | * need to find oxygen position of water with orienti in tmp array 342 | * tmp array is for each water 9 343 | * therefore position of oxygen is orienti * 9, orienti*9 + 1, and orienti*9 +2 344 | * need to find distance of that water to transi 345 | * translate 346 | */ 347 | 348 | double orientO_x = tmp[orienti*9]; 349 | double orientO_y = tmp[orienti*9 + 1]; 350 | double orientO_z = tmp[orienti*9 + 2]; 351 | double orientH1_x = tmp[orienti*9 + 3]; 352 | double orientH1_y = tmp[orienti*9 + 4]; 353 | double orientH1_z = tmp[orienti*9 + 5]; 354 | double orientH2_x = tmp[orienti*9 + 6]; 355 | double orientH2_y = tmp[orienti*9 + 7]; 356 | double orientH2_z = tmp[orienti*9 + 8]; 357 | 358 | string name = "ATOM"; 359 | string atom = "O"; 360 | string chainid = "X"; 361 | int resseq = 1; 362 | string resname = "T3P"; 363 | string testfile = "test.pdb"; 364 | //FILE * tFile; 365 | //tFile = fopen(testfile.c_str(), "a"); 366 | //fprintf (tFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), i, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, orientO_x, orientO_y, orientO_z, 0.0, 0.0); 367 | //fprintf (tFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), i, "H", resname.c_str(), chainid.c_str(), resseq, orientH1_x, orientH1_y, orientH1_z, 0.0, 0.0); 368 | //fprintf (tFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), i, "H", resname.c_str(), chainid.c_str(), resseq, orientH2_x, orientH2_y, orientH2_z, 0.0, 0.0); 369 | 370 | 371 | double transO_x = trans.pts[transi].x[0]; 372 | double transO_y = trans.pts[transi].x[1]; 373 | double transO_z = trans.pts[transi].x[2]; 374 | 375 | double distx = (transO_x - orientO_x); 376 | double disty = (transO_y - orientO_y); 377 | double distz = (transO_z - orientO_z); 378 | 379 | orientO_x += distx; 380 | orientO_y += disty; 381 | orientO_z += distz; 382 | orientH1_x += distx; 383 | orientH1_y += disty; 384 | orientH1_z += distz; 385 | orientH2_x += distx; 386 | orientH2_y += disty; 387 | orientH2_z += distz; 388 | double ox = 0.0, oy = 0.0, oz = 0.0; 389 | for (i = 0; i < 3; i++) { 390 | if (i > 0) {atom = "H";} 391 | if (i == 0) { 392 | ox = orientO_x; oy = orientO_y; oz = orientO_z; 393 | } 394 | if (i == 1) { 395 | ox = orientH1_x; oy = orientH1_y; oz = orientH1_z; 396 | } 397 | if (i == 2){ 398 | ox = orientH2_x; oy = orientH2_y; oz = orientH2_z; 399 | } 400 | fprintf (pFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), i, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, ox, oy, oz, 0.0, 0.0); 401 | } 402 | fclose(pFile); 403 | } 404 | 405 | int renum(string infile) { 406 | //int i = 0; string infile; 407 | int i = 0; 408 | /* 409 | while (i < argc) { 410 | if (!strcmp(argv[i], "-i")) { 411 | infile = argv[++i]; 412 | } 413 | i++; 414 | } 415 | */ 416 | 417 | int pos = 0; int watnum = 0; 418 | string temp; 419 | ifstream input(infile.c_str()); 420 | ofstream output("probable_configs.pdb"); 421 | while (!input.eof()) { 422 | getline(input, temp); 423 | if (!temp.empty()) { 424 | cout << "got line\n" << endl; 425 | if (pos%3==0 && pos!=0) { 426 | watnum++; 427 | } 428 | if (pos < 10) { 429 | if (watnum < 10) { 430 | output << temp.substr(0,9) << " " << pos << " " <= 3 520 | #define MOD_ERROR_VAL NULL 521 | #define MOD_SUCCESS_VAL(val) val 522 | #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) 523 | #define MOD_DEF(ob, name, doc, methods) \ 524 | static struct PyModuleDef moduledef = { \ 525 | PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \ 526 | ob = PyModule_Create(&moduledef); 527 | #else 528 | #define MOD_ERROR_VAL 529 | #define MOD_SUCCESS_VAL(val) 530 | #define MOD_INIT(name) void init##name(void) 531 | #define MOD_DEF(ob, name, doc, methods) \ 532 | ob = Py_InitModule3(name, methods, doc); 533 | #endif 534 | 535 | MOD_INIT(_sstmap_probableconfig) 536 | { 537 | PyObject *m; 538 | 539 | MOD_DEF(m, "_sstmap_probableconfig", "Determin probable configuration rutine.\n", _sstmap_probableconfig_methods) 540 | 541 | if (m == NULL) 542 | return MOD_ERROR_VAL; 543 | 544 | return MOD_SUCCESS_VAL(m); 545 | } 546 | 547 | -------------------------------------------------------------------------------- /sstmap/io_helpers.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import numpy as np 3 | 4 | def are_you_numpy(a): 5 | 6 | """ 7 | Returns True if a is an instance of numpy. 8 | False otherwise. 9 | """ 10 | 11 | return type(a).__module__ == np.__name__ 12 | 13 | 14 | def make_grid(arrays, out=None): 15 | """ 16 | !!! Adapted from: 17 | !!! http://stackoverflow.com/questions/1208118/using-numpy-to-build-an-array-of-all-combinations-of-two-arrays 18 | 19 | Generate a cartesian product of input arrays. 20 | 21 | Parameters 22 | ---------- 23 | arrays : list of array-like 24 | 1-D arrays to form the cartesian product of. 25 | out : ndarray 26 | Array to place the cartesian product in. 27 | 28 | Returns 29 | ------- 30 | out : ndarray 31 | 2-D array of shape (M, len(arrays)) containing cartesian products 32 | formed of input arrays. 33 | 34 | Examples 35 | -------- 36 | >>> make_grid(([1, 2, 3], [4, 5], [6, 7])) 37 | array([[1, 4, 6], 38 | [1, 4, 7], 39 | [1, 5, 6], 40 | [1, 5, 7], 41 | [2, 4, 6], 42 | [2, 4, 7], 43 | [2, 5, 6], 44 | [2, 5, 7], 45 | [3, 4, 6], 46 | [3, 4, 7], 47 | [3, 5, 6], 48 | [3, 5, 7]]) 49 | 50 | """ 51 | 52 | arrays = [np.asarray(x) for x in arrays] 53 | 54 | dtype = arrays[0].dtype 55 | 56 | n = np.prod([x.size for x in arrays]) 57 | 58 | if out is None: 59 | 60 | out = np.zeros([n, len(arrays)], dtype=dtype) 61 | 62 | m = n / arrays[0].size 63 | 64 | out[:,0] = np.repeat(arrays[0], m) 65 | 66 | if arrays[1:]: 67 | 68 | make_grid(arrays[1:], out=out[0:m,1:]) 69 | 70 | for j in xrange(1, arrays[0].size): 71 | 72 | out[j*m:(j+1)*m,1:] = out[0:m,1:] 73 | 74 | return out 75 | 76 | 77 | def bounding_box_frac(frac_structure, delta=np.ones(3), _buffer=0., verbose=False): 78 | 79 | """ 80 | Input is structure in cart. or frac. coordinates as 81 | nx3 array (n= number of coordinates). 82 | Output is coordinate meshgrid array with coordinates of 83 | bounding box lattice as integers. 84 | """ 85 | 86 | bounding_min = np.array( [ np.min(frac_structure[:,0]), 87 | np.min(frac_structure[:,1]), 88 | np.min(frac_structure[:,2]) ], dtype=int ) 89 | 90 | bounding_max = np.array( [ np.max(frac_structure[:,0]), 91 | np.max(frac_structure[:,1]), 92 | np.max(frac_structure[:,2]) ], dtype=int ) 93 | 94 | bounding_min -= int(np.round(_buffer)) 95 | bounding_max += int(np.round(_buffer)) 96 | 97 | if verbose: 98 | print "Bounding min. ", bounding_min 99 | print "Bounding max. ", bounding_max 100 | print np.arange(bounding_min[2], bounding_max[2]+1, delta[2], dtype=int ) 101 | 102 | return make_grid ( [ np.arange(bounding_min[0], bounding_max[0]+1, delta[0], dtype=int ), 103 | np.arange(bounding_min[1], bounding_max[1]+1, delta[1], dtype=int ), 104 | np.arange(bounding_min[2], bounding_max[2]+1, delta[2], dtype=int ) ] ) 105 | 106 | -------------------------------------------------------------------------------- /sstmap/io_spatial.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def rotate_check(matrix): 4 | 5 | if not (0.99 < np.linalg.det(matrix) < 1.01): 6 | 7 | raise Warning("Warning: Determinant of rotation matrix is %s. Should be close to +1.0." %np.linalg.det(matrix)) 8 | 9 | 10 | def do_rotation (crds, origin, rot_mat): 11 | 12 | return (crds - origin).dot(rot_mat) + origin 13 | -------------------------------------------------------------------------------- /sstmap/kdhsa102.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "kdhsa102.h" 9 | 10 | using namespace std; 11 | 12 | point::point () { 13 | dim = D; 14 | x = new double[dim]; 15 | for (int i = 0; i < dim; i++) { 16 | x[i] = 0; 17 | } 18 | 19 | } 20 | 21 | void point::set_point(double* vals) { 22 | //dim = d; 23 | //x = new double[dim]; 24 | for (int i = 0; i < dim; i++) { 25 | x = &vals[i]; 26 | } 27 | } 28 | void point::set_point(const point &p) { 29 | dim = p.dim; 30 | //x = new double[dim]; 31 | for (int i = 0; i < dim; i++) { 32 | x[i] = p.x[i]; 33 | } 34 | } 35 | 36 | /*point::~point () { 37 | delete x; 38 | }*/ 39 | 40 | void point::zeros() { 41 | for (int i = 0; i < dim; i++) { 42 | x[i] = 0; 43 | } 44 | } 45 | 46 | void point::ones() { 47 | for (int i = 0; i < dim; i++) { 48 | x[i] = 1; 49 | } 50 | } 51 | 52 | void point::print_point() { 53 | for (int i = 0; i < dim; i++) { 54 | cout << x[i] << "\t"; 55 | } 56 | cout << endl; 57 | } 58 | 59 | 60 | double dist(const point &p, const point &q) { 61 | if (p.dim != q.dim) { 62 | cerr << "Dimensions of points do not match in distance comparison!!\n"; 63 | exit(EXIT_FAILURE); 64 | } 65 | double distance = 0.0; 66 | for (int i = 0; i < p.dim; i++) { 67 | distance += pow((p.x[i] - q.x[i]), 2); 68 | } 69 | double large = 10000; 70 | if (distance == 0) return large; 71 | return sqrt(distance); 72 | 73 | } 74 | 75 | bool same_points (const point &p, const point &q) { 76 | if (p.dim != q.dim) { 77 | return false; 78 | } 79 | else { 80 | for (int i = 0; i < p.dim; i++) { 81 | if (p.x[i] != q.x[i]) { 82 | return false; 83 | } 84 | } 85 | } 86 | return true; 87 | } 88 | 89 | boxnode::boxnode() { 90 | mom = 0; dau1 = 0; dau2 = 0; pthi = 0; ptlo = 0; 91 | //By default the points will be set to zero respectively 92 | } 93 | 94 | void boxnode::set_boxnode(point mylo, point myhi, int mymom, int myd1, int myd2, int myptlo, int mypthi) { 95 | //mybox.set_box(mylo, myhi); 96 | hi.set_point(myhi); 97 | lo.set_point(mylo); 98 | //cout << "set box\n"; 99 | mom = mymom; 100 | dau1 = myd1; 101 | dau2 = myd2; 102 | ptlo = myptlo; 103 | pthi = mypthi; 104 | //cout << "done set box\n"; 105 | } 106 | 107 | double dist(const boxnode &b, const point &p) { 108 | double distance = 0.0; 109 | if (p.dim != b.lo.dim || p.dim != b.hi.dim) { 110 | cerr << "Point and Box Points do not have the same dimensionality in distance calculation!!\n"; 111 | exit(EXIT_FAILURE); 112 | } 113 | for (int i = 0; i < p.dim; i++) { 114 | if (p.x[i] < b.lo.x[i]) distance += pow((p.x[i]-b.lo.x[i]), 2); 115 | if (p.x[i] > b.hi.x[i]) distance += pow((p.x[i]-b.hi.x[i]), 2); 116 | } 117 | return sqrt(distance); 118 | //This will return 0 if the point is in the box 119 | } 120 | 121 | int selecti(const int k, int *indx, int n, double *arr) { 122 | int i, ia, ir, j, l, mid; 123 | double a; 124 | 125 | l = 0; 126 | 127 | ir = n-1; 128 | for (;;) { 129 | if (ir <= l+1) { 130 | if (ir == l+1 && arr[indx[ir]] < arr[indx[l]]) { 131 | swap(indx[l], indx[ir]); 132 | } 133 | return indx[k]; //final end point 134 | } 135 | else { 136 | mid = (l+ir) >> 1; 137 | swap(indx[mid], indx[l+1]); 138 | if (arr[indx[l]] > arr[indx[ir]]) swap(indx[l], indx[ir]); 139 | if (arr[indx[l+1]] > arr[indx[ir]]) swap(indx[l+1], indx[ir]); 140 | if (arr[indx[l]] > arr[indx[l+1]]) swap(indx[l], indx[l+1]); 141 | i = l+1; 142 | j = ir; 143 | ia = indx[l+1]; 144 | a = arr[ia]; 145 | for (;;) { 146 | do i++; while (arr[indx[i]] < a); 147 | do j--; while (arr[indx[j]] > a); 148 | if (j < i) break; //inner endpoint 149 | swap(indx[i], indx[j]); 150 | 151 | } 152 | 153 | indx[l+1] = indx[j]; 154 | indx[j] = ia; 155 | if (j >= k) ir=j-1; 156 | if (j <= k) l = i; 157 | } 158 | } 159 | 160 | } 161 | 162 | const double kdtree::BIG(1.0e99); 163 | 164 | kdtree::kdtree(std::vector< double > &vals) { 165 | /* 166 | This function assumes the doubles fed in through vals are only the pertinent ones. IE If this is 3d its the positions or orientations and nothing else. 167 | */ 168 | /*for (int i = 0; i < 3; i++) { 169 | minn[i] = 10000; 170 | maxx[i] = 0; 171 | }*/ 172 | //BIG = 1.0e99; 173 | dim = D; 174 | nd = new int[1]; 175 | dn = new double[1]; 176 | //within1 = new int[3000]; 177 | npts = vals.size()/D; 178 | //cout << npts << endl; 179 | pts = new point[npts]; 180 | /* 181 | cenn[0] = x; 182 | cenn[1] = y; 183 | cenn[2] = z; 184 | */ 185 | int foo = 0; 186 | for (int i = 0; i < vals.size(); i++) { 187 | if (i%D==0) { 188 | pts[foo].x[0] = vals[i]; 189 | //minn[0] = min(minn[0], vals[i]); 190 | //maxx[0] = max(maxx[0], vals[i]); 191 | } 192 | if (i%D==1) { 193 | pts[foo].x[1] = vals[i]; 194 | //minn[1] = min(minn[1], vals[i]); 195 | //maxx[1] = max(maxx[1], vals[i]); 196 | } 197 | if (i%D==2) { 198 | pts[foo].x[2] = vals[i]; 199 | //minn[2] = min(minn[2], vals[i]); 200 | //maxx[2] = max(maxx[2], vals[i]); 201 | foo++; 202 | } 203 | } 204 | 205 | /*for (int i = 0; i < 3; i++) { 206 | cenn[i] = (maxx[i]+minn[i])/2; 207 | }*/ 208 | 209 | ptindx = new int[npts]; rptindx = new int[npts]; 210 | int ntmp, m, kk, k, j, nowtask, jbox, np, tmom, tdim, ptlo, pthi; 211 | int *hp; 212 | double *cp; 213 | int taskmom[50], taskdim[50]; 214 | for (k = 0; k < npts; k++) ptindx[k] = k; 215 | m = 1; 216 | for (ntmp = npts; ntmp; ntmp >>= 1) { 217 | m <<= 1; 218 | } 219 | //cout << "npts: " << npts << endl; 220 | numbox = 2*npts - (m>>1); 221 | if (m < numbox) numbox = m; 222 | //m = pow(2, (log(npts)/log(2))); 223 | //numbox = 2*npts - m/2; 224 | if (m < numbox) numbox = m; 225 | numbox--; 226 | //cout << "about to make boxes\n"; 227 | //cout << numbox << endl; 228 | boxes = new boxnode[numbox]; 229 | //cout << "made boxes\n"; 230 | coord = new double[D*npts]; 231 | //cout << "made coords\n"; 232 | for (j = 0, kk = 0; j < D; j++, kk+= npts) { 233 | for (k = 0; k < npts; k++) { 234 | //cout << k << endl; 235 | coord[kk+k] = pts[k].x[j]; 236 | } 237 | } 238 | //cout << "set coords\n"; 239 | //double* nums; 240 | //double* nnums; 241 | point lo, hi; 242 | for (int i = 0; i < 3; i++) { 243 | hi.x[i] = BIG; 244 | lo.x[i] = -BIG; 245 | } 246 | 247 | //cout << hi.x[0] << "\t" << hi.x[1] << "\t" << hi.x[2] << endl; 248 | //cout << lo.x[0] << "\t" << lo.x[1] << "\t" << lo.x[2] << endl; 249 | boxes[0].set_boxnode(lo, hi, 0, 0, 0, 0, npts-1); 250 | /*if (D == 3) { 251 | //cout << "start D3 if\n"; 252 | nums = new double[3]; 253 | nnums = new double[3]; 254 | for (int i =0; i <3; i++) { 255 | nums[i] = BIG; 256 | nnums[i] = -BIG; 257 | } 258 | cout << nums[0] << "\t" << nums[1] << "\t" << nums[2] << endl; 259 | cout << nnums[0] << "\t" << nnums[1] << "\t" << nnums[2] << endl; 260 | lo.set_point(nnums, D); 261 | hi.set_point(nums, D); 262 | //cout << "made BIG points\n"; 263 | cout << hi.x[0] << "\t" << hi.x[1] << "\t" << hi.x[2] << endl; 264 | cout << lo.x[0] << "\t" << lo.x[1] << "\t" << lo.x[2] << endl; 265 | boxes[0].set_boxnode(lo, hi, 0, 0, 0, 0, npts-1); 266 | //cout << "made first box\n"; 267 | } 268 | if (D == 6) { 269 | //cout << "start D6 if\n"; 270 | nums = new double[6]; 271 | nnums = new double[6]; 272 | for (int i = 0; i < 6; i++) { 273 | nums[i] = BIG; 274 | nnums[i] = -BIG; 275 | } 276 | lo.set_point(nnums, D), hi.set_point(nums, D); 277 | boxes[0].set_boxnode(lo, hi, 0, 0, 0, 0, npts-1); 278 | } 279 | delete nums; 280 | delete nnums; 281 | */ 282 | //cout << "Set initial box: \n\n"; 283 | //cout << boxes[0].hi.x[0] << "\t" << boxes[0].hi.x[1] << "\t" << boxes[0].hi.x[2] << endl; 284 | //cout << boxes[0].lo.x[0] << "\t" << boxes[0].lo.x[1] << "\t" << boxes[0].lo.x[2] << endl; 285 | 286 | for (int i = 0; i < 3; i++) { 287 | boxes[0].hi.x[i] = BIG; 288 | boxes[0].lo.x[i] = -BIG; 289 | } 290 | 291 | //cout << "Fix initial box: \n\n"; 292 | //cout << boxes[0].hi.x[0] << "\t" << boxes[0].hi.x[1] << "\t" << boxes[0].hi.x[2] << endl; 293 | //cout << boxes[0].lo.x[0] << "\t" << boxes[0].lo.x[1] << "\t" << boxes[0].lo.x[2] << endl; 294 | 295 | jbox = 0; 296 | taskmom[1] = 0; 297 | taskdim[1] = 0; 298 | nowtask = 1; 299 | //cout << "got to while loop\n"; 300 | while (nowtask) { 301 | tmom = taskmom[nowtask]; 302 | tdim = taskdim[nowtask--]; 303 | ptlo = boxes[tmom].ptlo; 304 | pthi = boxes[tmom].pthi; 305 | hp = &ptindx[ptlo]; 306 | cp = &coord[tdim*npts]; 307 | np = pthi - ptlo + 1; 308 | kk = (np-1)/2; 309 | selecti(kk, hp, np, cp); 310 | hi = boxes[tmom].hi; 311 | lo = boxes[tmom].lo; 312 | //hi.x[tdim] = lo.x[tdim] = coord[tdim*npts + hp[kk]]; 313 | //cout << jbox << endl; 314 | boxes[++jbox].set_boxnode(boxes[tmom].lo, hi, tmom, 0, 0, ptlo, ptlo+kk); 315 | boxes[jbox].hi.x[tdim] = coord[tdim*npts + hp[kk]]; 316 | //cout << jbox << endl; 317 | boxes[++jbox].set_boxnode(lo, boxes[tmom].hi, tmom, 0 , 0, ptlo+kk+1, pthi); 318 | boxes[jbox].lo.x[tdim] = coord[tdim*npts + hp[kk]]; 319 | boxes[tmom].dau1 = jbox-1; 320 | boxes[tmom].dau2 = jbox; 321 | if (kk > 1) { 322 | taskmom[++nowtask] = jbox-1; 323 | taskdim[nowtask] = (tdim+1)%D; 324 | } 325 | if (np - kk > 3) { 326 | taskmom[++nowtask] = jbox; 327 | taskdim[nowtask] = (tdim+1)%D; 328 | } 329 | } 330 | for (j = 0; j < npts; j++) rptindx[ptindx[j]] = j; 331 | //cout << "made tree" << endl; 332 | 333 | //cout << "delete coord" << endl; 334 | } 335 | 336 | 337 | kdtree::~kdtree () { 338 | delete boxes; 339 | delete ptindx; 340 | delete rptindx; 341 | delete dn; 342 | delete nd; 343 | delete coord; 344 | //delete within1; 345 | } 346 | 347 | double kdtree::disti(int jpt, int kpt) { 348 | if (jpt == kpt) return BIG; //to avoid the closest neighbor is itself 349 | else return dist(pts[jpt], pts[kpt]); 350 | } 351 | 352 | int kdtree::locate(point pt) { 353 | int nb, d1, jdim; 354 | nb = jdim = 0; 355 | while (boxes[nb].dau1) { //basically keep going until bottom from root 356 | d1 = boxes[nb].dau1; 357 | if (pt.x[jdim] <= boxes[d1].hi.x[jdim]) nb = d1; 358 | else nb = boxes[nb].dau2; 359 | jdim = ++jdim%D; 360 | } 361 | return nb; 362 | } 363 | 364 | int kdtree::locate(int jpt) { 365 | int nb, d1, jh; 366 | jh = rptindx[jpt]; 367 | nb = 0; 368 | while (boxes[nb].dau1) { 369 | d1 = boxes[nb].dau1; 370 | if (jh <= boxes[d1].pthi) nb = d1; 371 | else nb = boxes[nb].dau2; 372 | } 373 | return nb; 374 | } 375 | 376 | double kdtree::dnearest(point pt) { 377 | int i, k, nrst, ntask; 378 | int task[50]; 379 | double dnrst = BIG, d; 380 | k = locate(pt); 381 | for (i = boxes[k].ptlo; i <= boxes[k].pthi; i++) { 382 | d = dist(pts[ptindx[i]], pt); 383 | if (d < dnrst && d != 0) { 384 | //this fix for != 0 may result in some uncertain behavior, will be necessary to check this out. 385 | nrst = ptindx[i]; 386 | dnrst = d; 387 | } 388 | } 389 | task[1] = 0; 390 | ntask = 1; 391 | while (ntask) { 392 | k = task[ntask--]; 393 | if (dist(boxes[k], pt) < dnrst) { 394 | if (boxes[k].dau1) { 395 | task[++ntask] = boxes[k].dau1; 396 | task[++ntask] = boxes[k].dau2; 397 | } 398 | else { 399 | for (i = boxes[k].ptlo; i <= boxes[k].pthi; i++) { 400 | d = dist(pts[ptindx[i]], pt); 401 | if (d < dnrst && d != 0) { 402 | nrst = ptindx[i]; 403 | dnrst = d; 404 | } 405 | } 406 | } 407 | } 408 | } 409 | return dnrst; 410 | } 411 | 412 | void kdtree::nnearest(int jpt, int* nn, double* dn, int n) { 413 | int i, k, ntask, kp; 414 | int task[50]; 415 | double d; 416 | if (n > npts-1) throw("you're asking for too much buddy (nn > npts)"); 417 | for (i = 0; i < n; i++) dn[i] = BIG; 418 | kp = boxes[locate(jpt)].mom; 419 | while (boxes[kp].pthi - boxes[kp].ptlo < n) kp = boxes[kp].mom; 420 | for (i = boxes[kp].ptlo; i <= boxes[kp].pthi; i++) { 421 | if (jpt == ptindx[i]) continue; 422 | d = disti(ptindx[i], jpt); 423 | if (d < dn[0]) { 424 | dn[0] = d; 425 | nn[0] = ptindx[i]; 426 | if (n>1) sift_down(dn, nn, n); 427 | } 428 | } 429 | task[1] = 0; 430 | ntask = 1; 431 | while (ntask) { 432 | k = task[ntask--]; 433 | if (k == kp) continue; 434 | if (dist(boxes[k], pts[jpt]) < dn[0]) { 435 | if (boxes[k].dau1) { 436 | task[++ntask] = boxes[k].dau1; 437 | task[++ntask] = boxes[k].dau2; 438 | } 439 | else { 440 | for (i = boxes[k].ptlo; i <= boxes[k].pthi; i++) { 441 | d = disti(ptindx[i], jpt); 442 | if (d < dn[0]) { 443 | dn[0] = d; 444 | nn[0] = ptindx[i]; 445 | if (n > 1) sift_down(dn, nn, n); 446 | } 447 | } 448 | } 449 | } 450 | } 451 | return; 452 | } 453 | 454 | void kdtree::sift_down(double* heap, int* ndx, int nn) { 455 | int n = nn - 1; 456 | int j, jold, ia; 457 | double a; 458 | a = heap[0]; 459 | ia = ndx[0]; 460 | jold = 0; 461 | j = 1; 462 | while (j <= n) { 463 | if (j < n && heap[j] < heap[j+1]) j++; 464 | if (a >= heap[j]) break; 465 | heap[jold] = heap[j]; 466 | ndx[jold] = ndx[j]; 467 | jold = j; 468 | j = 2*j+1; 469 | } 470 | heap[jold] = a; 471 | ndx[jold] = ia; 472 | } 473 | 474 | 475 | int kdtree::locatenear(point pt, double r, int *v, int nmax) { 476 | /* 477 | This fuction returns all the points within some distance of a target point. I dont think we will ever use it. 478 | */ 479 | int k, i, nb, nbold, nret, ntask, jdim, d1, d2; 480 | int task[50]; 481 | nb = jdim = nret = 0; 482 | if (r < 0.0) throw("radius must be nonnegative"); 483 | while (boxes[nb].dau1) { 484 | nbold = nb; 485 | d1 = boxes[nb].dau1; 486 | d2 = boxes[nb].dau2; 487 | if (pt.x[jdim] + r <= boxes[d1].hi.x[jdim]) nb = d1; 488 | else if (pt.x[jdim] - r >= boxes[d2].lo.x[jdim]) nb = d2; 489 | jdim = ++jdim%D; 490 | if (nb == nbold) break; 491 | } 492 | //cout << nb << endl; 493 | task[1] = nb; 494 | ntask = 1; 495 | while (ntask) { 496 | k = task[ntask--]; 497 | if (dist(boxes[k], pt) > r) { 498 | //cout << "box out of range: " << dist(boxes[k], pt) << endl; 499 | //cout << boxes[k].hi.x[0] << "\t" << boxes[k].hi.x[1] << "\t" << boxes[k].hi.x[2] << endl; 500 | continue; 501 | } 502 | else { 503 | //cout << "box in range\n"; 504 | } 505 | if (boxes[k].dau1) { 506 | task[++ntask] = boxes[k].dau1; 507 | task[++ntask] = boxes[k].dau2; 508 | } 509 | else { 510 | for (i = boxes[k].ptlo; i <= boxes[k].pthi; i++) { 511 | if (dist(pts[ptindx[i]], pt) <= r && nret < nmax) { 512 | v[nret++] = ptindx[i]; 513 | } 514 | if (nret == nmax) return nmax; 515 | } 516 | } 517 | } 518 | return nret; 519 | } 520 | /* 521 | double kdtree::run_tree() { 522 | //ofstream output; 523 | //output.open(OUT.c_str()); 524 | //output.precision(16); 525 | //cout << "run tree start" << endl; 526 | double gd = 0; 527 | double s = 0.0; 528 | double T = 300.; 529 | double R = 8.314472; 530 | double pi = 3.14159265359; 531 | double cenndist = 10000; 532 | int npts2 = 0; 533 | int fcount = 10000; 534 | for (int i = 0; i < npts; i++) { 535 | //if (pts[i].x[0] > maxx[0]-5 || pts[i].x[0] < minn[0]+5 || pts[i].x[1] > maxx[1]-5 || pts[i].x[1] < minn[1]+5 || pts[i].x[2] > maxx[2]-5 || pts[i].x[2] < minn[2]+5) { 536 | // continue; 537 | //} 538 | //else { 539 | cenndist = pow((pts[i].x[0] - cenn[0]), 2) + pow((pts[i].x[1] - cenn[1]), 2) + pow((pts[i].x[2] - cenn[2]), 2); 540 | //if (abs(pts[i].x[0] - cenn[0]) < 1 && abs(pts[i].x[1] - cenn[1]) < 1 && abs(pts[i].x[2] - cenn[2]) < 1) { 541 | if (cenndist <= 1) { 542 | nnearest(i, nd, dn, 1); 543 | //cout << dn[0] << endl; 544 | gd += log((0.0329223149*fcount*4*pi*pow(dn[0], 3))/3); 545 | npts2++; 546 | } 547 | } 548 | //cout << "\n\n"; 549 | //cout << gd << endl; 550 | //cout << npts2 << endl; 551 | s = R*T*0.239*(gd/npts2 + 0.5772156649)/1000; 552 | //cout << s << endl; 553 | return s; 554 | } 555 | */ 556 | double kdtree::run_tree_trans(std::vector &cls) { 557 | point pt; 558 | double* dh; 559 | int numvals = cls.size()/3; 560 | dh = new double[numvals]; 561 | int vecpos = 0; 562 | for (int i = 0; i < cls.size(); i+=3) { 563 | //run through the vector of the acknowledged standard cluster file, skipping hydrogens until the end 564 | pt.x[0] = cls[i]; pt.x[1] = cls[i+1]; pt.x[2] = cls[i+2]; 565 | dh[vecpos] = dnearest(pt); 566 | vecpos++; 567 | } 568 | 569 | double gd = 0; 570 | double s = 0.0; 571 | double T = 300.; 572 | double R = 8.314472; 573 | double pi = 3.14159265359; 574 | 575 | int fcount = 10000; 576 | 577 | for (int i = 0; i < numvals; i++) { 578 | gd += log((0.0329223149*fcount*4*pi*pow(dh[i], 3))/3); 579 | } 580 | 581 | s = R*T*0.239*(gd/numvals + 0.5772156649)/1000; 582 | 583 | delete dh; 584 | return s; 585 | } 586 | 587 | double kdtree::run_tree_orient() { 588 | //ofstream ori("orientdists.txt"); ori.precision(16); 589 | double gd = 0; 590 | double s = 0.0; 591 | double T = 300.; 592 | double R = 8.314472; 593 | double pi = 3.14159265359; 594 | double de = 10000; 595 | point z; 596 | for (int i = 0; i < npts; i++) { 597 | nnearest(i, nd, dn, 1); 598 | //in order to implement this need to use a function which takes a point and returns a distance. 599 | //dnearest from before. 600 | if (pts[i].x[0] > pi/2) { 601 | z.x[0] = pts[i].x[0] - 2*pi; 602 | z.x[1] = pts[i].x[1]; 603 | z.x[2] = pts[i].x[2]; 604 | de = dnearest(z); 605 | if (de < dn[0] && de != 0) { 606 | dn[0] = de; 607 | } 608 | } 609 | else if (pts[i].x[0] < -pi/2) { 610 | z.x[0] = pts[i].x[0] + 2*pi; 611 | z.x[1] = pts[i].x[1]; 612 | z.x[2] = pts[i].x[2]; 613 | de = dnearest(z); 614 | if (de < dn[0] && de != 0) { 615 | dn[0] = de; 616 | } 617 | } 618 | else if (pts[i].x[1] > pi/2) { 619 | z.x[0] = pts[i].x[0]; 620 | z.x[1] = pts[i].x[1] - 2*pi; 621 | z.x[2] = pts[i].x[2]; 622 | de = dnearest(z); 623 | if (de < dn[0] && de != 0) { 624 | dn[0] = de; 625 | } 626 | } 627 | else if (pts[i].x[1] < -pi/2) { 628 | z.x[0] = pts[i].x[0]; 629 | z.x[1] = pts[i].x[1] + 2*pi; 630 | z.x[2] = pts[i].x[2]; 631 | de = dnearest(z); 632 | if (de < dn[0] && de != 0) { 633 | dn[0] = de; 634 | } 635 | } 636 | else if (pts[i].x[2] > pi/2) { 637 | z.x[0] = pts[i].x[0]; 638 | z.x[1] = pts[i].x[1]; 639 | z.x[2] = pts[i].x[2] - 2*pi; 640 | de = dnearest(z); 641 | if (de < dn[0] && de != 0) { 642 | dn[0] = de; 643 | } 644 | } 645 | else if (pts[i].x[2] < -pi/2) { 646 | z.x[0] = pts[i].x[0]; 647 | z.x[1] = pts[i].x[1]; 648 | z.x[2] = pts[i].x[2] + 2*pi; 649 | de = dnearest(z); 650 | if (de < dn[0] && de != 0) { 651 | dn[0] = de; 652 | } 653 | } 654 | //ori << dn[0] << endl; 655 | gd += log((pow(dn[0], 3)*npts)/(6*pi)); 656 | } 657 | //cout << gd << endl; 658 | //cout << npts << endl; 659 | s = R*T*0.239*(gd/npts + 0.5772156649)/1000; 660 | //cout << s << endl; 661 | return s; 662 | } 663 | 664 | /*void kdtree::run_locate() { 665 | double dn = BIG; 666 | double d; 667 | int nr = 0; 668 | for (int i = 0; i < npts; i++) { 669 | nr = locatenear(pts[i], 0.25, within1, 3000); 670 | //cout << nr << endl; 671 | for (int j = 0; j < nr; j++) { 672 | d = disti(i, within1[j]); 673 | if (d < dn) { 674 | dn = d; 675 | } 676 | } 677 | //cout << dn << endl; 678 | } 679 | }*/ 680 | 681 | 682 | -------------------------------------------------------------------------------- /sstmap/kdhsa102.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | /* 12 | Changes since kdhsa102: 13 | 14 | Instituting a new system in which the larger cluster files will be used for translational but orientational will use non-expanded cluster files (typical 1A) 15 | So what this means is that an expanded file will be read to create a tree for trans 16 | The standard file will be read to create the search region for trans, and converted to do orientational. 17 | 18 | Primarily this will be accomplished in the main body code, however one adjustment needs to be made to the existing methods: that dnearest cannot return a distance of 0. 19 | This is due to searching the tree from a different vector than it was created on...resulting in a 0 return from the normal nnearest code. 20 | 21 | No need to change the orientational tree. Need to add a new function for searching translational tree. 22 | 23 | */ 24 | 25 | 26 | static int D = 3; 27 | 28 | struct point { 29 | int dim; 30 | double* x; 31 | point(); 32 | void set_point(double* vals); 33 | void set_point(const point &p); 34 | //~point(); 35 | void print_point(); 36 | void zeros(); 37 | void ones(); 38 | //void set_dimension(int y); 39 | 40 | 41 | }; 42 | 43 | bool same_points (point p, point q); 44 | 45 | double dist(const point &p, const point &q); 46 | 47 | /*struct box { 48 | point lo, hi; //diagonally opposite points in the box (min, max) 49 | //box () {} //empty normal constructor 50 | void set_box(const point &mylo, const point &myhi); //copy those points to be our lo and hi 51 | };*/ 52 | 53 | 54 | 55 | struct boxnode { 56 | int mom, dau1, dau2, ptlo, pthi; //these are all integers which will work to point towards the specified thing in their data structure 57 | point lo, hi; 58 | boxnode(); 59 | void set_boxnode(point mylo, point myhi, int mymom, int myd1, int myd2, int myptlo, int mypthi); 60 | 61 | /* 62 | Feed it 2 points and the necessary indices, save those indices and create a box from the points. 63 | In other words this is the data structure which actively creates the box data structure, but will be used 64 | recursively to create the entire tree 65 | */ 66 | }; 67 | 68 | double dist(const boxnode &b, const point &p, int d); 69 | 70 | struct kdtree { 71 | static const double BIG; //this value is a placeholder for starting box size (will be absurd) 72 | int dim; 73 | int numbox, npts; //integer counts of boxes and points 74 | point* pts; 75 | boxnode *boxes; 76 | int* ptindx; 77 | int* rptindx; //point index and reverse point index 78 | int* nd; 79 | double* dn; 80 | double* coord; 81 | //int* within1; 82 | //double cenn[3]; 83 | //double maxx[3]; 84 | //double minn[3]; 85 | kdtree(std::vector< double > &vals); 86 | ~kdtree(); 87 | //utility functions for use after tree is constructed 88 | double disti(int jpt, int kpt); 89 | int locate(point pt); 90 | int locate(int jpt); 91 | //applications to use tree 92 | //int nearest(point pt); 93 | double dnearest(point pt); 94 | void nnearest(int jpt, int *nn, double *dn, int n); 95 | static void sift_down(double *heap, int *ndx, int nn); 96 | int locatenear(point pt, double r, int *v, int nmax); 97 | //double run_tree(); 98 | double run_tree_orient(); 99 | double run_tree_trans(std::vector &cls); 100 | //void run_locate(); 101 | //void print_boxes(); 102 | //void print_tree(int y); 103 | //void print_box(int y); 104 | 105 | }; 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /sstmap/kdhsa102_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "kdhsa102.h" 10 | 11 | using namespace std; 12 | 13 | int main(int argc, char** argv) { 14 | if (argc <= 1) { 15 | cerr << "\nUSAGE:\n\n" 16 | << "./kd102 [-i inputfile][-e expanded inputfile]\n\n" 17 | << "where\n\n" 18 | << "inputfile is the file to read from (standard 1A cluster)\n" 19 | << "expanded inputfile is the cluster file with 2A included\n\n"; 20 | //<< "x coordinate of center is from clustercenterfile\n" 21 | //<< "y coordinate of center is from clustercenterfile\n" 22 | //<< "z coordinate of center is from clustercenterfile\n\n"; 23 | exit(0); 24 | } 25 | 26 | double s = 0; 27 | 28 | ofstream transout("trans.dat", ios::app); transout.precision(16); 29 | ofstream orientout("orient.dat", ios::app); orientout.precision(16); 30 | 31 | clock_t t; 32 | t = clock(); 33 | int i = 0; string infile; string expfile; 34 | //double x = 0, y = 0, z = 0; 35 | while (i tmp; 73 | double temp; 74 | string strtemp; 75 | ifstream input(expfile.c_str()); 76 | //getline(input, strtemp); //skip header 77 | while (!input.eof()) { 78 | getline(input, strtemp); 79 | if (!strtemp.empty()) { 80 | temp = atof(strtemp.substr(31, 7).c_str()); 81 | tmp.push_back(temp); 82 | temp = atof(strtemp.substr(39, 7).c_str()); 83 | tmp.push_back(temp); 84 | temp = atof(strtemp.substr(47, 7).c_str()); 85 | tmp.push_back(temp); 86 | } 87 | } 88 | vector tmp2; 89 | for (int i = 0; i < tmp.size(); i++) { 90 | if (i%9 == 0 || i%9==1 || i%9==2) { 91 | tmp2.push_back(tmp[i]); 92 | } 93 | } 94 | /* 95 | ofstream tout("test.dat"); tout.precision(16); 96 | tout << tmp2.size() << endl; 97 | for (int i = 0; i < tmp2.size(); i++) { 98 | tout << tmp2[i] << "\t"; 99 | if (i%3== 2 && i!=0) { 100 | tout << endl; 101 | } 102 | } 103 | */ 104 | kdtree trans(tmp2); 105 | //cout << "made trans tree" << endl; 106 | 107 | vector tmp4; 108 | ifstream stput(infile.c_str()); 109 | getline(stput, strtemp); //skip header 110 | while (!stput.eof()) { 111 | getline(stput, strtemp); 112 | if (!strtemp.empty()) { 113 | temp = atof(strtemp.substr(31, 7).c_str()); 114 | tmp4.push_back(temp); 115 | temp = atof(strtemp.substr(39, 7).c_str()); 116 | tmp4.push_back(temp); 117 | temp = atof(strtemp.substr(47, 7).c_str()); 118 | tmp4.push_back(temp); 119 | } 120 | } 121 | vector tmp5; 122 | for (int i = 0; i < tmp4.size(); i++) { 123 | if (i%9 == 0 || i%9 == 1 || i%9 == 2) { 124 | tmp5.push_back(tmp4[i]); 125 | } 126 | } 127 | 128 | s = trans.run_tree_trans(tmp5); 129 | transout << s << endl; 130 | transout.close(); 131 | /* 132 | Begin orientational code 133 | */ 134 | vector tmp3; 135 | double pi = 3.14159265359; double cenndist = 10000; 136 | int x_ref[3]; int y_ref[3]; int z_ref[3]; 137 | x_ref[0] = 1; x_ref[1] = 0; x_ref[2] = 0; 138 | y_ref[0] = 0; y_ref[1] = 1; y_ref[2] = 0; 139 | z_ref[0] = 0; z_ref[1] = 0; z_ref[2] = 1; 140 | double ar[3]; double ar2; double h12; double h22; double h1length; double h2length; double arlength; double dotprohah1; double theta; 141 | double crossp_x_ref_h1[3]; double crossp_x_ref_h1_sign; double q[4]; double htemp[3]; double z_mol_vect[3]; double z_mol_vect2; 142 | double z_mol_vectlength; double dotproductz_mol_vectz_ref; double theta3p; double crossp_z_mol_vectz_ref[3]; double crossp_z_mol_vectz_ref_sign; 143 | double q2[4]; double e[4]; double singtest; 144 | for (int i = 0; i < tmp4.size(); i+=9) { 145 | tmp4[i+8] -= tmp4[i+2]; 146 | tmp4[i+5] -= tmp4[i+2]; 147 | tmp4[i+2] -= tmp4[i+2]; 148 | tmp4[i+7] -= tmp4[i+1]; 149 | tmp4[i+4] -= tmp4[i+1]; 150 | tmp4[i+1] -= tmp4[i+1]; 151 | tmp4[i+6] -= tmp4[i]; 152 | tmp4[i+3] -= tmp4[i]; 153 | tmp4[i] -= tmp4[i]; 154 | h12 = pow(tmp4[i+3],2) + pow(tmp4[i+4],2) + pow(tmp4[i+5],2); 155 | h22 = pow(tmp4[i+6],2) + pow(tmp4[i+7],2) + pow(tmp4[i+8],2); 156 | h1length = pow(h12, 0.5); 157 | h2length = pow(h22, 0.5); 158 | if (tmp4[i+3] != 0) { 159 | tmp4[i+3] /= h1length; 160 | } 161 | if (tmp4[i+4] != 0) { 162 | tmp4[i+4] /= h1length; 163 | } 164 | if (tmp4[i+5] != 0) { 165 | tmp4[i+5] /= h1length; 166 | } 167 | if (tmp4[i+6] != 0) { 168 | tmp4[i+6] /= h1length; 169 | } 170 | if (tmp4[i+7] != 0) { 171 | tmp4[i+7] /= h1length; 172 | } 173 | if (tmp4[i+7] != 0) { 174 | tmp4[i+7] /= h1length; 175 | } 176 | ar[0]=tmp4[i+4]*x_ref[2] - tmp4[i+5]*x_ref[1]; 177 | ar[1]=tmp4[i+5]*x_ref[0] - tmp4[i+3]*x_ref[2]; 178 | ar[2]=tmp4[i+3]*x_ref[1] - tmp4[i+4]*x_ref[0]; 179 | ar2 = pow(ar[0],2) + pow(ar[1],2) + pow(ar[2],2); 180 | arlength = pow(ar2, 0.5); 181 | if (ar[0] != 0) { 182 | ar[0] /= arlength; 183 | } 184 | if (ar[1] != 0) { 185 | ar[1] /= arlength; 186 | } 187 | if (ar[2] != 0) { 188 | ar[2] /= arlength; 189 | } 190 | dotprohah1 = 0; 191 | dotprohah1 += x_ref[0]*tmp4[i+3]; 192 | dotprohah1 += x_ref[1]*tmp4[i+4]; 193 | dotprohah1 += x_ref[2]*tmp4[i+5]; 194 | theta = acos(dotprohah1); 195 | crossp_x_ref_h1[0]=tmp4[i+4]*x_ref[2] - tmp4[i+5]*x_ref[1]; 196 | crossp_x_ref_h1[1]=tmp4[i+5]*x_ref[0] - tmp4[i+3]*x_ref[2]; 197 | crossp_x_ref_h1[2]=tmp4[i+3]*x_ref[1] - tmp4[i+4]*x_ref[0]; 198 | crossp_x_ref_h1_sign=crossp_x_ref_h1[0]*tmp4[i+3]+crossp_x_ref_h1[1]*tmp4[i+4]+crossp_x_ref_h1[2]*tmp4[i+5]; 199 | if (crossp_x_ref_h1_sign > 0) { 200 | theta /=2; 201 | } 202 | else { 203 | theta /=-2; 204 | } 205 | q[0]=cos(theta); 206 | q[1]=ar[0]*sin(theta); 207 | q[2]=ar[1]*sin(theta); 208 | q[3]=ar[2]*sin(theta); 209 | 210 | htemp[0]= ((pow(q[0],2)+pow(q[1],2))-(pow(q[2],2)+pow(q[3],2)))* tmp4[i+3]; 211 | htemp[0]= (2*(q[1]*q[2] + q[0]*q[3]) * tmp4[i+4] ) + htemp[0]; 212 | htemp[0]= (2*(q[1]*q[3]-q[0]*q[2])*tmp4[i+5]) + htemp[0]; 213 | htemp[1]= 2*( q[1]*q[2] - q[0]*q[3] ) * tmp4[i+3]; 214 | htemp[1]= ( ( q[0]*q[0]-q[1]*q[1]+q[2]*q[2]-q[3]*q[3] ) * tmp4[i+4] ) + htemp[1]; 215 | htemp[1]= ( 2*( q[2]*q[3] + q[0]*q[1] ) * tmp4[i+5] ) + htemp[1]; 216 | htemp[2]= 2*( q[1]*q[3] + q[0]*q[2]) * tmp4[i+3]; 217 | htemp[2]= ( 2*( q[2]*q[3]-q[0]*q[1] ) * tmp4[i+4] ) + htemp[2]; 218 | htemp[2]= ( ( q[0]*q[0]-q[1]*q[1]-q[2]*q[2]+q[3]*q[3] ) * tmp4[i+5] ) + htemp[2]; 219 | tmp4[i+3]=htemp[0]; 220 | tmp4[i+4]=htemp[1]; 221 | tmp4[i+5]=htemp[2]; 222 | htemp[0]= ((pow(q[0],2)+pow(q[1],2))-(pow(q[2],2)+pow(q[3],2)))* tmp4[i+6]; 223 | htemp[0]= (2*(q[1]*q[2] + q[0]*q[3]) * tmp4[i+7] ) + htemp[0]; 224 | htemp[0]= (2*(q[1]*q[3]-q[0]*q[2])*tmp4[i+8]) + htemp[0]; 225 | htemp[1]= 2*( q[1]*q[2] - q[0]*q[3] ) * tmp4[i+6]; 226 | htemp[1]= ( ( q[0]*q[0]-q[1]*q[1]+q[2]*q[2]-q[3]*q[3] ) * tmp4[i+7] ) + htemp[1]; 227 | htemp[1]= ( 2*( q[2]*q[3] + q[0]*q[1] ) * tmp4[i+8] ) + htemp[1]; 228 | htemp[2]= 2*( q[1]*q[3] + q[0]*q[2]) * tmp4[i+6]; 229 | htemp[2]= ( 2*( q[2]*q[3]-q[0]*q[1] ) * tmp4[i+7] ) + htemp[2]; 230 | htemp[2]= ( ( q[0]*q[0]-q[1]*q[1]-q[2]*q[2]+q[3]*q[3] ) * tmp4[i+8] ) + htemp[2]; 231 | tmp4[i+6]=htemp[0]; 232 | tmp4[i+7]=htemp[1]; 233 | tmp4[i+8]=htemp[2]; 234 | z_mol_vect[0]=tmp4[i+4]*tmp4[i+8] - tmp4[i+5]*tmp4[i+7]; 235 | z_mol_vect[1]=tmp4[i+5]*tmp4[i+6] - tmp4[i+3]*tmp4[i+8]; 236 | z_mol_vect[2]=tmp4[i+3]*tmp4[i+7] - tmp4[i+4]*tmp4[i+6]; 237 | z_mol_vect2= pow(z_mol_vect[0],2) + pow(z_mol_vect[1],2) + pow(z_mol_vect[2],2); 238 | z_mol_vectlength=pow(z_mol_vect2,0.5); 239 | if (z_mol_vect[0] !=0) { 240 | z_mol_vect[0] /= z_mol_vectlength; 241 | } 242 | if (z_mol_vect[1] !=0) { 243 | z_mol_vect[1] /= z_mol_vectlength; 244 | } 245 | if (z_mol_vect[2] !=0) { 246 | z_mol_vect[2] /= z_mol_vectlength; 247 | } 248 | dotproductz_mol_vectz_ref=0; 249 | for(int j=0;j<3;j++) { 250 | dotproductz_mol_vectz_ref+=z_mol_vect[j]*z_ref[j]; 251 | } 252 | theta3p= acos(dotproductz_mol_vectz_ref); 253 | 254 | crossp_z_mol_vectz_ref[0]=z_mol_vect[1]*z_ref[2] - z_mol_vect[2]*z_ref[1]; 255 | crossp_z_mol_vectz_ref[1]=z_mol_vect[2]*z_ref[0] - z_mol_vect[0]*z_ref[2]; 256 | crossp_z_mol_vectz_ref[2]=z_mol_vect[0]*z_ref[1] - z_mol_vect[1]*z_ref[0]; 257 | 258 | crossp_z_mol_vectz_ref_sign=crossp_z_mol_vectz_ref[0]*tmp4[i+3]+crossp_z_mol_vectz_ref[1]*tmp4[i+4]+crossp_z_mol_vectz_ref[2]*tmp4[i+5]; 259 | 260 | if (crossp_z_mol_vectz_ref_sign < 0) { 261 | theta3p /=2; 262 | } 263 | else { 264 | theta3p /=-2; 265 | } 266 | 267 | q2[0]=cos(theta3p); 268 | q2[1]=x_ref[0]*sin(theta3p); 269 | q2[2]=x_ref[1]*sin(theta3p); 270 | q2[3]=x_ref[2]*sin(theta3p); 271 | 272 | e[0]= q[0]*q2[0] - q[1]*q2[1] - q[2]*q2[2] - q[3]*q2[3]; 273 | e[1]= q[0]*q2[1] + q[1]*q2[0] + q[2]*q2[3] - q[3]*q2[2]; 274 | e[2]= q[0]*q2[2] - q[1]*q2[3] + q[2]*q2[0] + q[3]*q2[1]; 275 | e[3]= q[0]*q2[3] + q[1]*q2[2] - q[2]*q2[1] + q[3]*q2[0]; 276 | 277 | singtest=((e[1]*e[2]) + (e[3]*e[0])); 278 | if (singtest > 0.4999) { 279 | tmp3.push_back(sin(pi/2)); 280 | tmp3.push_back(0); 281 | tmp3.push_back(2*atan2(e[1],e[0])); 282 | } 283 | else if (singtest < -0.4999) { 284 | tmp3.push_back(sin(pi/-2)); 285 | tmp3.push_back(0); 286 | tmp3.push_back(-2*atan2(e[1], e[0])); 287 | } 288 | else { 289 | tmp3.push_back(sin(asin(2*singtest))); 290 | tmp3.push_back(atan2(((2*e[1]*e[0])-(2*e[2]*e[3])) , (1 - (2*pow(e[1],2)) - (2*pow(e[3],2))))); 291 | tmp3.push_back(atan2(((2*e[2]*e[0])-(2*e[1]*e[3])) , (1 - (2*pow(e[2],2)) - (2*pow(e[3],2))))); 292 | } 293 | } 294 | kdtree orient(tmp3); 295 | s = orient.run_tree_orient(); 296 | orientout << s << endl; 297 | orientout.close(); 298 | 299 | 300 | //t = clock() -t; 301 | //printf("It took me %d clicks to calc (%f seconds)\n", t, ((float)t)/CLOCKS_PER_SEC); 302 | //cout << ((float)t)/CLOCKS_PER_SEC << endl; 303 | } 304 | 305 | -------------------------------------------------------------------------------- /sstmap/make_clust_brute.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | /* 13 | struct water { 14 | double oxygen[3]; 15 | double hyd1[3]; 16 | double hyd2[3]; 17 | water() { 18 | for (int i = 0; i < 3; i++) { 19 | oxygen[i] = 0; 20 | hyd1[i] = 0; 21 | hyd2[i] = 0; 22 | } 23 | } //default constructor, will set all to 0 24 | }; 25 | */ 26 | 27 | int main (int argc, char** argv) { 28 | if (argc <= 1) { 29 | cerr << "\nUSAGE:\n\n" 30 | << "./bruteclust[-c clustercenterfile][-w within5Aofligand]\n" 31 | << "where:\n\n" 32 | << "clustercenterfile contains the coordinates of the chosen clusters\n" 33 | << "within5Aofligand contains all coordinates of waters within a certain distance of the ligand\n\n"; 34 | exit(0); 35 | } 36 | 37 | int i = 0; string cfile; string wfile; 38 | while (i < argc) { 39 | if (!strcmp(argv[i], "-c")) { 40 | cfile = argv[++i]; 41 | } 42 | if (!strcmp(argv[i], "-w")) { 43 | wfile = argv[++i]; 44 | } 45 | i++; 46 | } 47 | 48 | if (cfile.empty() || wfile.empty()) { 49 | cerr << "Define the damn input files:\n" 50 | << "./bruteclust[-c clustercenterfile][-w within5Aofligand]\n" 51 | << "where:\n\n" 52 | << "clustercenterfile contains the coordinates of the chosen clusters\n" 53 | << "within5Aofligand contains all coordinates of waters within a certain distance of the ligand\n\n"; 54 | exit(0); 55 | } 56 | 57 | 58 | vector cens; 59 | vector wats; 60 | 61 | 62 | string temp; 63 | ifstream cinput(cfile.c_str()); 64 | getline(cinput, temp); //skip header 65 | while (!cinput.eof()) { 66 | getline(cinput, temp); 67 | if (!temp.empty()) { 68 | cens.push_back(atof(temp.substr(31, 7).c_str())); 69 | cens.push_back(atof(temp.substr(39, 7).c_str())); 70 | cens.push_back(atof(temp.substr(47, 7).c_str())); 71 | } 72 | } 73 | 74 | cinput.close(); 75 | 76 | int numclust = cens.size()/3; 77 | 78 | ifstream winput(wfile.c_str()); 79 | //getline(winput, temp); //skip header 80 | while (!winput.eof()) { 81 | getline(winput, temp); 82 | if (!temp.empty()) { 83 | wats.push_back(atof(temp.substr(31, 7).c_str())); 84 | wats.push_back(atof(temp.substr(39, 7).c_str())); 85 | wats.push_back(atof(temp.substr(47, 7).c_str())); 86 | } 87 | } 88 | 89 | winput.close(); 90 | 91 | FILE* pFile; 92 | char fileName[80]; 93 | int val; 94 | 95 | string name = "ATOM"; string atom = "H"; string resname = "T3P"; string chainid = "C"; int resseq = 1; double occupancy = 0.0; double T = 0.0; 96 | int pos = 0; 97 | //stringstream ss; 98 | //cout << "cencount" << cencount << endl; 99 | int j = 0; 100 | for (int i = 0; i < numclust; i++) { 101 | val = i+1; 102 | cout << val << endl; 103 | if (i < 9) { 104 | sprintf(fileName, "cluster.00000%i.pdb", val); 105 | //ss << "cluster.00000" << val << ".pdb"; 106 | //ss >> fileName; 107 | } 108 | else if (i < 99){ 109 | sprintf(fileName, "cluster.0000%i.pdb", val); 110 | //ss << "cluster.0000" << val << ".pdb"; 111 | //ss >> fileName; 112 | } 113 | else { 114 | sprintf(fileName, "cluster.000%i.pdb", val); 115 | } 116 | pFile = fopen(fileName, "w"); 117 | pos = 0; 118 | /*for (int j = 0; j < watnum; j++) { 119 | if (wats[j].numclust != 0) { 120 | for (int k = 0; k < wats[j].numclust; k++) { 121 | if (wats[j].clusters[k] == i) { 122 | atom = "O"; 123 | fprintf (pFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), pos, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, wats[j].oxygen[0], wats[j].oxygen[1], wats[j].oxygen[2], occupancy, T); 124 | pos++; 125 | atom = "H"; 126 | fprintf (pFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), pos, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, wats[j].hyd1[0], wats[j].hyd1[1], wats[j].hyd1[2], occupancy, T); 127 | pos++; 128 | fprintf (pFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), pos, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, wats[j].hyd2[0], wats[j].hyd2[1], wats[j].hyd2[2], occupancy, T); 129 | pos++; 130 | } 131 | } 132 | } 133 | }*/ 134 | 135 | double dist = 10000; 136 | 137 | //for (int j = 0; j < cens.size(); j+=3) { 138 | j = i*3; 139 | for (int k = 0; k < wats.size(); k+=9) { 140 | dist = pow((cens[j] - wats[k]), 2) + pow((cens[j+1] - wats[k+1]), 2) + pow((cens[j+2] - wats[k+2]), 2); 141 | if (dist <= 4) { 142 | atom = "O"; 143 | fprintf (pFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), pos, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, wats[k], wats[k+1], wats[k+2], occupancy, T); 144 | pos++; 145 | atom = "H"; 146 | fprintf (pFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), pos, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, wats[k+3], wats[k+4], wats[k+5], occupancy, T); 147 | pos++; 148 | fprintf (pFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), pos, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, wats[k+6], wats[k+7], wats[k+8], occupancy, T); 149 | pos++; 150 | } 151 | } 152 | //} 153 | 154 | fclose(pFile); 155 | } 156 | 157 | 158 | } 159 | -------------------------------------------------------------------------------- /sstmap/probable.cpp: -------------------------------------------------------------------------------- 1 | #include "probable.h" 2 | 3 | using namespace std; 4 | 5 | point::point () { 6 | dim = D; 7 | x = new double[dim]; 8 | for (int i = 0; i < dim; i++) { 9 | x[i] = 0; 10 | } 11 | 12 | } 13 | 14 | void point::set_point(double* vals) { 15 | //dim = d; 16 | //x = new double[dim]; 17 | for (int i = 0; i < dim; i++) { 18 | x = &vals[i]; 19 | } 20 | } 21 | void point::set_point(const point &p) { 22 | dim = p.dim; 23 | //x = new double[dim]; 24 | for (int i = 0; i < dim; i++) { 25 | x[i] = p.x[i]; 26 | } 27 | } 28 | 29 | /*point::~point () { 30 | delete x; 31 | }*/ 32 | 33 | void point::zeros() { 34 | for (int i = 0; i < dim; i++) { 35 | x[i] = 0; 36 | } 37 | } 38 | 39 | void point::ones() { 40 | for (int i = 0; i < dim; i++) { 41 | x[i] = 1; 42 | } 43 | } 44 | 45 | void point::print_point() { 46 | for (int i = 0; i < dim; i++) { 47 | cout << x[i] << "\t"; 48 | } 49 | cout << endl; 50 | } 51 | 52 | 53 | double dist(const point &p, const point &q) { 54 | if (p.dim != q.dim) { 55 | cerr << "Dimensions of points do not match in distance comparison!!\n"; 56 | exit(EXIT_FAILURE); 57 | } 58 | double distance = 0.0; 59 | for (int i = 0; i < p.dim; i++) { 60 | distance += pow((p.x[i] - q.x[i]), 2); 61 | } 62 | double large = 10000; 63 | if (distance == 0) return large; 64 | return sqrt(distance); 65 | 66 | } 67 | 68 | bool same_points (const point &p, const point &q) { 69 | if (p.dim != q.dim) { 70 | return false; 71 | } 72 | else { 73 | for (int i = 0; i < p.dim; i++) { 74 | if (p.x[i] != q.x[i]) { 75 | return false; 76 | } 77 | } 78 | } 79 | return true; 80 | } 81 | 82 | boxnode::boxnode() { 83 | mom = 0; dau1 = 0; dau2 = 0; pthi = 0; ptlo = 0; 84 | //By default the points will be set to zero respectively 85 | } 86 | 87 | void boxnode::set_boxnode(point mylo, point myhi, int mymom, int myd1, int myd2, int myptlo, int mypthi) { 88 | //mybox.set_box(mylo, myhi); 89 | hi.set_point(myhi); 90 | lo.set_point(mylo); 91 | //cout << "set box\n"; 92 | mom = mymom; 93 | dau1 = myd1; 94 | dau2 = myd2; 95 | ptlo = myptlo; 96 | pthi = mypthi; 97 | //cout << "done set box\n"; 98 | } 99 | 100 | double dist(const boxnode &b, const point &p) { 101 | double distance = 0.0; 102 | if (p.dim != b.lo.dim || p.dim != b.hi.dim) { 103 | cerr << "Point and Box Points do not have the same dimensionality in distance calculation!!\n"; 104 | exit(EXIT_FAILURE); 105 | } 106 | for (int i = 0; i < p.dim; i++) { 107 | if (p.x[i] < b.lo.x[i]) distance += pow((p.x[i]-b.lo.x[i]), 2); 108 | if (p.x[i] > b.hi.x[i]) distance += pow((p.x[i]-b.hi.x[i]), 2); 109 | } 110 | return sqrt(distance); 111 | //This will return 0 if the point is in the box 112 | } 113 | 114 | int selecti(const int k, int *indx, int n, double *arr) { 115 | int i, ia, ir, j, l, mid; 116 | double a; 117 | 118 | l = 0; 119 | 120 | ir = n-1; 121 | for (;;) { 122 | if (ir <= l+1) { 123 | if (ir == l+1 && arr[indx[ir]] < arr[indx[l]]) { 124 | swap(indx[l], indx[ir]); 125 | } 126 | return indx[k]; //final end point 127 | } 128 | else { 129 | mid = (l+ir) >> 1; 130 | swap(indx[mid], indx[l+1]); 131 | if (arr[indx[l]] > arr[indx[ir]]) swap(indx[l], indx[ir]); 132 | if (arr[indx[l+1]] > arr[indx[ir]]) swap(indx[l+1], indx[ir]); 133 | if (arr[indx[l]] > arr[indx[l+1]]) swap(indx[l], indx[l+1]); 134 | i = l+1; 135 | j = ir; 136 | ia = indx[l+1]; 137 | a = arr[ia]; 138 | for (;;) { 139 | do i++; while (arr[indx[i]] < a); 140 | do j--; while (arr[indx[j]] > a); 141 | if (j < i) break; //inner endpoint 142 | swap(indx[i], indx[j]); 143 | 144 | } 145 | 146 | indx[l+1] = indx[j]; 147 | indx[j] = ia; 148 | if (j >= k) ir=j-1; 149 | if (j <= k) l = i; 150 | } 151 | } 152 | 153 | } 154 | 155 | const double kdtree::BIG(1.0e99); 156 | 157 | kdtree::kdtree(std::vector< double > &vals) { 158 | /* 159 | This function assumes the doubles fed in through vals are only the pertinent ones. IE If this is 3d its the positions or orientations and nothing else. 160 | */ 161 | /*for (int i = 0; i < 3; i++) { 162 | minn[i] = 10000; 163 | maxx[i] = 0; 164 | }*/ 165 | //BIG = 1.0e99; 166 | dim = D; 167 | nd = new int[1]; 168 | dn = new double[1]; 169 | //within1 = new int[3000]; 170 | npts = vals.size()/D; 171 | //cout << npts << endl; 172 | pts = new point[npts]; 173 | /* 174 | cenn[0] = x; 175 | cenn[1] = y; 176 | cenn[2] = z; 177 | */ 178 | int foo = 0; 179 | for (int i = 0; i < vals.size(); i++) { 180 | if (i%D==0) { 181 | pts[foo].x[0] = vals[i]; 182 | //minn[0] = min(minn[0], vals[i]); 183 | //maxx[0] = max(maxx[0], vals[i]); 184 | } 185 | if (i%D==1) { 186 | pts[foo].x[1] = vals[i]; 187 | //minn[1] = min(minn[1], vals[i]); 188 | //maxx[1] = max(maxx[1], vals[i]); 189 | } 190 | if (i%D==2) { 191 | pts[foo].x[2] = vals[i]; 192 | //minn[2] = min(minn[2], vals[i]); 193 | //maxx[2] = max(maxx[2], vals[i]); 194 | foo++; 195 | } 196 | } 197 | 198 | /*for (int i = 0; i < 3; i++) { 199 | cenn[i] = (maxx[i]+minn[i])/2; 200 | }*/ 201 | 202 | ptindx = new int[npts]; rptindx = new int[npts]; 203 | int ntmp, m, kk, k, j, nowtask, jbox, np, tmom, tdim, ptlo, pthi; 204 | int *hp; 205 | double *cp; 206 | int taskmom[50], taskdim[50]; 207 | for (k = 0; k < npts; k++) ptindx[k] = k; 208 | m = 1; 209 | for (ntmp = npts; ntmp; ntmp >>= 1) { 210 | m <<= 1; 211 | } 212 | //cout << "npts: " << npts << endl; 213 | numbox = 2*npts - (m>>1); 214 | if (m < numbox) numbox = m; 215 | //m = pow(2, (log(npts)/log(2))); 216 | //numbox = 2*npts - m/2; 217 | if (m < numbox) numbox = m; 218 | numbox--; 219 | //cout << "about to make boxes\n"; 220 | //cout << numbox << endl; 221 | boxes = new boxnode[numbox]; 222 | //cout << "made boxes\n"; 223 | coord = new double[D*npts]; 224 | //cout << "made coords\n"; 225 | for (j = 0, kk = 0; j < D; j++, kk+= npts) { 226 | for (k = 0; k < npts; k++) { 227 | //cout << k << endl; 228 | coord[kk+k] = pts[k].x[j]; 229 | } 230 | } 231 | //cout << "set coords\n"; 232 | //double* nums; 233 | //double* nnums; 234 | point lo, hi; 235 | for (int i = 0; i < 3; i++) { 236 | hi.x[i] = BIG; 237 | lo.x[i] = -BIG; 238 | } 239 | 240 | //cout << hi.x[0] << "\t" << hi.x[1] << "\t" << hi.x[2] << endl; 241 | //cout << lo.x[0] << "\t" << lo.x[1] << "\t" << lo.x[2] << endl; 242 | boxes[0].set_boxnode(lo, hi, 0, 0, 0, 0, npts-1); 243 | /*if (D == 3) { 244 | //cout << "start D3 if\n"; 245 | nums = new double[3]; 246 | nnums = new double[3]; 247 | for (int i =0; i <3; i++) { 248 | nums[i] = BIG; 249 | nnums[i] = -BIG; 250 | } 251 | cout << nums[0] << "\t" << nums[1] << "\t" << nums[2] << endl; 252 | cout << nnums[0] << "\t" << nnums[1] << "\t" << nnums[2] << endl; 253 | lo.set_point(nnums, D); 254 | hi.set_point(nums, D); 255 | //cout << "made BIG points\n"; 256 | cout << hi.x[0] << "\t" << hi.x[1] << "\t" << hi.x[2] << endl; 257 | cout << lo.x[0] << "\t" << lo.x[1] << "\t" << lo.x[2] << endl; 258 | boxes[0].set_boxnode(lo, hi, 0, 0, 0, 0, npts-1); 259 | //cout << "made first box\n"; 260 | } 261 | if (D == 6) { 262 | //cout << "start D6 if\n"; 263 | nums = new double[6]; 264 | nnums = new double[6]; 265 | for (int i = 0; i < 6; i++) { 266 | nums[i] = BIG; 267 | nnums[i] = -BIG; 268 | } 269 | lo.set_point(nnums, D), hi.set_point(nums, D); 270 | boxes[0].set_boxnode(lo, hi, 0, 0, 0, 0, npts-1); 271 | } 272 | delete nums; 273 | delete nnums; 274 | */ 275 | //cout << "Set initial box: \n\n"; 276 | //cout << boxes[0].hi.x[0] << "\t" << boxes[0].hi.x[1] << "\t" << boxes[0].hi.x[2] << endl; 277 | //cout << boxes[0].lo.x[0] << "\t" << boxes[0].lo.x[1] << "\t" << boxes[0].lo.x[2] << endl; 278 | 279 | for (int i = 0; i < 3; i++) { 280 | boxes[0].hi.x[i] = BIG; 281 | boxes[0].lo.x[i] = -BIG; 282 | } 283 | 284 | //cout << "Fix initial box: \n\n"; 285 | //cout << boxes[0].hi.x[0] << "\t" << boxes[0].hi.x[1] << "\t" << boxes[0].hi.x[2] << endl; 286 | //cout << boxes[0].lo.x[0] << "\t" << boxes[0].lo.x[1] << "\t" << boxes[0].lo.x[2] << endl; 287 | 288 | jbox = 0; 289 | taskmom[1] = 0; 290 | taskdim[1] = 0; 291 | nowtask = 1; 292 | //cout << "got to while loop\n"; 293 | while (nowtask) { 294 | tmom = taskmom[nowtask]; 295 | tdim = taskdim[nowtask--]; 296 | ptlo = boxes[tmom].ptlo; 297 | pthi = boxes[tmom].pthi; 298 | hp = &ptindx[ptlo]; 299 | cp = &coord[tdim*npts]; 300 | np = pthi - ptlo + 1; 301 | kk = (np-1)/2; 302 | selecti(kk, hp, np, cp); 303 | hi = boxes[tmom].hi; 304 | lo = boxes[tmom].lo; 305 | //hi.x[tdim] = lo.x[tdim] = coord[tdim*npts + hp[kk]]; 306 | //cout << jbox << endl; 307 | boxes[++jbox].set_boxnode(boxes[tmom].lo, hi, tmom, 0, 0, ptlo, ptlo+kk); 308 | boxes[jbox].hi.x[tdim] = coord[tdim*npts + hp[kk]]; 309 | //cout << jbox << endl; 310 | boxes[++jbox].set_boxnode(lo, boxes[tmom].hi, tmom, 0 , 0, ptlo+kk+1, pthi); 311 | boxes[jbox].lo.x[tdim] = coord[tdim*npts + hp[kk]]; 312 | boxes[tmom].dau1 = jbox-1; 313 | boxes[tmom].dau2 = jbox; 314 | if (kk > 1) { 315 | taskmom[++nowtask] = jbox-1; 316 | taskdim[nowtask] = (tdim+1)%D; 317 | } 318 | if (np - kk > 3) { 319 | taskmom[++nowtask] = jbox; 320 | taskdim[nowtask] = (tdim+1)%D; 321 | } 322 | } 323 | for (j = 0; j < npts; j++) rptindx[ptindx[j]] = j; 324 | //cout << "made tree" << endl; 325 | 326 | //cout << "delete coord" << endl; 327 | } 328 | 329 | 330 | kdtree::~kdtree () { 331 | delete boxes; 332 | delete ptindx; 333 | delete rptindx; 334 | delete dn; 335 | delete nd; 336 | delete coord; 337 | //delete within1; 338 | } 339 | 340 | double kdtree::disti(int jpt, int kpt) { 341 | if (jpt == kpt) return BIG; //to avoid the closest neighbor is itself 342 | else return dist(pts[jpt], pts[kpt]); 343 | } 344 | 345 | int kdtree::locate(point pt) { 346 | int nb, d1, jdim; 347 | nb = jdim = 0; 348 | while (boxes[nb].dau1) { //basically keep going until bottom from root 349 | d1 = boxes[nb].dau1; 350 | if (pt.x[jdim] <= boxes[d1].hi.x[jdim]) nb = d1; 351 | else nb = boxes[nb].dau2; 352 | jdim = ++jdim%D; 353 | } 354 | return nb; 355 | } 356 | 357 | int kdtree::locate(int jpt) { 358 | int nb, d1, jh; 359 | jh = rptindx[jpt]; 360 | nb = 0; 361 | while (boxes[nb].dau1) { 362 | d1 = boxes[nb].dau1; 363 | if (jh <= boxes[d1].pthi) nb = d1; 364 | else nb = boxes[nb].dau2; 365 | } 366 | return nb; 367 | } 368 | 369 | double kdtree::dnearest(point pt) { 370 | int i, k, nrst, ntask; 371 | int task[50]; 372 | double dnrst = BIG, d; 373 | k = locate(pt); 374 | for (i = boxes[k].ptlo; i <= boxes[k].pthi; i++) { 375 | d = dist(pts[ptindx[i]], pt); 376 | if (d < dnrst && d != 0) { 377 | //this fix for != 0 may result in some uncertain behavior, will be necessary to check this out. 378 | nrst = ptindx[i]; 379 | dnrst = d; 380 | } 381 | } 382 | task[1] = 0; 383 | ntask = 1; 384 | while (ntask) { 385 | k = task[ntask--]; 386 | if (dist(boxes[k], pt) < dnrst) { 387 | if (boxes[k].dau1) { 388 | task[++ntask] = boxes[k].dau1; 389 | task[++ntask] = boxes[k].dau2; 390 | } 391 | else { 392 | for (i = boxes[k].ptlo; i <= boxes[k].pthi; i++) { 393 | d = dist(pts[ptindx[i]], pt); 394 | if (d < dnrst && d != 0) { 395 | nrst = ptindx[i]; 396 | dnrst = d; 397 | } 398 | } 399 | } 400 | } 401 | } 402 | return dnrst; 403 | } 404 | 405 | void kdtree::nnearest(int jpt, int* nn, double* dn, int n) { 406 | int i, k, ntask, kp; 407 | int task[50]; 408 | double d; 409 | if (n > npts-1) throw("you're asking for too much buddy (nn > npts)"); 410 | for (i = 0; i < n; i++) dn[i] = BIG; 411 | kp = boxes[locate(jpt)].mom; 412 | while (boxes[kp].pthi - boxes[kp].ptlo < n) kp = boxes[kp].mom; 413 | for (i = boxes[kp].ptlo; i <= boxes[kp].pthi; i++) { 414 | if (jpt == ptindx[i]) continue; 415 | d = disti(ptindx[i], jpt); 416 | if (d < dn[0]) { 417 | dn[0] = d; 418 | nn[0] = ptindx[i]; 419 | if (n>1) sift_down(dn, nn, n); 420 | } 421 | } 422 | task[1] = 0; 423 | ntask = 1; 424 | while (ntask) { 425 | k = task[ntask--]; 426 | if (k == kp) continue; 427 | if (dist(boxes[k], pts[jpt]) < dn[0]) { 428 | if (boxes[k].dau1) { 429 | task[++ntask] = boxes[k].dau1; 430 | task[++ntask] = boxes[k].dau2; 431 | } 432 | else { 433 | for (i = boxes[k].ptlo; i <= boxes[k].pthi; i++) { 434 | d = disti(ptindx[i], jpt); 435 | if (d < dn[0]) { 436 | dn[0] = d; 437 | nn[0] = ptindx[i]; 438 | if (n > 1) sift_down(dn, nn, n); 439 | } 440 | } 441 | } 442 | } 443 | } 444 | return; 445 | } 446 | 447 | void kdtree::sift_down(double* heap, int* ndx, int nn) { 448 | int n = nn - 1; 449 | int j, jold, ia; 450 | double a; 451 | a = heap[0]; 452 | ia = ndx[0]; 453 | jold = 0; 454 | j = 1; 455 | while (j <= n) { 456 | if (j < n && heap[j] < heap[j+1]) j++; 457 | if (a >= heap[j]) break; 458 | heap[jold] = heap[j]; 459 | ndx[jold] = ndx[j]; 460 | jold = j; 461 | j = 2*j+1; 462 | } 463 | heap[jold] = a; 464 | ndx[jold] = ia; 465 | } 466 | 467 | 468 | int kdtree::locatenear(point pt, double r, int *v, int nmax) { 469 | /* 470 | This fuction returns all the points within some distance of a target point. I dont think we will ever use it. 471 | */ 472 | int k, i, nb, nbold, nret, ntask, jdim, d1, d2; 473 | int task[50]; 474 | nb = jdim = nret = 0; 475 | if (r < 0.0) throw("radius must be nonnegative"); 476 | while (boxes[nb].dau1) { 477 | nbold = nb; 478 | d1 = boxes[nb].dau1; 479 | d2 = boxes[nb].dau2; 480 | if (pt.x[jdim] + r <= boxes[d1].hi.x[jdim]) nb = d1; 481 | else if (pt.x[jdim] - r >= boxes[d2].lo.x[jdim]) nb = d2; 482 | jdim = ++jdim%D; 483 | if (nb == nbold) break; 484 | } 485 | //cout << nb << endl; 486 | task[1] = nb; 487 | ntask = 1; 488 | while (ntask) { 489 | k = task[ntask--]; 490 | if (dist(boxes[k], pt) > r) { 491 | //cout << "box out of range: " << dist(boxes[k], pt) << endl; 492 | //cout << boxes[k].hi.x[0] << "\t" << boxes[k].hi.x[1] << "\t" << boxes[k].hi.x[2] << endl; 493 | continue; 494 | } 495 | else { 496 | //cout << "box in range\n"; 497 | } 498 | if (boxes[k].dau1) { 499 | task[++ntask] = boxes[k].dau1; 500 | task[++ntask] = boxes[k].dau2; 501 | } 502 | else { 503 | for (i = boxes[k].ptlo; i <= boxes[k].pthi; i++) { 504 | if (dist(pts[ptindx[i]], pt) <= r && nret < nmax) { 505 | v[nret++] = ptindx[i]; 506 | } 507 | if (nret == nmax) return nmax; 508 | } 509 | } 510 | } 511 | return nret; 512 | } 513 | /* 514 | double kdtree::run_tree() { 515 | //ofstream output; 516 | //output.open(OUT.c_str()); 517 | //output.precision(16); 518 | //cout << "run tree start" << endl; 519 | double gd = 0; 520 | double s = 0.0; 521 | double T = 300.; 522 | double R = 8.314472; 523 | double pi = 3.14159265359; 524 | double cenndist = 10000; 525 | int npts2 = 0; 526 | int fcount = 10000; 527 | for (int i = 0; i < npts; i++) { 528 | //if (pts[i].x[0] > maxx[0]-5 || pts[i].x[0] < minn[0]+5 || pts[i].x[1] > maxx[1]-5 || pts[i].x[1] < minn[1]+5 || pts[i].x[2] > maxx[2]-5 || pts[i].x[2] < minn[2]+5) { 529 | // continue; 530 | //} 531 | //else { 532 | cenndist = pow((pts[i].x[0] - cenn[0]), 2) + pow((pts[i].x[1] - cenn[1]), 2) + pow((pts[i].x[2] - cenn[2]), 2); 533 | //if (abs(pts[i].x[0] - cenn[0]) < 1 && abs(pts[i].x[1] - cenn[1]) < 1 && abs(pts[i].x[2] - cenn[2]) < 1) { 534 | if (cenndist <= 1) { 535 | nnearest(i, nd, dn, 1); 536 | //cout << dn[0] << endl; 537 | gd += log((0.0329223149*fcount*4*pi*pow(dn[0], 3))/3); 538 | npts2++; 539 | } 540 | } 541 | //cout << "\n\n"; 542 | //cout << gd << endl; 543 | //cout << npts2 << endl; 544 | s = R*T*0.239*(gd/npts2 + 0.5772156649)/1000; 545 | //cout << s << endl; 546 | return s; 547 | } 548 | */ 549 | double kdtree::run_tree_trans(std::vector &cls) { 550 | point pt; 551 | double* dh; 552 | int numvals = cls.size()/3; 553 | dh = new double[numvals]; 554 | int vecpos = 0; 555 | for (int i = 0; i < cls.size(); i+=3) { 556 | //run through the vector of the acknowledged standard cluster file, skipping hydrogens until the end 557 | pt.x[0] = cls[i]; pt.x[1] = cls[i+1]; pt.x[2] = cls[i+2]; 558 | dh[vecpos] = dnearest(pt); 559 | vecpos++; 560 | } 561 | 562 | double gd = 0; 563 | double s = 0.0; 564 | double T = 300.; 565 | double R = 8.314472; 566 | double pi = 3.14159265359; 567 | 568 | int fcount = 10000; 569 | 570 | for (int i = 0; i < numvals; i++) { 571 | gd += log((0.0329223149*fcount*4*pi*pow(dh[i], 3))/3); 572 | } 573 | 574 | s = R*T*0.239*(gd/numvals + 0.5772156649)/1000; 575 | 576 | delete dh; 577 | return s; 578 | } 579 | 580 | double kdtree::run_tree_orient() { 581 | //ofstream ori("orientdists.txt"); ori.precision(16); 582 | double gd = 0; 583 | double s = 0.0; 584 | double T = 300.; 585 | double R = 8.314472; 586 | double pi = 3.14159265359; 587 | double de = 10000; 588 | point z; 589 | for (int i = 0; i < npts; i++) { 590 | nnearest(i, nd, dn, 1); 591 | //in order to implement this need to use a function which takes a point and returns a distance. 592 | //dnearest from before. 593 | if (pts[i].x[0] > pi/2) { 594 | z.x[0] = pts[i].x[0] - 2*pi; 595 | z.x[1] = pts[i].x[1]; 596 | z.x[2] = pts[i].x[2]; 597 | de = dnearest(z); 598 | if (de < dn[0] && de != 0) { 599 | dn[0] = de; 600 | } 601 | } 602 | else if (pts[i].x[0] < -pi/2) { 603 | z.x[0] = pts[i].x[0] + 2*pi; 604 | z.x[1] = pts[i].x[1]; 605 | z.x[2] = pts[i].x[2]; 606 | de = dnearest(z); 607 | if (de < dn[0] && de != 0) { 608 | dn[0] = de; 609 | } 610 | } 611 | else if (pts[i].x[1] > pi/2) { 612 | z.x[0] = pts[i].x[0]; 613 | z.x[1] = pts[i].x[1] - 2*pi; 614 | z.x[2] = pts[i].x[2]; 615 | de = dnearest(z); 616 | if (de < dn[0] && de != 0) { 617 | dn[0] = de; 618 | } 619 | } 620 | else if (pts[i].x[1] < -pi/2) { 621 | z.x[0] = pts[i].x[0]; 622 | z.x[1] = pts[i].x[1] + 2*pi; 623 | z.x[2] = pts[i].x[2]; 624 | de = dnearest(z); 625 | if (de < dn[0] && de != 0) { 626 | dn[0] = de; 627 | } 628 | } 629 | else if (pts[i].x[2] > pi/2) { 630 | z.x[0] = pts[i].x[0]; 631 | z.x[1] = pts[i].x[1]; 632 | z.x[2] = pts[i].x[2] - 2*pi; 633 | de = dnearest(z); 634 | if (de < dn[0] && de != 0) { 635 | dn[0] = de; 636 | } 637 | } 638 | else if (pts[i].x[2] < -pi/2) { 639 | z.x[0] = pts[i].x[0]; 640 | z.x[1] = pts[i].x[1]; 641 | z.x[2] = pts[i].x[2] + 2*pi; 642 | de = dnearest(z); 643 | if (de < dn[0] && de != 0) { 644 | dn[0] = de; 645 | } 646 | } 647 | //ori << dn[0] << endl; 648 | gd += log((pow(dn[0], 3)*npts)/(6*pi)); 649 | } 650 | //cout << gd << endl; 651 | //cout << npts << endl; 652 | s = R*T*0.239*(gd/npts + 0.5772156649)/1000; 653 | //cout << s << endl; 654 | return s; 655 | } 656 | 657 | /*void kdtree::run_locate() { 658 | double dn = BIG; 659 | double d; 660 | int nr = 0; 661 | for (int i = 0; i < npts; i++) { 662 | nr = locatenear(pts[i], 0.25, within1, 3000); 663 | //cout << nr << endl; 664 | for (int j = 0; j < nr; j++) { 665 | d = disti(i, within1[j]); 666 | if (d < dn) { 667 | dn = d; 668 | } 669 | } 670 | //cout << dn << endl; 671 | } 672 | }*/ 673 | 674 | 675 | -------------------------------------------------------------------------------- /sstmap/probable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: probable.h 9 | * Author: stevenramsey 10 | * 11 | * Created on March 16, 2016, 9:06 AM 12 | */ 13 | 14 | #ifndef PROBABLE_H 15 | #define PROBABLE_H 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #endif /* PROBABLE_H */ 25 | 26 | static int D = 3; 27 | 28 | struct point { 29 | int dim; 30 | double* x; 31 | point(); 32 | void set_point(double* vals); 33 | void set_point(const point &p); 34 | //~point(); 35 | void print_point(); 36 | void zeros(); 37 | void ones(); 38 | //void set_dimension(int y); 39 | 40 | 41 | }; 42 | 43 | bool same_points (point p, point q); 44 | 45 | double dist(const point &p, const point &q); 46 | 47 | /*struct box { 48 | point lo, hi; //diagonally opposite points in the box (min, max) 49 | //box () {} //empty normal constructor 50 | void set_box(const point &mylo, const point &myhi); //copy those points to be our lo and hi 51 | };*/ 52 | 53 | 54 | 55 | struct boxnode { 56 | int mom, dau1, dau2, ptlo, pthi; //these are all integers which will work to point towards the specified thing in their data structure 57 | point lo, hi; 58 | boxnode(); 59 | void set_boxnode(point mylo, point myhi, int mymom, int myd1, int myd2, int myptlo, int mypthi); 60 | 61 | /* 62 | Feed it 2 points and the necessary indices, save those indices and create a box from the points. 63 | In other words this is the data structure which actively creates the box data structure, but will be used 64 | recursively to create the entire tree 65 | */ 66 | }; 67 | 68 | double dist(const boxnode &b, const point &p, int d); 69 | 70 | struct kdtree { 71 | static const double BIG; //this value is a placeholder for starting box size (will be absurd) 72 | int dim; 73 | int numbox, npts; //integer counts of boxes and points 74 | point* pts; 75 | boxnode *boxes; 76 | int* ptindx; 77 | int* rptindx; //point index and reverse point index 78 | int* nd; 79 | double* dn; 80 | double* coord; 81 | //int* within1; 82 | //double cenn[3]; 83 | //double maxx[3]; 84 | //double minn[3]; 85 | kdtree(std::vector< double > &vals); 86 | ~kdtree(); 87 | //utility functions for use after tree is constructed 88 | double disti(int jpt, int kpt); 89 | int locate(point pt); 90 | int locate(int jpt); 91 | //applications to use tree 92 | //int nearest(point pt); 93 | double dnearest(point pt); 94 | void nnearest(int jpt, int *nn, double *dn, int n); 95 | static void sift_down(double *heap, int *ndx, int nn); 96 | int locatenear(point pt, double r, int *v, int nmax); 97 | //double run_tree(); 98 | double run_tree_orient(); 99 | double run_tree_trans(std::vector &cls); 100 | //void run_locate(); 101 | //void print_boxes(); 102 | //void print_tree(int y); 103 | //void print_box(int y); 104 | 105 | }; 106 | 107 | -------------------------------------------------------------------------------- /sstmap/probable_main.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: main.cpp 9 | * Author: stevenramsey 10 | * 11 | * Created on March 16, 2016, 9:06 AM 12 | */ 13 | 14 | 15 | #include "probable.h" 16 | 17 | using namespace std; 18 | 19 | int main(int argc, char** argv) { 20 | if (argc <= 1) { 21 | cerr << "\nUSAGE:\n\n" 22 | << "./probable [-i inputfile][-o outfile]\n\n" 23 | << "where\n\n" 24 | << "inputfile is the file to read from (a clusterfile with hydrogen atoms)\n" 25 | << "outfile is an outfile to append probable configs to\n" 26 | << "\t if not specified will be printed to probable.pdb \n\n"; 27 | exit(0); 28 | } 29 | 30 | double s = 0; 31 | 32 | 33 | clock_t t; 34 | t = clock(); 35 | int i = 0; string infile; string outfile; 36 | while (i tmp; 65 | double temp; 66 | string strtemp; 67 | /* 68 | ifstream input(expfile.c_str()); 69 | //getline(input, strtemp); //skip header 70 | while (!input.eof()) { 71 | getline(input, strtemp); 72 | if (!strtemp.empty()) { 73 | temp = atof(strtemp.substr(31, 7).c_str()); 74 | tmp.push_back(temp); 75 | temp = atof(strtemp.substr(39, 7).c_str()); 76 | tmp.push_back(temp); 77 | temp = atof(strtemp.substr(47, 7).c_str()); 78 | tmp.push_back(temp); 79 | } 80 | } 81 | vector tmp2; 82 | for (int i = 0; i < tmp.size(); i++) { 83 | if (i%9 == 0 || i%9==1 || i%9==2) { 84 | tmp2.push_back(tmp[i]); 85 | } 86 | } 87 | * / 88 | /* 89 | ofstream tout("test.dat"); tout.precision(16); 90 | tout << tmp2.size() << endl; 91 | for (int i = 0; i < tmp2.size(); i++) { 92 | tout << tmp2[i] << "\t"; 93 | if (i%3== 2 && i!=0) { 94 | tout << endl; 95 | } 96 | } 97 | */ 98 | //kdtree trans(tmp2); 99 | //cout << "made trans tree" << endl; 100 | 101 | vector tmp4; 102 | ifstream stput(infile.c_str()); 103 | //getline(stput, strtemp); //skip header 104 | while (!stput.eof()) { 105 | getline(stput, strtemp); 106 | if (!strtemp.empty()) { 107 | temp = atof(strtemp.substr(31, 7).c_str()); 108 | tmp4.push_back(temp); 109 | tmp.push_back(temp); 110 | temp = atof(strtemp.substr(39, 7).c_str()); 111 | tmp4.push_back(temp); 112 | tmp.push_back(temp); 113 | temp = atof(strtemp.substr(47, 7).c_str()); 114 | tmp4.push_back(temp); 115 | tmp.push_back(temp); 116 | } 117 | } 118 | vector tmp5; 119 | for (int i = 0; i < tmp4.size(); i++) { 120 | if (i%9 == 0 || i%9 == 1 || i%9 == 2) { 121 | tmp5.push_back(tmp4[i]); 122 | } 123 | } 124 | 125 | kdtree trans(tmp5); 126 | int transi = 0; //index of closest trans 127 | int* indt; 128 | indt = new int[1]; 129 | double* distt; 130 | distt = new double[1]; 131 | double winner = 10000.00; 132 | for (i = 0; i < trans.npts; i++) { 133 | trans.nnearest(i, indt, distt, 1); 134 | if (distt[0] < winner) { 135 | winner = distt[0]; 136 | transi = indt[0]; 137 | } 138 | } 139 | 140 | delete distt; 141 | delete indt; 142 | //s = trans.run_tree_trans(tmp5); 143 | //transout << s << endl; 144 | //transout.close(); 145 | /* 146 | Begin orientational code 147 | */ 148 | vector tmp3; 149 | double pi = 3.14159265359; double cenndist = 10000; 150 | int x_ref[3]; int y_ref[3]; int z_ref[3]; 151 | x_ref[0] = 1; x_ref[1] = 0; x_ref[2] = 0; 152 | y_ref[0] = 0; y_ref[1] = 1; y_ref[2] = 0; 153 | z_ref[0] = 0; z_ref[1] = 0; z_ref[2] = 1; 154 | double ar[3]; double ar2; double h12; double h22; double h1length; double h2length; double arlength; double dotprohah1; double theta; 155 | double crossp_x_ref_h1[3]; double crossp_x_ref_h1_sign; double q[4]; double htemp[3]; double z_mol_vect[3]; double z_mol_vect2; 156 | double z_mol_vectlength; double dotproductz_mol_vectz_ref; double theta3p; double crossp_z_mol_vectz_ref[3]; double crossp_z_mol_vectz_ref_sign; 157 | double q2[4]; double e[4]; double singtest; 158 | for (int i = 0; i < tmp4.size(); i+=9) { 159 | tmp4[i+8] -= tmp4[i+2]; 160 | tmp4[i+5] -= tmp4[i+2]; 161 | tmp4[i+2] -= tmp4[i+2]; 162 | tmp4[i+7] -= tmp4[i+1]; 163 | tmp4[i+4] -= tmp4[i+1]; 164 | tmp4[i+1] -= tmp4[i+1]; 165 | tmp4[i+6] -= tmp4[i]; 166 | tmp4[i+3] -= tmp4[i]; 167 | tmp4[i] -= tmp4[i]; 168 | h12 = pow(tmp4[i+3],2) + pow(tmp4[i+4],2) + pow(tmp4[i+5],2); 169 | h22 = pow(tmp4[i+6],2) + pow(tmp4[i+7],2) + pow(tmp4[i+8],2); 170 | h1length = pow(h12, 0.5); 171 | h2length = pow(h22, 0.5); 172 | if (tmp4[i+3] != 0) { 173 | tmp4[i+3] /= h1length; 174 | } 175 | if (tmp4[i+4] != 0) { 176 | tmp4[i+4] /= h1length; 177 | } 178 | if (tmp4[i+5] != 0) { 179 | tmp4[i+5] /= h1length; 180 | } 181 | if (tmp4[i+6] != 0) { 182 | tmp4[i+6] /= h1length; 183 | } 184 | if (tmp4[i+7] != 0) { 185 | tmp4[i+7] /= h1length; 186 | } 187 | if (tmp4[i+7] != 0) { 188 | tmp4[i+7] /= h1length; 189 | } 190 | ar[0]=tmp4[i+4]*x_ref[2] - tmp4[i+5]*x_ref[1]; 191 | ar[1]=tmp4[i+5]*x_ref[0] - tmp4[i+3]*x_ref[2]; 192 | ar[2]=tmp4[i+3]*x_ref[1] - tmp4[i+4]*x_ref[0]; 193 | ar2 = pow(ar[0],2) + pow(ar[1],2) + pow(ar[2],2); 194 | arlength = pow(ar2, 0.5); 195 | if (ar[0] != 0) { 196 | ar[0] /= arlength; 197 | } 198 | if (ar[1] != 0) { 199 | ar[1] /= arlength; 200 | } 201 | if (ar[2] != 0) { 202 | ar[2] /= arlength; 203 | } 204 | dotprohah1 = 0; 205 | dotprohah1 += x_ref[0]*tmp4[i+3]; 206 | dotprohah1 += x_ref[1]*tmp4[i+4]; 207 | dotprohah1 += x_ref[2]*tmp4[i+5]; 208 | theta = acos(dotprohah1); 209 | crossp_x_ref_h1[0]=tmp4[i+4]*x_ref[2] - tmp4[i+5]*x_ref[1]; 210 | crossp_x_ref_h1[1]=tmp4[i+5]*x_ref[0] - tmp4[i+3]*x_ref[2]; 211 | crossp_x_ref_h1[2]=tmp4[i+3]*x_ref[1] - tmp4[i+4]*x_ref[0]; 212 | crossp_x_ref_h1_sign=crossp_x_ref_h1[0]*tmp4[i+3]+crossp_x_ref_h1[1]*tmp4[i+4]+crossp_x_ref_h1[2]*tmp4[i+5]; 213 | if (crossp_x_ref_h1_sign > 0) { 214 | theta /=2; 215 | } 216 | else { 217 | theta /=-2; 218 | } 219 | q[0]=cos(theta); 220 | q[1]=ar[0]*sin(theta); 221 | q[2]=ar[1]*sin(theta); 222 | q[3]=ar[2]*sin(theta); 223 | 224 | htemp[0]= ((pow(q[0],2)+pow(q[1],2))-(pow(q[2],2)+pow(q[3],2)))* tmp4[i+3]; 225 | htemp[0]= (2*(q[1]*q[2] + q[0]*q[3]) * tmp4[i+4] ) + htemp[0]; 226 | htemp[0]= (2*(q[1]*q[3]-q[0]*q[2])*tmp4[i+5]) + htemp[0]; 227 | htemp[1]= 2*( q[1]*q[2] - q[0]*q[3] ) * tmp4[i+3]; 228 | htemp[1]= ( ( q[0]*q[0]-q[1]*q[1]+q[2]*q[2]-q[3]*q[3] ) * tmp4[i+4] ) + htemp[1]; 229 | htemp[1]= ( 2*( q[2]*q[3] + q[0]*q[1] ) * tmp4[i+5] ) + htemp[1]; 230 | htemp[2]= 2*( q[1]*q[3] + q[0]*q[2]) * tmp4[i+3]; 231 | htemp[2]= ( 2*( q[2]*q[3]-q[0]*q[1] ) * tmp4[i+4] ) + htemp[2]; 232 | htemp[2]= ( ( q[0]*q[0]-q[1]*q[1]-q[2]*q[2]+q[3]*q[3] ) * tmp4[i+5] ) + htemp[2]; 233 | tmp4[i+3]=htemp[0]; 234 | tmp4[i+4]=htemp[1]; 235 | tmp4[i+5]=htemp[2]; 236 | htemp[0]= ((pow(q[0],2)+pow(q[1],2))-(pow(q[2],2)+pow(q[3],2)))* tmp4[i+6]; 237 | htemp[0]= (2*(q[1]*q[2] + q[0]*q[3]) * tmp4[i+7] ) + htemp[0]; 238 | htemp[0]= (2*(q[1]*q[3]-q[0]*q[2])*tmp4[i+8]) + htemp[0]; 239 | htemp[1]= 2*( q[1]*q[2] - q[0]*q[3] ) * tmp4[i+6]; 240 | htemp[1]= ( ( q[0]*q[0]-q[1]*q[1]+q[2]*q[2]-q[3]*q[3] ) * tmp4[i+7] ) + htemp[1]; 241 | htemp[1]= ( 2*( q[2]*q[3] + q[0]*q[1] ) * tmp4[i+8] ) + htemp[1]; 242 | htemp[2]= 2*( q[1]*q[3] + q[0]*q[2]) * tmp4[i+6]; 243 | htemp[2]= ( 2*( q[2]*q[3]-q[0]*q[1] ) * tmp4[i+7] ) + htemp[2]; 244 | htemp[2]= ( ( q[0]*q[0]-q[1]*q[1]-q[2]*q[2]+q[3]*q[3] ) * tmp4[i+8] ) + htemp[2]; 245 | tmp4[i+6]=htemp[0]; 246 | tmp4[i+7]=htemp[1]; 247 | tmp4[i+8]=htemp[2]; 248 | z_mol_vect[0]=tmp4[i+4]*tmp4[i+8] - tmp4[i+5]*tmp4[i+7]; 249 | z_mol_vect[1]=tmp4[i+5]*tmp4[i+6] - tmp4[i+3]*tmp4[i+8]; 250 | z_mol_vect[2]=tmp4[i+3]*tmp4[i+7] - tmp4[i+4]*tmp4[i+6]; 251 | z_mol_vect2= pow(z_mol_vect[0],2) + pow(z_mol_vect[1],2) + pow(z_mol_vect[2],2); 252 | z_mol_vectlength=pow(z_mol_vect2,0.5); 253 | if (z_mol_vect[0] !=0) { 254 | z_mol_vect[0] /= z_mol_vectlength; 255 | } 256 | if (z_mol_vect[1] !=0) { 257 | z_mol_vect[1] /= z_mol_vectlength; 258 | } 259 | if (z_mol_vect[2] !=0) { 260 | z_mol_vect[2] /= z_mol_vectlength; 261 | } 262 | dotproductz_mol_vectz_ref=0; 263 | for(int j=0;j<3;j++) { 264 | dotproductz_mol_vectz_ref+=z_mol_vect[j]*z_ref[j]; 265 | } 266 | theta3p= acos(dotproductz_mol_vectz_ref); 267 | 268 | crossp_z_mol_vectz_ref[0]=z_mol_vect[1]*z_ref[2] - z_mol_vect[2]*z_ref[1]; 269 | crossp_z_mol_vectz_ref[1]=z_mol_vect[2]*z_ref[0] - z_mol_vect[0]*z_ref[2]; 270 | crossp_z_mol_vectz_ref[2]=z_mol_vect[0]*z_ref[1] - z_mol_vect[1]*z_ref[0]; 271 | 272 | crossp_z_mol_vectz_ref_sign=crossp_z_mol_vectz_ref[0]*tmp4[i+3]+crossp_z_mol_vectz_ref[1]*tmp4[i+4]+crossp_z_mol_vectz_ref[2]*tmp4[i+5]; 273 | 274 | if (crossp_z_mol_vectz_ref_sign < 0) { 275 | theta3p /=2; 276 | } 277 | else { 278 | theta3p /=-2; 279 | } 280 | 281 | q2[0]=cos(theta3p); 282 | q2[1]=x_ref[0]*sin(theta3p); 283 | q2[2]=x_ref[1]*sin(theta3p); 284 | q2[3]=x_ref[2]*sin(theta3p); 285 | 286 | e[0]= q[0]*q2[0] - q[1]*q2[1] - q[2]*q2[2] - q[3]*q2[3]; 287 | e[1]= q[0]*q2[1] + q[1]*q2[0] + q[2]*q2[3] - q[3]*q2[2]; 288 | e[2]= q[0]*q2[2] - q[1]*q2[3] + q[2]*q2[0] + q[3]*q2[1]; 289 | e[3]= q[0]*q2[3] + q[1]*q2[2] - q[2]*q2[1] + q[3]*q2[0]; 290 | 291 | singtest=((e[1]*e[2]) + (e[3]*e[0])); 292 | if (singtest > 0.4999) { 293 | tmp3.push_back(sin(pi/2)); 294 | tmp3.push_back(0); 295 | tmp3.push_back(2*atan2(e[1],e[0])); 296 | } 297 | else if (singtest < -0.4999) { 298 | tmp3.push_back(sin(pi/-2)); 299 | tmp3.push_back(0); 300 | tmp3.push_back(-2*atan2(e[1], e[0])); 301 | } 302 | else { 303 | tmp3.push_back(sin(asin(2*singtest))); 304 | tmp3.push_back(atan2(((2*e[1]*e[0])-(2*e[2]*e[3])) , (1 - (2*pow(e[1],2)) - (2*pow(e[3],2))))); 305 | tmp3.push_back(atan2(((2*e[2]*e[0])-(2*e[1]*e[3])) , (1 - (2*pow(e[2],2)) - (2*pow(e[3],2))))); 306 | } 307 | } 308 | kdtree orient(tmp3); 309 | //s = orient.run_tree_orient(); 310 | //orientout << s << endl; 311 | //orientout.close(); 312 | int orienti = 0; //index of closest orient 313 | int* indo; 314 | indo = new int[1]; 315 | double* disto; 316 | disto = new double[1]; 317 | winner = 10000.00; 318 | for (i = 0; i < orient.npts; i++) { 319 | orient.nnearest(i, indo, disto, 1); 320 | if (disto[0] < winner) { 321 | winner = disto[0]; 322 | orienti = indo[0]; 323 | } 324 | } 325 | 326 | delete disto; 327 | delete indo; 328 | 329 | /* 330 | Determined the best water orientation as orienti in array of pts 331 | * Best oxygen position is the position of water at transi in pts 332 | * need to find oxygen position of water with orienti in tmp array 333 | * tmp array is for each water 9 334 | * therefore position of oxygen is orienti * 9, orienti*9 + 1, and orienti*9 +2 335 | * need to find distance of that water to transi 336 | * translate 337 | */ 338 | 339 | double orientO_x = tmp[orienti*9]; 340 | double orientO_y = tmp[orienti*9 + 1]; 341 | double orientO_z = tmp[orienti*9 + 2]; 342 | double orientH1_x = tmp[orienti*9 + 3]; 343 | double orientH1_y = tmp[orienti*9 + 4]; 344 | double orientH1_z = tmp[orienti*9 + 5]; 345 | double orientH2_x = tmp[orienti*9 + 6]; 346 | double orientH2_y = tmp[orienti*9 + 7]; 347 | double orientH2_z = tmp[orienti*9 + 8]; 348 | 349 | string name = "ATOM"; 350 | string atom = "O"; 351 | string chainid = "X"; 352 | int resseq = 1; 353 | string resname = "T3P"; 354 | string testfile = "test.pdb"; 355 | //FILE * tFile; 356 | //tFile = fopen(testfile.c_str(), "a"); 357 | //fprintf (tFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), i, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, orientO_x, orientO_y, orientO_z, 0.0, 0.0); 358 | //fprintf (tFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), i, "H", resname.c_str(), chainid.c_str(), resseq, orientH1_x, orientH1_y, orientH1_z, 0.0, 0.0); 359 | //fprintf (tFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), i, "H", resname.c_str(), chainid.c_str(), resseq, orientH2_x, orientH2_y, orientH2_z, 0.0, 0.0); 360 | 361 | 362 | double transO_x = trans.pts[transi].x[0]; 363 | double transO_y = trans.pts[transi].x[1]; 364 | double transO_z = trans.pts[transi].x[2]; 365 | 366 | double distx = (transO_x - orientO_x); 367 | double disty = (transO_y - orientO_y); 368 | double distz = (transO_z - orientO_z); 369 | 370 | orientO_x += distx; 371 | orientO_y += disty; 372 | orientO_z += distz; 373 | orientH1_x += distx; 374 | orientH1_y += disty; 375 | orientH1_z += distz; 376 | orientH2_x += distx; 377 | orientH2_y += disty; 378 | orientH2_z += distz; 379 | double ox = 0.0, oy = 0.0, oz = 0.0; 380 | for (i = 0; i < 3; i++) { 381 | if (i > 0) {atom = "H";} 382 | if (i == 0) { 383 | ox = orientO_x; oy = orientO_y; oz = orientO_z; 384 | } 385 | if (i == 1) { 386 | ox = orientH1_x; oy = orientH1_y; oz = orientH1_z; 387 | } 388 | if (i == 2){ 389 | ox = orientH2_x; oy = orientH2_y; oz = orientH2_z; 390 | } 391 | fprintf (pFile, "%-6s%5i %-4s %3s %1s%4i %8.3f%8.3f%8.3f%6.2f%6.2f\n", name.c_str(), i, atom.c_str(), resname.c_str(), chainid.c_str(), resseq, ox, oy, oz, 0.0, 0.0); 392 | } 393 | } 394 | 395 | 396 | 397 | -------------------------------------------------------------------------------- /sstmap/renum_pdb.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | int main (int argc, char** argv) { 10 | int i = 0; string infile; 11 | while (i < argc) { 12 | if (!strcmp(argv[i], "-i")) { 13 | infile = argv[++i]; 14 | } 15 | i++; 16 | } 17 | 18 | int pos = 0; int watnum = 0; 19 | string temp; 20 | ifstream input(infile.c_str()); 21 | ofstream output("fixedprob.pdb"); 22 | while (!input.eof()) { 23 | getline(input, temp); 24 | if (!temp.empty()) { 25 | if (pos%3==0 && pos!=0) { 26 | watnum++; 27 | } 28 | if (pos < 10) { 29 | if (watnum < 10) { 30 | output << temp.substr(0,9) << " " << pos << " " <. 22 | ############################################################################## 23 | """ 24 | This module contains implementation of a parent class for water analysis in 25 | molecular dynamics simulation trajectories. This class provides methods for 26 | index all atoms in the simulation, calculations of the energy and hydrogen 27 | bonding of water molecules with other atoms in the system. 28 | 29 | Please reference the following if you use this code in your research: 30 | [1] Haider K, Wickstrom L, Ramsey S, Gilson MK and Kurtzman T. Enthalpic Breakdown 31 | of Water Structure on Protein Active Site Surfaces. J Phys Chem B. 120:8743-8756, 32 | 2016. http://dx.doi.org/10.1021/acs.jpcb.6b01094. 33 | """ 34 | 35 | __author__ = "Kamran Haider" 36 | __license__ = "MIT" 37 | __maintainer__ = "Kamran Haider" 38 | __email__ = "kamranhaider.mb@gmail.com" 39 | 40 | 41 | from sstmap.testing import test_gist_output 42 | -------------------------------------------------------------------------------- /sstmap/testing/test_gist_output.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test scripts 3 | General purpose: Take in two files as input and run the tests 4 | Should be able to import the module and run for hsa and gist calculations run 5 | as part of installation. 6 | 7 | Place it insdie the test suite, at the end of test scripts import and run tests 8 | Use numpy testing module. 9 | 10 | Test quantities: divide into three groups 11 | """ 12 | 13 | 14 | import os 15 | 16 | import numpy as np 17 | import numpy.testing as npt 18 | 19 | quantities = ["voxel", "xcoord", "ycoord", "zcoord", 20 | "n_wat", "g_O", 21 | "dTStrans-dens", "dTStrans-norm", 22 | "dTSorient-dens", "dTSorient-norm", 23 | "dTSsix-dens", "dTSsix-norm", 24 | "Esw-dens", "Esw-norm", "Eww-dens", "Eww-norm-unref", 25 | "neighbor-dens", "neighbor-norm"] 26 | 27 | DX_TEST_FILES = ["gO", "dTSorient-dens", "dTStrans-dens", "dTSsix-dens", "Eww-dens", "Esw-dens"] 28 | 29 | class TestGistOutput(): 30 | """ 31 | """ 32 | 33 | def __init__(self, test_data, ref_data): 34 | """ 35 | 36 | Args: 37 | test_data: 38 | ref_data: 39 | """ 40 | self.test_data = test_data 41 | self.ref_data = ref_data 42 | 43 | def test_grid(self): 44 | """ 45 | 46 | Returns: 47 | 48 | """ 49 | 50 | passed = True 51 | try: 52 | #npt.assert_equal(self.test_data.shape, self.ref_data.shape) 53 | npt.assert_almost_equal(self.test_data[:, 1:4], self.ref_data[:, 1:4], decimal=3) 54 | except Exception as e: 55 | print(e) 56 | passed = False 57 | 58 | return passed 59 | 60 | def test_voxel_number(self): 61 | """ 62 | 63 | Returns: 64 | 65 | """ 66 | 67 | passed = True 68 | try: 69 | npt.assert_equal(self.test_data.shape, self.ref_data.shape) 70 | except Exception as e: 71 | print(e) 72 | passed = False 73 | 74 | return passed 75 | 76 | def test_quantity(self, quantity_index): 77 | """ 78 | 79 | Args: 80 | quantity_index: 81 | 82 | Returns: 83 | 84 | """ 85 | 86 | passed = True 87 | try: 88 | npt.assert_array_almost_equal(self.test_data[:, quantity_index], self.ref_data[:, quantity_index], decimal=2) 89 | except Exception as e: 90 | print(e) 91 | passed = False 92 | 93 | return passed 94 | 95 | def read_gist_sstmap(sstmap_gist_summary): 96 | """ 97 | 98 | Args: 99 | sstmap_gist_summary: 100 | 101 | Returns: 102 | 103 | """ 104 | columns_to_read = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19] 105 | sstmap_data = np.loadtxt(sstmap_gist_summary, skiprows=1, usecols=columns_to_read) 106 | #sstmap_data = sstmap_data[np.where(sstmap_data[:, 4] != 1.0)] 107 | return np.round(sstmap_data, 3) 108 | 109 | def read_gist_cpptraj(cpptraj_gist_summary): 110 | """ 111 | 112 | Args: 113 | cpptraj_gist_summary: 114 | 115 | Returns: 116 | 117 | """ 118 | columns_to_read = [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 21, 22] 119 | cpptraj_data = np.loadtxt(cpptraj_gist_summary, skiprows=2, usecols=columns_to_read) 120 | return cpptraj_data 121 | 122 | def test_dx_output(test_dx_filename, ref_dx_filename, test_nwat_array, ref_nwat_array): 123 | """ 124 | Compare two DX files. 125 | 126 | Parameters 127 | ---------- 128 | test_dx_filename: string 129 | Location of the test DX file. 130 | ref_dx_filename: string 131 | Location of the reference DX file. 132 | 133 | Returns 134 | ------- 135 | 136 | """ 137 | print(test_dx_filename, ref_dx_filename) 138 | with open(test_dx_filename, "r") as test: 139 | lines = test.readlines() 140 | test_dims = [float(s) for s in lines[0].strip().split()[-3:]] 141 | test_origin = [float(s) for s in lines[1].strip().split()[-3:]] 142 | test_spacing = float(lines[2].strip().split()[-1]) 143 | test_voxel_num = int(lines[6].strip().split()[-3]) 144 | test_data = [] 145 | for i in range(len(lines[7:])): 146 | 147 | test_data.extend([float(s) for s in lines[7:][i].strip().split()]) 148 | test_data = np.asarray(test_data) 149 | 150 | with open(ref_dx_filename, "r") as ref: 151 | lines = ref.readlines() 152 | ref_dims = [float(s) for s in lines[0].strip().split()[-3:]] 153 | ref_origin = [float(s) for s in lines[1].strip().split()[-3:]] 154 | ref_spacing = float(lines[2].strip().split()[-1]) 155 | ref_voxel_num = int(lines[6].strip().split()[-3]) 156 | ref_data = [] 157 | for i in range(len(lines[7:-1])): 158 | ref_data.extend([float(s) for s in lines[7:][i].strip().split()]) 159 | ref_data = np.asarray(ref_data) 160 | 161 | npt.assert_almost_equal(test_dims, ref_dims, decimal=6) 162 | npt.assert_almost_equal(test_origin, ref_origin, decimal=6) 163 | npt.assert_almost_equal(test_spacing, ref_spacing, decimal=6) 164 | npt.assert_almost_equal(test_voxel_num, ref_voxel_num, decimal=6) 165 | if "Esw" in ref_dx_filename: 166 | ref_data /= 2.0 167 | try: 168 | npt.assert_almost_equal(test_data, ref_data, decimal=3) 169 | print("\tPassed!") 170 | except Exception as e: 171 | print(e) 172 | #for i in range(test_data.shape[0]): 173 | # if abs(test_data[i] - ref_data[i]) >= 0.001: 174 | # print(i, ref_data[i], ref_nwat_array[i], test_data[i], test_nwat_array[i]) 175 | 176 | 177 | def parse_args(): 178 | """Parse the command line arguments and perform some validation on the 179 | arguments 180 | Returns 181 | ------- 182 | args : argparse.Namespace 183 | The namespace containing the arguments 184 | """ 185 | parser = ArgumentParser(description='''Run tests of GIST calculations against validated output.''') 186 | 187 | parser.add_argument('-t', '--test_gist_summary', required=True, type=str, 188 | help='''Summary file of GIST calculation to be tested.''') 189 | parser.add_argument('-r', '--ref_gist_summary', required=True, type=str, 190 | help='''A refeeence summary file of GIST calculation''') 191 | args = parser.parse_args() 192 | return args 193 | 194 | def run_all_gist_tests(test_dir, ref_dir): 195 | """ 196 | 197 | Args: 198 | test_data_file: 199 | ref_data_file: 200 | """ 201 | 202 | test_result = {1: "Passed", 0: "Failed"} 203 | test_dir_path = os.path.abspath(test_dir) + "/" 204 | ref_dir_path = os.path.abspath(ref_dir) + "/" 205 | file_dict = {} 206 | if not os.path.exists(test_dir_path) or not os.path.exists(ref_dir_path): 207 | raise IOError("%s and/or %s directory not found, please provide correct path." % (test_dir, ref_dir)) 208 | else: 209 | test_dir_files = os.listdir(test_dir_path) 210 | ref_dir_files = os.listdir(ref_dir_path) 211 | test_dx_files = [f for f in test_dir_files if f.endswith(".dx")] 212 | test_dx_files = [f for f in test_dx_files if f[f.find("_") + 1:][:-3] in DX_TEST_FILES] 213 | ref_dx_files = [f for f in ref_dir_files if f.endswith(".dx")] 214 | ref_dx_files = [f for f in ref_dx_files if f[5:][:-3] in DX_TEST_FILES] 215 | assert len(test_dx_files) == len(ref_dx_files), "Couldn't obtain all DX files, tests won't run." 216 | for f in ref_dx_files: 217 | suffix = f[5:] 218 | corresponding_test_file = [t for t in test_dx_files if suffix in t] 219 | if len(corresponding_test_file) == 0: 220 | raise Exception(ValueError, "%s: corresponding test file not found.", f) 221 | else: 222 | file_dict[f] = corresponding_test_file[0] 223 | test_data_file = [test_dir_path + f for f in test_dir_files if f.endswith("gist_data.txt")] 224 | test_data = read_gist_sstmap(test_data_file[0]) 225 | ref_data_file = [ref_dir_path + f for f in ref_dir_files if f.endswith("all.out")] 226 | ref_data = read_gist_cpptraj(ref_data_file[0]) 227 | assert test_data.shape == ref_data.shape, "GIST columns/rows in summary files are not equal, tests won't run" 228 | diff_nwat = [] 229 | for row in range(test_data.shape[0]): 230 | if test_data[row, 4] <= 1: 231 | test_data[row, 6:14] *= 0.0 232 | # record voxels with different water number but exclude them for tests 233 | else: 234 | if abs(int(test_data[row, 4]) - int(ref_data[row, 4])) >= 1: 235 | diff_nwat.append([test_data[row, :], ref_data[row, :]]) 236 | test_data[row, 4:] *= 0.0 237 | ref_data[row, 4:] *= 0.0 238 | # Run tests 239 | print("Checking grid and voxel placement ...") 240 | testcase = TestGistOutput(test_data, ref_data) 241 | result = testcase.test_voxel_number() 242 | result = testcase.test_grid() 243 | print("\t" + test_result[bool(result)]) 244 | print("Checking: %s" % quantities[4]) 245 | result = testcase.test_quantity(4) 246 | print("\t%s" % test_result[bool(result)]) 247 | for index, filename in enumerate(file_dict.keys()): 248 | t = file_dict[filename] 249 | test_dx_file, ref_dx_file = test_dir_path + t, ref_dir_path + filename 250 | test_dx_output(test_dx_file, ref_dx_file, ref_data[:, 4], test_data[:, 4]) 251 | """ 252 | for quantity_index in xrange(4, 5): 253 | print "--------------------------------------" 254 | print "Checking: %s" % quantities[quantity_index] 255 | result = testcase.test_quantity(quantity_index) 256 | print "\t%s" % test_result[bool(result)] 257 | 258 | 259 | """ 260 | 261 | def main(): 262 | """ 263 | 264 | """ 265 | args = parse_args() 266 | run_all_gist_tests(args.test_gist_summary, args.ref_gist_summary) 267 | 268 | if __name__ == '__main__': 269 | main() 270 | -------------------------------------------------------------------------------- /sstmap/utils.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # SSTMap: A Python library for the calculation of water structure and 3 | # thermodynamics on solute surfaces from molecular dynamics 4 | # trajectories. 5 | # MIT License 6 | # Copyright 2016-2017 Lehman College City University of New York and the Authors 7 | # 8 | # Authors: Kamran Haider, Steven Ramsay, Anthony Cruz Balberdy 9 | # 10 | # Permission is hereby granted, free of charge, to any person obtaining a copy 11 | # of this software and associated documentation files (the "Software"), to deal 12 | # in the Software without restriction, including without limitation the rights 13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | # copies of the Software, and to permit persons to whom the Software is 15 | # furnished to do so, subject to the following conditions: 16 | 17 | # The above copyright notice and this permission notice shall be included in all 18 | # copies or substantial portions of the Software. 19 | 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | # SOFTWARE. 27 | ############################################################################### 28 | 29 | import sys 30 | import os 31 | import time 32 | from functools import wraps 33 | 34 | import numpy as np 35 | from scipy import stats 36 | import matplotlib as mpl 37 | mpl.use('Agg') 38 | import matplotlib.pyplot as plt 39 | import matplotlib.ticker as ticker 40 | from matplotlib import cm 41 | 42 | ############################################################################## 43 | # Utilities 44 | ############################################################################## 45 | 46 | def function_timer(function): 47 | @wraps(function) 48 | def function_timer(*args, **kwargs): 49 | t0 = time.time() 50 | result = function(*args, **kwargs) 51 | t1 = time.time() 52 | print(("Total time running %s: %2.2f seconds" % 53 | (function.__name__, t1-t0))) 54 | return result 55 | return function_timer 56 | 57 | def print_progress_bar (count, total): 58 | """ 59 | Create and update progress bar during a loop. 60 | 61 | Parameters 62 | ---------- 63 | iteration : int 64 | The number of current iteration, used to calculate current progress. 65 | total : int 66 | Total number of iterations 67 | 68 | Notes 69 | ----- 70 | Based on: 71 | http://stackoverflow.com/questions/3173320/text-progress-bar-in-the-console 72 | """ 73 | bar_len = 20 74 | filled_len = int(round(bar_len * count / float(total))) 75 | 76 | percents = round(100.0 * count / float(total), 1) 77 | bar = "=" * filled_len + ' ' * (bar_len - filled_len) 78 | 79 | sys.stdout.write('Progress |%s| %s%s Done.\r' % (bar, percents, '%')) 80 | sys.stdout.flush() 81 | if count == total: 82 | print() 83 | 84 | 85 | 86 | def plot_enbr(data_dir, site_indices=None, nbr_norm=False, ref_data=None, ref_nbrs=None): 87 | """ 88 | Generate an Enbr plot for an arbitrary list of sites. First site should be the reference system. 89 | sites: a list of keys which represent site labels 90 | data: a dictionary of sites 91 | x_values: data points on x-axis 92 | nbr_norm: Normalize by number of neighbors 93 | outname: name of output file 94 | 95 | Parameters 96 | ---------- 97 | data_dir : TYPE 98 | Description 99 | site_indices : None, optional 100 | Description 101 | nbr_norm : bool, optional 102 | Description 103 | ref_data : None, optional 104 | Description 105 | ref_nbrs : None, optional 106 | Description 107 | """ 108 | enbr_files = [] 109 | enbr = {} 110 | ref_enbr = None 111 | nbr_files = [] 112 | nbr_values = [] 113 | 114 | if not os.path.isdir(data_dir): 115 | sys.exit( 116 | "Data directory not found, please check path of the directory again.") 117 | 118 | if site_indices is None: 119 | enbr_files = [ 120 | f for f in os.listdir(data_dir) if f.endswith("Ewwnbr.txt")] 121 | if nbr_norm: 122 | nbr_files = [ 123 | f for f in os.listdir(data_dir) if f.endswith("Nnbrs.txt")] 124 | else: 125 | enbr_files = [f for f in os.listdir(data_dir) if f.endswith( 126 | "Ewwnbr.txt") and int(f[0:3]) in site_indices] 127 | if nbr_norm: 128 | nbr_files = [f for f in os.listdir(data_dir) if f.endswith( 129 | "Nnbrs.txt") and int(f[0:3]) in site_indices] 130 | 131 | for index, file in enumerate(enbr_files): 132 | site_i = int(file[0:3]) 133 | enbr[site_i] = np.loadtxt(data_dir + "/" + file) 134 | if nbr_norm: 135 | nbrs = np.loadtxt(data_dir + "/" + nbr_files[index]) 136 | nbr_values.append(np.sum(nbrs) /nbrs.shape[0]) 137 | if ref_data is not None: 138 | ref_enbr = np.loadtxt(ref_data) 139 | if nbr_norm: 140 | ref_enbr *= ref_nbrs 141 | 142 | for index, site_i in enumerate(enbr.keys()): 143 | print(("Generating Enbr plot for: ", site_i, enbr_files[index])) 144 | # Get x and p_x for current site 145 | site_enbr = enbr[site_i]*0.5 146 | x_low, x_high = -5.0, 3.0 147 | enbr_min, enbr_max = np.min(site_enbr), np.max(site_enbr) 148 | if enbr_min < x_low: 149 | x_low = enbr_min 150 | if enbr_max > x_high: 151 | x_high = enbr_max 152 | 153 | x = np.linspace(x_low, x_high) 154 | kernel = stats.gaussian_kde(site_enbr) 155 | p_x = kernel.evaluate(x) 156 | if nbr_norm: 157 | site_nbrs = nbr_values[index] 158 | p_x *= site_nbrs 159 | # Get x and p_x for reference site, if available 160 | p_x_ref = None 161 | if ref_enbr is not None: 162 | kernel = stats.gaussian_kde(ref_enbr) 163 | p_x_ref = kernel.evaluate(x) 164 | # Set up plot 165 | fig, ax = plt.subplots(1) 166 | fig.set_size_inches(3, 3) 167 | plt.xlim(x_low, x_high) 168 | plt.ylim(0.0, np.max(p_x) + 0.1) 169 | start, end = ax.get_ylim() 170 | ax.yaxis.set_ticks(np.arange(start, end, 0.2)) 171 | start, end = ax.get_xlim() 172 | ax.xaxis.set_ticks(np.arange(start, end, 2.0)) 173 | ax.xaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f')) 174 | ax.yaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1f')) 175 | x_label = r'$\mathit{E_{n} (kcal/mol)}$' 176 | y_label = r'$\mathit{\rho(E_{n})}$' 177 | if nbr_norm: 178 | y_label = r'$\mathit{\rho(E_{n})N^{nbr}}$' 179 | ax.set_xlabel(x_label, size=14) 180 | ax.set_ylabel(y_label, size=14) 181 | ax.yaxis.tick_left() 182 | ax.xaxis.tick_bottom() 183 | ax.spines["right"].set_visible(False) 184 | ax.spines["top"].set_visible(False) 185 | plt.minorticks_on() 186 | plt.tick_params(which='major', width=1, length=4, direction='in') 187 | plt.tick_params(which='minor', width=1, length=2, direction='in') 188 | plt.tick_params(axis='x', labelsize=12) 189 | plt.tick_params(axis='y', labelsize=12) 190 | plt.plot( 191 | x, p_x, antialiased=True, linewidth=1.0, color="red", label=site_i) 192 | if p_x_ref is not None: 193 | plt.plot(x, p_x_ref, antialiased=True, linewidth=1.0, 194 | color="green", label="Reference") 195 | fig_name = "%03d_" % site_i 196 | plt.legend(loc='upper right', prop={'size': 10}, frameon=False) 197 | plt.tight_layout() 198 | plt.savefig(data_dir + "/" + fig_name + "Enbr_plot.png", dpi=300) 199 | plt.close() 200 | 201 | 202 | def plot_rtheta(data_dir, site_indices=None): 203 | 204 | """ 205 | Parameters 206 | ---------- 207 | data_dir : TYPE 208 | Description 209 | site_indices : None, optional 210 | Description 211 | 212 | """ 213 | rtheta_files = [] 214 | rtheta_data = {} 215 | 216 | print(data_dir) 217 | if not os.path.isdir(data_dir): 218 | sys.exit( 219 | "Data directory not found, please check path of the directory again.") 220 | 221 | if site_indices is None: 222 | rtheta_files = [ 223 | f for f in os.listdir(data_dir) if f.endswith("r_theta.txt")] 224 | else: 225 | rtheta_files = [f for f in os.listdir(data_dir) if f.endswith( 226 | "r_theta.txt") and int(f[0:3]) in site_indices] 227 | 228 | for index, file in enumerate(rtheta_files): 229 | site_i = int(file[0:3]) 230 | rtheta_data[site_i] = np.loadtxt(data_dir + "/" + file) 231 | 232 | integ_counts = 16.3624445886 233 | for index, site_i in enumerate(rtheta_data.keys()): 234 | print(("Generating r_theta plot for: ", site_i, rtheta_files[index])) 235 | fig = plt.figure() 236 | ax = fig.gca(projection='3d') 237 | theta = rtheta_data[site_i][:, 0] 238 | r = rtheta_data[site_i][:, 1] 239 | #Nnbr = len(r)/nwat 240 | # print nwat, Nnbr 241 | # generate index matrices 242 | X, Y = np.mgrid[0:130:131j, 2.0:6.0:41j] 243 | # generate kernel density estimates 244 | values = np.vstack([theta, r]) 245 | kernel = stats.gaussian_kde(values) 246 | positions = np.vstack([X.ravel(), Y.ravel()]) 247 | Z = np.reshape(kernel(positions).T, X.shape) 248 | Z *= integ_counts*0.1 249 | #Z /= integ_counts 250 | sum_counts_kernel = 0 251 | # print kernel.n 252 | # correct Z 253 | for i in range(0, Y.shape[1]): 254 | d = Y[0, i] 255 | # get shell_vol 256 | d_low = d - 0.1 257 | vol = (4.0 / 3.0) * np.pi * (d**3) 258 | vol_low = (4.0 / 3.0) * np.pi * (d_low**3) 259 | shell_vol = vol - vol_low 260 | 261 | counts_bulk = 0.0329*shell_vol 262 | sum_counts_kernel += np.sum(Z[:, i]) 263 | #Z[:,i] /= counts_bulk 264 | Z[:, i] = Z[:, i],counts_bulk 265 | 266 | print(sum_counts_kernel) 267 | legend_label = "%03d_" % site_i 268 | ax.plot_surface(X, Y, Z, rstride=1, cstride=1, linewidth=0.5, 269 | antialiased=True, alpha=1.0, cmap=cm.coolwarm, label=legend_label) 270 | x_label = r"$\theta^\circ$" 271 | y_label = r"$r (\AA)$" 272 | ax.set_xlabel(x_label) 273 | ax.set_xlim(0, 130) 274 | ax.set_ylabel(y_label) 275 | ax.set_ylim(2.0, 6.0) 276 | z_label = r'$\mathrm{P(\theta, \AA)}$' 277 | ax.set_zlabel(z_label) 278 | #ax.legend(legend_label, loc='upper left', prop={'size':6}) 279 | #ax.set_zlim(0.0, 0.15) 280 | plt.savefig(data_dir + "/" + legend_label + "rtheta_plot.png", dpi=300) 281 | plt.close() 282 | 283 | 284 | def read_hsa_summary(hsa_data_file): 285 | ''' 286 | Returns a dictionary with hydration site index as keys and a list of various attributes as values. 287 | Parameters 288 | ---------- 289 | hsa_data_file : string 290 | Text file containing 291 | 292 | Returns 293 | ------- 294 | ''' 295 | 296 | f = open(hsa_data_file, 'r') 297 | data = f.readlines() 298 | hsa_header = data[0] 299 | data_keys = hsa_header.strip("\n").split() 300 | hsa_data = {} 301 | for l in data[1:]: 302 | float_converted_data = [float(x) for x in l.strip("\n").split()[1:27]] 303 | hsa_data[int(l.strip("\n").split()[0])] = float_converted_data 304 | f.close() 305 | return hsa_data 306 | 307 | def read_gist_summary(gist_data_file): 308 | ''' 309 | Returns a dictionary with hydration site index as keys and a list of various attributes as values. 310 | Parameters 311 | ---------- 312 | hsa_data_file : string 313 | Text file containing 314 | 315 | Returns 316 | ------- 317 | ''' 318 | 319 | f = open(hsa_data_file, 'r') 320 | data = f.readlines() 321 | hsa_header = data[0] 322 | data_keys = hsa_header.strip("\n").split() 323 | hsa_data = {} 324 | for l in data[1:]: 325 | float_converted_data = [float(x) for x in l.strip("\n").split()[1:27]] 326 | hsa_data[int(l.strip("\n").split()[0])] = float_converted_data 327 | f.close() 328 | return hsa_data 329 | 330 | def write_watpdb_from_list(coords, filename, water_id_list, full_water_res=False): 331 | """Summary 332 | 333 | Parameters 334 | ---------- 335 | traj : TYPE 336 | Description 337 | filename : TYPE 338 | Description 339 | water_id_list : None, optional 340 | Description 341 | wat_coords : None, optional 342 | Description 343 | full_water_res : bool, optional 344 | Description 345 | 346 | Returns 347 | ------- 348 | TYPE 349 | Description 350 | """ 351 | pdb_line_format = "{0:6}{1:>5} {2:<3}{3:<1}{4:>3} {5:1}{6:>4}{7:1} {8[0]:>8.3f}{8[1]:>8.3f}{8[2]:>8.3f}{9:>6.2f}{10:>6.2f}{11:>12s}\n" 352 | ter_line_format = "{0:3} {1:>5} {2:>3} {3:1}{4:4} \n" 353 | pdb_lines = [] 354 | # write form the list of (water, frame) tuples 355 | # at_index, wat in enumerate(water_id_list): 356 | at = 1 357 | res = 1 358 | with open(filename + ".pdb", 'w') as f: 359 | for i in range(len(water_id_list)): 360 | wat = water_id_list[i] 361 | at_index = at #% 10000 362 | res_index = res % 10000 363 | #wat_coords = md.utils.in_units_of( 364 | # coords[wat[0], wat[1], :], "nanometers", "angstroms") 365 | wat_coords = coords[wat[0], wat[1], :] 366 | #chain_id = possible_chains[chain_id_index] 367 | chain_id = "A" 368 | pdb_line = pdb_line_format.format( 369 | "ATOM", at_index, "O", " ", "WAT", chain_id, res_index, " ", wat_coords, 0.00, 0.00, "O") 370 | #pdb_lines.append(pdb_line) 371 | f.write(pdb_line) 372 | 373 | if full_water_res: 374 | #H1_coords = md.utils.in_units_of( 375 | # coords[wat[0], wat[1] + 1, :], "nanometers", "angstroms") 376 | H1_coords = coords[wat[0], wat[1] + 1, :] 377 | pdb_line_H1 = pdb_line_format.format("ATOM", at_index + 1, "H1", " ", "WAT", chain_id, res_index, " ", H1_coords, 0.00, 0.00, "H") 378 | #pdb_lines.append(pdb_line_H1) 379 | f.write(pdb_line_H1) 380 | #H2_coords = md.utils.in_units_of( 381 | # coords[wat[0], wat[1] + 2, :], "nanometers", "angstroms") 382 | H2_coords = coords[wat[0], wat[1] + 2, :] 383 | pdb_line_H2 = pdb_line_format.format("ATOM", at_index + 2, "H2", " ", "WAT", chain_id, res_index, " ", H2_coords, 0.00, 0.00, "H") 384 | #pdb_lines.append(pdb_line_H2) 385 | f.write(pdb_line_H2) 386 | at += 3 387 | res += 1 388 | else: 389 | at += 1 390 | res += 1 391 | if res_index == 9999: 392 | ter_line = ter_line_format.format( 393 | "TER", at, "WAT", chain_id, res_index) 394 | at = 1 395 | #pdb_lines.append(ter_line) 396 | #pdb_lines.append("END") 397 | #np.savetxt(filename + ".pdb", np.asarray(pdb_lines), fmt="%s") 398 | 399 | 400 | def write_watpdb_from_coords(filename, coords, full_water_res=False): 401 | """Summary 402 | 403 | Parameters 404 | ---------- 405 | traj : TYPE 406 | Description 407 | filename : TYPE 408 | Description 409 | water_id_list : None, optional 410 | Description 411 | wat_coords : None, optional 412 | Description 413 | full_water_res : bool, optional 414 | Description 415 | 416 | Returns 417 | ------- 418 | TYPE 419 | Description 420 | """ 421 | 422 | pdb_line_format = "{0:6}{1:>5} {2:<3}{3:<1}{4:>3} {5:1}{6:>4}{7:1} {8[0]:>8.3f}{8[1]:>8.3f}{8[2]:>8.3f}{9:>6.2f}{10:>6.2f}{11:>12s}\n" 423 | ter_line_format = "{0:3} {1:>5} {2:>3} {3:1}{4:4} \n" 424 | pdb_lines = [] 425 | # write form the list of (water, frame) tuples 426 | # at_index, wat in enumerate(water_id_list): 427 | at = 0 428 | res = 0 429 | wat_i = 0 430 | with open(filename + ".pdb", 'w') as f: 431 | f.write("REMARK Initial number of clusters: N/A\n") 432 | while wat_i < len(coords): 433 | at_index = at # % 10000 434 | res_index = res % 10000 435 | # wat_coords = md.utils.in_units_of( 436 | # coords[wat[0], wat[1], :], "nanometers", "angstroms") 437 | wat_coords = coords[wat_i] 438 | # chain_id = possible_chains[chain_id_index] 439 | chain_id = "A" 440 | pdb_line = pdb_line_format.format( 441 | "ATOM", at_index, "O", " ", "WAT", chain_id, res_index, " ", wat_coords, 0.00, 0.00, "O") 442 | # pdb_lines.append(pdb_line) 443 | f.write(pdb_line) 444 | wat_i += 1 445 | if full_water_res: 446 | # H1_coords = md.utils.in_units_of( 447 | # coords[wat[0], wat[1] + 1, :], "nanometers", "angstroms") 448 | H1_coords = coords[wat_i] 449 | pdb_line_H1 = pdb_line_format.format("ATOM", at_index + 1, "H1", " ", "WAT", chain_id, res_index, " ", 450 | H1_coords, 0.00, 0.00, "H") 451 | # pdb_lines.append(pdb_line_H1) 452 | f.write(pdb_line_H1) 453 | # H2_coords = md.utils.in_units_of( 454 | # coords[wat[0], wat[1] + 2, :], "nanometers", "angstroms") 455 | H2_coords = coords[wat_i + 1] 456 | pdb_line_H2 = pdb_line_format.format("ATOM", at_index + 2, "H2", " ", "WAT", chain_id, res_index, " ", 457 | H2_coords, 0.00, 0.00, "H") 458 | # pdb_lines.append(pdb_line_H2) 459 | f.write(pdb_line_H2) 460 | at += 3 461 | res += 1 462 | wat_i += 2 463 | else: 464 | at += 1 465 | res += 1 466 | if res_index == 9999: 467 | ter_line = ter_line_format.format("TER", at, "WAT", chain_id, res_index) 468 | at = 1 469 | # pdb_lines.append(ter_line) 470 | # pdb_lines.append("END") 471 | # np.savetxt(filename + ".pdb", np.asarray(pdb_lines), fmt="%s") 472 | 473 | """ 474 | pdb_line_format = "{0:6}{1:>5} {2:<3}{3:<1}{4:>3} {5:1}{6:>4}{7:1} {8[0]:>8.3f}{8[1]:>8.3f}{8[2]:>8.3f}{9:>6.2f}{10:>6.2f}{11:>12s}\n" 475 | ter_line_format = "{0:3} {1:>5} {2:>3} {3:1}{4:4} \n" 476 | pdb_lines = ["REMARK Initial number of clusters: N/A\n"] 477 | # write form the list of (water, frame) tuples 478 | for at in range(len(wat_coords)): 479 | wat_coord = wat_coords[at] 480 | at_index = at % 10000 481 | res_index = at % 10000 482 | chain_id = "A" 483 | pdb_line = pdb_line_format.format( 484 | "ATOM", at_index, "O", " ", "WAT", chain_id, res_index, " ", wat_coord, 0.00, 0.00, "O") 485 | pdb_lines.append(pdb_line) 486 | if res_index == 9999: 487 | ter_line = ter_line_format.format( 488 | "TER", at_index, "WAT", chain_id, res_index) 489 | pdb_lines.append(ter_line) 490 | 491 | with open(filename + ".pdb", "w") as f: 492 | f.write("".join(pdb_lines)) 493 | 494 | """ 495 | 496 | class GISTFields: 497 | data_titles = ['index', 'x', 'y', 'z', 498 | 'N_wat', 'g_O', 'g_H', 499 | 'TS_tr_dens', 'TS_tr_norm', 500 | 'TS_or_dens', 'TS_or_norm', 501 | 'dTSsix-dens', 'dTSsix_norm', 502 | 'E_sw_dens', 'E_sw_norm', 'E_ww_dens', 'Eww_norm', 503 | 'E_ww_nbr_dens', 'E_ww_nbr_norm', 504 | 'N_nbr_dens', 'N_nbr_norm', 505 | 'f_hb_dens', 'f_hb_norm', 506 | 'N_hb_sw_dens', 'N_hb_sw_norm', 'N_hb_ww_dens', 'N_hb_ww_norm', 507 | 'N_don_sw_dens', 'N_don_sw_norm', 'N_acc_sw_dens', 'N_acc_sw_norm', 508 | 'N_don_ww_dens', 'N_don_ww_norm', 'N_acc_ww_dens', 'N_acc_ww_norm'] 509 | index = 0 510 | x = 1 511 | y = 2 512 | z = 3 513 | N_wat = 4 514 | g_O = 5 515 | g_H = 6 516 | TS_tr_dens = 7 517 | TS_tr_norm = 8 518 | TS_or_dens = 9 519 | TS_or_norm = 10 520 | dTSsix_dens = 11 521 | dTSsix_norm = 12 522 | E_sw_dens = 13 523 | E_sw_norm = 14 524 | E_ww_dens = 15 525 | Eww_norm = 16 526 | E_ww_nbr_dens = 17 527 | E_ww_nbr_norm = 18 528 | N_nbr_dens = 19 529 | N_nbr_norm = 20 530 | f_hb_dens = 21 531 | f_hb_norm = 22 532 | N_hb_sw_dens = 23 533 | N_hb_sw_norm = 24 534 | N_hb_ww_dens = 25 535 | N_hb_ww_norm = 26 536 | N_don_sw_dens = 27 537 | N_don_sw_norm = 28 538 | N_acc_sw_dens = 29 539 | N_acc_sw_norm = 30 540 | N_don_ww_dens = 31 541 | N_don_ww_norm = 32 542 | N_acc_ww_dens = 33 543 | N_acc_ww_norm = 34 544 | 545 | class HSAFields: 546 | data_titles = ['index', 'x', 'y', 'z', 547 | 'N_wat', 'g_O', 'g_H', 548 | 'TS_tr_dens', 'TS_tr_norm', 549 | 'TS_or_dens', 'TS_or_norm', 550 | 'dTSsix-dens', 'dTSsix_norm', 551 | 'E_sw_dens', 'E_sw_norm', 'E_ww_dens', 'Eww_norm', 552 | 'E_ww_nbr_dens', 'E_ww_nbr_norm', 553 | 'N_nbr_dens', 'N_nbr_norm', 554 | 'f_hb_dens', 'f_hb_norm', 555 | 'N_hb_sw_dens', 'N_hb_sw_norm', 'N_hb_ww_dens', 'N_hb_ww_norm', 556 | 'N_don_sw_dens', 'N_don_sw_norm', 'N_acc_sw_dens', 'N_acc_sw_norm', 557 | 'N_don_ww_dens', 'N_don_ww_norm', 'N_acc_ww_dens', 'N_acc_ww_norm'] 558 | index = 0 559 | x = 1 560 | y = 2 561 | z = 3 562 | nwat = 4 563 | occupancy = 5 564 | Esw = 6 565 | EswLJ = 7 566 | EswElec = 8 567 | Eww = 9 568 | EwwLJ = 10 569 | EwwElec = 11 570 | Etot = 12 571 | Ewwnbr = 13 572 | TSsw_trans = 14 573 | TSsw_orient = 15 574 | TStot = 16 575 | Nnbrs = 17 576 | Nhbww = 18 577 | Nhbsw = 19 578 | Nhbtot = 20 579 | f_hb_ww = 21 580 | f_enc = 22 581 | Acc_ww = 23 582 | Don_ww = 24 583 | Acc_sw = 25 584 | Don_sw = 26 585 | solute_acceptors = 27 586 | solute_donors = 28 587 | -------------------------------------------------------------------------------- /sstmap/water_analysis.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # SSTMap: A Python library for the calculation of water structure and 3 | # thermodynamics on solute surfaces from molecular dynamics 4 | # trajectories. 5 | # MIT License 6 | # Copyright 2016-2017 Lehman College City University of New York and the Authors 7 | # 8 | # Authors: Kamran Haider, Steven Ramsay, Anthony Cruz Balberdy 9 | # 10 | # Permission is hereby granted, free of charge, to any person obtaining a copy 11 | # of this software and associated documentation files (the "Software"), to deal 12 | # in the Software without restriction, including without limitation the rights 13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | # copies of the Software, and to permit persons to whom the Software is 15 | # furnished to do so, subject to the following conditions: 16 | 17 | # The above copyright notice and this permission notice shall be included in all 18 | # copies or substantial portions of the Software. 19 | 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | # SOFTWARE. 27 | ############################################################################### 28 | """ 29 | This module contains implementation of a parent class for water analysis in 30 | molecular dynamics simulation trajectories. The data and methods in this class 31 | are common to site-based and grid-based analysis classes. 32 | 33 | Please reference the following if you use this code in your research: 34 | 35 | [1] Haider K, Cruz A, Ramsey S, Gilson M. and Kurtzman T. Solvation Structure and Thermodynamic Mapping (SSTMap): An 36 | Open-Source, Flexible Package for the Analysis of Water in Molecular Dynamics Trajectories. J. Chem. Theory Comput. 37 | (10.1021/acs.jctc.7b00592) 2017. 38 | [2] Crystal N. Nguyen, Michael K. Gilson, Tom Young. Structure and Thermodynamics of Molecular Hydration via Grid 39 | Inhomogeneous Solvation Theory. eprint arXiv:1108.4876, (2011). 40 | [3] Crystal N. Nguyen, Tom Kurtzman Young, and Michael K. Gilson. Grid inhomogeneous solvation theory: hydration 41 | structure and thermodynamics of the miniature receptor cucurbit[7]uril. J. Chem. Phys. 137, 044101 (2012) 42 | [4] Haider K, Wickstrom L, Ramsey S, Gilson MK and Kurtzman T. Enthalpic Breakdown of Water Structure on Protein Active 43 | Site Surfaces. J Phys Chem B. 120:8743-8756, (2016). http://dx.doi.org/10.1021/acs.jpcb.6b01094. 44 | """ 45 | 46 | ############################################################################## 47 | # Imports 48 | ############################################################################## 49 | import numpy as np 50 | import parmed as pmd 51 | import mdtraj as md 52 | from parmed.charmm import CharmmParameterSet 53 | from sstmap.utils import * 54 | 55 | ############################################################################## 56 | # Globals 57 | ############################################################################## 58 | 59 | DON_ACC_LIST = ["oxygen", "nitrogen", "sulfur"] 60 | _WATER_RESNAMES = ['H2O', 'HHO', 'OHH', 'HOH', 'OH2', 'SOL', 'WAT', 'TIP', 'TIP2', 'TIP3', 'TIP4', 'T3P', 'T4P', 'T5P'] 61 | ANGLE_CUTOFF_RAD = 0.523599 62 | requirements = { 63 | "prmtop": ["prmtop", "", "lorentz-bertholot"], 64 | "parm7": ["parm7", "", "lorentz-bertholot"], 65 | "psf": ["toppar", "Please provide a folder named toppar that contains charmm parameter/topology files.", 66 | "lorentz-bertholot"], 67 | "gro": ["top", "Please provide graomcs .top file corresponding to your system and also make sure that .itp files " 68 | "are present in the directory where calculations are being run. To get a list of .itp files being " 69 | "used by gromacs topology file, type $grep #include ", "lorentz-bertholot"], 70 | "pdb": ["txt", "Please provide a text file containing non-bonded parameters for your system.", "geometric"], 71 | "h5": ["txth5", "Please provide a text file containing non-bonded parameters for your system.", "lorentz-bertholot"], 72 | } 73 | 74 | ############################################################################## 75 | # WaterAnalysis class definition 76 | ############################################################################## 77 | 78 | class WaterAnalysis(object): 79 | """Parent class for setting up water analysis calculations in molecular 80 | dynamics trajectories. 81 | """ 82 | 83 | def __init__(self, topology_file, trajectory, supporting_file=None): 84 | """Initialize WaterAnalysis object for a trajectory and 85 | corresponding topology file. 86 | 87 | Parameters 88 | ---------- 89 | topology_file : string 90 | Filename of the system topology file. 91 | trajectory : string 92 | Filename of the molecular dynamics trajectory. 93 | supporting_file : None, optional 94 | Filename of additional file containing non-bonded parameters for 95 | every particle in the system. Default: None 96 | """ 97 | # Check sanity checks on files 98 | if not os.path.exists(topology_file) or not os.path.exists(trajectory): 99 | raise IOError("File %s or %s does not exist." % (topology_file, trajectory)) 100 | self.topology_file = topology_file 101 | self.trajectory = trajectory 102 | # Check if correct supporting file is provided. 103 | self.supporting_file = supporting_file 104 | topology_extension = self.topology_file.split(".")[-1] 105 | required_support = requirements[topology_extension][0] 106 | self.comb_rule = None 107 | if required_support == topology_extension: 108 | self.supporting_file = self.topology_file 109 | self.comb_rule = requirements[topology_extension][-1] 110 | else: 111 | if topology_extension not in list(requirements.keys()): 112 | message = """SSTMap currently does not support %s topology file type. 113 | If this is a non-standard force-filed, consider using a PDB file as a topplogy 114 | and provide a text file containing non-bonded parameters for each atom in your system. 115 | See sstmap.org for more details. 116 | """ % topology_extension 117 | sys.exit(message) 118 | else: 119 | self.supporting_file = supporting_file 120 | self.comb_rule = requirements[topology_extension][-1] 121 | 122 | # Create Parmed topology object and perform sanity check on PBC's in the trajectory 123 | if self.topology_file.endswith(".h5"): 124 | print("topology ends with h5") 125 | first_frame = md.load_frame(self.trajectory, 0) 126 | else: 127 | first_frame = md.load_frame(self.trajectory, 0, top=self.topology_file) 128 | assert first_frame.unitcell_lengths is not None, "Could not detect unit cell information." 129 | self.topology = first_frame.topology 130 | 131 | # Create index arrays for iteration over groups of atoms and perform some sanity checks on system topology 132 | super_wat_select_exp = "" 133 | for i, wat_res in enumerate(_WATER_RESNAMES): 134 | if i < len(_WATER_RESNAMES) - 1: 135 | super_wat_select_exp += "resname %s or " % wat_res 136 | else: 137 | super_wat_select_exp += "resname %s" % wat_res 138 | self.all_atom_ids = self.topology.select("all") 139 | self.prot_atom_ids = self.topology.select("protein") 140 | self.wat_atom_ids = self.topology.select("water") 141 | self.set_neighbors("water and name O") 142 | if self.wat_atom_ids.shape[0] == 0: 143 | self.wat_atom_ids = self.topology.select(super_wat_select_exp) 144 | assert (self.wat_atom_ids.shape[0] != 0), \ 145 | "Unable to recognize water residues in the system!" 146 | assert (self.topology.atom(self.wat_atom_ids[0]).name == "O"), \ 147 | "Failed while constructing water oxygen atom indices!" 148 | self.wat_oxygen_atom_ids = np.asarray([atom for atom in self.wat_atom_ids if self.topology.atom(atom).name == "O"]) 149 | self.water_sites = self.wat_oxygen_atom_ids[1] - self.wat_oxygen_atom_ids[0] 150 | for i in self.wat_oxygen_atom_ids: 151 | O, H1, H2 = self.topology.atom(i).name[0], self.topology.atom(i + 1).name[0], self.topology.atom(i + 2).name[0] 152 | if O != "O" or H1 != "H" or H2 != "H": 153 | sys.exit("Water molecules in the topology must be organized as Oxygen, Hydrogen, Hydrogen, Virtual-sites.") 154 | self.non_water_atom_ids = np.setdiff1d(self.all_atom_ids, self.wat_atom_ids) 155 | # ions or ligands 156 | self.non_prot_atom_ids = np.setdiff1d(self.non_water_atom_ids, self.prot_atom_ids) 157 | # if no protein, then set other solute to protein index variable for energy calculation purposes 158 | if self.prot_atom_ids.shape[0] == 0: 159 | self.prot_atom_ids = self.non_water_atom_ids 160 | assert (self.wat_atom_ids.shape[0] + self.non_water_atom_ids.shape[0] == self.all_atom_ids.shape[0]), \ 161 | "Failed to partition atom indices in the system correctly!" 162 | 163 | # Obtain non-bonded parameters for the system 164 | print("Obtaining non-bonded parameters for the system ...") 165 | self.chg_product, self.acoeff, self.bcoeff = self.generate_nonbonded_params() 166 | assert self.chg_product.shape == self.acoeff.shape == self.bcoeff.shape, \ 167 | "Mismatch in non-bonded parameter matrices, exiting." 168 | print("Done.") 169 | 170 | # Assign a hydrogen bond to atoms 171 | print("Assigning hydrogen bond types ...") 172 | self.don_H_pair_dict = {} 173 | self.prot_hb_types = np.zeros(len(self.all_atom_ids), dtype=np.int_) 174 | self.solute_acc_ids, self.solute_don_ids, self.solute_acc_don_ids = self.assign_hb_types() 175 | print("Done.") 176 | 177 | 178 | def set_neighbors(self, mask): 179 | """Method for setting atoms that should be used during shell-wise breakdown 180 | of energies in HSA and first shell neighbor count in GIST. 181 | 182 | Parameters 183 | ---------- 184 | mask : Charmm-style atom selection mask 185 | 186 | """ 187 | 188 | self.neighbor_ids = self.topology.select(mask) 189 | self.wat_nbrs_shell = np.zeros(self.neighbor_ids.shape[0], dtype=np.int) 190 | 191 | 192 | @function_timer 193 | def assign_hb_types(self): 194 | """Assigns a hydrogen-bond type to each atom and updates a dictionary of H-bond donors 195 | whose keys are donor atom ids and values are the indices of each connected hydrogen are stored for each donor. 196 | 197 | Returns 198 | ------- 199 | solute_acc_ids : numpy.ndarray 200 | An array of indices corresponding to solute acceptor atoms 201 | solute_don_ids : numpy.ndarray 202 | An array of indices corresponding to solute donor atoms 203 | solute_acc_don_ids : numpy.ndarray 204 | An array of indices corresponding to solute atoms that are both acceptors and donors 205 | 206 | Notes 207 | ----- 208 | The following attributes of the object are also updated. 209 | self.don_H_pair_dict: 210 | This dictionary is populated with keys that are indices of atoms in solute_don_ids and in 211 | solute_acc_don_ids. The value for each key is a list of tuples where each tuple is the pair 212 | of atom indices, first index is the donor atom and second index is the covalently-bonded hydrogen atom. 213 | self.prot_hb_types: 214 | Array size equal number of solute atoms and each value is the numeric h-bond type, using the 215 | following scheme; 0=non_hb, 1=acceptor, 2=donor, 3=both. 216 | """ 217 | 218 | self.topology.create_standard_bonds() 219 | acc_list = [] 220 | don_list = [] 221 | acc_don_list = [] 222 | # obtain a list of non-water bonds 223 | non_water_bonds = [(bond[0].index, bond[1].index) 224 | for bond in self.topology.bonds if bond[0].residue.name not in _WATER_RESNAMES] 225 | dist_pairs = [] 226 | keys_all = [] 227 | 228 | for at in self.prot_atom_ids: 229 | # obtain bonds associated with donors or acceptors 230 | if self.topology.atom(at).element.name in DON_ACC_LIST: 231 | bonds_of_at = [] 232 | for bond in non_water_bonds: 233 | if at in bond and bond not in bonds_of_at: 234 | bonds_of_at.append(bond) 235 | 236 | if self.topology.atom(at).element.name == "nitrogen": 237 | # if a nitrogen atom is bonded to a hydrogn atom, save donor-H pair and added to donors 238 | # print at, bonds_of_at 239 | don_h_pairs = [] 240 | for at1, at2 in bonds_of_at: 241 | if self.topology.atom(at2).element.name == "hydrogen": 242 | don_h_pairs.append([at1, at2]) 243 | if self.topology.atom(at1).element.name == "hydrogen": 244 | don_h_pairs.append([at2, at1]) 245 | if len(don_h_pairs) != 0: 246 | keys_all.append(at) 247 | for bond in don_h_pairs: 248 | dist_pairs.append(bond) 249 | if at not in don_list: 250 | don_list.append(at) 251 | # if no bonds with hydrogen found, add to acceptors 252 | else: 253 | acc_list.append(at) 254 | if self.topology.atom(at).element.name in ["oxygen", "sulfur"]: 255 | # if an oxygen or a sulfur atom is bonded to a hydrogen, 256 | # add to the list of acceptor-donors and save donor-H pair 257 | don_h_pairs = [] 258 | for at1, at2 in bonds_of_at: 259 | if self.topology.atom(at2).element.name == "hydrogen": 260 | don_h_pairs.append([at1, at2]) 261 | if self.topology.atom(at1).element.name == "hydrogen": 262 | don_h_pairs.append([at2, at1]) 263 | if len(don_h_pairs) != 0: 264 | keys_all.append(at) 265 | for bond in don_h_pairs: 266 | dist_pairs.append(bond) 267 | if at not in acc_don_list: 268 | acc_don_list.append(at) 269 | # if no bonds with hydrogen found, add to acceptors 270 | else: 271 | acc_list.append(at) 272 | 273 | for index, pair in enumerate(dist_pairs): 274 | if pair[0] not in list(self.don_H_pair_dict.keys()): 275 | self.don_H_pair_dict[pair[0]] = [[pair[0], pair[1]]] 276 | else: 277 | self.don_H_pair_dict[pair[0]].append([pair[0], pair[1]]) 278 | 279 | solute_acc_ids = np.array(acc_list, dtype=np.int) 280 | solute_acc_don_ids = np.array(acc_don_list, dtype=np.int) 281 | solute_don_ids = np.array(don_list, dtype=np.int) 282 | 283 | for at_id in solute_acc_ids: 284 | self.prot_hb_types[at_id] = 1 285 | for at_id in solute_don_ids: 286 | self.prot_hb_types[at_id] = 2 287 | for at_id in solute_acc_don_ids: 288 | self.prot_hb_types[at_id] = 3 289 | 290 | return solute_acc_ids, solute_don_ids, solute_acc_don_ids 291 | 292 | @function_timer 293 | def generate_nonbonded_params(self): 294 | """ 295 | Generates non-bonded parameters for energy calculations. 296 | 297 | Returns 298 | ------- 299 | chg_product : numpy.ndarray 300 | An N_sites x N_particles matrix where N_sites is the number of sites in the water model and N_atoms 301 | is the total number of particles in the system, each entry of the matrix is the product of the charges 302 | q_i*q_j used for the calculation of electrostatic interactions. 303 | acoeff : numpy.ndarray 304 | An N_sites x N_particles matrix where N_sites is the number of sites in the water model and N_atoms 305 | is the total number of particles in the system, each entry of the matrix is the A coefficient in the 306 | AB form of Lennard Jones potential. 307 | bcoeff : numpy.ndarray 308 | An N_sites x N_particles matrix where N_sites is the number of sites in the water model and N_atoms 309 | is the total number of particles in the system, each entry of the matrix is the B coefficient in the 310 | AB form of Lennard Jones potential. 311 | """ 312 | 313 | vdw = [] 314 | chg = [] 315 | if self.supporting_file.endswith(".txt"): 316 | nb_data = np.loadtxt(self.supporting_file) 317 | for c in nb_data[:, 0]: 318 | chg.append(c) 319 | for v in nb_data[:, 1:]: 320 | vdw.append(v) 321 | chg = np.asarray(chg) 322 | elif self.supporting_file.endswith(".txth5"): 323 | nb_data = np.loadtxt(self.supporting_file) 324 | for c in nb_data[:, 0]: 325 | chg.append(c) 326 | for v in nb_data[:, 1:]: 327 | vdw.append(v) 328 | 329 | elif self.topology_file.endswith(".psf"): 330 | parmed_topology_object = pmd.load_file(self.topology_file) 331 | param_dir = os.path.abspath(self.supporting_file) 332 | param_files = [os.path.join(param_dir, f) for f in os.listdir(param_dir) 333 | if os.path.isfile(os.path.join(param_dir, f)) and f.endswith((".rtf", ".top", ".par", ".prm", ".inp", ".str"))] 334 | params = CharmmParameterSet(*param_files) 335 | try: 336 | parmed_topology_object.load_parameters(params) 337 | except Exception as e: 338 | print(e) 339 | for at in self.all_atom_ids: 340 | vdw.append([parmed_topology_object.atoms[at].sigma, 341 | parmed_topology_object.atoms[at].epsilon]) 342 | chg.append(parmed_topology_object.atoms[at].charge) 343 | else: 344 | parmed_topology_object = pmd.load_file(self.supporting_file) 345 | for at in self.all_atom_ids: 346 | vdw.append([parmed_topology_object.atoms[at].sigma, 347 | parmed_topology_object.atoms[at].epsilon]) 348 | chg.append(parmed_topology_object.atoms[at].charge) 349 | 350 | # User provided charges are assumed to be in correct units. 351 | # TODO: Make the units for charges explicit in docstring 352 | if not self.supporting_file.endswith(".txt"): 353 | chg = np.asarray(chg) * 18.2223 354 | vdw = np.asarray(vdw) 355 | water_chg = chg[self.wat_atom_ids[0:self.water_sites]].reshape(self.water_sites, 1) 356 | chg_product = water_chg * np.tile(chg[self.all_atom_ids], (self.water_sites, 1)) 357 | 358 | water_sig = vdw[self.wat_atom_ids[0:self.water_sites], 0].reshape(self.water_sites, 1) 359 | water_eps = vdw[self.wat_atom_ids[0:self.water_sites], 1].reshape(self.water_sites, 1) 360 | mixed_sig, mixed_eps = None, None 361 | if self.comb_rule is None or self.comb_rule == "lorentz-bertholot": 362 | mixed_sig = 0.5 * (water_sig + vdw[self.all_atom_ids, 0]) 363 | mixed_eps = np.sqrt(water_eps * vdw[self.all_atom_ids, 1]) 364 | if self.comb_rule == "geometric": 365 | mixed_sig = np.sqrt(water_sig * vdw[self.all_atom_ids, 0]) 366 | mixed_eps = np.sqrt(water_eps * vdw[self.all_atom_ids, 1]) 367 | 368 | if mixed_eps is not None and mixed_sig is not None: 369 | acoeff = 4 * mixed_eps * (mixed_sig**12) 370 | bcoeff = 4 * mixed_eps * (mixed_sig**6) 371 | else: 372 | raise Exception("Couldn't assign vdw params") 373 | return chg_product, acoeff, bcoeff 374 | 375 | 376 | def calculate_hydrogen_bonds(self, traj, water, nbrs, water_water=True): 377 | """Calculates hydrogen bonds made by a water molecule with its first shell 378 | water and solute neighbors. 379 | 380 | Parameters 381 | ---------- 382 | traj : md.trajectory 383 | MDTraj trajectory object for which hydrogen bonds are to be calculates. 384 | water : int 385 | The index of water oxygen atom 386 | nbrs : np.ndarray 387 | Indices of the water oxygen or solute atoms in the first solvation shell of the water molecule. 388 | water_water : bool 389 | Boolean for whether water-water or solute-water hbonds are desired 390 | 391 | Returns 392 | ------- 393 | hbonds : np.ndarray 394 | Array of hydrogen bonds where each hydrogen bond is represented by an array of indices 395 | of three participating atoms [Donor, H, Acceptor] 396 | """ 397 | hbond_data = [] 398 | angle_triplets = [] 399 | if water_water: 400 | for wat_nbr in nbrs: 401 | angle_triplets.extend( 402 | [[water, wat_nbr, wat_nbr + 1], [water, wat_nbr, wat_nbr + 2], [wat_nbr, water, water + 1], 403 | [wat_nbr, water, water + 2]]) 404 | else: 405 | for solute_nbr in nbrs: 406 | if self.prot_hb_types[solute_nbr] == 1 or self.prot_hb_types[solute_nbr] == 3: 407 | angle_triplets.extend([[solute_nbr, water, water + 1], [solute_nbr, water, water + 2]]) 408 | if self.prot_hb_types[solute_nbr] == 2 or self.prot_hb_types[solute_nbr] == 3: 409 | for don_H_pair in self.don_H_pair_dict[solute_nbr]: 410 | angle_triplets.extend([[water, solute_nbr, don_H_pair[1]]]) 411 | 412 | angle_triplets = np.asarray(angle_triplets) 413 | angles = md.compute_angles(traj, angle_triplets) 414 | angles[np.isnan(angles)] = 0.0 415 | hbonds = angle_triplets[np.where(angles[0, :] <= ANGLE_CUTOFF_RAD)] 416 | return hbonds 417 | 418 | 419 | def water_nbr_orientations(self, traj, water, nbrs): 420 | """Calculates orientations of the neighboring water molecules of a given water molecule. The orientation is 421 | defined as the miniumum of four possible Donor-H-Acceptor angles. 422 | 423 | Parameters 424 | ---------- 425 | traj : md.trajectory 426 | MDTraj trajectory object for which hydrogen bonds are to be calculates. 427 | water : int 428 | The index of water oxygen atom 429 | nbrs : np.ndarray, int, shape=(N^{ww}_nbr, ) 430 | Indices of the water oxygen or solute atoms in the first solvation shell of the water molecule. 431 | 432 | Returns 433 | ------- 434 | wat_orientations : np.ndarray 435 | Array of angles between the given and each one of its neighbors 436 | """ 437 | 438 | angle_triplets = [] 439 | for wat_nbr in nbrs: 440 | angle_triplets.extend( 441 | [[water, wat_nbr, wat_nbr + 1], [water, wat_nbr, wat_nbr + 2], [wat_nbr, water, water + 1], 442 | [wat_nbr, water, water + 2]]) 443 | angle_triplets = np.asarray(angle_triplets) 444 | angles = md.compute_angles(traj, angle_triplets) 445 | angles[np.isnan(angles)] = 0.0 446 | wat_orientations = [np.rad2deg(np.min(angles[0, i*4:(i*4)+4])) for i in range(nbrs.shape[0])] 447 | return wat_orientations 448 | --------------------------------------------------------------------------------