├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── devtools ├── conda-recipe │ ├── README.md │ ├── build.sh │ └── meta.yaml └── travis-ci │ ├── after_success.sh │ ├── index.html │ ├── install.sh │ └── push-docs-to-s3.py ├── dist └── smarty-0.1.0-py2.7.egg ├── examples ├── README.md ├── parm@frosst │ ├── README.md │ ├── atomtypes │ │ ├── README.md │ │ ├── basetypes-elemental.smarts │ │ ├── basetypes.smarts │ │ ├── decorators-simple.smarts │ │ ├── decorators.smarts │ │ └── substitutions.smarts │ ├── make_subset.py │ ├── molecules │ │ ├── zinc-subset-500-parm@frosst.mol2.gz │ │ ├── zinc-subset-500-tripos.mol2.gz │ │ ├── zinc-subset-parm@frosst.mol2.gz │ │ └── zinc-subset-tripos.mol2.gz │ └── scripts │ │ ├── README.md │ │ └── convert-atom-names-to-tripos.py ├── smarty_simulations │ ├── AlkEthOH.csv │ ├── AlkEthOH.log │ ├── AlkEthOH.pdf │ ├── Hydrogen.csv │ ├── Hydrogen.log │ ├── Hydrogen.pdf │ ├── README.md │ ├── Simple-Decorators.csv │ ├── Simple-Decorators.log │ └── Simple-Decorators.pdf └── smirky │ ├── README.md │ ├── atom_AND_decorators.smarts │ ├── atom_OR_bases.smarts │ ├── atom_OR_decorators.smarts │ ├── atom_odds_forTorsions.smarts │ ├── bond_AND_decorators.smarts │ ├── bond_OR_bases.smarts │ ├── bond_odds_forTorsions.smarts │ ├── initial_Torsions.smarts │ ├── output.csv │ ├── output.log │ ├── output.pdf │ ├── output_results.smarts │ └── substitutions.smarts ├── oe_license.txt.enc ├── setup.py ├── smarty ├── __init__.py ├── atomtyper.py ├── cli_smarty.py ├── cli_smirky.py ├── data │ ├── README.md │ ├── __init__.py │ ├── atomtypes │ │ ├── README.md │ │ ├── basetypes.smarts │ │ ├── decorators-simple.smarts │ │ ├── decorators.smarts │ │ ├── initial_AlkEthOH.smarts │ │ ├── initialtypes.smarts │ │ ├── new-decorators.smarts │ │ └── replacements.smarts │ └── odds_files │ │ ├── atom_OR_bases.smarts │ │ ├── atom_decorators.smarts │ │ ├── atom_index_odds.smarts │ │ ├── bond_AND_decorators.smarts │ │ ├── bond_OR_bases.smarts │ │ ├── bond_index_odds.smarts │ │ └── substitutions.smarts ├── sampler.py ├── sampler_smirky.py ├── score_utils.py ├── tests │ ├── __init__.py │ ├── test_atomtyper.py │ ├── test_sampler.py │ ├── test_smirky_sampler.py │ └── test_utils.py └── utils.py └── utilities ├── README.md └── test_smirks_or_environment_speed ├── README.md ├── Torsion_0_0.00e+00_results.smarts └── testing_smirks_speed.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Extracted archive 33 | AlkEthOH_inputfiles/ 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *,cover 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | 56 | # Sphinx documentation 57 | docs/_build/ 58 | 59 | # PyBuilder 60 | target/ 61 | 62 | # Ipython notebook checkpoints 63 | *.ipynb_checkpoints/ 64 | 65 | # ignore files created during tests 66 | smarty/tests/*.pdf 67 | smarty/tests/*.log 68 | smarty/tests/*.csv 69 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | sudo: false 3 | 4 | branches: 5 | only: 6 | - master 7 | 8 | install: 9 | - source devtools/travis-ci/install.sh 10 | - export PYTHONUNBUFFERED=true 11 | # Unpack encrypted OpenEye license file 12 | - if [ "$TRAVIS_SECURE_ENV_VARS" == true ]; then openssl aes-256-cbc -K $encrypted_e60be1d1adc8_key -iv $encrypted_e60be1d1adc8_iv -in oe_license.txt.enc -out $OE_LICENSE -d; fi 13 | - if [ "$TRAVIS_SECURE_ENV_VARS" == false ]; then echo "OpenEye license will not be installed in forks."; fi 14 | 15 | script: 16 | # Add omnia channel 17 | - conda config --add channels ${ORGNAME} 18 | # Create and activate test environment 19 | - conda create --yes -n test python=$python 20 | - source activate test 21 | # Install OpenEye toolkit 22 | #- pip install $OPENEYE_CHANNEL openeye-toolkits && python -c "import openeye; print(openeye.__version__)" 23 | # Use beta version for partial bond orders 24 | - pip install --pre -i https://pypi.anaconda.org/openeye/label/beta/simple openeye-toolkits && python -c "import openeye; print(openeye.__version__)" 25 | # Install openforcefield tools 26 | # TODO if changes to openforcefield become less dynamic switch to conda install? 27 | - pip install git+https://github.com/openforcefield/openforcefield.git 28 | # Build the recipe 29 | - conda build devtools/conda-recipe 30 | # Install 31 | - conda install --yes --use-local smarty 32 | # Run tests 33 | - conda install --yes nose nose-timer 34 | - cd devtools && nosetests -vv --nocapture --with-timer $PACKAGENAME && cd .. 35 | 36 | env: 37 | matrix: 38 | - python=2.7 CONDA_PY=27 39 | - python=3.4 CONDA_PY=34 40 | - python=3.5 CONDA_PY=35 41 | 42 | global: 43 | - ORGNAME="omnia" 44 | - PACKAGENAME="smarty" 45 | # OpenEye toolkit 46 | - OE_LICENSE="$HOME/oe_license.txt" 47 | - OPENEYE_CHANNEL="-i https://pypi.anaconda.org/openeye/channel/main/simple" 48 | # encrypted BINSTAR_TOKEN for push of dev package to binstar 49 | - secure: "Iw2yv40ElSbS/TstXS9YnsbJFbxsbFQ25fkWlq8H/O3SPJwpX2/PRoCo99R1Scc0mO9BiVMwGDJQeM9y1VoYo3ozv5SIhPvc+0cMOE3AzkRiFEpZeTtDUTxOWsb+k/x5dH5/AapXRtJeKhY3cWe3lhKdv9N+yWrhY29lawXgfU4WsOEl6ON9BPwwPzvKK1sP4z8kIMzDNjt6gJ3m1HzdEQe/ibrOJIEk6Z4kTLQo9z4F9dm73/L4scEgnW6SOACC39nuYCL8PK4zPNKTqpAoVkm18uyrRz62+qPYSl3RCBNOFtbAuz7fz+ShSMA6g//LpAobNptpQeQpWXkHhYk5ALc6xzH2zScVgrPytKAPwi8mYKq9gYZnUPYgpOdjK3bNyfkGjeV9I4sQwNCBYlKtGHoqZ1l+l6oYsbx+Ti+nIeK67ufGmAugH4GJ3dhZvP6ZR73/irOrvSWiJJgqI1/k4c9Ela4wDpQHDp9sRf03HgSrRTX2gQ3E/JmPx8s56tMdkmrIDIgy6Edc80AN6zEKX0+3YVGcH6ltUViDidRGDlZ7xbUUXYtjqMJXuJEh2SV/wbeVmrBM8Pn+IfsBzLKnd1jqe3pXfoCqbCtvNwW8Sr4qMgWBEHvEtB4C5KvO5CydmRx95q/0ziRGb/VEV6QOnGxT7EIJDfyQeUqNqJD7Bdo=" 50 | # encrypted AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY 51 | #- secure: "SrSd1JoI8dBXQxDAX0xBTYBinTusRBQoPETnxHrBAgKdoty1pkzaghTKNMsrGsk78iwkkj1hAyttIY9trdFQkmx+OTx0fLKFmDHsMkgko4RzTtrgLgoxuRIs/gruID2cN1XKEbxlhRmQF14+q8/X1q6iGGdYMrxo51JcYPuEOSo=" 52 | #- secure: "br6QRMYXhHltYTEh/d+zejxcunT3GsqwQvxxLmqnLxi+LIxX4j7eymR6p4fPBd5mCRxyvkQEjnSZxF6e7JlEKxWVcMG28I/dBWzVIRW3EKQQNRmyI+JL1dfNaqj68kHJD+FknBwHK9LD238JPcyqXPdVrm9iPkDijPczvPBxvDs=" 53 | 54 | #after_success: 55 | # - echo "after_success" 56 | # - if [ "$TRAVIS_SECURE_ENV_VARS" == true ]; then ./devtools/travis-ci/after_success.sh; fi 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Open Forcefield Group 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 14 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 15 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 16 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 17 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 18 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | OR OTHER DEALINGS IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/openforcefield/smarty.svg?branch=master)](https://travis-ci.org/openforcefield/smarty?branch=master) 2 | [![DOI](https://zenodo.org/badge/60921138.svg)](https://zenodo.org/badge/latestdoi/60921138) 3 | 4 | # `smarty`: Exploring Bayesian atom type sampling 5 | 6 | This is a simple example of how Bayesian atom type sampling using reversible-jump Markov chain Monte Carlo (RJMCMC) [1] over SMARTS types might work. 7 | 8 | All tools for implementation of the SMIRNOFF in OpenMM have been moved to the [openforcefield repository](https://github.com/openforcefield/openforcefield) 9 | 10 | ## Manifest 11 | 12 | * `examples/` - some toy examples - look here to get started 13 | * `smarty/` - simple toolkit illustrating the use of RJMCMC to sample over SMARTS-specified atom types and SMIRKS-specified bonded and non-bonded parameter types. 14 | * `devtools/` - continuous integration and packaging scripts and utilities 15 | * `oe_license.txt.enc` - encrypted OpenEye license for continuous integration testing 16 | * `.travis.yml` - travis-ci continuous integration file 17 | * `utilities/` - some utility functionality relating to the project, specifically testing the speed of ChemicalEnvironments for sampling in SMIRKY. 18 | 19 | ## Prerequisites 20 | 21 | Install [miniconda](http://conda.pydata.org/miniconda.html) first. On `osx` with `bash`, this is: 22 | ``` 23 | wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh 24 | bash Miniconda2-latest-MacOSX-x86_64.sh -b -p $HOME/miniconda 25 | export PATH="$HOME/miniconda/bin:${PATH}"" 26 | ``` 27 | 28 | You must first install the OpenEye toolkit: 29 | ``` 30 | pip install -i https://pypi.anaconda.org/OpenEye/simple OpenEye-toolkits 31 | ``` 32 | 33 | You can then use conda to install smarty: 34 | ``` 35 | conda config --add channels omnia 36 | conda install -c omnia smarty 37 | ``` 38 | 39 | ## Installation 40 | 41 | Install `smarty` from the `smarty/` directory with: 42 | ```bash 43 | pip install . 44 | ``` 45 | If you modify the `smarty` source code (rather than the examples), reinstall with 46 | ```bash 47 | pip install . --upgrade 48 | ``` 49 | 50 | ## Documentation 51 | 52 | 53 | ## SMARTY atom type sampler 54 | 55 | Check out the example in `examples/smarty/`: 56 | 57 | Atom types are specified by SMARTS matches with corresponding parameter names. 58 | 59 | First, we start with a number of initial "base types" which are essentially indestructible (often generic) atom types, specified in `atomtypes/basetypes.smarts`: 60 | ``` 61 | % atom types 62 | [#1] hydrogen 63 | [#6] carbon 64 | [#7] nitrogen 65 | [#8] oxygen 66 | [#9] fluorine 67 | [#15] phosphorous 68 | [#16] sulfur 69 | [#17] chlorine 70 | [#35] bromine 71 | [#53] iodine 72 | ``` 73 | Note that lines beginning with `%` are comment lines. 74 | 75 | We also specify a number of starting types, "initial types" which can be the same or different from the base types. These follow the same format, and `atomtypes/basetypes.smarts` can be reused unless alternate behavior is desired (such as starting from more sophisticated initial types). 76 | 77 | We have two sampler options for SMARTY which differ in how focused the sampling is. The original sampler samples over all elements/patterns at once, whereas the elemental sampler focuses on sampling only one specific element. The principle of sampling is the same; the only change is in which elements we sample over. To sample only over a single element, such as oxygen, for example, we use the elemental sampler to focus on that element. 78 | 79 | 80 | ### Generating New SMARTS patterns 81 | 82 | There are two options for how to change SMARTS patterns when creating new atom types. 83 | One is using combinatorial decorators (default) and the other is using simple decorators (`--decoratorbehavior=simple-decorators`). However, it should be noted that we have found the simple decorators insufficient at distinguishing atomtypes even for the most simple sets of molecules. 84 | 85 | **Combinatorial Decorators** 86 | 87 | The first option (combinatorial-decorator) attempt to create the new atomtype adding an Alpha or Beta substituent to a basetype or an atomtype. 88 | This decorators are different from the simple-decorator option and do not have atom types or bond information on it. 89 | The new decorators are listed in `AlkEthOH/atomtypes/new-decorators.smarts` and `parm@frosst/atomtypes/new-decorators.smarts`: 90 | 91 | ``` 92 | % total connectivity 93 | X1 connections-1 94 | X2 connections-2 95 | X3 connections-3 96 | X4 connections-4 97 | % total-h-count 98 | H0 total-h-count-0 99 | H1 total-h-count-1 100 | H2 total-h-count-2 101 | H3 total-h-count-3 102 | % formal charge 103 | +0 neutral 104 | +1 cationic+1 105 | -1 anionic-1 106 | % aromatic/aliphatic 107 | a aromatic 108 | A aliphatic 109 | ``` 110 | Each decorator has a corresponding string token (no spaces allowed!) that is used to create human-readable versions of the corresponding atom types. 111 | 112 | For example, we may find the atom type ```[#6]&H3``` which is `carbon total-h-count-3` for a C atom bonded to three hydrogens. 113 | 114 | **Simple Decorators** 115 | The second option (simple-decorators) attempts to split off a new atom type from a parent atom type by combining (via an "and" operator, `&`) the parent atom type with a "decorator". 116 | The decorators are listed in `AlkEthOH/atomtypes/decorators.smarts` or `parm@frosst/atomtypes/decorators.smarts`: 117 | ``` 118 | % bond order 119 | $([*]=[*]) double-bonded 120 | $([*]#[*]) triple-bonded 121 | $([*]:[*]) aromatic-bonded 122 | % bonded to atoms 123 | $(*~[#1]) hydrogen-adjacent 124 | $(*~[#6]) carbon-adjacent 125 | $(*~[#7]) nitrogen-adjacent 126 | $(*~[#8]) oxygen-adjacent 127 | $(*~[#9]) fluorine-adjacent 128 | $(*~[#15]) phosphorous-adjacent 129 | $(*~[#16]) sulfur-adjacent 130 | $(*~[#17]) chlorine-adjacent 131 | $(*~[#35]) bromine-adjacent 132 | $(*~[#53]) iodine-adjacent 133 | % degree 134 | D1 degree-1 135 | D2 degree-2 136 | D3 degree-3 137 | D4 degree-4 138 | D5 degree-5 139 | D6 degree-6 140 | % valence 141 | v1 valence-1 142 | v2 valence-2 143 | v3 valence-3 144 | v4 valence-4 145 | v5 valence-5 146 | v6 valence-6 147 | % total-h-count 148 | H1 total-h-count-1 149 | H2 total-h-count-2 150 | H3 total-h-count-3 151 | % aromatic/aliphatic 152 | a atomatic 153 | A aliphatic 154 | ``` 155 | This option also has the corresponding string tokens. 156 | 157 | Newly proposed atom types are added to the end of the list. 158 | After a new atom type is proposed, all molecules are reparameterized using the new set of atom types. 159 | Atom type matching proceeds by trying to see if each SMARTS match can be applied working from top to bottom of the list. 160 | This means the atom type list is hierarchical, with more general types appearing at the top of the list and more specific subtypes appearing at the bottom. 161 | 162 | If a proposed type matches zero atoms, the RJMCMC move is rejected. 163 | 164 | Currently, the acceptance criteria does not include the full Metropolis-Hastings acceptance criteria that would include the reverse probability. This needs to be added in. 165 | 166 | ### Elemental Decomposition 167 | 168 | The input option `--element` allows a user to specify which atoms types to sample based on atomic number. The default input is 0 (corresponding to no specified atomic number) and will attempt to match all atom types. If an element number is given (i.e. `--element=1` for hydrogen) only atoms with that atomic number are considered. Specifying an element number does not affect any other smarty behavior. 169 | 170 | Finally, here is a complete list of input options for smarty. Under `usage` all bracketed parameters are optional. 171 | ``` 172 | Usage: Sample over atom types, optionally attempting to match atom types in a reference typed set of molecules. 173 | 174 | usage: smarty --basetypes smartsfile --initialtypes smartsfile 175 | --decorators smartsfile --molecules molfile 176 | [--element atomicnumber --substitutions smartsfile --reference molfile 177 | --decoratorbehavior combinatorial-decorators/simple-decorators 178 | --iterations niterations --temperature temperature --trajectory trajectorfile 179 | --plot plotfile] 180 | 181 | example: 182 | python smarty --basetypes=atomtypes/basetypes.smarts --initialtypes=atomtypes/initialtypes.smarts \ 183 | --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \ 184 | --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz \ 185 | --iterations 1000 --temperature=0.1 186 | 187 | 188 | Options: 189 | --version show program's version number and exit 190 | -h, --help show this help message and exit 191 | -e ELEMENT, --element=ELEMENT 192 | By default the element value is 0 corresponding to 193 | sampling all atomtypes. If another atomic number is 194 | specified only atoms with that atomic number are 195 | sampled (i.e. --element=8 will only sample atomtypes 196 | for oxygen atoms). 197 | -b BASETYPES, --basetypes=BASETYPES 198 | Filename defining base or generic atom types as SMARTS 199 | atom matches; these are indestructible and normally 200 | are elemental atom types. 201 | -f BASETYPES, --initialtypes=BASETYPES 202 | Filename defining initial (first) atom types as SMARTS 203 | atom matches. 204 | -d DECORATORS, --decorators=DECORATORS 205 | Filename defining decorator atom types as SMARTS atom 206 | matches. 207 | -s SUBSTITUTIONS, --substitutions=SUBSTITUTIONS 208 | Filename defining substitution definitions for SMARTS 209 | atom matches (OPTIONAL). 210 | -r REFMOL, --reference=REFMOL 211 | Reference typed molecules for computing likelihood 212 | (must match same molecule and atom ordering in 213 | molecules file) (OPTIONAL). 214 | -m MOLECULES, --molecules=MOLECULES 215 | Small molecule set (in any OpenEye compatible file 216 | format) containing 'dG(exp)' fields with experimental 217 | hydration free energies. 218 | -i ITERATIONS, --iterations=ITERATIONS 219 | MCMC iterations. 220 | -t TEMPERATURE, --temperature=TEMPERATURE 221 | Effective temperature for Monte Carlo acceptance, 222 | indicating fractional tolerance of mismatched atoms 223 | (default: 0.1). If 0 is specified, will behave in a 224 | greedy manner. 225 | -l TRAJECTORY_FILE, --trajectory=TRAJECTORY_FILE 226 | Name for trajectory file output, trajectory saves only 227 | changes to the list of 'atomtypes' for each iteration. 228 | If the file already exists, it is overwritten. 229 | -p PLOT_FILE, --plot=PLOT_FILE 230 | Name for output file of a plot of the score versus 231 | time. If not specified, none will be written. If 232 | provided, needs to use a file extension suitable for 233 | matplotlib/pylab. Currently requires a trajectory file 234 | to be written using -l or --trajectory. 235 | -x DECORATOR_BEHAVIOR, --decoratorbehavior=DECORATOR_BEHAVIOR 236 | Choose between simple-decorators or combinatorial- 237 | decorators (default = combinatorial-decorators). 238 | ``` 239 | 240 | --- 241 | 242 | ## smirky 243 | 244 | Check out examples in `examples/smirky/`: 245 | 246 | This tool can sample any chemical environment type relevant to SMIRNOFFs, that is atoms, bonds, angles, and proper and improper torsions, one at a time 247 | Scoring is analous to smarty (explained above), but uses a SMIRNOFF with existing parameters as a reference insteady of atomtyped molecules. 248 | 249 | Input for this tool can require up to four different file types 250 | * MOLECULES - any file that are readable in openeye, mol2, sdf, oeb, etc. 251 | * ODDSFILES - File with the form "smarts odds" for the different decorator or bond options 252 | * SMARTS - .smarts file type with the form "smarts/smirks label/typename" 253 | * REFERENCE - a SMIRNOFF file with reference atoms, bonts, angles, torsions, and impropers 254 | 255 | ``` 256 | Usage: Sample over fragment types (atoms, bonds, angles, torsions, or impropers) 257 | optionally attempting to match created types to an established SMIRNOFF. 258 | For all files left blank, they will be taken from this module's 259 | data/odds_files/ subdirectory. 260 | 261 | usage smirky --molecules molfile --typetag fragmentType 262 | [--atomORbases AtomORbaseFile --atomORdecors AtomORdecorFile 263 | --atomANDdecors AtomANDdecorFile --bondORbase BondORbaseFile 264 | --bondANDdecors BondANDdecorFile --atomIndexOdds AtomIndexFile 265 | --bondIndexOdds BondIndexFile --replacements substitutions 266 | --initialFragments initialFragments --SMIRNOFF referenceSMIRNOFF 267 | --temperature float --verbose verbose 268 | --iterations iterations --output outputFile] 269 | 270 | example: 271 | smirky -molecules AlkEthOH_test_filt1_ff.mol2 --typetag Angle 272 | 273 | 274 | 275 | Options: 276 | --version show program's version number and exit 277 | -h, --help show this help message and exit 278 | -m MOLECULES, --molecules=MOLECULES 279 | Small molecule set (in any OpenEye compatible file 280 | format) containing 'dG(exp)' fields with experimental 281 | hydration free energies. This filename can also be an 282 | option in this module's data/molecules sub-directory 283 | -T TYPETAG, --typetag=TYPETAG 284 | type of fragment being sampled, options are 'VdW', 285 | 'Bond', 'Angle', 'Torsion', 'Improper' 286 | -e ODDFILES, --atomORbases=ODDFILES 287 | Filename defining atom OR bases and associated 288 | probabilities. These are combined with atom OR 289 | decorators in SMIRKS, for example in 290 | '[#6X4,#7X3;R2:2]' '#6' and '#7' are atom OR bases. 291 | (OPTIONAL) 292 | -O ODDFILES, --atomORdecors=ODDFILES 293 | Filename defining atom OR decorators and associated 294 | probabilities. These are combined with atom bases in 295 | SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'X4' and 296 | 'X3' are ORdecorators. (OPTIONAL) 297 | -A ODDFILES, --atomANDdecors=ODDFILES 298 | Filename defining atom AND decorators and associated 299 | probabilities. These are added to the end of an atom's 300 | SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'R2' is an 301 | AND decorator. (OPTIONAL) 302 | -o ODDFILES, --bondORbase=ODDFILES 303 | Filename defining bond OR bases and their associated 304 | probabilities. These are OR'd together to describe a 305 | bond, for example in '[#6]-,=;@[#6]' '-' and '=' are 306 | OR bases. (OPTIONAL) 307 | -a ODDFILES, --bondANDdecors=ODDFILES 308 | Filename defining bond AND decorators and their 309 | associated probabilities. These are AND'd to the end 310 | of a bond, for example in '[#6]-,=;@[#7]' '@' is an 311 | AND decorator.(OPTIONAL) 312 | -D ODDSFILE, --atomOddsFile=ODDSFILE 313 | Filename defining atom descriptors and probabilities 314 | with making changes to that kind of atom. Options for 315 | descriptors are integers corresponding to that indexed 316 | atom, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'. 317 | (OPTIONAL) 318 | -d ODDSFILE, --bondOddsFile=ODDSFILE 319 | Filename defining bond descriptors and probabilities 320 | with making changes to that kind of bond. Options for 321 | descriptors are integers corresponding to that indexed 322 | bond, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'. 323 | (OPTIONAL) 324 | -s SMARTS, --substitutions=SMARTS 325 | Filename defining substitution definitions for SMARTS 326 | atom matches. (OPTIONAL). 327 | -f SMARTS, --initialtypes=SMARTS 328 | Filename defining initial (first) fragment types as 329 | 'SMIRKS typename'. If this is left blank the 330 | initial type will be a generic form of the given 331 | fragment, for example '[*:1]~[*:2]' for a bond 332 | (OPTIONAL) 333 | -r REFERENCE, --smirff=REFERENCE 334 | Filename defining a SMIRNOFF force fielce used to 335 | determine reference fragment types in provided set of 336 | molecules. It may be an absolute file path, a path 337 | relative to the current working directory, or a path 338 | relative to this module's data subdirectory (for built 339 | in force fields). (OPTIONAL) 340 | -i ITERATIONS, --iterations=ITERATIONS 341 | MCMC iterations. 342 | -t TEMPERATURE, --temperature=TEMPERATURE 343 | Effective temperature for Monte Carlo acceptance, 344 | indicating fractional tolerance of mismatched atoms 345 | (default: 0.1). If 0 is specified, will behave in a 346 | greedy manner. 347 | -p OUTPUT, --output=OUTPUT 348 | Filename base for output information. This same base 349 | will be used for all output files created. If None 350 | provided then it is set to 'typetag_temperature' 351 | (OPTIONAL). 352 | -v VERBOSE, --verbose=VERBOSE 353 | If True prints minimal information to the commandline 354 | during iterations. (OPTIONAL) 355 | `` 356 | 357 | ## The SMIRNOFF force field format 358 | 359 | The SMIRNOFF force field format is documented [here](https://github.com/openforcefield/openforcefield/blob/master/The-SMIRNOFF-force-field-format.md). 360 | It was previously avaialbe in this repository, but has been moved. 361 | SMIRNOFF99Frosst, a version of SMIRNOFF mirroring the parameters found in the parm@Frosst force field, is now housed in its own [repository](https://github.com/openforcefield/smirnoff99Frosst). 362 | `forcefield.py` and other modules required to implement the SMIRNOFF format for simulations in OpenMM have also been moved. These scripts and examples on how to use them can be found at [openforcefield/openforcefield](https://github.com/openforcefield/openforcefield). 363 | 364 | ## References 365 | 366 | [1] Green PJ. Reversible jump Markov chain Monte Carlo computation and Bayesian model determination. Biometrika 82:711, 1995. 367 | http://dx.doi.org/10.1093/biomet/82.4.711 368 | -------------------------------------------------------------------------------- /devtools/conda-recipe/README.md: -------------------------------------------------------------------------------- 1 | This is a recipe for building the current development package into a conda 2 | binary. 3 | 4 | The installation on travis-ci is done by building the conda package, installing 5 | it, running the tests, and then if successful pushing the package to binstar 6 | (and the docs to AWS S3). The binstar auth token is an encrypted environment 7 | variable generated using: 8 | 9 | binstar auth -n yank-travis -o omnia --max-age 22896000 -c --scopes api:write 10 | 11 | and then saved in the environment variable BINSTAR_TOKEN. 12 | 13 | You can set up travis to store an encrypted token via 14 | 15 | gem install travis travis encrypt BINSTAR_TOKEN=xx 16 | 17 | where xx is the token output by binstar. The final command should print a line (containing 'secure') for inclusion in your .travis.yml file. 18 | -------------------------------------------------------------------------------- /devtools/conda-recipe/build.sh: -------------------------------------------------------------------------------- 1 | pip install . 2 | -------------------------------------------------------------------------------- /devtools/conda-recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: smarty 3 | version: 0.0.0 4 | 5 | source: 6 | path: ../.. 7 | 8 | build: 9 | preserve_egg_dir: True 10 | number: 0 11 | 12 | requirements: 13 | build: 14 | - python 15 | - setuptools 16 | - pandas 17 | 18 | run: 19 | - python 20 | - numpy 21 | - networkx 22 | - lxml 23 | - openmoltools >=0.7.3 24 | - parmed 25 | - matplotlib 26 | - pandas 27 | 28 | test: 29 | requires: 30 | - nose 31 | - nose-timer 32 | imports: 33 | - smarty 34 | 35 | about: 36 | home: https://github.com/openforcefield/smarty 37 | license: MIT 38 | -------------------------------------------------------------------------------- /devtools/travis-ci/after_success.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Must be invoked with $PACKAGENAME 3 | 4 | echo $TRAVIS_PULL_REQUEST $TRAVIS_BRANCH 5 | PUSH_DOCS_TO_S3=false 6 | 7 | if [ "$TRAVIS_PULL_REQUEST" = true ]; then 8 | echo "This is a pull request. No deployment will be done."; exit 0 9 | fi 10 | 11 | 12 | if [ "$TRAVIS_BRANCH" != "master" ]; then 13 | echo "No deployment on BRANCH='$TRAVIS_BRANCH'"; exit 0 14 | fi 15 | 16 | 17 | # Deploy to binstar 18 | conda install --yes anaconda-client jinja2 19 | pushd . 20 | cd $HOME/miniconda/conda-bld 21 | FILES=*/${PACKAGENAME}-dev-*.tar.bz2 22 | for filename in $FILES; do 23 | anaconda -t $BINSTAR_TOKEN remove --force ${ORGNAME}/${PACKAGENAME}-dev/${filename} 24 | anaconda -t $BINSTAR_TOKEN upload --force -u ${ORGNAME} -p ${PACKAGENAME}-dev ${filename} 25 | done 26 | popd 27 | 28 | if [ $PUSH_DOCS_TO_S3 = true ]; then 29 | # Create the docs and push them to S3 30 | # ----------------------------------- 31 | conda install --yes pip 32 | conda config --add channels $ORGNAME 33 | conda install --yes `conda build devtools/conda-recipe --output` 34 | pip install numpydoc s3cmd msmb_theme 35 | conda install --yes `cat docs/requirements.txt | xargs` 36 | 37 | conda list -e 38 | 39 | (cd docs && make html && cd -) 40 | ls -lt docs/_build 41 | pwd 42 | python devtools/ci/push-docs-to-s3.py 43 | fi 44 | -------------------------------------------------------------------------------- /devtools/travis-ci/index.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /devtools/travis-ci/install.sh: -------------------------------------------------------------------------------- 1 | # Temporarily change directory to $HOME to install software 2 | pushd . 3 | cd $HOME 4 | 5 | # Install Miniconda 6 | MINICONDA=Miniconda2-latest-Linux-x86_64.sh 7 | MINICONDA_HOME=$HOME/miniconda 8 | MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *\(.*\)<\/td> */\1/p') 9 | wget -q http://repo.continuum.io/miniconda/$MINICONDA 10 | if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then 11 | echo "Miniconda MD5 mismatch" 12 | exit 1 13 | fi 14 | bash $MINICONDA -b -p $MINICONDA_HOME 15 | 16 | # Configure miniconda 17 | export PIP_ARGS="-U" 18 | export PATH=$MINICONDA_HOME/bin:$PATH 19 | conda update --yes conda 20 | conda install --yes conda-build jinja2 anaconda-client pip 21 | conda install --yes -c omnia openmoltools 22 | conda install --yes -c omnia parmed 23 | conda install --yes -c matplotlib 24 | conda install --yes pandas 25 | 26 | # Restore original directory 27 | popd 28 | -------------------------------------------------------------------------------- /devtools/travis-ci/push-docs-to-s3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Must have the vollowing environment variables defined: 5 | * BUCKET_NAME : AWS bucket name 6 | * PREFIX : 'latest' or other version number 7 | 8 | """ 9 | 10 | import os 11 | import pip 12 | import tempfile 13 | import subprocess 14 | import thermopyl.version 15 | 16 | 17 | BUCKET_NAME = 'thermopyl.org' 18 | if not thermopyl.version.release: 19 | PREFIX = 'latest' 20 | else: 21 | PREFIX = thermopyl.version.short_version 22 | 23 | if not any(d.project_name == 's3cmd' for d in pip.get_installed_distributions()): 24 | raise ImportError('The s3cmd pacakge is required. try $ pip install s3cmd') 25 | # The secret key is available as a secure environment variable 26 | # on travis-ci to push the build documentation to Amazon S3. 27 | with tempfile.NamedTemporaryFile('w') as f: 28 | f.write('''[default] 29 | access_key = {AWS_ACCESS_KEY_ID} 30 | secret_key = {AWS_SECRET_ACCESS_KEY} 31 | '''.format(**os.environ)) 32 | f.flush() 33 | 34 | template = ('s3cmd --guess-mime-type --config {config} ' 35 | 'sync docs/_build/ s3://{bucket}/{prefix}/') 36 | cmd = template.format( 37 | config=f.name, 38 | bucket=BUCKET_NAME, 39 | prefix=PREFIX) 40 | return_val = subprocess.call(cmd.split()) 41 | 42 | # Sync index file. 43 | template = ('s3cmd --guess-mime-type --config {config} ' 44 | 'sync devtools/ci/index.html s3://{bucket}/') 45 | cmd = template.format( 46 | config=f.name, 47 | bucket=BUCKET_NAME) 48 | return_val = subprocess.call(cmd.split()) 49 | -------------------------------------------------------------------------------- /dist/smarty-0.1.0-py2.7.egg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/dist/smarty-0.1.0-py2.7.egg -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples for Bayesian atomtype sampler 2 | 3 | ## Manifest 4 | * `parm@frosst/` - example illustrating attempt to recover parm@frosst atom types 5 | * `smarty_simulations/` - examples to implement smarty, a tool to rediscover parm@frosst atomtypes on the AlkEthOH molecules set 6 | * `smirky_simulations/` - example usage of the smirky sampling tool to rediscover the SMIRNOFF99Frosst parameter types 7 | 8 | **We have rearranged the Open Force Field group if you are looking for an example that used to be here, but is no longer it can be found at [openforcefield/examples/](https://github.com/openforcefield/openforcefield/tree/master/examples)** 9 | -------------------------------------------------------------------------------- /examples/parm@frosst/README.md: -------------------------------------------------------------------------------- 1 | # Example application of SMARTY atom type sampler to recover parm@frosst typing 2 | 3 | In this example, the SMARTY `AtomTypeSampler` is used to attempt to recover SMARTS atom types that recapitulate the typing rules from a referenced set of typed molecules. 4 | 5 | ## Manifest 6 | * `smarty.py` - example command-line driver 7 | * `atomtypes/` - input atom type sample specification files 8 | * `molecules/` - typed molecule datasets 9 | * `scripts/` - useful conversion scripts 10 | 11 | ## Usage 12 | 13 | Usage 14 | 15 | Example: 16 | ``` 17 | smarty --basetypes=atomtypes/basetypes-elemental.smarts --initialtypes=atomtypes/basetypes-elemental.smarts --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \ 18 | --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz --iterations 1000 --temperature=0.1 19 | ``` 20 | 21 | Initially, the base atom types are added to the pool of current atom types, and the number of atoms and molecules matched by each atom type are shown: 22 | ``` 23 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED 24 | 1 : 88148 7487 | hydrogen [#1] HA 28720 / 28720 (100.000%) 25 | 2 : 90146 7505 | carbon [#6] CA 37143 / 37143 (100.000%) 26 | 3 : 20838 6806 | nitrogen [#7] NB 7612 / 7612 (100.000%) 27 | 4 : 12829 5946 | oxygen [#8] O 4876 / 4876 (100.000%) 28 | 5 : 1001 444 | fluorine [#9] F 1001 / 1001 (100.000%) 29 | 6 : 5 5 | phosphorous [#15] P 5 / 5 (100.000%) 30 | 7 : 3171 2593 | sulfur [#16] S 2544 / 2544 (100.000%) 31 | 8 : 574 463 | chlorine [#17] CL 574 / 574 (100.000%) 32 | 9 : 84 73 | bromine [#35] BR 84 / 84 (100.000%) 33 | 10 : 8 8 | iodine [#53] I 8 / 8 (100.000%) 34 | TOTAL : 216804 7505 | 82567 / 216804 match (38.084 %) 35 | ``` 36 | After a few iterations, the pool of current atom types will have diverged, with some children having been added to the set or atom types removed from the original set. 37 | ``` 38 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED 39 | 1 : 88148 7487 | hydrogen [#1] HA 28720 / 28720 (100.000%) 40 | 2 : 90068 7505 | carbon [#6] CA 37109 / 37143 ( 99.908%) 41 | 3 : 78 73 | carbon bromine-adjacent [#6&$(*~[#35])] CW 15 / 4850 ( 0.309%) 42 | 4 : 9689 5835 | nitrogen [#7] N 3161 / 3161 (100.000%) 43 | 5 : 11149 5300 | nitrogen degree-2 [#7&D2] NB 7480 / 7612 ( 98.266%) 44 | 6 : 12829 5946 | oxygen [#8] O 4876 / 4876 (100.000%) 45 | 7 : 1001 444 | fluorine [#9] F 1001 / 1001 (100.000%) 46 | 8 : 5 5 | phosphorous [#15] P 5 / 5 (100.000%) 47 | 9 : 3171 2593 | sulfur [#16] S 2544 / 2544 (100.000%) 48 | 10 : 574 463 | chlorine [#17] CL 574 / 574 (100.000%) 49 | 11 : 84 73 | bromine [#35] BR 84 / 84 (100.000%) 50 | 12 : 8 8 | iodine [#53] I 8 / 8 (100.000%) 51 | TOTAL : 216804 7505 | 85577 / 216804 match (39.472 %) 52 | ``` 53 | or even 54 | ``` 55 | Iteration 241 / 1000 56 | Attempting to destroy atom type [#9] : fluorine... 57 | Typing failed; rejecting. 58 | Rejected. 59 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED 60 | 1 : 88148 7487 | hydrogen [#1] HA 28720 / 28720 (100.000%) 61 | 2 : 63417 7402 | carbon [#6] CA 36300 / 37143 ( 97.730%) 62 | 3 : 4293 2349 | carbon sulfur-adjacent [#6&$(*~[#16])] CW 1497 / 4850 ( 30.866%) 63 | 4 : 14861 5134 | carbon degree-4 [#6&D4] CT 14509 / 22084 ( 65.699%) 64 | 5 : 7575 4235 | carbon total-h-count-3 [#6&H3] 65 | 6 : 20253 6767 | nitrogen [#7] NB 7612 / 7612 (100.000%) 66 | 7 : 585 504 | nitrogen degree-1 [#7&D1] NL 585 / 585 (100.000%) 67 | 8 : 12829 5946 | oxygen [#8] O 4876 / 4876 (100.000%) 68 | 9 : 1001 444 | fluorine [#9] F 1001 / 1001 (100.000%) 69 | 10 : 5 5 | phosphorous [#15] P 5 / 5 (100.000%) 70 | 11 : 2593 2144 | sulfur [#16] S 2544 / 2544 (100.000%) 71 | 12 : 578 563 | sulfur valence-6 [#16&v6] SO 578 / 627 ( 92.185%) 72 | 13 : 574 463 | chlorine [#17] CL 574 / 574 (100.000%) 73 | 14 : 84 73 | bromine [#35] BR 84 / 84 (100.000%) 74 | 15 : 8 8 | iodine [#53] I 8 / 8 (100.000%) 75 | TOTAL : 216804 7505 | 98893 / 216804 match (45.614 %) 76 | ``` 77 | -------------------------------------------------------------------------------- /examples/parm@frosst/atomtypes/README.md: -------------------------------------------------------------------------------- 1 | # Atom type SMARTS components 2 | 3 | ## Formats 4 | 5 | ### Initial types 6 | 7 | A `basetypes` file specifies the initial atom types used to initialize the sampler. 8 | 9 | Comments beginning with `%` are ignored throughout the file. 10 | Each line has the format 11 | ``` 12 | 13 | ``` 14 | where `` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `` is a human-readable typename associated with that atom type. 15 | 16 | Atom type definitions are hierarchical, with the last match in the file taking precedence over earlier matches. 17 | 18 | For example, we could use the elemental base types: 19 | ``` 20 | % atom types 21 | H hydrogen 22 | C carbon 23 | N nitrogen 24 | O oxygen 25 | F fluorine 26 | P phosphorous 27 | S sulfur 28 | Cl chlorine 29 | Br bromine 30 | I iodine 31 | ``` 32 | 33 | ### Decorators 34 | 35 | A `decorators` file contains a list of SMARTS 36 | 37 | Comments beginning with `%` are ignored throughout the file. 38 | Each line has the format 39 | ``` 40 | 41 | ``` 42 | where `` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `` is a human-readable typename associated with that decorator. 43 | 44 | The SMARTS component is ANDed together (using the `&` operator) with a parent atom type to create a new proposed child atom type. 45 | The human-readable `` is appended (with a space) to the parent name to keep a human-readable annotation of the proposed child atom type. 46 | 47 | ### Substitutions 48 | 49 | It is often convenient to define various tokens that are substituted for more sophisticated SMARTS expressions. 50 | 51 | % Substitution definitions 52 | % Format: 53 | % 54 | 55 | Comments beginning with `%` are ignored throughout the file. 56 | Each line has the format 57 | ``` 58 | 59 | ``` 60 | where `` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `` is the token that will be substituted for this. 61 | 62 | For example, we could define some elemental substitutions along with some substitutions for halogens: 63 | ``` 64 | % elements 65 | [#9] fluorine 66 | [#17] chlorine 67 | [#35] bromine 68 | [#53] iodine 69 | 70 | % halogens 71 | [$smallhals,$largehals] halogen 72 | [$fluorine,$chlorine] smallhals 73 | [$bromine,$iodine] largehals 74 | ``` 75 | 76 | The [`OESmartsLexReplace`](http://docs.eyesopen.com/toolkits/python/oechemtk/OEChemFunctions/OESmartsLexReplace.html) function is used to implement these replacements. 77 | 78 | ## Manifest 79 | * `basetypes-elemental.smarts` - basetypes file with elemental atom types - this is a good choice to begin with 80 | * `basetypes.smarts` - basetypes file with more sophisticated atom types 81 | * `decorators.smarts` - `decorators` file with a variety of decorators 82 | * `decorators-simple.smarts` - minimal `decorators` file for testing 83 | * `substitutions.smarts` - minimal `substitutions` file 84 | -------------------------------------------------------------------------------- /examples/parm@frosst/atomtypes/basetypes-elemental.smarts: -------------------------------------------------------------------------------- 1 | % atom types 2 | [#1] hydrogen 3 | [#6] carbon 4 | [#7] nitrogen 5 | [#8] oxygen 6 | [#9] fluorine 7 | [#15] phosphorous 8 | [#16] sulfur 9 | [#17] chlorine 10 | [#35] bromine 11 | [#53] iodine 12 | -------------------------------------------------------------------------------- /examples/parm@frosst/atomtypes/basetypes.smarts: -------------------------------------------------------------------------------- 1 | % atom types 2 | [#1] hydrogen 3 | [#6] carbon 4 | [#6&a] carbon aromatic 5 | [#7] nitrogen 6 | [#8] oxygen 7 | [#9] fluorine 8 | [#15] phosphorous 9 | [#16] sulfur 10 | [#17] chlorine 11 | [#35] bromine 12 | [#53] iodine 13 | -------------------------------------------------------------------------------- /examples/parm@frosst/atomtypes/decorators-simple.smarts: -------------------------------------------------------------------------------- 1 | % aromatic/aliphatic 2 | a aromatic 3 | A aliphatic 4 | % halogens 5 | $(*~[$halogen]) halogen-adjacent 6 | -------------------------------------------------------------------------------- /examples/parm@frosst/atomtypes/decorators.smarts: -------------------------------------------------------------------------------- 1 | % bond order 2 | $([*]=[*]) double-bonded 3 | $([*]#[*]) triple-bonded 4 | $([*]:[*]) aromatic-bonded 5 | % bonded to atoms 6 | $(*~[#1]) hydrogen-adjacent 7 | $(*~[#6]) carbon-adjacent 8 | $(*~[#7]) nitrogen-adjacent 9 | $(*~[#8]) oxygen-adjacent 10 | $(*~[#9]) fluorine-adjacent 11 | $(*~[#15]) phosphorous-adjacent 12 | $(*~[#16]) sulfur-adjacent 13 | $(*~[#17]) chlorine-adjacent 14 | $(*~[#35]) bromine-adjacent 15 | $(*~[#53]) iodine-adjacent 16 | % degree 17 | D1 degree-1 18 | D2 degree-2 19 | D3 degree-3 20 | D4 degree-4 21 | D5 degree-5 22 | D6 degree-6 23 | % valence 24 | v1 valence-1 25 | v2 valence-2 26 | v3 valence-3 27 | v4 valence-4 28 | v5 valence-5 29 | v6 valence-6 30 | % total-h-count 31 | H1 total-h-count-1 32 | H2 total-h-count-2 33 | H3 total-h-count-3 34 | % aromatic/aliphatic 35 | a aromatic 36 | A aliphatic 37 | % halogens 38 | $(*~[$halogen]) halogen-adjacent 39 | $(*~[$smallhals]) small-halogen-adjacent 40 | $(*~[$largehals]) large-halogen-adjacent 41 | -------------------------------------------------------------------------------- /examples/parm@frosst/atomtypes/substitutions.smarts: -------------------------------------------------------------------------------- 1 | % Substitution definitions 2 | % Format: 3 | % 4 | 5 | % elements 6 | [#1] hydrogen 7 | [#6] carbon 8 | [#7] nitrogen 9 | [#8] oxygen 10 | [#9] fluorine 11 | [#15] phosphorous 12 | [#16] sulfur 13 | [#17] chlorine 14 | [#35] bromine 15 | [#53] iodine 16 | 17 | % halogens 18 | [$smallhals,$largehals] halogen 19 | [$fluorine,$chlorine] smallhals 20 | [$bromine,$iodine] largehals 21 | -------------------------------------------------------------------------------- /examples/parm@frosst/make_subset.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | 3 | """Take the ZINC subset here and make a smaller subset of it for testing purposes.""" 4 | 5 | from openeye.oechem import * 6 | 7 | nmols = 500 #Number of molecules to retain out of full ~7500 8 | # Currently the above are taken as the first 500. We could also take randomly. 9 | 10 | 11 | 12 | # Read set with tripos types, write subset 13 | ifs = oemolistream( 'molecules/zinc-subset-tripos.mol2.gz') 14 | ofs = oemolostream( 'molecules/zinc-subset-%s-tripos.mol2.gz' % nmols ) 15 | mol = OEMol() 16 | ct=0 17 | while OEReadMolecule(ifs, mol) and ct < nmols: 18 | OEWriteConstMolecule(ofs, mol) 19 | ct += 1 20 | 21 | 22 | # Read set with parm@frosst types, write subset 23 | # Use flavors here to ensure writing doesn't mangle atom types 24 | ifs = oemolistream( 'molecules/zinc-subset-parm@frosst.mol2.gz') 25 | flavor = OEIFlavor_Generic_Default | OEIFlavor_MOL2_Default | OEIFlavor_MOL2_Forcefield 26 | ifs.SetFlavor(OEFormat_MOL2, flavor) 27 | ofs = oemolostream( 'molecules/zinc-subset-%s-parm@frosst.mol2.gz' % nmols ) 28 | ofs.SetFlavor(OEFormat_MOL2, flavor) 29 | mol = OEMol() 30 | ct=0 31 | while OEReadMolecule(ifs, mol) and ct < nmols: 32 | OEWriteConstMolecule(ofs, mol) 33 | ct+=1 34 | 35 | -------------------------------------------------------------------------------- /examples/parm@frosst/molecules/zinc-subset-500-parm@frosst.mol2.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-500-parm@frosst.mol2.gz -------------------------------------------------------------------------------- /examples/parm@frosst/molecules/zinc-subset-500-tripos.mol2.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-500-tripos.mol2.gz -------------------------------------------------------------------------------- /examples/parm@frosst/molecules/zinc-subset-parm@frosst.mol2.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-parm@frosst.mol2.gz -------------------------------------------------------------------------------- /examples/parm@frosst/molecules/zinc-subset-tripos.mol2.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-tripos.mol2.gz -------------------------------------------------------------------------------- /examples/parm@frosst/scripts/README.md: -------------------------------------------------------------------------------- 1 | # Useful scripts for parm@frosst test 2 | 3 | ## Manifest 4 | 5 | * `convert-atom-names-to-tripos.py` - utility to convert atom names to Tripos in mol2 files 6 | -------------------------------------------------------------------------------- /examples/parm@frosst/scripts/convert-atom-names-to-tripos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Convert file of molecules from forcefield atom types to Tripos atom types. 4 | 5 | Example: 6 | 7 | > python ../convert-atom-names-to-tripos.py zinc-subset-parm@frosst.mol2.gz zinc-subset-tripos.mol2.gz 8 | """ 9 | ################################################################ 10 | # Copyright (C) 2006-2015 OpenEye Scientific Software, Inc. 11 | ################################################################ 12 | from __future__ import division 13 | from __future__ import print_function 14 | import os,sys 15 | import openeye.oechem as oechem 16 | 17 | def main(argv=sys.argv): 18 | if len(argv) != 3: 19 | oechem.OEThrow.Usage("%s " % argv[0]) 20 | 21 | ifs = oechem.oemolistream() 22 | flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield 23 | ifs.SetFlavor(oechem.OEFormat_MOL2, flavor) 24 | if not ifs.open(argv[1]): 25 | oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1]) 26 | 27 | ofs = oechem.oemolostream() 28 | if not ofs.open(argv[2]): 29 | oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2]) 30 | 31 | for mol in ifs.GetOEMols(): 32 | oechem.OETriposAtomNames(mol) 33 | oechem.OEWriteConstMolecule(ofs, mol) 34 | 35 | ifs.close() 36 | ofs.close() 37 | 38 | if __name__ == "__main__": 39 | sys.exit(main(sys.argv))#!/usr/bin/env python 40 | -------------------------------------------------------------------------------- /examples/smarty_simulations/AlkEthOH.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/AlkEthOH.pdf -------------------------------------------------------------------------------- /examples/smarty_simulations/Hydrogen.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/Hydrogen.pdf -------------------------------------------------------------------------------- /examples/smarty_simulations/README.md: -------------------------------------------------------------------------------- 1 | # Example application of SMARTY atom type sampler to recover parm99 typing of alkanes, ethers, and alcohols 2 | 3 | These are example outputs for a variety of smarty uses. Each example is listed below with the associated command line call. 4 | Each example has the three output files with the title of the example as the name: 5 | * `*.csv` - example trajectory file, a csv file that is readable with the `score\_util.py` methods 6 | * `*.log` - stored commandline output for that simulation 7 | * `*.pdf` - plot showing the score verses iteration for the simulation 8 | 9 | These are only examples of how to use smarty. All input files are those included in the smarty package 10 | available at `smart/data/`, the utility here allows those files to be used in simulations. 11 | 12 | ## AlkEthOH 13 | 14 | Typical smarty behavior with the AlkEthOH molecule set 15 | with combinatorial decorators and sampling all atoms 16 | 17 | ``` 18 | smarty --basetypes atomtypes/basetypes.smarts \ 19 | --initialtypes atomtypes/basetypes.smarts \ 20 | --decorators atomtypes/new-decorators.smarts \ 21 | --molecules AlkEthOH_test_filt1_tripos.mol2 \ 22 | --reference AlkEthOH_test_filt1_ff.mol2 \ 23 | --iterations 1000 \ 24 | --temperature 0.01 \ 25 | --trajectory AlkEthOH.csv \ 26 | --plot AlkEthOH.pdf >> AlkEthOH.log 27 | ``` 28 | 29 | **Example Output** 30 | this output shows how smarty is used to sample atomtypes 31 | and compared to the parm@frosst typed reference molecules 32 | 33 | ##### Initializing smarty: 34 | ``` 35 | Loading molecules from '/Users/bannanc/anaconda/lib/python2.7/site-packages/smarty/data/molecules/AlkEthOH_test_filt1_tripos.mol2'... 36 | 42 molecules read 37 | 0.006 s elapsed 38 | Loading molecules from '/Users/bannanc/anaconda/lib/python2.7/site-packages/smarty/data/molecules/AlkEthOH_test_filt1_ff.mol2'... 39 | 42 molecules read 40 | 0.006 s elapsed 41 | Sampling all atomtypes 42 | ``` 43 | Store bond types that are used in these molecules 44 | ``` 45 | USED BOND TYPES: 46 | INDEX ATOMS MOLECULES TYPE NAME SMARTS 47 | 1 : 803 42 | singly - 48 | 2 : 0 0 | doubly = 49 | 3 : 0 0 | triply # 50 | 4 : 0 0 | aromatic : 51 | TOTAL : 803 42 52 | ``` 53 | Type molecules with base types and store those with matches 54 | ``` 55 | MATCHED BASETYPES: 56 | INDEX ATOMS MOLECULES TYPE NAME SMARTS 57 | 1 : 464 42 | c_hydrogen [#1] 58 | 2 : 232 42 | c_carbon [#6] 59 | 3 : 0 0 | c_nitrogen [#7] 60 | 4 : 107 42 | c_oxygen [#8] 61 | 5 : 0 0 | c_fluorine [#9] 62 | 6 : 0 0 | c_phosphorous [#15] 63 | 7 : 0 0 | c_sulfur [#16] 64 | 8 : 0 0 | c_chlorine [#17] 65 | 9 : 0 0 | c_selenium [#34] 66 | 10 : 0 0 | c_bromine [#35] 67 | 11 : 0 0 | c_iodine [#53] 68 | TOTAL : 803 42 69 | Removing basetype '[#7]' ('c_nitrogen'), which is unused. 70 | Removing basetype '[#9]' ('c_fluorine'), which is unused. 71 | Removing basetype '[#15]' ('c_phosphorous'), which is unused. 72 | Removing basetype '[#16]' ('c_sulfur'), which is unused. 73 | Removing basetype '[#17]' ('c_chlorine'), which is unused. 74 | Removing basetype '[#34]' ('c_selenium'), which is unused. 75 | Removing basetype '[#35]' ('c_bromine'), which is unused. 76 | Removing basetype '[#53]' ('c_iodine'), which is unused. 77 | ``` 78 | Type molecules with initial types and store the ones that are used 79 | ``` 80 | MATCHED INITIAL TYPES: 81 | INDEX ATOMS MOLECULES TYPE NAME SMARTS 82 | 1 : 464 42 | c_hydrogen [#1] 83 | 2 : 232 42 | c_carbon [#6] 84 | 3 : 0 0 | c_nitrogen [#7] 85 | 4 : 107 42 | c_oxygen [#8] 86 | 5 : 0 0 | c_fluorine [#9] 87 | 6 : 0 0 | c_phosphorous [#15] 88 | 7 : 0 0 | c_sulfur [#16] 89 | 8 : 0 0 | c_chlorine [#17] 90 | 9 : 0 0 | c_selenium [#34] 91 | 10 : 0 0 | c_bromine [#35] 92 | 11 : 0 0 | c_iodine [#53] 93 | TOTAL : 803 42 94 | Removing initial atom type '[#7]', as it matches no atoms 95 | Removing initial atom type '[#9]', as it matches no atoms 96 | Removing initial atom type '[#15]', as it matches no atoms 97 | Removing initial atom type '[#16]', as it matches no atoms 98 | Removing initial atom type '[#17]', as it matches no atoms 99 | Removing initial atom type '[#34]', as it matches no atoms 100 | Removing initial atom type '[#35]', as it matches no atoms 101 | Removing initial atom type '[#53]', as it matches no atoms 102 | ``` 103 | Use bi-partite scoring sceme to score current atomtypes against reference 104 | ``` 105 | Creating graph matching current atom types with reference atom types... 106 | Graph creation took 0.008 s 107 | Computing maximum weight match... 108 | Maximum weight match took 0.001 s 109 | ``` 110 | Initial types and which reference they are paired with and initial score (67.746 %) 111 | ``` 112 | Atom type matches: 113 | c_hydrogen matches HC : 244 atoms matched 114 | c_carbon matches CT : 232 atoms matched 115 | c_oxygen matches OH : 68 atoms matched 116 | 544 / 803 total atoms match (67.746 %) 117 | ``` 118 | ##### Example move in chemical space 119 | ``` 120 | Iteration 16 / 1000 121 | Attempting to create new subtype: '[#1]' (c_hydrogen) -> '[#1$(*~[#6])]' (c_hydrogen any c_carbon ) 122 | Proposal is valid... 123 | ``` 124 | Score proposed atomtypes against reference 125 | ``` 126 | Creating graph matching current atom types with reference atom types... 127 | Graph creation took 0.007 s 128 | Computing maximum weight match... 129 | Maximum weight match took 0.001 s 130 | PROPOSED: 131 | Atom type matches: 132 | c_hydrogen matches HO : 68 atoms matched 133 | c_carbon matches CT : 232 atoms matched 134 | c_oxygen matches OH : 68 atoms matched 135 | c_hydrogen any c_carbon matches HC : 244 atoms matched 136 | 612 / 803 total atoms match (76.214 %) 137 | ``` 138 | ##### Accepting or Rejecting a Move 139 | A move that leads to an increased score will always be accepted. 140 | A move with a decrease has a probability of being accepted depending on the temperature. 141 | A 0.0 temperature will lead lead to a complete optimizer where only moves leading to an increased score are accepted, 142 | however these can get stuck in local optima. By using a non-zero temperature we allow more moves to be accepted 143 | and a larger chemical space to be explored. 144 | ``` 145 | Proposal score: 544 >> 612 : log_P_accept = 8.46824e+00 146 | Accepted. 147 | ``` 148 | Score by reference atomtype 149 | ``` 150 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED 151 | 1 : 68 42 | c_hydrogen [#1] HO 68 / 68 (100.000%) 152 | 2 : 232 42 | c_carbon [#6] CT 232 / 232 (100.000%) 153 | 3 : 107 42 | c_oxygen [#8] OH 68 / 68 (100.000%) 154 | 4 : 396 42 | c_hydrogen any c_carbon [#1$(*~[#6])] HC 244 / 244 (100.000%) 155 | TOTAL : 803 42 | 612 / 803 match (76.214 %) 156 | ``` 157 | Atomtype hierarchy shows which parent type a child descends from 158 | ``` 159 | Atom type hierarchy: 160 | [#6] 161 | [#8] 162 | [#1] 163 | [#1$(*~[#6])] 164 | ``` 165 | ##### Final iteration of this simulation 166 | ``` 167 | Iteration 999 / 1000 168 | Attempting to destroy atom type [#6] : c_carbon... 169 | Destruction rejected for atom type [#6] because this is a generic type which was initially populated. 170 | Rejected. 171 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED 172 | 1 : 291 42 | c_hydrogen [#1] HC 244 / 244 (100.000%) 173 | 2 : 232 42 | c_carbon [#6] CT 232 / 232 (100.000%) 174 | 3 : 39 30 | c_oxygen [#8] OS 39 / 39 (100.000%) 175 | 4 : 68 42 | c_hydrogen any c_oxygen [#1$(*~[#8])] HO 68 / 68 (100.000%) 176 | 5 : 27 21 | c_hydrogen any c_carbon any c_carbon (any c_oxygen) (singly c_oxygen) [#1$(*~[#6](-[#8])(~[#8])~[#6])] H2 27 / 33 ( 81.818%) 177 | 6 : 78 25 | c_hydrogen any c_carbon any c_carbon (any c_oxygen) (singly c_hydrogen) [#1$(*~[#6](-[#1])(~[#8])~[#6])] H1 78 / 116 ( 67.241%) 178 | 7 : 68 42 | c_oxygen any c_hydrogen [#8$(*~[#1])] OH 68 / 68 (100.000%) 179 | TOTAL : 803 42 | 756 / 803 match (94.147 %) 180 | 181 | Atom type hierarchy: 182 | [#1] 183 | [#1$(*~[#8])] 184 | [#1$(*~[#6](-[#8])(~[#8])~[#6])] 185 | [#1$(*~[#6](-[#1])(~[#8])~[#6])] 186 | [#8] 187 | [#8$(*~[#1])] 188 | [#6] 189 | Maximum score achieved: 0.99 190 | ``` 191 | 192 | ## Hydrogen 193 | 194 | This is an example of how to implement the elemental sampler for smarty 195 | you only need to add the `--element` option. In this case instead of considering 196 | all atoms, we only sample atom types for hydrogen. 197 | This allows for more efficient testing of the smarty tool as we can 198 | focus on the chemical perception sampling around one element. 199 | In the AlkEthOH, there is only 1 carbon and 2 oxygens, so the 5 hydrogen types 200 | are the best example of this behavior. 201 | 202 | ``` 203 | smarty --element 1 \ 204 | --basetypes atomtypes/basetypes.smarts \ 205 | --initialtypes atomtypes/basetypes.smarts \ 206 | --decorators atomtypes/new-decorators.smarts \ 207 | --molecules AlkEthOH_test_filt1_tripos.mol2 \ 208 | --reference AlkEthOH_test_filt1_ff.mol2 \ 209 | --iterations 1000 \ 210 | --temperature 0.01 \ 211 | --trajectory Hydrogen.csv \ 212 | --plot Hydrogen.pdf >> Hydrogen.log 213 | ``` 214 | 215 | ## Simple-Decorators 216 | 217 | With the simple decorator option new atomtypes are generated by ANDing 218 | decorator SMARTS patterns to the end of a parent atomtype. 219 | This method is not capable of even getting the complexity in the AlkEthOH 220 | molecule set as it does not allow for beta substitution from the primary atom. 221 | 222 | ``` 223 | smarty --basetypes atomtypes/basetypes.smarts \ 224 | --initialtypes atomtypes/basetypes.smarts \ 225 | --decorators atomtypes/decorators.smarts \ 226 | --substitutions atomtypes/replacements.smarts \ 227 | --molecules AlkEthOH_test_filt1_tripos.mol2 \ 228 | --reference AlkEthOH_test_filt1_ff.mol2 \ 229 | --iterations 1000 \ 230 | --temperature 0.01 \ 231 | --trajectory Simple-decorators.csv \ 232 | --plot Simple-decorators.pdf \ 233 | --decoratorbehavior simple-decorators >> Simple-decorators.log 234 | ``` 235 | 236 | ## More smarty tests 237 | We have done more extensive testing of this tool, but the results are 238 | a bit bulky to keep on GitHub. We maintain a public (Google Drive Directory)[https://drive.google.com/drive/folders/0BwF2-3puCvfEeWNuNnlsTm1CTlU?usp=sharing] 239 | with these results. Please note it is a work in progress so documentation is on going. 240 | -------------------------------------------------------------------------------- /examples/smarty_simulations/Simple-Decorators.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/Simple-Decorators.pdf -------------------------------------------------------------------------------- /examples/smirky/README.md: -------------------------------------------------------------------------------- 1 | # smirky sampling of Torsions 2 | 3 | This is an example of how to use smirky, a command line tool for sampling chemical perception of Bonds, Angles, proper or improper Torsions, or van der Waal parameters. Default smirky behaivor only requires two inputs, this is an example of all input options into smirky. 4 | 5 | ### Input files explained 6 | 7 | * `atom_OR_bases.smarts` - element numbers that form the base of atoms, such as `"#6"` and their associated odds 8 | * `atom_OR_decorators.smarts` - decorators and associated odds that are combined with element numbers such as `X4` in `"[#6X3,#7]"` 9 | * `atom_AND_decorators.smarts` - decorators and associated odds for patterns that are "AND'd" to the end of an atom for example `r5` in `"[#6X4,#7X3;r5]"` 10 | * `bond_OR_bases.smarts` - bond bases and their associated odds, that is '-', '=', ':', or '#' typically 11 | * `bond_AND_decorators.smarts` - bond decorators that can be "AND'd" in a bond, such as '@' in `"[#6r6]-,:;@[#7r6]"` 12 | * `atom_odds_forTorsions.smarts` - keywords or indices for atoms in torsions and odds of making changes to them 13 | * `bond_odds_forTorsions.smarts` - keywords or indices for bonds in torsions and odds of making changes to them 14 | * `initial_Torsions.smarts` - SMIRKS patterns for initial patterns 15 | * `substitutions.smarts` - SMIRKS patterns and the short hand they can be replaced with 16 | 17 | ### Command line call 18 | 19 | ``` 20 | smirky --molecules AlkEthOH_test_filt1_ff.mol2 \ 21 | --typetag Torsion \ 22 | --atomORbases atom_OR_bases.smarts \ 23 | --atomORdecors atom_OR_decorators.smarts \ 24 | --atomANDdecors atom_AND_decorators.smarts \ 25 | --bondORbase bond_OR_bases.smarts \ 26 | --bondANDdecors bond_AND_decorators.smarts \ 27 | --atomOddsFile atom_odds_forTorsions.smarts \ 28 | --bondOddsFile bond_odds_forTorsions.smarts \ 29 | --initialtypes initial_Torsions.smarts \ 30 | --substitutions substitutions.smarts \ 31 | --smirff forcefield/Frosst_AlkEthOH.ffxml \ 32 | --iteratorsion 1000 \ 33 | --temperature 0.001 \ 34 | --verbose True \ 35 | --output output 36 | ``` 37 | 38 | ### Output files created 39 | * output.log - detailed log of each iteration, changes made and if it was accepted or rejected 40 | * output.csv - a "trajectory" file that describes the torsions at each iteration 41 | * output.pdf - plot showing the overall score vs iteration 42 | * output_results.smarts - smarts file showing the file SMIRKS and their matched results 43 | 44 | ### Detailed output explained 45 | 46 | Here is a segment of output.log with explaination of what happens in a smirky simulation 47 | 48 | ##### Match initial input 49 | 50 | Type initial parameters 51 | ``` 52 | INDEX TORSIONS MOLECULES TYPE NAME: SMIRKS 53 | 1 : 0 0 | 0: [*:1]~[*:2]~[*:3]~[*:4] 54 | 2 : 1737 42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] 55 | 3 : 438 42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] 56 | TOTAL : 2175 42 57 | ``` 58 | Remove elements that are not used in this molecule set (remember AlkEthOH only has carbon, oxygen, and hydrogen) 59 | ``` 60 | removing unused element ([#5]) from list 61 | removing unused element ([#7]) from list 62 | removing unused element ([#9]) from list 63 | removing unused element ([#14]) from list 64 | removing unused element ([#15]) from list 65 | removing unused element ([#16]) from list 66 | removing unused element ([#17]) from list 67 | removing unused element ([#35]) from list 68 | removing unused element ([#53]) from list 69 | ``` 70 | ##### Comparing to SMIRNOFF99Frosst 71 | 72 | Use the forcefield tools to type all molecules with SMIRNOFF reference. 73 | Compare reference types to initial parameter types 74 | 75 | ``` 76 | Creating labeler from forcefield/Frosst_AlkEthOH.ffxml... 77 | Creating graph matching current types with reference types... 78 | Graph creation took 0.304 s 79 | Computing maximum weight match... 80 | Maximum weight match took 0.001 s 81 | PROPOSED: 82 | Torsion type matches: 83 | 0: [*:1]~[*:2]~[*:3]~[*:4] no match 84 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] matches t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]: 574 Torsion types matched 85 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] matches t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]: 156 Torsion types matched 86 | 730 / 2175 total Torsions match (33.563 %) 87 | ``` 88 | Show current statistics before sampling begins 89 | ``` 90 | INDEX TORSIONS MOLECULES TYPE NAME: SMIRKS REF TYPE: SMIRKS FRACTION OF REF TYPED MOLECULES MATCHED 91 | 1 : 0 0 | 0: [*:1]~[*:2]~[*:3]~[*:4] 92 | 2 : 1737 42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] 574 / 574 (100.000%) 93 | 3 : 438 42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4] 156 / 156 (100.000%) 94 | TOTAL : 2175 42 | 730 / 2175 match (33.563 %) 95 | ``` 96 | 97 | ##### Example move to generate a new Torsion 98 | 99 | Create a new torsion, in this case by changing the 4th atom from generic (*) to an oxygen not bound to hydrogen (`#8H0`) 100 | 101 | ``` 102 | Iteration 1 / 1000 103 | Attempting to create new subtype: '4778' ([*:1]~[#6:2]~[#6:3]~[#8!H0:4]) from parent type 'C-C' ([*:1]~[#6:2]~[#6:3]~[*:4]) 104 | Probability of making this environment is 0.004 %Proposal is valid... 105 | ``` 106 | Compare proposed types to the SMIRNOFF reference types 107 | ``` 108 | Creating graph matching current types with reference types... 109 | Graph creation took 0.176 s 110 | Computing maximum weight match... 111 | Maximum weight match took 0.001 s 112 | PROPOSED: 113 | Torsion type matches: 114 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] matches t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]: 574 Torsion types matched 115 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] matches t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]: 156 Torsion types matched 116 | 4778: [*:1]~[#6:2]~[#6:3]~[#8!H0:4] matches t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4]: 190 Torsion types matched 117 | 920 / 2175 total Torsions match (42.299 %) 118 | ``` 119 | ##### Using temperature and score to accept or reject move 120 | Use change in score and temperature to calculate the probability of accepting the move. 121 | A move with an increased score will always be accepted, the higher the temperature the 122 | more probable a move with a decreased score will be accepted 123 | ``` 124 | Proposal score: 730 >> 920 : log_P_accept = 8.73563e+01 125 | Accepted. 126 | INDEX TORSIONS MOLECULES TYPE NAME: SMIRKS REF TYPE: SMIRKS FRACTION OF REF TYPED MOLECULES MATCHED 127 | 1 : 1436 42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] 574 / 574 (100.000%) 128 | 2 : 438 42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4] 156 / 156 (100.000%) 129 | 3 : 301 42 | 4778: [*:1]~[#6:2]~[#6:3]~[#8!H0:4] t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] 190 / 307 ( 61.889%) 130 | TOTAL : 2175 42 | 920 / 2175 match (42.299 %) 131 | 132 | ``` 133 | Hierarchy shows which parent types lead to the generation of child types 134 | ``` 135 | Torsion type hierarchy: 136 | C-C ([*:1]~[#6:2]~[#6:3]~[*:4]) 137 | 4778 ([*:1]~[#6:2]~[#6:3]~[#8!H0:4]) 138 | C-O ([*:1]~[#6:2]~[#8:3]~[*:4]) 139 | ``` 140 | ##### Final Iteration in this example 141 | ``` 142 | Iteration 999 / 1000 143 | Attempting to destroy type 1876 : [#1:1]~[#6:2]~[#6:3]~[#1:4]... 144 | Proposal is valid... 145 | Creating graph matching current types with reference types... 146 | Graph creation took 0.249 s 147 | Computing maximum weight match... 148 | Maximum weight match took 0.004 s 149 | PROPOSED: 150 | Torsion type matches: 151 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] matches t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]: 574 Torsion types matched 152 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] matches t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]: 156 Torsion types matched 153 | 4808: [*:1]~[#6:2]~[#8:3]~[#1!X4:4] matches t0002: [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4]: 101 Torsion types matched 154 | 8090: [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4] matches t0006: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4]: 87 Torsion types matched 155 | 7751: [*:1]~[#6:2]~[#6:3]~[#6:4] matches t0001: [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4]: 146 Torsion types matched 156 | 1068: [#6!H3:1]~[#6:2]~[#6:3]~[#6:4] matches t0007: [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]: 131 Torsion types matched 157 | 6774: [#1H0:1]~[#6:2]~[#6:3]~[#6:4] matches t0005: [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]: 552 Torsion types matched 158 | 8025: [#6:1]~[#6:2]~[#8:3]~[#6!H3:4] matches t0008: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4]: 66 Torsion types matched 159 | 1813 / 2175 total Torsions match (83.356 %) 160 | Proposal score: 2120 >> 1813 : log_P_accept = -1.41149e+02 161 | Rejected. 162 | INDEX TORSIONS MOLECULES TYPE NAME: SMIRKS REF TYPE: SMIRKS FRACTION OF REF TYPED MOLECULES MATCHED 163 | 1 : 334 42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] 307 / 307 (100.000%) 164 | 2 : 168 30 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4] 156 / 156 (100.000%) 165 | 3 : 117 42 | 4808: [*:1]~[#6:2]~[#8:3]~[#1!X4:4] t0002: [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4] 101 / 101 (100.000%) 166 | 4 : 87 42 | 8090: [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4] t0006: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4] 87 / 103 ( 84.466%) 167 | 5 : 146 40 | 7751: [*:1]~[#6:2]~[#6:3]~[#6:4] t0001: [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4] 146 / 146 (100.000%) 168 | 6 : 131 37 | 1068: [#6!H3:1]~[#6:2]~[#6:3]~[#6:4] t0007: [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] 131 / 131 (100.000%) 169 | 7 : 552 40 | 6774: [#1H0:1]~[#6:2]~[#6:3]~[#6:4] t0005: [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] 552 / 552 (100.000%) 170 | 8 : 66 30 | 8025: [#6:1]~[#6:2]~[#8:3]~[#6!H3:4] t0008: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4] 66 / 66 (100.000%) 171 | 9 : 574 42 | 1876: [#1:1]~[#6:2]~[#6:3]~[#1:4] t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] 574 / 574 (100.000%) 172 | TOTAL : 2175 42 | 2120 / 2175 match (97.471 %) 173 | 174 | Torsion type hierarchy: 175 | C-C ([*:1]~[#6:2]~[#6:3]~[*:4]) 176 | 7751 ([*:1]~[#6:2]~[#6:3]~[#6:4]) 177 | 1068 ([#6!H3:1]~[#6:2]~[#6:3]~[#6:4]) 178 | 6774 ([#1H0:1]~[#6:2]~[#6:3]~[#6:4]) 179 | 1876 ([#1:1]~[#6:2]~[#6:3]~[#1:4]) 180 | C-O ([*:1]~[#6:2]~[#8:3]~[*:4]) 181 | 4808 ([*:1]~[#6:2]~[#8:3]~[#1!X4:4]) 182 | 8090 ([#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4]) 183 | 8025 ([#6:1]~[#6:2]~[#8:3]~[#6!H3:4]) 184 | 185 | ``` 186 | 187 | ## More smirky tests 188 | 189 | The results from smirky tests get a bit bulky so we are not storing them on github. 190 | We maintain a public (Google Drive Directory)[https://drive.google.com/drive/folders/0BwF2-3puCvfEeWNuNnlsTm1CTlU?usp=sharing] 191 | storing extensive tests on smirky and smarty. Please keep in mind these tests are on going so documentation for the Google Drive is a work in progress. 192 | -------------------------------------------------------------------------------- /examples/smirky/atom_AND_decorators.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | % Size of smallest ring 3 | r3 0 4 | r4 0 5 | r5 0 6 | r6 0 7 | % Number of rings 8 | R0 0 9 | R1 0 10 | R2 0 11 | R3 0 12 | R4 0 13 | R 0 14 | !R0 0 15 | !R1 0 16 | !R2 0 17 | !R3 0 18 | !R4 0 19 | !R 0 20 | % total connectivity 21 | X1 0 22 | X2 0 23 | X3 0 24 | X4 0 25 | !X1 0 26 | !X2 0 27 | !X3 0 28 | !X4 0 29 | % total hydrogen count 30 | H0 0 31 | !H0 0 32 | H1 0 33 | !H1 0 34 | H2 0 35 | !H2 0 36 | H3 0 37 | !H3 0 38 | % aromatic/aliphatic 39 | a 0 40 | !a 0 41 | A 0 42 | !A 0 43 | % charges 44 | -1 0 45 | +0 0 46 | +1 0 47 | % no decorator 48 | '' 1 49 | -------------------------------------------------------------------------------- /examples/smirky/atom_OR_bases.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | % elements 3 | [#1] 1 4 | [#5] 0 5 | [#6] 1 6 | [#7] 0 7 | [#8] 1 8 | [#9] 0 9 | [#14] 0 10 | [#15] 0 11 | [#16] 0 12 | [#17] 0 13 | [#35] 0 14 | [#53] 0 15 | % substitution groups 16 | $ewg1 0 17 | $ewg2 0 18 | -------------------------------------------------------------------------------- /examples/smirky/atom_OR_decorators.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | % Size of smallest ring 3 | r3 0 4 | r4 0 5 | r5 0 6 | r6 0 7 | % Number of rings 8 | R0 0 9 | R1 0 10 | R2 0 11 | R3 0 12 | R4 0 13 | R 0 14 | !R0 0 15 | !R1 0 16 | !R2 0 17 | !R3 0 18 | !R4 0 19 | !R 0 20 | % total connectivity 21 | X1 0 22 | X2 1 23 | X3 0 24 | X4 1 25 | !X1 0 26 | !X2 1 27 | !X3 0 28 | !X4 1 29 | % total hydrogen count 30 | H0 1 31 | !H0 1 32 | H1 1 33 | !H1 1 34 | H2 1 35 | !H2 1 36 | H3 1 37 | !H3 1 38 | % aromatic/aliphatic 39 | a 0 40 | !a 0 41 | A 0 42 | !A 0 43 | % charges 44 | -1 0 45 | +0 0 46 | +1 0 47 | % OR base with no decorator 48 | '' 10 49 | -------------------------------------------------------------------------------- /examples/smirky/atom_odds_forTorsions.smarts: -------------------------------------------------------------------------------- 1 | % Descriptor odds 2 | % used in the default, all equally likely 3 | all 0 4 | % 5 | % Other options remember to use indices appropriately 6 | 1 10 7 | 2 1 8 | 3 1 9 | 4 10 10 | Indexed 0 11 | Unindexed 5 12 | Alpha 0 13 | Beta 0 14 | -------------------------------------------------------------------------------- /examples/smirky/bond_AND_decorators.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | @ 0 3 | !@ 0 4 | !# 0 5 | '' 1 6 | -------------------------------------------------------------------------------- /examples/smirky/bond_OR_bases.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | % bond types 3 | - 1 4 | : 0 5 | = 0 6 | # 0 7 | % not bond types 8 | !- 0 9 | !: 0 10 | != 0 11 | !# 0 12 | -------------------------------------------------------------------------------- /examples/smirky/bond_odds_forTorsions.smarts: -------------------------------------------------------------------------------- 1 | % Descriptor odds 2 | % used in the default, all equally likely 3 | all 0 4 | % 5 | % Other options remember to use indices appropriately 6 | 1 10 7 | 2 1 8 | 3 10 9 | Indexed 0 10 | Unindexed 20 11 | Alpha 0 12 | Beta 0 13 | -------------------------------------------------------------------------------- /examples/smirky/initial_Torsions.smarts: -------------------------------------------------------------------------------- 1 | % Van Der Waal fragments to begin with 2 | [*:1]~[*:2]~[*:3]~[*:4] 0 3 | [*:1]~[#6:2]~[#6:3]~[*:4] C-C 4 | [*:1]~[#6:2]~[#8:3]~[*:4] C-O 5 | -------------------------------------------------------------------------------- /examples/smirky/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smirky/output.pdf -------------------------------------------------------------------------------- /examples/smirky/output_results.smarts: -------------------------------------------------------------------------------- 1 | % Results for sampling Torsions at 1.00e-03 2 | %% SMIRKS patterns for final results are below 3 | % followed by a their matched reference SMIRKS from forcefield/Frosst_AlkEthOH.ffxml 4 | %Final Score was 97.471 % 5 | %% 6 | [*:1]~[#6:2]~[#6:3]~[*:4] C-C 7 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] t0012 8 | [*:1]~[#6:2]~[#8:3]~[*:4] C-O 9 | % [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4] t0003 10 | [*:1]~[#6:2]~[#8:3]~[#1!X4:4] 4808 11 | % [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4] t0002 12 | [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4] 8090 13 | % [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4] t0006 14 | [*:1]~[#6:2]~[#6:3]~[#6:4] 7751 15 | % [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4] t0001 16 | [#6!H3:1]~[#6:2]~[#6:3]~[#6:4] 1068 17 | % [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] t0007 18 | [#1H0:1]~[#6:2]~[#6:3]~[#6:4] 6774 19 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] t0005 20 | [#6:1]~[#6:2]~[#8:3]~[#6!H3:4] 8025 21 | % [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4] t0008 22 | [#1:1]~[#6:2]~[#6:3]~[#1:4] 1876 23 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] t0004 24 | -------------------------------------------------------------------------------- /examples/smirky/substitutions.smarts: -------------------------------------------------------------------------------- 1 | % Substitution definitions 2 | % Format: 3 | % 4 | % halogens 5 | [#7!-1,#8,#16] ewg2 6 | [#7!-1,#8!-1,#16!-1,$halogen] ewg1 7 | [$smallhals,$largehals] halogen 8 | [#9,#17] smallhals 9 | [#35,#53] largehals 10 | -------------------------------------------------------------------------------- /oe_license.txt.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/oe_license.txt.enc -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import relpath, join 3 | from setuptools import setup 4 | 5 | def read(fname): 6 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 7 | 8 | def find_package_data(data_root, package_root): 9 | files = [] 10 | for root, dirnames, filenames in os.walk(data_root): 11 | for fn in filenames: 12 | files.append(relpath(join(root, fn), package_root)) 13 | return files 14 | 15 | setup( 16 | name = "smarty", 17 | version = "0.1.6", 18 | author = "John Chodera, David Mobley, and others", 19 | author_email = "john.chodera@choderalab.org", 20 | description = ("Automated Bayesian atomtype sampling"), 21 | license = "MIT", 22 | keywords = "Bayesian atomtype sampling forcefield parameterization", 23 | url = "http://github.com/openforcefield/smarty", 24 | packages=['smarty', 'smarty/tests', 'smarty/data'], 25 | long_description=read('README.md'), 26 | classifiers=[ 27 | "Development Status :: 3 - Alpha", 28 | "Topic :: Utilities", 29 | "License :: OSI Approved :: MIT", 30 | ], 31 | entry_points={'console_scripts': ['smarty = smarty.cli_smarty:main', 'smirky = smarty.cli_smirky:main']}, 32 | package_data={'smarty': find_package_data('smarty/data', 'smarty')}, 33 | ) 34 | -------------------------------------------------------------------------------- /smarty/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import openeye 3 | # These can only be imported if openeye tools are available 4 | from smarty.atomtyper import * 5 | from smarty.sampler import * 6 | from smarty.utils import * 7 | from smarty.sampler_smirky import * 8 | 9 | except Exception as e: 10 | print(e) 11 | print('Warning: Cannot import openeye toolkit; not all functionality will be available.') 12 | 13 | from smarty.score_utils import * 14 | -------------------------------------------------------------------------------- /smarty/atomtyper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #============================================================================================= 4 | # MODULE DOCSTRING 5 | #============================================================================================= 6 | 7 | """ 8 | atomtyper.py 9 | 10 | Atom type assignment engine using SMARTS strings. 11 | 12 | Authors 13 | ------- 14 | John Chodera , Memorial Sloan Kettering Cancer Center and University of California, Berkeley 15 | 16 | The AtomTyper class is based on 'patty' from Pat Walters, Vertex Pharmaceuticals. 17 | 18 | """ 19 | #============================================================================================= 20 | # GLOBAL IMPORTS 21 | #============================================================================================= 22 | 23 | import sys 24 | import string 25 | 26 | import os 27 | import math 28 | import copy 29 | import re 30 | import numpy 31 | import random 32 | from smarty import utils 33 | 34 | import openeye.oechem 35 | import openeye.oeomega 36 | import openeye.oequacpac 37 | 38 | from openeye.oechem import * 39 | from openeye.oeomega import * 40 | from openeye.oequacpac import * 41 | 42 | #============================================================================================= 43 | # ATOM TYPER 44 | #============================================================================================= 45 | 46 | class AtomTyper(object): 47 | """ 48 | Atom typer based on SMARTS-defined atom types. 49 | 50 | Based on 'Patty' implementation by Pat Walters. 51 | 52 | """ 53 | 54 | class TypingException(Exception): 55 | """ 56 | Atom typing exception. 57 | 58 | """ 59 | def __init__(self, molecule, atom): 60 | self.molecule = molecule 61 | self.atom = atom 62 | 63 | def __str__(self): 64 | msg = "Atom not assigned: molecule %s : atom index %6d name %8s element %8s" % (self.molecule.GetTitle(), self.atom.GetIdx(), self.atom.GetName(), OEGetAtomicSymbol(self.atom.GetAtomicNum())) 65 | msg += '\n' 66 | for atom in self.molecule.GetAtoms(): 67 | msg += 'atom %8d : name %8s element %8s' % (atom.GetIdx(), atom.GetName(), OEGetAtomicSymbol(self.atom.GetAtomicNum())) 68 | if atom == self.atom: 69 | msg += ' ***' 70 | msg += '\n' 71 | 72 | return msg 73 | 74 | def __init__(self, typelist, tagname, replacements=None): 75 | """" 76 | Create an atom typer instance. 77 | 78 | ARGUMENTS 79 | 80 | typelist : str 81 | If specified, will read types from list with each element [smarts, typename] 82 | tagname : str 83 | Tag name 84 | replacements : list of [smarts, shortname] 85 | Substitution/replacement bindings. 86 | 87 | """ 88 | 89 | self.pattyTag = OEGetTag(tagname) 90 | 91 | # Create bindings list. 92 | bindings = list() 93 | if replacements is not None: 94 | for [smarts,shortname] in replacements: 95 | bindings.append( (shortname, smarts) ) 96 | 97 | # Create table of search objects. 98 | self.smartsList = [] 99 | for [smarts, typename] in typelist: 100 | # Perform binding replacements 101 | smarts = OESmartsLexReplace(smarts, bindings) 102 | # Create SMARTS search 103 | pat = OESubSearch() 104 | pat.Init(smarts) 105 | pat.SetMaxMatches(0) 106 | self.smartsList.append([pat,typename,smarts]) 107 | 108 | return 109 | 110 | def dump(self): 111 | for pat,type,smarts in self.smartsList: 112 | print(pat,type,smarts) 113 | return 114 | 115 | def assignTypes(self,mol,element = 0): 116 | # Assign null types. 117 | for atom in mol.GetAtoms(): 118 | atom.SetStringData(self.pattyTag, "") 119 | 120 | # Assign atom types using rules. 121 | OEAssignAromaticFlags(mol) 122 | for pat,type,smarts in self.smartsList: 123 | OEPrepareSearch(mol, pat) 124 | for matchbase in pat.Match(mol): 125 | for matchpair in matchbase.GetAtoms(): 126 | matchpair.target.SetStringData(self.pattyTag,type) 127 | 128 | # Check if any atoms remain unassigned. 129 | if element > 0: 130 | mol_atoms = mol.GetAtoms(OEHasAtomicNum(element)) 131 | else: 132 | mol_atoms = mol.GetAtoms() 133 | for atom in mol_atoms: 134 | if atom.GetStringData(self.pattyTag)=="": 135 | raise AtomTyper.TypingException(mol, atom) 136 | return 137 | 138 | def debugTypes(self,mol): 139 | for atom in mol.GetAtoms(): 140 | print("%6d %8s %8s" % (atom.GetIdx(),OEGetAtomicSymbol(atom.GetAtomicNum()),atom.GetStringData(self.pattyTag))) 141 | return 142 | 143 | def getTypeList(self,mol): 144 | typeList = [] 145 | for atom in mol.GetAtoms(): 146 | typeList.append(atom.GetStringData(self.pattyTag)) 147 | return typeList 148 | 149 | @classmethod 150 | def read_typelist(cls, filename): 151 | """ 152 | Read an atomtype or decorator list from a file. 153 | 154 | Parameters 155 | ---------- 156 | filename : str 157 | The name of the file to be read 158 | 159 | Returns 160 | ------- 161 | typelist : list of tuples 162 | Typelist[i] is element i of the typelist in format [smarts, typename] 163 | 164 | """ 165 | if filename is None: 166 | return None 167 | 168 | if not os.path.exists(filename): 169 | built_in = utils.get_data_filename(filename) 170 | if not os.path.exists(built_in): 171 | raise Exception("File '%s' not found." % filename) 172 | filename = built_in 173 | 174 | typelist = list() 175 | ifs = open(filename) 176 | lines = ifs.readlines() 177 | used_typenames = list() 178 | for line in lines: 179 | # Strip trailing comments 180 | index = line.find('%') 181 | if index != -1: 182 | line = line[0:index] 183 | # Split into tokens. 184 | tokens = line.split() 185 | # Process if we have enough tokens 186 | if len(tokens) >= 2: 187 | smarts = tokens[0] 188 | typename = ' '.join(tokens[1:]) 189 | if typename not in used_typenames: 190 | typelist.append([smarts,typename]) 191 | used_typenames.append(typename) 192 | else: 193 | raise Exception("Error in file '%s' -- each entry must " 194 | "have a unique name." % filename ) 195 | ifs.close() 196 | 197 | return typelist 198 | -------------------------------------------------------------------------------- /smarty/cli_smarty.py: -------------------------------------------------------------------------------- 1 | """ 2 | Command-line driver example for SMARTY. 3 | 4 | """ 5 | 6 | import sys 7 | import string 8 | 9 | from optparse import OptionParser # For parsing of command line arguments 10 | 11 | import os 12 | import math 13 | import copy 14 | import re 15 | import numpy 16 | import random 17 | 18 | import smarty 19 | from openforcefield.utils import utils 20 | 21 | def main(): 22 | # Create command-line argument options. 23 | usage_string = """\ 24 | Sample over atom types, optionally attempting to match atom types in a reference typed set of molecules. 25 | 26 | usage: %prog --basetypes smartsfile --initialtypes smartsfile --decorators smartsfile [--substitutions smartsfile] --molecules molfile [--reference molfile] --iterations niterations [--temperature temperature] 27 | 28 | example: 29 | 30 | python %prog --basetypes=atomtypes/basetypes.smarts --initialtypes=atomtypes/initialtypes.smarts --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \ 31 | --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz --iterations 1000 --temperature=0.1 32 | 33 | """ 34 | version_string = "%prog %__version__" 35 | parser = OptionParser(usage=usage_string, version=version_string) 36 | 37 | parser.add_option("-e", "--element", metavar='ELEMENT', 38 | action="store", type="int", dest='element', default=0, 39 | help= "By default the element value is 0 corresponding to sampling all atomtypes. If another atomic number is specified only atoms with that atomic number are sampled (i.e. --element=8 will only sample atomtypes for oxygen atoms).") 40 | 41 | 42 | parser.add_option("-b", "--basetypes", metavar='BASETYPES', 43 | action="store", type="string", dest='basetypes_filename', default=None, 44 | help="Filename defining base or generic atom types as SMARTS atom matches; these are indestructible and normally are elemental atom types.") 45 | 46 | parser.add_option("-f", "--initialtypes", metavar='BASETYPES', 47 | action="store", type="string", dest='initialtypes_filename', default=None, 48 | help="Filename defining initial (first) atom types as SMARTS atom matches.") 49 | 50 | parser.add_option("-d", "--decorators", metavar='DECORATORS', 51 | action="store", type="string", dest='decorators_filename', default=None, 52 | help="Filename defining decorator atom types as SMARTS atom matches.") 53 | 54 | parser.add_option("-s", "--substitutions", metavar="SUBSTITUTIONS", 55 | action="store", type="string", dest='substitutions_filename', default=None, 56 | help="Filename defining substitution definitions for SMARTS atom matches (OPTIONAL).") 57 | 58 | parser.add_option("-r", "--reference", metavar="REFMOL", 59 | action="store", type="string", dest='reference_molecules_filename', default=None, 60 | help="Reference typed molecules for computing likelihood (must match same molecule and atom ordering in molecules file) (OPTIONAL).") 61 | 62 | parser.add_option("-m", "--molecules", metavar='MOLECULES', 63 | action="store", type="string", dest='molecules_filename', default=None, 64 | help="Small molecule set (in any OpenEye compatible file format) containing 'dG(exp)' fields with experimental hydration free energies.") 65 | 66 | parser.add_option("-i", "--iterations", metavar='ITERATIONS', 67 | action="store", type="int", dest='iterations', default=150, 68 | help="MCMC iterations.") 69 | 70 | parser.add_option("-t", "--temperature", metavar='TEMPERATURE', 71 | action="store", type="float", dest='temperature', default=0.1, 72 | help="Effective temperature for Monte Carlo acceptance, indicating fractional tolerance of mismatched atoms (default: 0.1). If 0 is specified, will behave in a greedy manner.") 73 | 74 | parser.add_option("-l", '--trajectory', metavar="TRAJECTORY_FILE", 75 | action = "store", dest = "traj_file", default = "trajectory.csv", 76 | help = "Name for trajectory file output, trajectory saves only changes to the list of 'atomtypes' for each iteration. If the file already exists, it is overwritten.") 77 | 78 | parser.add_option("-p", '--plot', metavar="PLOT_FILE", 79 | action = "store", dest = "plot_file", default = None, 80 | help = "Name for output file of a plot of the score versus time. If not specified, none will be written. If provided, needs to use a file extension suitable for matplotlib/pylab. Currently requires a trajectory file to be written using -l or --trajectory.") 81 | 82 | 83 | parser.add_option("-x", "--decoratorbehavior", metavar='DECORATOR_BEHAVIOR', 84 | action="store", type="string", dest='decorator_behavior', default='combinatorial-decorators', 85 | help="Choose between simple-decorators or combinatorial-decorators (default = combinatorial-decorators).") 86 | 87 | verbose = True 88 | 89 | # Parse command-line arguments. 90 | (options,args) = parser.parse_args() 91 | 92 | # Ensure all required options have been specified. 93 | if (options.basetypes_filename is None) or (options.decorators_filename is None) or (options.molecules_filename is None): 94 | parser.print_help() 95 | parser.error("All input files must be specified.") 96 | 97 | # Ensure the Decorator Behavior option has been specified right 98 | if not (options.decorator_behavior == 'simple-decorators' or options.decorator_behavior == 'combinatorial-decorators'): 99 | parser.print_help() 100 | parser.error("Option not valid for decorator behavior.") 101 | 102 | # Load and type all molecules in the specified dataset. 103 | molecules = utils.read_molecules(options.molecules_filename, verbose=True) 104 | 105 | # Read reference typed molecules, if specified. 106 | reference_typed_molecules = None 107 | if options.reference_molecules_filename is not None: 108 | reference_typed_molecules = utils.read_molecules(options.reference_molecules_filename, verbose=True) 109 | 110 | # Construct atom type sampler. 111 | if options.element == 0: 112 | if verbose: print("Sampling all atomtypes") 113 | elif options.element > 0: 114 | if verbose: print("Sampling atoms with atomic number %i" % options.element) 115 | else: 116 | parser.print_help() 117 | parser.error("Element number must be 0 for all atoms or an integer greater than 0 for an atomic number") 118 | atomtype_sampler = smarty.AtomTypeSampler(molecules, options.basetypes_filename, options.initialtypes_filename, options.decorators_filename, replacements_filename=options.substitutions_filename, reference_typed_molecules=reference_typed_molecules, verbose=verbose, temperature=options.temperature, decorator_behavior=options.decorator_behavior, element = options.element) 119 | 120 | # Start sampling atom types. 121 | atomtype_sampler.run(options.iterations, options.traj_file) 122 | 123 | if options.plot_file is not None: 124 | if options.traj_file is None: 125 | print("Cannot create plot file without a trajectory file") 126 | else: 127 | smarty.score_utils.create_plot_file(options.traj_file, options.plot_file, False, verbose) 128 | -------------------------------------------------------------------------------- /smarty/cli_smirky.py: -------------------------------------------------------------------------------- 1 | """ 2 | Command-line driver example for SMIRKY. 3 | 4 | """ 5 | 6 | import sys 7 | import string 8 | import time 9 | 10 | from optparse import OptionParser # For parsing of command line arguments 11 | import smarty 12 | from openforcefield.utils import utils 13 | 14 | import os 15 | import math 16 | import copy 17 | import re 18 | import numpy 19 | from numpy import random 20 | 21 | def main(): 22 | # Create command-line argument options. 23 | usage_string = """\ 24 | Sample over fragment types (atoms, bonds, angles, torsions, or impropers) 25 | optionally attempting to match created types to an established SMIRFF. 26 | For all files left blank, they will be taken from this module's 27 | data/odds_files/ subdirectory. 28 | 29 | usage %prog --molecules molfile --typetag fragmentType 30 | [--atomORbases AtomORbaseFile --atomORdecors AtomORdecorFile 31 | --atomANDdecors AtomANDdecorFile --bondORbase BondORbaseFile 32 | --bondANDdecors BondANDdecorFile --atomIndexOdds AtomIndexFile 33 | --bondIndexOdds BondIndexFile --replacements substitutions 34 | --initialtypes initialFragmentsFile --SMIRFF referenceSMIRFF 35 | --temperature float --verbose verbose 36 | --iterations iterations --output outputFile] 37 | 38 | example: 39 | smirky --molecules AlkEthOH_test_filt1_ff.mol2 --typetag Angle 40 | 41 | """ 42 | version_string = "%prog %__version__" 43 | parser = OptionParser(usage=usage_string, version=version_string) 44 | 45 | parser.add_option("-m", "--molecules", metavar='MOLECULES', 46 | action="store", type="string", dest='molecules_filename', default=None, 47 | help="Small molecule set (in any OpenEye compatible file format) containing 'dG(exp)' fields with experimental hydration free energies. This filename can also be an option in this module's data/molecules sub-directory") 48 | #TODO: ask about the the dG(exp) fields? 49 | 50 | parser.add_option("-T", "--typetag", metavar='TYPETAG', 51 | action = "store", type="choice", dest='typetag', 52 | default=None, choices = ['VdW', 'Bond', 'Angle', 'Torsion', 'Improper'], 53 | help="type of fragment being sampled, options are 'VdW', 'Bond', 'Angle', 'Torsion', 'Improper'") 54 | 55 | parser.add_option('-e', '--atomORbases', metavar="DECORATORS", 56 | action='store', type='string', dest='atom_OR_bases', 57 | default = 'odds_files/atom_OR_bases.smarts', 58 | help="Filename defining atom OR bases and associated probabilities. These are combined with atom OR decorators in SMIRKS, for example in '[#6X4,#7X3;R2:2]' '#6' and '#7' are atom OR bases. (OPTIONAL)") 59 | 60 | parser.add_option("-O", "--atomORdecors", metavar="DECORATORS", 61 | action='store', type='string', dest='atom_OR_decorators', 62 | default = 'odds_files/atom_decorators.smarts', 63 | help="Filename defining atom OR decorators and associated probabilities. These are combined with atom bases in SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'X4' and 'X3' are ORdecorators. (OPTIONAL)") 64 | 65 | parser.add_option('-A', '--atomANDdecors', metavar="DECORATORS", 66 | action='store', type='string', dest='atom_AND_decorators', 67 | default='odds_files/atom_decorators.smarts', 68 | help="Filename defining atom AND decorators and associated probabilities. These are added to the end of an atom's SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'R2' is an AND decorator. (OPTIONAL)") 69 | 70 | parser.add_option('-o', '--bondORbase', metavar="DECORATORS", 71 | action='store', type='string', dest='bond_OR_bases', 72 | default='odds_files/bond_OR_bases.smarts', 73 | help="Filename defining bond OR bases and their associated probabilities. These are OR'd together to describe a bond, for example in '[#6]-,=;@[#6]' '-' and '=' are OR bases. (OPTIONAL)") 74 | 75 | parser.add_option('-a', '--bondANDdecors', metavar="DECORATORS", 76 | action="store", type='string', dest='bond_AND_decorators', 77 | default='odds_files/bond_AND_decorators.smarts', 78 | help="Filename defining bond AND decorators and their associated probabilities. These are AND'd to the end of a bond, for example in '[#6]-,=;@[#7]' '@' is an AND decorator.(OPTIONAL)") 79 | 80 | parser.add_option('-D', '--atomOddsFile', metavar="ODDSFILE", 81 | action="store", type="string", dest="atom_odds", 82 | default='odds_files/atom_index_odds.smarts', 83 | help="Filename defining atom descriptors and probabilities with making changes to that kind of atom. Options for descriptors are integers corresponding to that indexed atom, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'. (OPTIONAL)") 84 | 85 | parser.add_option('-d', '--bondOddsFile', metavar="ODDSFILE", 86 | action="store", type="string", dest="bond_odds", 87 | default='odds_files/bond_index_odds.smarts', 88 | help="Filename defining bond descriptors and probabilities with making changes to that kind of bond. Options for descriptors are integers corresponding to that indexed bond, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'. (OPTIONAL)") 89 | 90 | parser.add_option("-s", "--substitutions", metavar="SUBSTITUTIONS", 91 | action="store", type="string", dest='substitutions_filename', 92 | default=None, 93 | help="Filename defining substitution definitions for SMARTS atom matches. (OPTIONAL).") 94 | 95 | parser.add_option("-f", "--initialtypes", metavar='INITIALTYPES', 96 | action="store", type="string", dest='initialtypes_filename', 97 | default=None, 98 | help="Filename defining initial fragment types. The file is formatted with two columns: 'SMIRKS typename'. For the default the initial type will be a generic form of the given fragment, for example '[*:1]~[*:2]' for a bond (OPTIONAL)") 99 | 100 | parser.add_option('-r', '--smirff', metavar='REFERENCE', 101 | action='store', type='string', dest='SMIRFF', 102 | default=None, 103 | help="Filename defining a SMIRFF force fielce used to determine reference fragment types in provided set of molecules. It may be an absolute file path, a path relative to the current working directory, or a path relative to this module's data subdirectory (for built in force fields). (OPTIONAL)") 104 | 105 | parser.add_option("-i", "--iterations", metavar='ITERATIONS', 106 | action="store", type="int", dest='iterations', 107 | default=150, 108 | help="MCMC iterations.") 109 | 110 | parser.add_option("-t", "--temperature", metavar='TEMPERATURE', 111 | action="store", type="float", dest='temperature', 112 | default=0.1, 113 | help="Effective temperature for Monte Carlo acceptance, indicating fractional tolerance of mismatched atoms (default: 0.1). If 0 is specified, will behave in a greedy manner.") 114 | 115 | parser.add_option("-p", "--output", metavar='OUTPUT', 116 | action="store", type="string", dest='outputfile', 117 | default=None, 118 | help="Filename base for output information. This same base will be used for all output files created. If None provided then it is set to 'typetag_temperature' (OPTIONAL).") 119 | 120 | parser.add_option('-v', '--verbose', metavar='VERBOSE', 121 | action='store', type='choice', dest='verbose', 122 | default=False, choices = ['True', 'False'], 123 | help="If True prints minimal information to the commandline during iterations. (OPTIONAL)") 124 | 125 | # Parse command-line arguments. 126 | (option,args) = parser.parse_args() 127 | 128 | # Molecules are required 129 | if option.molecules_filename is None: 130 | parser.print_help() 131 | parser.error("Molecules input files must be specified.") 132 | 133 | verbose = option.verbose == 'True' 134 | # Load and type all molecules in the specified dataset. 135 | molecules = utils.read_molecules(option.molecules_filename, verbose=verbose) 136 | 137 | # Parse input odds files 138 | atom_OR_bases = smarty.parse_odds_file(option.atom_OR_bases, verbose) 139 | atom_OR_decorators = smarty.parse_odds_file(option.atom_OR_decorators, verbose) 140 | atom_AND_decorators = smarty.parse_odds_file(option.atom_AND_decorators, verbose) 141 | bond_OR_bases = smarty.parse_odds_file(option.bond_OR_bases, verbose) 142 | bond_AND_decorators = smarty.parse_odds_file(option.bond_AND_decorators, verbose) 143 | atom_odds = smarty.parse_odds_file(option.atom_odds, verbose) 144 | bond_odds = smarty.parse_odds_file(option.bond_odds, verbose) 145 | 146 | # get initial types if provided, otherwise none 147 | if option.initialtypes_filename is None: 148 | initialtypes = None 149 | else: 150 | initialtypes = smarty.AtomTyper.read_typelist(option.initialtypes_filename) 151 | 152 | output = option.outputfile 153 | if output is None: 154 | output = "%s_%.2e" % ( option.typetag, option.temperature) 155 | # get replacements 156 | if option.substitutions_filename is None: 157 | sub_file = smarty.get_data_filename('odds_files/substitutions.smarts') 158 | else: 159 | sub_file = option.substitutions_filename 160 | replacements = smarty.AtomTyper.read_typelist(sub_file) 161 | replacements = [ (short, smarts) for (smarts, short) in replacements] 162 | 163 | start_sampler = time.time() 164 | fragment_sampler = smarty.FragmentSampler( 165 | molecules, option.typetag, atom_OR_bases, atom_OR_decorators, 166 | atom_AND_decorators, bond_OR_bases, bond_AND_decorators, 167 | atom_odds, bond_odds, replacements, initialtypes, 168 | option.SMIRFF, option.temperature, output) 169 | # report time 170 | finish_sampler = time.time() 171 | elapsed = finish_sampler - start_sampler 172 | if verbose: print("Creating %s sampler took %.3f s" % (option.typetag, elapsed)) 173 | 174 | # Make iterations 175 | frac_found = fragment_sampler.run(option.iterations, verbose) 176 | results = fragment_sampler.write_results_smarts_file() 177 | finished = time.time() 178 | elapsed = finished - finish_sampler 179 | per_it = elapsed / float(option.iterations) 180 | if verbose: print("%i iterations took %.3f s (%.3f s / iteration)" % (option.iterations, elapsed, per_it)) 181 | if verbose: print("Final score was %.3f %%" % (frac_found*100.0)) 182 | 183 | # plot results 184 | plot_file = "%s.pdf" % output 185 | traj = "%s.csv" % output 186 | smarty.score_utils.create_plot_file(traj, plot_file, False, verbose) 187 | -------------------------------------------------------------------------------- /smarty/data/README.md: -------------------------------------------------------------------------------- 1 | # Data used by smarty 2 | 3 | ## Manifest 4 | - `atomtypes` - contains files used by smarty to determine how it samples over atom types 5 | - `odds_files` - contains odds files used by smirky to influence sampling 6 | -------------------------------------------------------------------------------- /smarty/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/smarty/data/__init__.py -------------------------------------------------------------------------------- /smarty/data/atomtypes/README.md: -------------------------------------------------------------------------------- 1 | # Atom type SMARTS components 2 | 3 | ## Formats 4 | smarts files are used as input for the smarty sampler 5 | there are a variety of types, detailed below. All follow 6 | the same general format. 7 | 8 | Comments beginning with `%` are ignored throughout the file. 9 | Each line has the format 10 | ``` 11 | 12 | ``` 13 | where `` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `` is a human-readable typename associated with that atom type. 14 | 15 | Atom type definitions are hierarchical, with the last match in the file taking precedence over earlier matches. 16 | 17 | ### Initial and Base types 18 | 19 | These are both used to initialize the smarty sampler. 20 | `basetypes` are considered more generic. 21 | These are the atomtypes used to create new atomtypes. 22 | See the file `basetypes.smarts`. 23 | 24 | `initial` types can be more complex 25 | for example the files 26 | `initialtypes.smarts` or `initiali\_AlkEthOH.smarts` 27 | 28 | Best practices should have base and initial types that are listed from most to 29 | least general 30 | 31 | ### Simple and Combinatorial Decorators 32 | 33 | A `decorators` file contains a list of SMARTS 34 | 35 | In smarty, when using simple decorators, the new atomtypes are created only 36 | by ANDing the decorator SMARTS component to the parent atomtype (using the `&` operator). 37 | The human-readable `` is appended (with a space) to the parent name to keep a human-readable annotation of the proposed child atom type. 38 | 39 | 40 | Example simple decorators are in *`decorators.smarts`* and are typically more complicated as they must include all 41 | ways of generating new atomtypes 42 | 43 | Combinatorial decorators use a more complex set of rules to generate new SMARTS strings. 44 | In this case, bonded atoms are found in the basetypes, so only "non-bonding decorators" need to be 45 | in the decorator file. 46 | For exampl see *`new-decorators.smarts`* 47 | 48 | ### Substitutions 49 | 50 | It is often convenient to define various tokens that are substituted for more sophisticated SMARTS expressions. 51 | 52 | For example, we could define some elemental substitutions along with some substitutions for halogens: 53 | ``` 54 | % elements 55 | [#9] fluorine 56 | [#17] chlorine 57 | [#35] bromine 58 | [#53] iodine 59 | 60 | % halogens 61 | [$smallhals,$largehals] halogen 62 | [$fluorine,$chlorine] smallhals 63 | [$bromine,$iodine] largehals 64 | ``` 65 | 66 | The [`OESmartsLexReplace`](http://docs.eyesopen.com/toolkits/python/oechemtk/OEChemFunctions/OESmartsLexReplace.html) function is used to implement these replacements. 67 | 68 | ## Manifest 69 | * `basetypes.smarts` - basetypes file with elemental atom types - this is a good choice to begin with 70 | * `initial.smarts` - basetypes file with more sophisticated atom types 71 | * `initial\_AlkEthOH.smarts` - the "answer" SMARTS strings for the AlkEthOH molecule set 72 | * `decorators.smarts` - `decorators` file with a variety of decorators 73 | * `decorators-simple.smarts` - minimal `decorators` file for testing 74 | * `new-decorators.smarts` - decorators file without bond information (new modular framework) 75 | * `substitutions.smarts` - minimal `substitutions` file 76 | -------------------------------------------------------------------------------- /smarty/data/atomtypes/basetypes.smarts: -------------------------------------------------------------------------------- 1 | % atom types 2 | [*] any_atom 3 | [$ewg1] ewg1 4 | [$ewg2] ewg2 5 | [#1] hydrogen 6 | [#6] carbon 7 | [#7] nitrogen 8 | [#8] oxygen 9 | [#9] fluorine 10 | [#15] phosphorous 11 | [#16] sulfur 12 | [#17] chlorine 13 | [#34] selenium 14 | [#35] bromine 15 | [#53] iodine 16 | -------------------------------------------------------------------------------- /smarty/data/atomtypes/decorators-simple.smarts: -------------------------------------------------------------------------------- 1 | % aromatic/aliphatic 2 | a aromatic 3 | A aliphatic 4 | % halogens 5 | $(*~[$halogen]) halogen-adjacent 6 | -------------------------------------------------------------------------------- /smarty/data/atomtypes/decorators.smarts: -------------------------------------------------------------------------------- 1 | % bond order 2 | $([*]=[*]) double-bonded 3 | $([*]#[*]) triple-bonded 4 | $([*]:[*]) aromatic-bonded 5 | % bonded to atoms 6 | $(*~[#1]) hydrogen-adjacent 7 | $(*~[#6]) carbon-adjacent 8 | $(*~[#7]) nitrogen-adjacent 9 | $(*~[#8]) oxygen-adjacent 10 | $(*~[#9]) fluorine-adjacent 11 | $(*~[#15]) phosphorous-adjacent 12 | $(*~[#16]) sulfur-adjacent 13 | $(*~[#17]) chlorine-adjacent 14 | $(*~[#35]) bromine-adjacent 15 | $(*~[#53]) iodine-adjacent 16 | % degree 17 | D1 degree-1 18 | D2 degree-2 19 | D3 degree-3 20 | D4 degree-4 21 | D5 degree-5 22 | D6 degree-6 23 | % valence 24 | v1 valence-1 25 | v2 valence-2 26 | v3 valence-3 27 | v4 valence-4 28 | v5 valence-5 29 | v6 valence-6 30 | % total-h-count 31 | H1 total-h-count-1 32 | H2 total-h-count-2 33 | H3 total-h-count-3 34 | % aromatic/aliphatic 35 | a aromatic 36 | A aliphatic 37 | % halogens 38 | $(*~[$halogen]) halogen-adjacent 39 | $(*~[$smallhals]) small-halogen-adjacent 40 | $(*~[$largehals]) large-halogen-adjacent 41 | -------------------------------------------------------------------------------- /smarty/data/atomtypes/initial_AlkEthOH.smarts: -------------------------------------------------------------------------------- 1 | % atom types 2 | [$([#1]-C)] hydrogen-carbon 3 | [$([#1]-C-[#7,#8,F,#16,Cl,Br])] hydrogen-carbon-ewd 4 | [$([#1]-C(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br])] hydrogen-carbon-ewd2 5 | [$([#1]-C(-[#7,#8,F,#16,Cl,Br])(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br])] hydrogen-carbon-ewd3 6 | [#1$(*-[#8])] hydrogen-oxygen 7 | [#6X4] carbon-tet 8 | [#8X2] oxygen-dival 9 | [#8X2+0$(*-[#1])] oxygen-hydrogen 10 | -------------------------------------------------------------------------------- /smarty/data/atomtypes/initialtypes.smarts: -------------------------------------------------------------------------------- 1 | % atom types 2 | [#1] hydrogen 3 | [#6] carbon 4 | [#6&a] carbon aromatic 5 | [#7] nitrogen 6 | [#8] oxygen 7 | [#9] fluorine 8 | [#15] phosphorous 9 | [#16] sulfur 10 | [#17] chlorine 11 | [#35] bromine 12 | [#53] iodine 13 | -------------------------------------------------------------------------------- /smarty/data/atomtypes/new-decorators.smarts: -------------------------------------------------------------------------------- 1 | % Size of smallest ring 2 | r3 3 | r4 4 | r5 5 | r6 6 | % Number of ring bonds 7 | R0 8 | R2 9 | R3 10 | R4 11 | R 12 | % total connectivity 13 | X1 connections-1 14 | X2 connections-2 15 | X3 connections-3 16 | X4 connections-4 17 | % total-h-count 18 | H0 total-h-count-0 19 | H1 total-h-count-1 20 | H2 total-h-count-2 21 | H3 total-h-count-3 22 | % formal charge 23 | +0 neutral 24 | +1 cationic+1 25 | -1 anionic-1 26 | % aromatic/aliphatic 27 | a aromatic 28 | A aliphatic 29 | -------------------------------------------------------------------------------- /smarty/data/atomtypes/replacements.smarts: -------------------------------------------------------------------------------- 1 | % Substitution definitions 2 | % Format: 3 | % 4 | 5 | % elements 6 | [#1] hydrogen 7 | [#6] carbon 8 | [#7] nitrogen 9 | [#8] oxygen 10 | [#9] fluorine 11 | [#15] phosphorous 12 | [#16] sulfur 13 | [#17] chlorine 14 | [#35] bromine 15 | [#53] iodine 16 | 17 | % electron withdrawing groups 18 | [#7!-1,#8,#16] ewg2 19 | [#7!-1,#8!-1,#16!-1,$halogen] ewg1 20 | 21 | % halogens 22 | [$smallhals,$largehals] halogen 23 | [$fluorine,$chlorine] smallhals 24 | [$bromine,$iodine] largehals 25 | -------------------------------------------------------------------------------- /smarty/data/odds_files/atom_OR_bases.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | % elements 3 | [#1] 4 | [#5] 5 | [#6] 6 | [#7] 7 | [#8] 8 | [#9] 9 | [#14] 10 | [#15] 11 | [#16] 12 | [#17] 13 | [#35] 14 | [#53] 15 | % substitution groups 16 | $ewg1 17 | $ewg2 18 | -------------------------------------------------------------------------------- /smarty/data/odds_files/atom_decorators.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | % Size of smallest ring 3 | r3 4 | r4 5 | r5 6 | r6 7 | % Number of ring bonds 8 | R0 9 | R2 10 | R3 11 | R4 12 | R 13 | !R0 14 | !R2 15 | !R3 16 | !R4 17 | !R 18 | % total connectivity 19 | X1 20 | X2 21 | X3 22 | X4 23 | !X1 24 | !X2 25 | !X3 26 | !X4 27 | % total hydrogen count 28 | H0 29 | !H0 30 | H1 31 | !H1 32 | H2 33 | !H2 34 | H3 35 | !H3 36 | % aromatic/aliphatic 37 | a 38 | !a 39 | A 40 | !A 41 | % charges 42 | -1 43 | +0 44 | +1 45 | % no decorator 46 | '' 47 | -------------------------------------------------------------------------------- /smarty/data/odds_files/atom_index_odds.smarts: -------------------------------------------------------------------------------- 1 | % Descriptor odds 2 | % used in the default, all equally likely 3 | all 1 4 | % 5 | % Other options remember to use indices appropriately 6 | 1 0 7 | 2 0 8 | 3 0 9 | 4 0 10 | Indexed 0 11 | Unindexed 0 12 | Alpha 0 13 | Beta 0 14 | -------------------------------------------------------------------------------- /smarty/data/odds_files/bond_AND_decorators.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | @ 1 3 | !@ 1 4 | !# 0 5 | -------------------------------------------------------------------------------- /smarty/data/odds_files/bond_OR_bases.smarts: -------------------------------------------------------------------------------- 1 | % Decorator Odds 2 | % bond types 3 | - 4 | : 5 | = 6 | # 7 | % not bond types 8 | !- 9 | !: 10 | != 11 | !# 12 | -------------------------------------------------------------------------------- /smarty/data/odds_files/bond_index_odds.smarts: -------------------------------------------------------------------------------- 1 | % Descriptor odds 2 | % used in the default, all equally likely 3 | all 1 4 | % 5 | % Other options remember to use indices appropriately 6 | 1 0 7 | 2 0 8 | 3 0 9 | Indexed 0 10 | Unindexed 0 11 | Alpha 0 12 | Beta 0 13 | -------------------------------------------------------------------------------- /smarty/data/odds_files/substitutions.smarts: -------------------------------------------------------------------------------- 1 | % Substitution definitions 2 | % Format: 3 | % 4 | % halogens 5 | [#7!-1,#8,#16] ewg2 6 | [#7!-1,#8!-1,#16!-1,$halogen] ewg1 7 | [$smallhals,$largehals] halogen 8 | [#9,#17] smallhals 9 | [#35,#53] largehals 10 | -------------------------------------------------------------------------------- /smarty/sampler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | #============================================================================================= 4 | # MODULE DOCSTRING 5 | #============================================================================================= 6 | 7 | """ 8 | smarty.py 9 | Example illustrating a scheme to create and destroy atom types automatically using SMARTS. 10 | AUTHORS 11 | John Chodera , Memorial Sloan Kettering Cancer Center. 12 | Additional contributions from the Mobley lab, UC Irvine, including David Mobley, Caitlin Bannan, and Camila Zanette. 13 | The AtomTyper class is based on 'patty' by Pat Walters, Vertex Pharmaceuticals. 14 | """ 15 | #============================================================================================= 16 | # GLOBAL IMPORTS 17 | #============================================================================================= 18 | 19 | import os 20 | import copy 21 | import re 22 | import numpy 23 | import random 24 | 25 | import openeye.oechem 26 | import openeye.oeomega 27 | import openeye.oequacpac 28 | 29 | from openeye.oechem import * 30 | from openeye.oeomega import * 31 | from openeye.oequacpac import * 32 | 33 | import networkx as nx 34 | 35 | import time 36 | 37 | from smarty.atomtyper import AtomTyper 38 | from smarty.score_utils import load_trajectory 39 | from smarty.score_utils import scores_vs_time 40 | 41 | #============================================================================================= 42 | # ATOMTYPE SAMPLER 43 | #============================================================================================= 44 | 45 | class AtomTypeSampler(object): 46 | """ 47 | Atom type sampler. 48 | """ 49 | def __init__(self, molecules, basetypes_filename, initialtypes_filename, decorators_filename, replacements_filename=None, reference_typed_molecules=None, temperature=0.1, verbose=False, decorator_behavior='combinatorial-decorators', element = 0): 50 | """ 51 | Initialize an atom type sampler. 52 | ARGUMENTS 53 | molecules : list of molecules for typing 54 | List of molecules for typing 55 | basetypes_filename : str 56 | File defining base/generic atom types (which cannot be destroyed); often these are elemental types 57 | initialtypes_filename : 58 | File defining initial atom types (which CAN be destroyed, except for those which occur in basetypes_filename 59 | decorators_filename : str 60 | File containing decorators that can be added to existing types to generate subtypes 61 | replacements_filename : str, optional, default=None 62 | If specified, SMARTS replacement definitions will be read from this file 63 | reference_typed_molecules : list of OEMol, optional, default=None 64 | List of molecules with reference types for use in Monte Carlo acceptance. 65 | If specified, the likelihood function will utilize the maximal number of matched atom types with these molecules. 66 | If not specified, no likelihood function will be employed. 67 | temperature : float, optional, default=0.1 68 | Temperature for Monte Carlo acceptance/rejection 69 | verbose : bool, optional, default=False 70 | If True, verbose output will be printed. 71 | decorator_behavior : string either "combinatorial-decorators" or "simple-decorators" 72 | simple decorators include bonded atoms as decorators 73 | element : integer >= 0 74 | If 0 all atomtypes sampled, otherwise only atomtypes of that atomic number are sampled 75 | Notes 76 | ----- 77 | This is just a proof of concept for chemical perception sampling. 78 | Scoring for purposed atomtypes is based on reference atomtypes. 79 | No scoring of molecular properties is performed 80 | """ 81 | # store simple input information 82 | self.verbose = verbose 83 | self.decorator_behavior = decorator_behavior 84 | self.typetag = 'atomtype' # internal tag 85 | self.temperature = temperature 86 | self.element = element 87 | 88 | # Read atomtypes (initial and base) and decorators. 89 | self.atomtypes = AtomTyper.read_typelist(initialtypes_filename) 90 | self.basetypes = AtomTyper.read_typelist(basetypes_filename) 91 | self.decorators = AtomTyper.read_typelist(decorators_filename) 92 | self.replacements = AtomTyper.read_typelist(replacements_filename) 93 | 94 | # Store a deep copy of the molecules since they will be annotated 95 | # loop through input molecules to remove repeats 96 | self.molecules = list() 97 | if reference_typed_molecules is not None: 98 | self.reference_typed_molecules = list() 99 | else: self.reference_typed_molecules = None 100 | 101 | smiles = set() 102 | for idx, mol in enumerate(molecules): 103 | smile = OECreateIsoSmiString(mol) 104 | if not smile in smiles: 105 | self.molecules.append(OEMol(mol)) 106 | smiles.add(smile) 107 | if reference_typed_molecules is not None: 108 | ref_mol = OEMol(reference_typed_molecules[idx]) 109 | ref_smile = OECreateIsoSmiString(ref_mol) 110 | # TODO: add ref_smile == smile check? 111 | self.reference_typed_molecules.append(OEMol(ref_mol)) 112 | 113 | # Save bond list to use throughout 114 | bondset = [("-","singly"), ("=", "doubly"), ("#","triply"), (":", "aromatic")] 115 | 116 | used_basetypes = list() 117 | self.atomtypes_with_no_matches = set() 118 | # Check all SMART strings that are used as a base type 119 | for (smarts, atom_type) in self.basetypes: 120 | check_basetype = self.smarts_matches(smarts) 121 | if check_basetype: 122 | # Keep used base types 123 | used_basetypes.append( ( smarts, atom_type) ) 124 | else: 125 | # Remove unused base types 126 | self.atomtypes_with_no_matches.add( smarts ) 127 | self.basetypes = copy.deepcopy(used_basetypes) 128 | if verbose: 129 | print("USED BASE TYPES:") 130 | for (smarts, typename) in self.basetypes: 131 | print("%10s %25s" % (smarts, typename)) 132 | 133 | # Calculate which bonds in set are used 134 | bond_typelist = [("[*]%s[*]" %bond, name) for (bond, name) in bondset] 135 | tmpmolecules = copy.deepcopy(self.molecules) 136 | self.type_molecules(bond_typelist, tmpmolecules, 0) 137 | [bond_typecounts, molecule_bond_typecounts] = self.compute_type_statistics( bondset, tmpmolecules, 0) 138 | if self.verbose: 139 | print("USED BOND TYPES:") 140 | self.show_type_statistics(bondset, bond_typecounts, molecule_bond_typecounts) 141 | 142 | # only same bonds that are used 143 | self.bondset = [ ('~', 'any') ] 144 | for (bond, name) in bondset: 145 | if bond_typecounts[name] > 0: 146 | self.bondset.append( (bond, name) ) 147 | 148 | # Rename base/initial types to ensure their names are unique 149 | # clashes between initial and target types will cause problems 150 | for idx, [smarts, typename] in enumerate(self.atomtypes): 151 | self.atomtypes[idx] = (smarts, 'c_'+typename) 152 | for idx, [smarts, typename] in enumerate(self.basetypes): 153 | self.basetypes[idx] = (smarts, 'c_'+typename) 154 | 155 | # Store smarts for basetypes 156 | self.basetypes_smarts = [ smarts for (smarts, name) in self.basetypes ] 157 | 158 | # Add any base types not already there to the initial types 159 | initial_smarts = [ smarts for (smarts, name) in self.atomtypes ] 160 | missing_basetypes = list() 161 | for [smarts, typename] in self.basetypes: 162 | if smarts not in initial_smarts: 163 | missing_basetypes.append( (smarts, typename) ) 164 | if self.verbose: print("Added base (generic) type `%s`, name %s, to initial types." % (smarts, typename)) 165 | 166 | self.atomtypes = missing_basetypes + self.atomtypes 167 | 168 | # Type all molecules with current typelist to ensure that starting types are sufficient. 169 | self.type_molecules(self.atomtypes, self.molecules, self.element) 170 | 171 | # Compute atomtype statistics on molecules for current atomtype set 172 | [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element) 173 | if self.verbose: 174 | print("MATCHED INITIAL TYPES:") 175 | self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts) 176 | 177 | # Track only used atomtypes and add unused to atomtypes with no matches 178 | used_initial_atomtypes = list() 179 | for (smarts, atom_type) in self.atomtypes: 180 | if atom_typecounts[atom_type] > 0: 181 | used_initial_atomtypes.append( (smarts, atom_type) ) 182 | else: 183 | self.atomtypes_with_no_matches.add( smarts ) 184 | if self.verbose: print("Removing initial atom type '%s', as it matches no atoms" % smarts) 185 | self.atomtypes = copy.deepcopy(used_initial_atomtypes) 186 | self.initial_atomtypes = copy.deepcopy(used_initial_atomtypes) 187 | 188 | # Type molecules again with the updated atomtype list 189 | self.type_molecules(self.atomtypes, self.molecules, self.element) 190 | 191 | # These are atomtypes where not all children have been matched 192 | self.unmatched_atomtypes = copy.deepcopy(self.atomtypes) 193 | 194 | # Creat dictionary to store children of initial atom types 195 | self.parents = dict() 196 | for [smarts, typename] in self.atomtypes: 197 | #store empty list of chlidren for each atomtype 198 | self.parents[smarts] = [] 199 | # store reverse parent dictionary with child to parent 200 | self.child_to_parent = self._switch_parent_dict() 201 | 202 | # Compute total atoms 203 | self.total_atoms = 0.0 204 | for molecule in self.molecules: 205 | for atom in self._GetAtoms(molecule, self.element): 206 | self.total_atoms += 1.0 207 | 208 | # Store reference molecules 209 | self.reference_atomtypes = set() 210 | self.current_atom_matches = None 211 | if self.reference_typed_molecules is not None: 212 | # Extract list of reference atom types 213 | for molecule in self.reference_typed_molecules: 214 | for atom in self._GetAtoms(molecule, self.element): 215 | atomtype = atom.GetType() 216 | self.reference_atomtypes.add(atomtype) 217 | self.reference_atomtypes = list(self.reference_atomtypes) 218 | # Compute current atom matches 219 | [self.atom_type_matches, self.total_atom_type_matches] = self.best_match_reference_types(self.atomtypes, self.molecules) 220 | # Count atom types. 221 | self.reference_atomtypes_atomcount = { atomtype : 0 for atomtype in self.reference_atomtypes } 222 | for molecule in self.reference_typed_molecules: 223 | for atom in self._GetAtoms(molecule, self.element): 224 | atomtype = atom.GetType() 225 | self.reference_atomtypes_atomcount[atomtype] += 1 226 | return 227 | 228 | def smarts_matches(self, smarts): 229 | """ 230 | This method returns true if the provided SMARTS pattern is in 231 | at least one molecule 232 | Parameters 233 | ---------- 234 | smarts: str, SMARTS pattern 235 | Returns 236 | ------- 237 | matched: boolean, True=smarts matches a molecule, False has no matches 238 | """ 239 | # Create bindings list for the replacements (uses the replacements 240 | # file) 241 | bindings = list() 242 | if self.replacements is not None: 243 | for [smarts_s,shortname] in self.replacements: 244 | bindings.append( (shortname, smarts_s) ) 245 | # Perform binding replacements 246 | smarts = OESmartsLexReplace(smarts, bindings) 247 | 248 | # create query 249 | qmol = OEQMol() 250 | if not OEParseSmarts(qmol, smarts): 251 | raise Exception("Error parsing SMARTS %s" % smarts) 252 | ss = OESubSearch(qmol) 253 | for mol in self.molecules: 254 | if ss.SingleMatch(mol): 255 | return True 256 | return False 257 | 258 | def _GetAtoms(self, molecule, element = 0): 259 | """ 260 | Parameters 261 | ---------- 262 | molecule : OEMol 263 | element : integer 264 | if 0 looks at all atoms, otherwise only those with the given atomic number 265 | 266 | Returns 267 | ------- 268 | iterator over the atoms based on the molecule and element number 269 | """ 270 | if element > 0: 271 | return molecule.GetAtoms(OEHasAtomicNum(element)) 272 | else: 273 | return molecule.GetAtoms() 274 | 275 | def best_match_reference_types(self, atomtypes, molecules): 276 | """ 277 | Determine best match for each parameter with reference atom types 278 | Parameters 279 | ---------- 280 | atomtypes : 281 | Current atom types 282 | molecules : list of OEMol 283 | Typed molecules, where types are stored in self.atomtypetag string data. 284 | Returns 285 | ------- 286 | atom_type_matches : list of tuples (current_atomtype, reference_atomtype, counts) 287 | Best correspondence between current and reference atomtypes, along with number of atoms equivalently typed in reference molecule set. 288 | total_atom_type_matches : int 289 | The total number of correspondingly typed atoms in the reference molecule set. 290 | * Currently, types for reference typed molecules are accessed via atom.GetType(), while types for current typed molecules are accessed via atom.GetStringData(self.typetag). 291 | This should be homogenized. 292 | Contributor: 293 | * Josh Fass contributed this algorithm. 294 | """ 295 | if self.reference_typed_molecules is None: 296 | if self.verbose: print('No reference molecules specified, so skipping likelihood calculation.') 297 | return None 298 | 299 | # Create bipartite graph (U,V,E) matching current atom types U with reference atom types V via edges E with weights equal to number of atoms typed in common. 300 | if self.verbose: print('Creating graph matching current atom types with reference atom types...') 301 | initial_time = time.time() 302 | graph = nx.Graph() 303 | 304 | # Get current atomtypes and reference atom types 305 | current_atomtypes = [ typename for (smarts, typename) in atomtypes ] 306 | reference_atomtypes = [ typename for typename in self.reference_atomtypes ] 307 | # check that current atom types are not in reference atom types 308 | if set(current_atomtypes) & set(reference_atomtypes): 309 | raise Exception("Current and reference atom types must be unique") 310 | # Add current atom types 311 | for atomtype in current_atomtypes: 312 | graph.add_node(atomtype, bipartite=0) 313 | # Add reference atom types 314 | for atomtype in reference_atomtypes: 315 | graph.add_node(atomtype, bipartite=1) 316 | # Add edges. 317 | atoms_in_common = dict() 318 | # Make an entry in the dictionary for each pair of types 319 | for current_atomtype in current_atomtypes: 320 | for reference_atomtype in reference_atomtypes: 321 | atoms_in_common[(current_atomtype,reference_atomtype)] = 0 322 | # Loop through all molecules 323 | for (current_typed_molecule, reference_typed_molecule) in zip(molecules, self.reference_typed_molecules): 324 | current_atoms = self._GetAtoms(current_typed_molecule, self.element) 325 | reference_atoms = self._GetAtoms(reference_typed_molecule, self.element) 326 | # For each atom add a count to the current/referance atomtype pair 327 | for (current_typed_atom, reference_typed_atom) in zip(current_atoms, reference_atoms): 328 | current_atomtype = current_typed_atom.GetStringData(self.typetag) 329 | reference_atomtype = reference_typed_atom.GetType() 330 | atoms_in_common[(current_atomtype,reference_atomtype)] += 1 331 | # Make weighted edges connecting the current and reference nodes 332 | for current_atomtype in current_atomtypes: 333 | for reference_atomtype in reference_atomtypes: 334 | weight = atoms_in_common[(current_atomtype,reference_atomtype)] 335 | graph.add_edge(current_atomtype, reference_atomtype, weight=weight) 336 | elapsed_time = time.time() - initial_time 337 | if self.verbose: print('Graph creation took %.3f s' % elapsed_time) 338 | 339 | # Compute maximum match using networkx algorithm 340 | if self.verbose: print('Computing maximum weight match...') 341 | initial_time = time.time() 342 | mate = nx.algorithms.max_weight_matching(graph, maxcardinality=False) 343 | elapsed_time = time.time() - initial_time 344 | if self.verbose: print('Maximum weight match took %.3f s' % elapsed_time) 345 | 346 | # Compute match dictionary and total number of matches. 347 | atom_type_matches = list() 348 | total_atom_type_matches = 0 349 | for current_atomtype in current_atomtypes: 350 | if current_atomtype in mate: 351 | reference_atomtype = mate[current_atomtype] 352 | counts = graph[current_atomtype][reference_atomtype]['weight'] 353 | total_atom_type_matches += counts 354 | atom_type_matches.append( (current_atomtype, reference_atomtype, counts) ) 355 | else: 356 | atom_type_matches.append( (current_atomtype, None, None) ) 357 | 358 | # Report on matches 359 | if self.verbose: 360 | print("PROPOSED:") 361 | self.show_type_matches(atom_type_matches) 362 | 363 | return (atom_type_matches, total_atom_type_matches) 364 | 365 | def show_type_matches(self, atom_type_matches): 366 | """ 367 | Show pairing of current to reference atom types. 368 | Parameters 369 | ---------- 370 | atom_type_matches : list of (current_atomtype, reference_atomtype, counts) 371 | 372 | Returns 373 | ------- 374 | fraction_matched_atoms : the fractional count of matched atoms 375 | """ 376 | print('Atom type matches:') 377 | total_atom_type_matches = 0 378 | for (current_atomtype, reference_atomtype, counts) in atom_type_matches: 379 | if reference_atomtype is not None: 380 | print('%-64s matches %8s : %8d atoms matched' % (current_atomtype, reference_atomtype, counts)) 381 | total_atom_type_matches += counts 382 | else: 383 | print('%-64s no match' % (current_atomtype)) 384 | 385 | fraction_matched_atoms = float(total_atom_type_matches) / float(self.total_atoms) 386 | print('%d / %d total atoms match (%.3f %%)' % (total_atom_type_matches, self.total_atoms, fraction_matched_atoms * 100)) 387 | 388 | return fraction_matched_atoms 389 | 390 | 391 | def AtomDecorator(self, atom1type, decorator): 392 | """ 393 | Given an atom and a decorator ammend the SMARTS string with that decorator 394 | 395 | Parameters 396 | ----------- 397 | atom1type : atomtype tuple in form (smarts, typename) 398 | decorator : decorator being added to current atom 399 | 400 | Returns 401 | ------- 402 | decorated atomtype as a tuple (smarts, typename) 403 | """ 404 | if self.HasAlpha(atom1type): 405 | # decorators should go before the $ sign on the atom 406 | dollar = atom1type[0].find('$') 407 | proposed_atomtype = atom1type[0][:dollar] + decorator[0] + atom3[0][dollar:] 408 | proposed_typename = atom1type[1] + ' ' + decorator[1] 409 | else: 410 | # No alpha atom so the decorator goes before the ']' 411 | proposed_atomtype = atom1type[0][:-1] + decorator[0] + ']' 412 | proposed_typename = atom1type[1] + ' ' + decorator[1] 413 | 414 | return (proposed_atomtype, proposed_typename) 415 | 416 | def PickAnAtom(self, atomList): 417 | """ 418 | Parameters 419 | ---------- 420 | atomList : any list of tuples in the form (smarts, typename) 421 | this could include decorator or bond lists 422 | 423 | Returns 424 | ------- 425 | one random (smarts, typename) pair from given list 426 | 427 | This allows for continuity in the code, this method could be changed, 428 | and all random choices would still be made in the same way. 429 | It also allowed for testing which atomtypes to choose from while sampling. 430 | """ 431 | return random.choice(atomList) 432 | 433 | def HasAlpha(self, atom1type): 434 | """ 435 | Parameter 436 | --------- 437 | atom1type : an atomtype tuple (smarts, typename) 438 | 439 | Returns 440 | ------- 441 | True if atomtype has at least 1 alpha substituent otherwise False 442 | """ 443 | # Alpha atoms are connected in the form [#1] --> [#1$(*~[#6])] 444 | # The new characters are '$(*' 445 | if '$(*' in atom1type[0]: 446 | return True 447 | else: 448 | return False 449 | 450 | def AddAlphaSubstituentAtom(self, atom1type, bondset, atom2type): 451 | """ 452 | Adds an atom alpha to the primary atom. The new alpha substituent 453 | always adds to the end of the sequence of alpha atom 454 | so if you have '[#1$(*~[#6])]' the next alpha atom [#8] is added in 455 | this way '[#1$(*~[#6])$(*~[#8])]' 456 | 457 | Parameters 458 | ---------- 459 | atom1type : current atomtype (smarts, typename) 460 | bondset : bondtype to connect two atoms (smarts, bondname) 461 | atom2type : atom to be added (smarts, typename) 462 | 463 | Returns 464 | ------- 465 | Atomtype with new alpha substituent (smarts, typename) 466 | """ 467 | proposed_atomtype = atom1type[0][:len(atom1type[0])-1] + '$(*' + bondset[0] + atom2type[0] + ')]' 468 | proposed_typename = atom1type[1] + ' ' + bondset[1] + ' ' + atom2type[1] + ' ' 469 | return (proposed_atomtype, proposed_typename) 470 | 471 | def AddBetaSubstituentAtom(self, atom1type, bondset, atom2type): 472 | """ 473 | Adds atom2type as a beta substituent bonding it to the 474 | first alpha atom in atom1type. If atom1type does not have 475 | an alpha atom this metho will call addAlphaSubstituentAtom instead. 476 | 477 | Parameters 478 | ---------- 479 | atom1type : parent atomtype (smarts, typename) 480 | bondset : bond used to connect atoms (smarts, bondname) 481 | atom2type : atomtype being bonded in beta position (smarts, typename) 482 | 483 | Returns 484 | ------- 485 | child atomtype as tuple (smarts, typename) 486 | 487 | """ 488 | 489 | # counting '[' tells us how many atoms are in the mix 490 | count = atom1type[0].count('[') 491 | proposed_atomtype = "" 492 | number_brackets = 0 493 | # find closed alpha atom 494 | closeAlpha = atom1type[0].find(']') 495 | # This has two atoms (already has an alpha atom) 496 | if count == 2: 497 | proposed_atomtype = atom1type[0][:closeAlpha+1] 498 | proposed_atomtype += bondset[0] + atom2type[0] + ')]' 499 | proposed_typename = atom1type[1] + bondset[1] + ' ' + atom2type[1] 500 | if self.verbose: print("ADD FIRST BETA SUB: proposed --- %s %s" % ( str(proposed_atomtype), str(proposed_typename))) 501 | elif count > 2: 502 | # Has an alpha atom with at least 1 beta atom 503 | proposed_atomtype = atom1type[0][:closeAlpha+1] 504 | proposed_atomtype += '(' + bondset[0] + atom2type[0] + ')' 505 | proposed_atomtype += atom1type[0][closeAlpha+1:] 506 | proposed_typename = atom1type[1] + ' (' + bondset[1] + ' ' + atom2type[1] + ')' 507 | if self.verbose: print("ADD MORE BETA SUB: proposed --- %s %s" % ( str(proposed_atomtype), str(proposed_typename))) 508 | else: 509 | # Has only 1 atom which means there isn't an alpha atom yet, add an alpha atom instead 510 | proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondset, atom2type) 511 | return (proposed_atomtype, proposed_typename) 512 | 513 | 514 | def sample_atomtypes(self): 515 | """ 516 | Perform one step of atom type sampling. 517 | This is done by either removing a current atomtype 518 | or creating a child atom type. Then the proposed 519 | atomtype list is scored and the move is accepted or rejected 520 | """ 521 | # Copy current atomtypes for proposal. 522 | proposed_atomtypes = copy.deepcopy(self.atomtypes) 523 | proposed_molecules = copy.deepcopy(self.molecules) 524 | proposed_parents = copy.deepcopy(self.parents) 525 | 526 | if random.random() < 0.5: 527 | # Pick a random index and remove atomtype at that index 528 | (atomtype, typename) = self.PickAnAtom(proposed_atomtypes) 529 | if self.verbose: print("Attempting to destroy atom type %s : %s..." % (atomtype, typename)) 530 | 531 | # Reject deletion of (populated) base types as we want to retain 532 | # generics even if empty 533 | if atomtype in self.basetypes_smarts: 534 | if self.verbose: print("Destruction rejected for atom type %s because this is a generic type which was initially populated." % atomtype ) 535 | return False 536 | 537 | # remove chosen atomtype 538 | proposed_atomtypes.remove( (atomtype, typename) ) 539 | # update proposed parent dictionary 540 | for parent, children in proposed_parents.items(): 541 | if atomtype in [at for (at, tn) in children]: 542 | children += proposed_parents[atomtype] 543 | children.remove( (atomtype, typename) ) 544 | 545 | del proposed_parents[atomtype] 546 | 547 | # Try to type all molecules. 548 | try: 549 | self.type_molecules(proposed_atomtypes, proposed_molecules, self.element) 550 | except AtomTyper.TypingException as e: 551 | # Reject since typing failed. 552 | if self.verbose: print("Typing failed; rejecting.") 553 | return False 554 | else: 555 | if self.decorator_behavior == 'simple-decorators': 556 | # Pick an atomtype to subtype. 557 | atom1type = self.PickAnAtom(self.atomtypes) 558 | # Pick a decorator to add. 559 | (decorator, decorator_typename) = self.PickAnAtom(self.decorators) 560 | 561 | # Create new atomtype to insert by appending decorator with 'and' operator. 562 | result = re.match('\[(.+)\]', atom1type[0]) 563 | proposed_atomtype = '[' + result.groups(1)[0] + '&' + decorator + ']' 564 | proposed_typename = atom1type[1] + ' ' + decorator_typename 565 | if self.verbose: print("Attempting to create new subtype: '%s' (%s) + '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], decorator, decorator_typename, proposed_atomtype, proposed_typename)) 566 | 567 | else: # combinatorial-decorators 568 | # Pick an atomtype 569 | atom1type = self.PickAnAtom(self.atomtypes) 570 | # Check if we need to add an alpha or beta substituent 571 | if self.HasAlpha(atom1type): 572 | # Has alpha 573 | bondtype = self.PickAnAtom(self.bondset) 574 | atom2type = self.PickAnAtom(self.basetypes) 575 | if random.random() < 0.5 or atom1type[0][2] == '1': # Add Beta Substituent Atom randomly or when it is Hydrogen 576 | proposed_atomtype, proposed_typename = self.AddBetaSubstituentAtom(atom1type, bondtype, atom2type) 577 | else: # Add another Alpha Substituent if it is not a Hydrogen 578 | proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondtype, atom2type) 579 | if self.verbose: print("Attempting to create new subtype: -> '%s' (%s)" % (proposed_atomtype, proposed_typename)) 580 | else: 581 | # Has no alpha 582 | if random.random() < 0.5: # add decorator to primary atom 583 | decorator = self.PickAnAtom(self.decorators) 584 | proposed_atomtype, proposed_typename = self.AtomDecorator(atom1type, decorator) 585 | if self.verbose: print("Attempting to create new subtype: '%s' (%s) + '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], decorator[0], decorator[1], proposed_atomtype, proposed_typename)) 586 | else: # add Alpha substituent 587 | bondtype = self.PickAnAtom(self.bondset) 588 | atom2type = self.PickAnAtom(self.basetypes) 589 | proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondtype, atom2type) 590 | if self.verbose: print("Attempting to create new subtype: '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], proposed_atomtype, proposed_typename)) 591 | 592 | # Check that we haven't already determined this atom type isn't matched in the dataset. 593 | if proposed_atomtype in self.atomtypes_with_no_matches: 594 | if self.verbose: print("Atom type '%s' (%s) unused in dataset; rejecting." % (proposed_atomtype, proposed_typename)) 595 | return False 596 | 597 | # Check that it is a new SMARTS pattern 598 | if proposed_atomtype in [smarts for (smarts, typename) in self.atomtypes]: 599 | if self.verbose: print("Atom type '%s' (%s) is in the existing atomtype list; rejecting." % (proposed_atomtype, proposed_typename)) 600 | return False 601 | 602 | # Check the proposed type name is unique 603 | current_typenames = [typename for (smarts, typename) in self.atomtypes] 604 | while proposed_typename in current_typenames: 605 | proposed_typename += '%i' % random.randint(0,10) 606 | 607 | # for either decorator - update proposed parent dictionary 608 | proposed_parents[atom1type[0]].append( (proposed_atomtype, proposed_typename) ) 609 | proposed_parents[proposed_atomtype] = [] 610 | 611 | # Insert atomtype immediately after. 612 | proposed_atomtypes.append( (proposed_atomtype, proposed_typename) ) 613 | # Try to type all molecules. 614 | try: 615 | # Type molecules. 616 | self.type_molecules(proposed_atomtypes, proposed_molecules, self.element) 617 | # Compute updated statistics. 618 | [proposed_atom_typecounts, proposed_molecule_typecounts] = self.compute_type_statistics(proposed_atomtypes, proposed_molecules, self.element) 619 | except AtomTyper.TypingException as e: 620 | print("Exception: %s" % str(e)) 621 | # Reject since typing failed. 622 | if self.verbose: print("Typing failed for one or more molecules using proposed atomtypes; rejecting.") 623 | return False 624 | 625 | # Reject if new type is unused. 626 | if (proposed_atom_typecounts[proposed_typename] == 0): 627 | # Reject because new type is unused in dataset. 628 | if self.verbose: print("Atom type '%s' (%s) unused in dataset; rejecting." % (proposed_atomtype, proposed_typename)) 629 | # Store this atomtype to speed up future rejections 630 | self.atomtypes_with_no_matches.add(proposed_atomtype) 631 | return False 632 | 633 | # Reject if any type is emptied (UNLESS it is a basetype) 634 | for (smarts, typename) in proposed_atomtypes: 635 | if not smarts in self.basetypes_smarts: # not a base type 636 | if proposed_atom_typecounts[typename] == 0: # no matches 637 | if self.verbose: print("Atomtype '%s' (%s) is now unused in dataset; rejecting." % (smarts, typename)) 638 | return False 639 | 640 | if self.verbose: print('Proposal is valid...') 641 | 642 | # Accept automatically if no reference molecules 643 | accept = False 644 | if self.reference_typed_molecules is None: 645 | accept = True 646 | else: 647 | # Find number of matches for current set 648 | (proposed_atom_type_matches, proposed_total_atom_type_matches) = self.best_match_reference_types(proposed_atomtypes, proposed_molecules) 649 | score_dif = (proposed_total_atom_type_matches - self.total_atom_type_matches) 650 | # if temperature is zero only accept increased scores 651 | if self.temperature == 0.0: 652 | print('Proposal score: %d >> %d' % (self.total_atom_type_matches, proposed_total_atom_type_matches)) 653 | accept = score_dif > 0.0 654 | 655 | # If finite temperature compute effective temperature and log_P_accept 656 | else: 657 | # Compute effective temperature 658 | effective_temperature = (self.total_atoms * self.temperature) 659 | # Compute likelihood for accept/reject 660 | log_P_accept = score_dif / effective_temperature 661 | print('Proposal score: %d >> %d : log_P_accept = %.5e' % (self.total_atom_type_matches, proposed_total_atom_type_matches, log_P_accept)) 662 | accept = (log_P_accept > 0.0) or (numpy.random.uniform() < numpy.exp(log_P_accept)) 663 | 664 | # Accept or reject 665 | if accept: 666 | self.atomtypes = proposed_atomtypes 667 | self.molecules = proposed_molecules 668 | self.parents = proposed_parents 669 | self.child_to_parent = self._switch_parent_dict() 670 | self.atom_type_matches = proposed_atom_type_matches 671 | self.total_atom_type_matches = proposed_total_atom_type_matches 672 | return True 673 | else: 674 | return False 675 | 676 | def type_molecules(self, typelist, molecules, element = 0): 677 | """ 678 | Type all molecules with the specified typelist. 679 | Parameters 680 | ---------- 681 | typelist : list of atomtypes or tuples in the form (smarts, typename) 682 | molecules : list of OEMols 683 | element : integer 0 for all atoms or atomic number being sampled 684 | 685 | For every atom in each molecule the relevant typename is assigned 686 | so it can be accessed at atom.GetStringData(self.typetag) 687 | """ 688 | # Create an atom typer. 689 | atomtyper = AtomTyper(typelist, self.typetag, replacements=self.replacements) 690 | 691 | # Type molecules. 692 | for molecule in molecules: 693 | atomtyper.assignTypes(molecule, element) 694 | 695 | return 696 | 697 | def compute_type_statistics(self, typelist, molecules, element = 0): 698 | """ 699 | Compute statistics for numnber of molecules assigned each type. 700 | Parameters 701 | ---------- 702 | typelist : atomtype list of form (smarts, typename) 703 | molecules : list of OEmols 704 | element : 0 for all atoms or atomic number being sampled 705 | Returns 706 | ------- 707 | atom_typecounts (dict) : counts of number of atoms containing each atomtype 708 | molecule_typecounds (dict) : counts of number of molecules containing each atom type 709 | """ 710 | # Zero type counts by atom and molecule. 711 | atom_typecounts = dict() 712 | molecule_typecounts = dict() 713 | for [smarts, typename] in typelist: 714 | atom_typecounts[typename] = 0 715 | molecule_typecounts[typename] = 0 716 | 717 | # Count number of atoms with each type. 718 | for molecule in molecules: 719 | types_in_this_molecule = set() 720 | for atom in self._GetAtoms(molecule, element): 721 | atomtype = atom.GetStringData(self.typetag) 722 | types_in_this_molecule.add(atomtype) 723 | atom_typecounts[atomtype] += 1 724 | for atomtype in types_in_this_molecule: 725 | molecule_typecounts[atomtype] += 1 726 | 727 | return (atom_typecounts, molecule_typecounts) 728 | 729 | def show_type_statistics(self, typelist, atom_typecounts, molecule_typecounts, atomtype_matches=None): 730 | """ 731 | Print atom type statistics to the commandline 732 | Parameters 733 | ---------- 734 | typelist : atomtype list of form (smarts, typename) 735 | atom_typecounts : dictionary result from compute_type_statistics 736 | molecule_typecounts : dictionary result from compute_type_statistics 737 | atomtype_matches : dictionary result from best_match_references_types 738 | if there are reference molecules 739 | """ 740 | index = 1 741 | natoms = 0 742 | 743 | if atomtype_matches is not None: 744 | reference_type_info = dict() 745 | for (typename, reference_atomtype, count) in atomtype_matches: 746 | reference_type_info[typename] = (reference_atomtype, count) 747 | 748 | # Print header 749 | if atomtype_matches is not None: 750 | print("%5s %10s %10s %64s %32s %8s %46s" % ('INDEX', 'ATOMS', 'MOLECULES', 'TYPE NAME', 'SMARTS', 'REF TYPE', 'FRACTION OF REF TYPED MOLECULES MATCHED')) 751 | else: 752 | print("%5s %10s %10s %64s %32s" % ('INDEX', 'ATOMS', 'MOLECULES', 'TYPE NAME', 'SMARTS')) 753 | 754 | # Print counts 755 | for [smarts, typename] in typelist: 756 | if atomtype_matches is not None: 757 | (reference_atomtype, reference_count) = reference_type_info[typename] 758 | if reference_atomtype is not None: 759 | reference_total = self.reference_atomtypes_atomcount[reference_atomtype] 760 | reference_fraction = float(reference_count) / float(reference_total) 761 | print("%5d : %10d %10d | %64s %32s %8s %16d / %16d (%7.3f%%)" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts, reference_atomtype, reference_count, reference_total, reference_fraction*100)) 762 | else: 763 | print("%5d : %10d %10d | %64s %32s" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts)) 764 | else: 765 | print("%5d : %10d %10d | %64s %32s" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts)) 766 | 767 | natoms += atom_typecounts[typename] 768 | index += 1 769 | 770 | nmolecules = len(self.molecules) 771 | 772 | if atomtype_matches is not None: 773 | print("%5s : %10d %10d | %64s %32s %8d / %8d match (%.3f %%)" % ('TOTAL', natoms, nmolecules, '', '', self.total_atom_type_matches, self.total_atoms, (float(self.total_atom_type_matches) / float(self.total_atoms)) * 100)) 774 | else: 775 | print("%5s : %10d %10d" % ('TOTAL', natoms, nmolecules)) 776 | 777 | return 778 | 779 | def save_type_statistics(self, typelist, atom_typecounts, molecule_typecounts, atomtype_matches=None): 780 | """ 781 | Saves the match information in format for a trajectory file 782 | Parameters 783 | ---------- 784 | typelist : atomtype list of form (smarts, typename) 785 | atom_typecounts : dictionary result from compute_type_statistics 786 | molecule_typecounts : dictionary result from compute_type_statistics 787 | atomtype_matches : dictionary result from best_match_references_types 788 | if there are reference molecules 789 | Returns 790 | ------- 791 | output : string line for trajectory file 792 | """ 793 | if atomtype_matches is not None: 794 | reference_type_info = dict() 795 | for (typename, reference_atomtype, count) in atomtype_matches: 796 | reference_type_info[typename] = (reference_atomtype, count) 797 | 798 | index = 1 799 | output = [] 800 | ntypes = 0 801 | # Print counts 802 | # INDEX, SMARTS, PARENT INDEX, REF TYPE, MATCHES, MOLECULES, FRACTION, OUT of, PERCENTAGE 803 | for [smarts, typename] in typelist: 804 | parent = str(self.child_to_parent[smarts]) 805 | if atomtype_matches is not None: 806 | (reference_atomtype, reference_count) = reference_type_info[typename] 807 | if reference_atomtype is not None: 808 | reference_total = self.reference_atomtypes_atomcount[reference_atomtype] 809 | reference_fraction = float(reference_count) / float(reference_total) 810 | # Save output 811 | output.append("%i,'%s','%s','%s','%s',%i,%i,%i,%i" % (index, smarts, typename, parent, reference_atomtype, atom_typecounts[typename], molecule_typecounts[typename], reference_count, reference_total)) 812 | else: 813 | output.append("%i,'%s','%s','%s','%s',%i,%i,%i,%i" % (index, smarts, typename, parent, 'NONE', atom_typecounts[typename], molecule_typecounts[typename], 0, 0)) 814 | 815 | else: 816 | output.append("%i,'%s',%i,%i,'%s',%i,%i,%i,%i" % (index, smarts, typename, parent, 'NONE', atom_typecounts[typename], molecule_typecounts[typename], 0, 0)) 817 | 818 | ntypes += atom_typecounts[typename] 819 | index += 1 820 | 821 | nmolecules = len(self.molecules) 822 | if atomtype_matches is None: 823 | output.append("-1,'total','all','None','all',%i,%i,0,0" % (ntypes, nmolecules)) 824 | else: 825 | output.append("-1,'total','all','None','all',%i,%i,%i,%i" % (ntypes,nmolecules,self.total_atom_type_matches,self.total_atoms)) 826 | return output 827 | 828 | def _switch_parent_dict(self): 829 | """ 830 | Takes the parent dictionary and returns a dictionary in the form 831 | {child: parent} 832 | """ 833 | child_to_parent = dict() 834 | for smarts in self.parents.keys(): 835 | child_to_parent[smarts] = None 836 | 837 | for smarts, children in self.parents.items(): 838 | for [child_smarts, child_typename] in children: 839 | child_to_parent[child_smarts] = smarts 840 | 841 | return child_to_parent 842 | 843 | def print_parent_tree(self, roots, start=''): 844 | """ 845 | Recursively prints the parent tree. 846 | Parameters 847 | ---------- 848 | roots = list of smarts strings to print 849 | """ 850 | for r in roots: 851 | print("%s%s" % (start, r)) 852 | if r in self.parents: 853 | new_roots = [smart for [smart, name] in self.parents[r]] 854 | self.print_parent_tree(new_roots, start+'\t') 855 | 856 | 857 | def run(self, niterations, trajFile=None): 858 | """ 859 | Run atomtype sampler for the specified number of iterations. 860 | Parameters 861 | ---------- 862 | niterations : int 863 | The specified number of iterations 864 | trajFile : str, optional, default=None 865 | Output trajectory filename 866 | Returns 867 | ---------- 868 | fraction_matched_atoms : float 869 | fraction of total atoms matched successfully at end of run 870 | """ 871 | if trajFile is not None: 872 | # make "trajectory" file 873 | if os.path.isfile(trajFile): 874 | print("trajectory file already exists, it was overwritten") 875 | self.traj = open(trajFile, 'w') 876 | self.traj.write('Iteration,Index,Smarts,Typename,ParentSMARTS,RefType,Matches,Molecules,FractionMatched,Denominator\n') 877 | 878 | for iteration in range(niterations): 879 | if self.verbose: 880 | print("Iteration %d / %d" % (iteration, niterations)) 881 | 882 | accepted = self.sample_atomtypes() 883 | [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element) 884 | 885 | if trajFile is not None: 886 | # Get data as list of csv strings 887 | lines = self.save_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches) 888 | # Add lines to trajectory with iteration number: 889 | for l in lines: 890 | self.traj.write('%i,%s \n' % (iteration, l)) 891 | 892 | if self.verbose: 893 | if accepted: 894 | print('Accepted.') 895 | else: 896 | print('Rejected.') 897 | 898 | # Compute atomtype statistics on molecules. 899 | self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches) 900 | print('') 901 | 902 | # Print parent tree as it is now. 903 | roots = [r for r in self.child_to_parent.keys() if self.child_to_parent[r] is None] 904 | 905 | print("Atom type hierarchy:") 906 | self.print_parent_tree(roots, '\t') 907 | 908 | if trajFile is not None: 909 | self.traj.close() 910 | # Get/print some stats on trajectory 911 | # Load timeseries 912 | timeseries = load_trajectory( trajFile ) 913 | time_fractions = scores_vs_time( timeseries ) 914 | print("Maximum score achieved: %.2f" % max(time_fractions['all'])) 915 | 916 | 917 | #Compute final type stats 918 | [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element) 919 | fraction_matched_atoms = self.show_type_matches(self.atom_type_matches) 920 | 921 | # If verbose print parent tree: 922 | if self.verbose: 923 | roots = self.parents.keys() 924 | # Remove keys from roots if they are children 925 | for parent, children in self.parents.items(): 926 | child_smarts = [smarts for [smarts, name] in children] 927 | for child in child_smarts: 928 | if child in roots: 929 | roots.remove(child) 930 | 931 | print("Atom type hierarchy:") 932 | self.print_parent_tree(roots, '\t') 933 | return fraction_matched_atoms 934 | -------------------------------------------------------------------------------- /smarty/score_utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import pandas as pd 3 | import matplotlib 4 | matplotlib.use('pdf') 5 | import pylab as pl 6 | 7 | def load_trajectory( trajFile): 8 | """Load data from a specified smarty trajectory .csv file and return a summary. 9 | 10 | Note that any SMARTS patterns which do not match anything are ignored in the resulting summary. 11 | 12 | Parameters 13 | ---------- 14 | 15 | trajFile (str) : filename to read from 16 | 17 | Returns 18 | ------- 19 | timeseries (dict) : status by iteration number 20 | Dictionary, keyed by iteration, storing the state at each iteration 21 | Subsequent keys are by reference types, (i.e. timeseries[1]['HO']) 22 | and an entry for total if included in the trajectory file at timeseries[1]['total'] 23 | gives data at step 1 on what (if anything) matches 'HO'. Subsequent 24 | keys are 'smarts', 'matches', 'molecules', 'fractionmatched', 'index' (serial # 25 | of match), `ParNum` (parameter number/label), `ParentParNum` (parameter number/label of parent) 26 | `denominator` (number of possible matches of this type), `fraction` 27 | (fraction of this type matched). 28 | 29 | """ 30 | data = pd.read_csv(trajFile, quotechar="'") 31 | data_dict = data.to_dict() 32 | # If the number if headers is not as expected, this is a different version and we can't parse 33 | if len(data.columns) != 10: 34 | raise Exception("Number of headers in trajectory not as expected; can't parse.") 35 | 36 | # Initialize storage 37 | timeseries = {} 38 | 39 | # Number of lines 40 | max_lines = data.index[-1] 41 | 42 | # How many iterations are we looking at? 43 | max_its = data.Iteration[max_lines] 44 | 45 | keys = list(data.columns) 46 | keys.remove('RefType') 47 | keys.remove('Iteration') 48 | 49 | numerator = data.columns[-2].lower() 50 | denominator = data.columns[-1].lower() 51 | # Process file 52 | for linenr in data.index: 53 | iteration = data.Iteration[linenr] 54 | 55 | # Pull elements from line and store 56 | if not iteration in timeseries: timeseries[iteration] = {} 57 | reftype = data.RefType[linenr] 58 | 59 | if not reftype=="'NONE'": 60 | timeseries[iteration][reftype]={} 61 | for k in keys: 62 | if k in ['ParNum', 'ParentParNum']: 63 | timeseries[iteration][reftype][k] = data_dict[k][linenr] 64 | else: 65 | timeseries[iteration][reftype][k.lower()] = data_dict[k][linenr] 66 | den = float(timeseries[iteration][reftype][denominator]) 67 | if den == 0.0: 68 | print("At iteration %s, found %s matched atoms and a denominator of %s for reftype %s..." % (iteration, timeseries[iteration][reftype][numerator], timeseries[iteration][reftype][denominator], reftype)) 69 | timeseries[iteration][reftype]['fraction'] = numpy.nan 70 | else: 71 | timeseries[iteration][reftype]['fraction'] = timeseries[iteration][reftype][numerator]/den 72 | 73 | return timeseries 74 | 75 | def scores_vs_time(timeseries, numerator = 'fractionmatched'): 76 | """Process a timeseries as read by load_trajectory and return the fraction of each reference atom type found at each time. 77 | 78 | 79 | Parameters 80 | ---------- 81 | trajectory : dict 82 | Trajectory information as output by load_trajectory 83 | 84 | Returns 85 | ------- 86 | time_fractions : dict 87 | Dictionary of NumPy arrays, keyed by reference type. 88 | The full score across all types is under `all`. 89 | 'all' is from the total list if available or calculated from other references 90 | """ 91 | 92 | # How many iterations are present? 93 | max_its = numpy.max([k for k in timeseries]) 94 | 95 | # Retrieve keys of all reference types 96 | reftypes = set() 97 | for it in timeseries: 98 | for reftype in timeseries[it]: 99 | if reftype not in reftypes: 100 | reftypes.add(reftype) 101 | 102 | # Allocate storage 103 | time_fractions = {} 104 | time_fractions['all'] = numpy.zeros( max_its, float) 105 | for reftype in reftypes: 106 | time_fractions[reftype] = numpy.zeros( max_its, float) 107 | 108 | # Update with data 109 | for it in range(max_its): 110 | # Update reference types occuring at this iteration 111 | denom = 0 112 | numer = 0 113 | for reftype in reftypes: 114 | if reftype in timeseries[it]: 115 | try: 116 | time_fractions[reftype][it] = timeseries[it][reftype]['fraction'] 117 | except KeyError: 118 | print("Can't find key set %s, %s, %s for timeseries." % (it, reftype, 'fraction')) 119 | print("Available keys:", timeseries[it][reftype]) 120 | denom += timeseries[it][reftype]['denominator'] 121 | numer += timeseries[it][reftype][numerator] 122 | 123 | # Any reference type which does not appear at this time point has zero matches so we just leave the value at zero 124 | 125 | # Handle 'all' case last 126 | if time_fractions['all'][it] == 0: 127 | time_fractions['all'][it] = numer/float(denom) 128 | 129 | return time_fractions 130 | 131 | def create_plot_file(trajFile, plot_filename, plot_others=False, verbose = False): 132 | """ 133 | Creates plot to demonstrate performance of smarty or smirky 134 | 135 | trajFile - csv file generated by smarty, smarty_elemental, or smirky 136 | plot_filename - pdf to save plot file to 137 | plot_others - if True plots data for all reftypes separately, optional 138 | """ 139 | 140 | data = pd.read_csv(trajFile, quotechar="'") 141 | numerator = data.columns[-2].lower() 142 | 143 | timeseries = load_trajectory(trajFile) 144 | time_fractions = scores_vs_time(timeseries, numerator) 145 | 146 | max_score = max(time_fractions['all']) *100.0 147 | if verbose: print("Maximum score was %.1f %%" % max_score) 148 | # plot overall score 149 | pl.plot( time_fractions['all'], 'k-', linewidth = 2.0) 150 | 151 | if plot_others: 152 | reftypes = [k for k in time_fractions] 153 | reftypes.remove('all') 154 | 155 | # Plot scors for individual types 156 | for reftype in reftypes: 157 | pl.plot(time_fractions[reftype]) 158 | 159 | pl.legend(['all']+reftypes, loc='lower right') 160 | 161 | pl.xlabel('Iterations') 162 | pl.ylabel('Fraction of reference type found') 163 | pl.ylim(-0.1, 1.1) 164 | 165 | pl.savefig(plot_filename) 166 | 167 | -------------------------------------------------------------------------------- /smarty/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/smarty/tests/__init__.py -------------------------------------------------------------------------------- /smarty/tests/test_atomtyper.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from smarty import AtomTyper 3 | import smarty 4 | from smarty.utils import get_data_filename 5 | from openforcefield.utils import read_molecules 6 | from unittest import TestCase 7 | 8 | class TestAtomTyper(TestCase): 9 | def test_read_typelist(self): 10 | atomtypes = AtomTyper.read_typelist(get_data_filename('atomtypes/basetypes.smarts')) 11 | decorators = AtomTyper.read_typelist(get_data_filename('atomtypes/decorators.smarts')) 12 | replacements = AtomTyper.read_typelist(get_data_filename('atomtypes/replacements.smarts')) 13 | 14 | def test_atomtyper(self): 15 | typetag = 'atomtype' 16 | atomtypes = AtomTyper.read_typelist(get_data_filename('atomtypes/basetypes.smarts')) 17 | replacements = AtomTyper.read_typelist(get_data_filename('atomtypes/replacements.smarts')) 18 | molecules = read_molecules('zinc-subset-tripos.mol2.gz', verbose=False) 19 | 20 | atomtyper = AtomTyper(atomtypes, typetag, replacements=replacements) 21 | for molecule in molecules: 22 | atomtyper.assignTypes(molecule) 23 | -------------------------------------------------------------------------------- /smarty/tests/test_sampler.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | import smarty 3 | from smarty import AtomTyper, AtomTypeSampler, score_utils 4 | from smarty.utils import get_data_filename 5 | from openforcefield.utils import read_molecules 6 | from openforcefield import utils 7 | import unittest 8 | from unittest import TestCase 9 | 10 | class TestAtomTypeSampler(TestCase): 11 | def __init__(self, *args, **kwargs): 12 | """ 13 | Initialize TestCase including files used in all smarty tests 14 | """ 15 | unittest.TestCase.__init__(self, *args, **kwargs) 16 | 17 | self.basetypes = get_data_filename('atomtypes/basetypes.smarts') 18 | self.alkethoh_answers = get_data_filename('atomtypes/initial_AlkEthOH.smarts') 19 | self.simple_decs = get_data_filename('atomtypes/decorators.smarts') 20 | self.combine_decs = get_data_filename('atomtypes/new-decorators.smarts') 21 | self.replacements = get_data_filename('atomtypes/replacements.smarts') 22 | 23 | # import molecules 24 | self.mols_zinc = read_molecules('zinc-subset-tripos.mol2.gz', verbose=False) 25 | self.mols_zinc_ref = read_molecules('zinc-subset-parm@frosst.mol2.gz', verbose=False) 26 | 27 | self.mols_alkethoh = read_molecules('AlkEthOH_test_filt1_tripos.mol2', verbose=False) 28 | self.mols_alkethoh_ref = read_molecules('AlkEthOH_test_filt1_ff.mol2', verbose=False) 29 | 30 | 31 | def test_atomtyper(self): 32 | """ 33 | Test atomtype sampler with simple-decorators 34 | """ 35 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_zinc, 36 | self.basetypes, self.basetypes, self.simple_decs, 37 | replacements_filename = self.replacements, 38 | reference_typed_molecules =self.mols_zinc_ref, 39 | temperature = 0.1, verbose = False, 40 | decorator_behavior = 'simple-decorators', element =0) 41 | atomtype_sampler.run(2) 42 | 43 | def test_atomtyper_combinatorial(self): 44 | """ 45 | Test atomtype sampler with combinatorial-decorators and optional output files 46 | """ 47 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_zinc, 48 | self.basetypes, self.basetypes, self.combine_decs, 49 | replacements_filename = self.replacements, 50 | reference_typed_molecules =self.mols_zinc_ref, 51 | temperature = 0.1, verbose = False) 52 | 53 | # run sampler with optional outputs 54 | traj = 'test_smarty.csv' 55 | plot = 'test_smarty.pdf' 56 | atomtype_sampler.run(5, traj) 57 | # test trajectory analysis functions on smarty output 58 | timeseries = score_utils.load_trajectory(traj) 59 | scores_vs_time = score_utils.scores_vs_time(timeseries) 60 | score_utils.create_plot_file(traj, plot, True, False) 61 | 62 | # check if score is 100% at first iteration 63 | if scores_vs_time['all'][0] == 1.0: 64 | raise Exception("Scoring problem, 100% at first iteration for total") 65 | 66 | def test_atomtyper_elemental(self): 67 | """ 68 | Test elemental atomtype sampler for hydrogen 69 | """ 70 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh, 71 | self.basetypes, self.basetypes, self.combine_decs, 72 | replacements_filename = self.replacements, 73 | reference_typed_molecules = self.mols_alkethoh_ref, 74 | temperature = 0.1, verbose = False, 75 | decorator_behavior = 'combinatorial-decorators', element=1) 76 | # run sampler with optional outputs 77 | traj = 'test_smarty.csv' 78 | plot = 'test_smarty.pdf' 79 | atomtype_sampler.run(5, traj) 80 | # test trajectory analysis functions on smarty output 81 | timeseries = score_utils.load_trajectory(traj) 82 | scores_vs_time = score_utils.scores_vs_time(timeseries) 83 | score_utils.create_plot_file(traj, plot, True, False) 84 | 85 | # check if score is 100% at first iteration 86 | if scores_vs_time['all'][0] == 1.0: 87 | raise Exception("Scoring problem, 100% at first iteration for total") 88 | 89 | 90 | def test_atomtyper_AlkEthOH(self): 91 | """ 92 | Test atomtype sampler with correct "answers" 93 | """ 94 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh, 95 | self.basetypes, self.alkethoh_answers, self.combine_decs, 96 | replacements_filename = self.replacements, 97 | reference_typed_molecules = self.mols_alkethoh_ref, 98 | temperature = 0, verbose = False) 99 | # Start sampling atom types. 100 | fracfound = atomtype_sampler.run(2) 101 | # Ensure fraction found is 1.0 102 | if fracfound < 1.0: 103 | raise Exception("Not finding 100% of AlkEthOH when starting from" 104 | " correct SMARTS.") 105 | 106 | def test_atomtyper_elemental_AlkEthOH(self): 107 | """ 108 | Test elemental sampler with correct "answers" 109 | """ 110 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh, 111 | self.basetypes, self.alkethoh_answers, self.combine_decs, 112 | replacements_filename = self.replacements, 113 | reference_typed_molecules = self.mols_alkethoh_ref, 114 | temperature = 0, verbose = False, 115 | decorator_behavior = 'combinatorial-decorators',element = 1) 116 | # Start sampling atom types. 117 | fracfound = atomtype_sampler.run(2) 118 | 119 | # Ensure fraction found is 1.0 120 | if fracfound < 1.0: 121 | raise Exception("Not finding 100% of Hydrogens of AlkEthOH when starting from" 122 | " correct SMARTS.") 123 | 124 | -------------------------------------------------------------------------------- /smarty/tests/test_smirky_sampler.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import smarty 3 | from openforcefield.typing.chemistry.environment import * 4 | from openforcefield.utils.utils import read_molecules 5 | from smarty.sampler_smirky import * 6 | from smarty import utils 7 | from smarty import score_utils 8 | from operator import itemgetter, attrgetter 9 | import openeye.oechem 10 | from openeye.oechem import * 11 | import copy 12 | import sys # used to exit while testing 13 | 14 | class TestSmirkySampler(unittest.TestCase): 15 | def __init__(self, *args, **kwargs): 16 | """ 17 | Initialize TestCase and then read in odds from files in smarty/data 18 | """ 19 | unittest.TestCase.__init__(self,*args, **kwargs) 20 | 21 | self.atom_OR_bases = utils.parse_odds_file("odds_files/atom_OR_bases.smarts" , False) 22 | self.atom_OR_decors = utils.parse_odds_file("odds_files/atom_decorators.smarts", False) 23 | self.atom_AND_decors = utils.parse_odds_file("odds_files/atom_decorators.smarts", False) 24 | self.bond_OR_bases = utils.parse_odds_file("odds_files/bond_OR_bases.smarts", False) 25 | self.bond_AND_decors = utils.parse_odds_file("odds_files/bond_AND_decorators.smarts", False) 26 | self.atom_odds = utils.parse_odds_file("odds_files/atom_index_odds.smarts", False) 27 | self.bond_odds = utils.parse_odds_file("odds_files/bond_index_odds.smarts", False) 28 | self.molecules = read_molecules("test_filt1_tripos.mol2", False) 29 | self.SMIRFF = "forcefield/Frosst_AlkEthOH.ffxml" 30 | self.outputFile = 'test_smirky' 31 | replacement_file = utils.get_data_filename("odds_files/substitutions.smarts") 32 | self.replacements = smarty.AtomTyper.read_typelist(replacement_file) 33 | self.replacements = [ [short, smarts] for [smarts, short] in self.replacements] 34 | 35 | self.correctDict = {'VdW': [ ["[#1:1]-[#6]", 'HC'], [ "[#1:1]-[#6]-[#7,#8,F,#16,Cl,Br]", 'H1'], [ "[#1:1]-[#6](-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br]", 'H2'], [ "[#1:1]-[#6](-[#7,#8,F,#16,Cl,Br])(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br]", 'H3'], [ "[#1:1]-[#8]", 'HO'], [ "[#6X4:1]", 'CT'], [ "[#8X2:1]", 'OS'], [ "[#8X2+0:1]-[#1]", 'OH'] ], 36 | 'Bond': [ ["[#6X4:1]-[#6X4:2]", 'CT-CT'], [ "[#6X4:1]-[#1:2]", 'CT-H'], [ "[#8:1]~[#1:2]", 'O~H'], [ "[#6X4:1]-[#8;X2;H1:2]", "CT-OH"], [ "[#6X4:1]-[#8;X2;H0:2]", "CT-OS"] ], 37 | 'Angle': [ [ "[a,A:1]-[#6&X4:2]-[a,A:3]", 'any-CT-any'], [ "[#1:1]-[#6&X4:2]-[#1:3]", "H-CT-H"], [ "[#6&X4:1]-[#6&X4:2]-[#6&X4:3]", 'CT-CT-CT'], [ "[#8&X2:1]-[#6&X4:2]-[#8&X2:3]", 'O-CT-O'], [ "[#6&X4:1]-[#8&X2:2]-[#1:3]", 'CT-OH-HO'], [ "[#6X4:1]-[#8X2:2]-[#6X4:3]", 'CT-OS-CT'] ], 38 | 'Torsion': [["[a,A:1]-[#6&X4:2]-[#6&X4:3]-[a,A:4]", 'any-CT-CT-any'], [ "[a,A:1]-[#6&X4:2]-[#8&X2:3]-[#1:4]", 'any-CT-OH-HO'], [ "[a,A:1]-[#6&X4:2]-[#8&X2:3]-[!#1:4]", 'any-CT-OS-!H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[#1:4]", 'H-CT-CT-H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[#6&X4:4]", 'H-CT-CT-CT'], [ "[#6&X4:1]-[#6&X4:2]-[#8&X2:3]-[#1:4]", 'CT-CT-OH-HO'], [ "[#6&X4:1]-[#6&X4:2]-[#6&X4:3]-[#6&X4:4]", 'CT-CT-CT-CT'], [ "[#6&X4:1]-[#6&X4:2]-[#8&X2:3]-[#6&X4:4]", 'CT-CT-OS-CT'], [ "[#6&X4:1]-[#8&X2:2]-[#6&X4:3]-[O&X2&H0:4]", 'CT-OS-CT-OS'], [ "[#8&X2:1]-[#6&X4:2]-[#6&X4:3]-[#8&X2:4]", 'O-CT-CT-O'], [ "[#8&X2:1]-[#6&X4:2]-[#6&X4:3]-[#1:4]", 'O-CT-CT-H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[O&X2:4]", 'H-CT-CT-O'] ]} 39 | 40 | def test_correct_fragments(self): 41 | """ 42 | Test score is 100% if correct VdW, Bond, Angles, or Torsions 43 | from AlkEthOH are used as input to the FragmentSampler 44 | """ 45 | 46 | for typetag, initialtypes in self.correctDict.items(): 47 | sampler = FragmentSampler(self.molecules, typetag, 48 | self.atom_OR_bases, self.atom_OR_decors, self.atom_AND_decors, 49 | self.bond_OR_bases, self.bond_AND_decors, 50 | AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds, 51 | replacements = self.replacements, initialtypes = initialtypes, 52 | SMIRFF = self.SMIRFF, temperature = 0.0, outputFile =self.outputFile) 53 | 54 | fracfound = sampler.run(1) 55 | self.assertAlmostEqual(fracfound, 1.0, msg = "Not finding 100%% of AlkEthOH when starting from correct %s SMIRKS." % typetag) 56 | 57 | def test_random_sampler(self): 58 | """ 59 | Test FragmentSampler runs for 10 iterations with no failures 60 | Test score_utils functions with the outputFile 61 | """ 62 | typetag = 'Torsion' 63 | sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases, 64 | self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases, 65 | self.bond_AND_decors, 66 | AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds, 67 | replacements = self.replacements, initialtypes = None, 68 | SMIRFF = self.SMIRFF, temperature = 0.0, outputFile = self.outputFile) 69 | fracfound = sampler.run(10) 70 | # load_trajectory converts csv file to dictionary 71 | timeseries = score_utils.load_trajectory('%s.csv' % self.outputFile) 72 | # scores_vs_time converts num/den entries to fractional scores 73 | scores_vs_time = score_utils.scores_vs_time(timeseries) 74 | # test plotting function 75 | score_utils.create_plot_file('%s.csv' % self.outputFile, '%s.pdf' % self.outputFile) 76 | 77 | 78 | def test_sampler_functions(self): 79 | """ 80 | Test fragment sampler functions are working 81 | """ 82 | typetag = 'Angle' 83 | sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases, 84 | self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases, 85 | self.bond_AND_decors, 86 | AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds, 87 | replacements = self.replacements, initialtypes = None, 88 | SMIRFF = self.SMIRFF, temperature = 0.0, outputFile = self.outputFile) 89 | 90 | typetags = [ ('VdW', 'NonbondedGenerator'), 91 | ('Bond', 'HarmonicBondGenerator'), 92 | ('Angle', 'HarmonicAngleGenerator'), 93 | ('Torsion', 'PeriodicTorsionGenerator'), 94 | ('Improper','PeriodicTorsionGenerator'), 95 | ('None', None)] 96 | 97 | for (tag, expected) in typetags: 98 | sample_tag, edges, sym_odds = sampler.get_type_info(tag) 99 | self.assertEqual(sample_tag, expected, msg = "get_force_type(%s) should return %s, but %s was returned instead" % (tag, expected, sample_tag)) 100 | 101 | # Running each method just to make sure they work 102 | # get environment 103 | env = sampler.envList[0] 104 | new_env, prob = sampler.create_new_environment(env) 105 | # check atom methods 106 | atom,prob = sampler.pick_an_atom(new_env) 107 | removeable = sampler.isremoveable(new_env,atom) 108 | prob = sampler.add_atom(new_env,atom) 109 | prob = sampler.change_atom(new_env, atom) 110 | atom.addORtype('#6', ['X4']) 111 | prob = sampler.change_ORdecorator(atom, self.atom_OR_decors) 112 | prob = sampler.change_ORbase(atom, self.atom_OR_bases, self.atom_OR_decors) 113 | prob = sampler.change_ANDdecorators(atom, self.atom_AND_decors) 114 | 115 | # check bond methods 116 | bond,prob = sampler.pick_a_bond(new_env) 117 | prob = sampler.change_bond(new_env, bond) 118 | prob = sampler.change_ORbase(bond, self.bond_OR_bases, sampler.BondORdecorators) 119 | prob = sampler.change_ANDdecorators(bond, self.bond_AND_decors) 120 | 121 | def test_no_reference_smirff(self): 122 | """ 123 | Test that sampling still works with no reference SMIRFF provided 124 | """ 125 | typetag = 'Bond' 126 | sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases, 127 | self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases, 128 | self.bond_AND_decors, 129 | AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds, 130 | replacements = self.replacements, initialtypes = None, 131 | SMIRFF = None, temperature = 0.0, outputFile = self.outputFile) 132 | fracfound = sampler.run(10) 133 | 134 | -------------------------------------------------------------------------------- /smarty/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | from smarty.utils import get_data_filename 3 | from unittest import TestCase 4 | 5 | import smarty 6 | 7 | class TestUtils(TestCase): 8 | def test_parse_odds_file(self): 9 | """ 10 | Testing parse_odds_file and get_data_filename 11 | """ 12 | # parse_odds_file uses get_data_filename so this run checks both 13 | odds = smarty.utils.parse_odds_file('odds_files/atom_index_odds.smarts', verbose = True) 14 | odds = smarty.utils.parse_odds_file('odds_files/bond_OR_bases.smarts') 15 | self.assertIsNone(odds[1], msg = "Parsing odds file with no odds should give None as the second entry") 16 | -------------------------------------------------------------------------------- /smarty/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Utility subroutines for SMARTY atom type sampling 5 | 6 | """ 7 | #============================================================================================= 8 | # GLOBAL IMPORTS 9 | #============================================================================================= 10 | 11 | import os 12 | 13 | #============================================================================================= 14 | # UTILITY ROUTINES 15 | #============================================================================================= 16 | 17 | def get_data_filename(relative_path): 18 | """Get the full path to one of the reference files in testsystems. 19 | 20 | In the source distribution, these files are in ``smarty/data/``, 21 | but on installation, they're moved to somewhere in the user's python 22 | site-packages directory. 23 | 24 | Parameters 25 | ---------- 26 | name : str 27 | Name of the file to load (with respect to the repex folder). 28 | 29 | """ 30 | 31 | from pkg_resources import resource_filename 32 | fn = resource_filename('smarty', os.path.join('data', relative_path)) 33 | 34 | if not os.path.exists(fn): 35 | raise ValueError("Sorry! %s does not exist. If you just added it, you'll have to re-install" % fn) 36 | 37 | return fn 38 | 39 | 40 | def parse_odds_file(filename, verbose = False): 41 | """ 42 | parses files that have the form 43 | decorator odds 44 | if only one column odds will be assumed equally probable 45 | 46 | Parameters 47 | ----------- 48 | filename: string or file object 49 | may be an absolute file path, a path relative to the current working directory, a path relative to this module's data subdirectory (for built in decorator files), or an opten file-like object with a readlines() method. 50 | 51 | Returns 52 | -------- 53 | choices: 2-tuple of the form ( [decorators], [odds] ) 54 | """ 55 | # if no file return None 56 | if filename is None: 57 | if verbose: print("No filename provided, returning None") 58 | return None 59 | 60 | # if input is a file object 61 | try: 62 | input_lines = filename.readlines() 63 | if verbose: print("Attempting to parse file '%s'" % filename.name) 64 | except AttributeError: 65 | if verbose: print("Attempting to parse file '%s'" % filename) 66 | try: 67 | ifs = open(filename, 'r') 68 | input_lines = ifs.readlines() 69 | except IOError: 70 | ifs = get_data_filename(filename) 71 | ifs = open(ifs, 'r') 72 | input_lines = ifs.readlines() 73 | except Exception as e: 74 | raise Exception("%s\nProvided file (%s) could not be parsed" % (str(e), filename)) 75 | except Exception as e: 76 | msg = str(e) + '\n' 77 | msg += "Could not read data from file %s" % filename 78 | raise Exception(msg) 79 | 80 | # close file 81 | ifs.close() 82 | 83 | decorators = [] 84 | odds = [] 85 | noOdds = False 86 | for l in input_lines: 87 | # skip empty lines 88 | if len(l) == 0: 89 | continue 90 | # check for and remove comments 91 | comment = l.find('%') 92 | if comment == -1: # no comment 93 | entry = l.split() 94 | elif comment > 0: # remove trailing comment 95 | entry = l[:comment].split() 96 | else: # whole line is a comment skip 97 | continue 98 | 99 | # add decorator 100 | if entry[0] == "''" or entry[0] == '""': 101 | decorators.append('') 102 | else: 103 | decorators.append(entry[0]) 104 | 105 | if len(entry) == 2: 106 | odds.append(float(entry[1])) 107 | elif len(entry) == 1: 108 | noOdds = True 109 | else: 110 | raise Exception("Error entry (%s) in decorator file '%s' is invalid" % (l, filename)) 111 | 112 | if (odds.count(0) == len(odds)) or noOdds: 113 | odds = None 114 | #TODO: handle case where 1 line is missing odds entry 115 | 116 | return (decorators, odds) 117 | 118 | -------------------------------------------------------------------------------- /utilities/README.md: -------------------------------------------------------------------------------- 1 | # Utilities related to SMARTY and SMIRKY 2 | 3 | * `test_smirks_or_environment_speed/` - compares computing time cost of storing SMIRKS strings compared to storing Chemical Environments 4 | -------------------------------------------------------------------------------- /utilities/test_smirks_or_environment_speed/README.md: -------------------------------------------------------------------------------- 1 | # Testing slow down when storing ChemicalEnvironments 2 | 3 | We were concerned that storing chemical environments would be slower than storing strings. 4 | Since ChemicalEnvironments can easily be converted to and from SMIRKS strings you could store a list of SMIRKS instead of a list of chemical environments when sampling parameter types (such as smirky). 5 | The notebook `testing_smirks_speed.ipynb` logs the time to store a list of SMIRKS or environments for a number of steps. It uses `Torsion_0_0.00e+00_results.smarts` as an example of the complext SMIRKS patterns that can be generated during a smirky simulation. 6 | 7 | Below are the results for this test. For each test data is reported in this order: 8 | * Parameter type list 9 | - generic: starts with only `"[*:1]~[*:2]~[*:3]~[*:4]"` 10 | - short: starts with first 10 SMIRKS in `*_results.smarts` 11 | - long: starts with all 82 SMIRKS in `*_results.smarts` 12 | * Time in minutes to do X iterations storing SMIRKS strings 13 | * Time in minutes to do X iterations storing Chemical Environments for each input SMIRKS 14 | * Difference in Chemical Environment and SMIRKS time in minutes 15 | 16 | ``` 17 | ------------------------------ 2 Iterations ------------------------------ 18 | short 1.97e-05 6.54e-05 4.57e-05 19 | long 1.93e-05 4.58e-04 4.39e-04 20 | generic 1.34e-05 1.82e-05 4.84e-06 21 | 22 | 23 | ------------------------------ 10 Iterations ------------------------------ 24 | short 7.12e-05 1.16e-04 4.53e-05 25 | long 8.27e-05 5.40e-04 4.58e-04 26 | generic 6.60e-05 6.47e-05 -1.23e-06 27 | 28 | 29 | ------------------------------ 100 Iterations ------------------------------ 30 | short 6.19e-04 7.01e-04 8.20e-05 31 | long 7.44e-04 1.36e-03 6.12e-04 32 | generic 5.49e-04 6.28e-04 7.92e-05 33 | 34 | 35 | ------------------------------ 1000 Iterations ------------------------------ 36 | short 7.59e-03 1.73e-02 9.76e-03 37 | long 8.42e-03 2.10e-02 1.26e-02 38 | generic 6.89e-03 1.61e-02 9.20e-03 39 | 40 | 41 | ------------------------------ 10000 Iterations ------------------------------ 42 | short 8.89e-02 1.09e+00 9.98e-01 43 | long 9.37e-02 1.17e+00 1.08e+00 44 | generic 7.18e-02 1.12e+00 1.05e+00 45 | 46 | 47 | ------------------------------ 30000 Iterations ------------------------------ 48 | short 3.61e-01 1.04e+01 1.00e+01 49 | long 4.51e-01 1.08e+01 1.04e+01 50 | generic 3.13e-01 1.01e+01 9.76e+00 51 | ``` 52 | 53 | We concluded from this that while the timing difference isn't so significant on the number of iterations typically run with smirky, future move proposal engines would probably benefit from storing SMIRKS patterns rather than Chemical Environments. 54 | -------------------------------------------------------------------------------- /utilities/test_smirks_or_environment_speed/Torsion_0_0.00e+00_results.smarts: -------------------------------------------------------------------------------- 1 | % Results for sampling Torsions at 0.00e+00 2 | %% SMIRKS patterns for final results are below 3 | % followed by a their matched reference SMIRKS from /beegfs/DATA/mobley/bannanc/smirky_testing/SMIRKY/inputFiles//smirff99Frosst.ffxml 4 | %Final Score was 51.963 % 5 | %% 6 | [*:1]~[#6:2]~[#6:3]~[*:4] C~C 7 | % [*:1]~[#6X3:2]:[#6X3:3]~[*:4] t45 8 | [*:1]~[#6:2]~[#7:3]~[*:4] C~N 9 | % [*:1]-[#6X4:2]-[#7X3$(*~[#6X3,#6X2]):3]~[*:4] t59 10 | [*:1]~[#6:2]~[#8:3]~[*:4] C~O 11 | % [*:1]-[#6X4:2]-[#8X2H0:3]-[*:4] t85 12 | [*:1]~[#6:2]~[#15:3]~[*:4] C~P 13 | % [*:1]~[#15:2]-[#6:3]-[*:4] t112 14 | [*:1]~[#6:2]~[#16:3]~[*:4] C~S 15 | % [*:1]-[#16X2,#16X3+1:2]-[#6:3]~[*:4] t104 16 | [*:1]~[#7:2]~[#7:3]~[*:4] N~N 17 | % [*:1]~[#7X2:2]-[#7X3:3]~[*:4] t124 18 | [*:1]~[#7:2]~[#8:3]~[*:4] N~O 19 | % [*:1]-[#8X2r5:2]-;@[#7X3r5:3]~[*:4] t115 20 | [*:1]~[#7:2]~[#16:3]~[*:4] N~S 21 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X3:3]-[#6X3:4] t139 22 | [*:1]~[#8:2]~[#15:3]~[*:4] O~P 23 | % [*:1]-[#8X2:2]-[#15:3]~[*:4] t146 24 | [*:1]~[#8:2]~[#16:3]~[*:4] O~S 25 | % [*:1]~[#16X4,#16X3+0:2]-[#8X2:3]-[*:4] t144 26 | [*:1]~[#16:2]~[#16:3]~[*:4] S~S 27 | % [*:1]-[#16X2,#16X3+1:2]-[#16X2,#16X3+1:3]-[*:4] t145 28 | [*;X3:1](~[#6:2]~[#7:3]~[*:4])~[#6] 2936 29 | % [*:1]-,:[#6X3:2]=[#7X2:3]-[*:4] t76 30 | [*;X3:1](~[#6:2]~[#7:3]~[*;a:4])~[$ewg1] 8087 31 | % [*:1]=[#7X2,#7X3+1:2]-[#6X3:3]=,:[*:4] t73 32 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[*:4] 7890 33 | % [#6X3:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t134 34 | [#7!R:1]~[#7:2]~[#7:3]~[*:4] 2323 35 | % [*:1]~[*:2]=[#6,#7,#16,#15;X2:3]=[*:4] t150 36 | [*;X3:1]~[#6:2]~[#7:3]~[*;a:4]~[#1] 2632 37 | % [#6X3:1]:[#7X2:2]:[#6X3:3]:[#6X3:4] t75 38 | [#1:1]~[#7:2]~[#7:3]~[#1:4] 2525 39 | % [*:1]-[#7X4,#7X3:2]-[#7X3$(*~[#6X3,#6X2]):3]~[*:4] t121 40 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[$ewg2&:4] 4181 41 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t136 42 | [*;!X4:1]~[#7:2]~[#16:3]~[#6H2:4] 3345 43 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4] t131 44 | [*;!R:1]~[#6:2]~[#6:3]~[*;!R:4] 8243 45 | % [*:1]-[#6X4;r3:2]-@[#6X4;r3:3]-[*:4] t16 46 | [*;!R:1]~[#6:2]~[#6:3]~[*;!R:4]~[#1] 5859 47 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] t5 48 | [*;a:1]~[#6:2]~[#16:3]~[*:4] 1983 49 | % [#6X3:1]-@[#16X2,#16X1-1,#16X3+1:2]-@[#6X3,#7X2;r5:3]=@[#6,#7;r5:4] t106 50 | [#8!X4:1]~[#6:2]~[#7:3]~[*:4] 5660 51 | % [*:1]~[#7X3,#7X2-1:2]-[#6X3:3]~[*:4] t67 52 | [*;!R:1](~[#6:2]~[#6:3]~[*;!R:4]~[#1])~[$ewg2] 9500 53 | % [#6X4:1]-[#6X4:2]-[#6X3:3]-[#7X3:4] t24 54 | [*;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4] 1122 55 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] t10 56 | [#6H2:1]~[#8:2]~[#15:3]~[*;!X1:4] 8301 57 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4])~[#6] 7607 58 | % [*:1]~[#6X3:2]-[#6X4:3]-[*:4] t18 59 | [*;!R:1]~[#6:2]~[#6:3]=[*;!R:4] 6470 60 | % [*:1]~[#6X3:2]-[#6X3:3]~[*:4] t44 61 | [*;X3:1](~[#6:2]~[#7X3:3]~[*:4])~[#6] 1918 62 | % [*:1]-[#7X3;r5:2]-@[#6X3;r5:3]~[*:4] t70 63 | [#7+0X3:1]~[#6:2]~[#8:3]~[*;!X3:4]~[#6] 9934 64 | % [#6X4:1]-[#8X2:2]-[#6X4:3]-[#7X3:4] t89 65 | [*;X3:1](~[#6:2]~[#7R0:3]~[*:4])~[#6] 1518 66 | % [*:1]~[#7X3,#7X2-1:2]-!@[#6X3:3]~[*:4] t68 67 | [*:1]~[#7:2]~[#7:3]~[*;r6:4] 7562 68 | % [*:1]~[#7X2:2]=,:[#7X2:3]~[*:4] t126 69 | [*;!R:1]~[#6:2]~[#6:3]#[*;!R:4]~[#1] 5488 70 | % [*:1]~[*:2]-[*:3]#[*:4] t149 71 | [*;a:1]~[#6:2]~[#16:3]~;!@[*:4] 4949 72 | % [#6:1]-[#16X4,#16X3+0:2]-[#6X3:3]~[*:4] t111 73 | [$ewg1&+0:1]~[#7:2]~[#16:3]~[*:4] 7297 74 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X3:3]-[#7X2:4] t140 75 | [#7H1:1]~[#6:2]~[#6:3]~[#7A:4] 6854 76 | % [#7X3:1]-[#6X4:2]-[#6X3:3]-[#7X3:4] t23 77 | [#1!X4:1]~[#7:2]~[#16:3]~[*;R2:4] 4082 78 | % [#6X3:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4] t132 79 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;!R:4]~[#1])~[$ewg2] 5403 80 | % [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] t3 81 | [*;X3:1](~[#6H2:2]~[#7R0:3]~[*:4])~[#6] 1131 82 | % [*:1]-[#6X4:2]-[#7X3:3]-[*:4] t51 83 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;R:4]~[#1])~[$ewg2] 8238 84 | % [#6X4;r3:1]-[#6X4;r3:2]-[#6X4;r3:3]-[*:4] t17 85 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4] 1268 86 | % [#8X2:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] t6 87 | [*:1]~[#6!X4:2]~[#8:3]~[*:4] 2683 88 | % [#1:1]-[#8X2:2]-[#6X3:3]=[#8X1:4] t99 89 | [#8!X4:1]~[#6X3:2]~[#7:3]~[#1:4] 6762 90 | % [#1:1]-[#7X3:2]-[#6X3:3]=[#8,#16,#7:4] t69 91 | [*:1]~[#6:2]~[#6:3]~[#35H0:4] 5347 92 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#35:4] t13 93 | [*:1]-[#6:2]~[#7r6:3]:[*:4] 9273 94 | % [*:1]~[#7X2,#7X3$(*~[#8X1]):2]:[#6X3:3]~[*:4] t74 95 | [*;X3:1](~[#6:2]~[#7:3]~[*;H3:4]):[#6] 1413 96 | % [*:1]-[#7X4:2]-[#6X3:3]~[*:4] t58 97 | [#1:1]~[#6:2]~[#6:3]~[#7A:4] 9141 98 | % [*:1]-[#6X4:2]-[#6X4:3]-[*:4] t2 99 | [#1:1]~[#6:2]~[#6:3]~[#1:4] 5525 100 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] t4 101 | [*;!R:1]~[#6:2]~[#6;a:3]~[*;!R:4] 4246 102 | % [*:1]-,:[#6X3:2]=[#6X3:3]-,:[*:4] t46 103 | [*:1]~[#6:2]~[#7:3]~[$ewg2&A;+1:4] 3498 104 | % [*:1]~[#7X2:2]-[#6X4:3]-[*:4] t64 105 | [#8a:1]~[#7:2]~[#7:3]~[*;r6:4] 7797 106 | % [*:1]~[#7X3+1:2]=,:[#7X2:3]~[*:4] t127 107 | [*a:1](~[#6:2]~[#6:3]~[#7A:4])~[$ewg1] 6650 108 | % [*:1]~[#6X3:2]-[#6X3$(*=[#8,#16,#7]):3]~[*:4] t48 109 | [*;X3:1](~[#6H2:2]~[#7R0:3]~[*;X3:4])~[#6] 7694 110 | % [#6X3:1]-[#7X3:2]-[#6X4:3]-[#6X3:4] t60 111 | [#1!X4:1]~[#7:2]~[#16:3]~[$ewg1&:4] 4831 112 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4] t135 113 | [#6:1]~[#6!X4:2]~[#8:3]~[*:4] 6039 114 | % [*:1]~[#6X3:2]-[#8X2:3]-[#1:4] t96 115 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;!R:4]~[#1])(~[#8])(~[#9])~[$ewg2] 5351 116 | [#8!X4R2:1](~[#6:2]~[#7:3]~[*:4])~[#6] 3851 117 | % [#8X2H0:1]-[#6X4:2]-[#7X3:3]-[#6X3:4] t62 118 | [#16:1](~[#6:2]~[#6:3]~[#7A:4]~[#6])~[*] 2631 119 | % [#16X2,#16X1-1,#16X3+1:1]-[#6X3:2]-[#6X4:3]-[#7X3$(*-[#6X3,#6X2]):4] t27 120 | [*:1]~[#6:2]~[#16:3]~[*;H2:4] 9632 121 | % [*:1]-[#16X2,#16X3+1:2]-[#6:3]-[#1:4] t105 122 | [#8!X2;!R:1]~[#6:2]~[#6:3]=[*;!R:4] 9260 123 | % [#6X3:1]=[#6X3:2]-[#6X3:3]=[#8X1:4] t49 124 | [*;X4:1]~[#6:2]~[#8:3]~[*;X4:4] 2174 125 | % [#6X4:1]-[#6X4:2]-[#8X2H0:3]-[#6X4:4] t86 126 | [#7+0;R:1](~[#6:2]~[#8:3]~[*!X4;!X3:4])~[#6] 6501 127 | % [*:1]-[#6X4:2]-[#8X2:3]-[#1:4] t83 128 | [*:1]~[#7:2]~[#16:3]~[#7X2:4] 1601 129 | % [*:1]-[#16X2,#16X3+1:2]-[!#6:3]~[*:4] t129 130 | [*;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4] 7827 131 | % [#9:1]-[#6X4:2]-[#6X4:3]-[#9:4] t7 132 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[*;X4:4] 4974 133 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t133 134 | [*:1]~[#6!X4:2]~[#8:3]~[#6!X3:4] 5843 135 | % [*:1]~[#6X3:2]-[#8X2:3]-[*:4] t95 136 | [*:1]~[#6:2]~[#16:3]~[$ewg2&A:4] 2986 137 | % [*:1]~[#16X4,#16X3+0:2]-[#6X3:3]~[*:4] t110 138 | [*;!R;+0:1]~[#6X4:2]~[#6:3]=[*;!R:4] 6290 139 | % [*:1]-[#6X4:2]-[#6X3:3]=[*:4] t21 140 | [*;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4] 9724 141 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#17:4] t12 142 | [*:1]~[#6X2:2]~[#7:3]~[*:4] 4773 143 | [#8!X2:1]~[#6!X4:2]~[#8:3]~[#6!X3:4] 2494 144 | % [#8,#16,#7:1]=[#6X3:2]-[#8X2H0:3]-[#6X4:4] t100 145 | [*;!R;+0:1]~[#6X4:2]~[#6:3]=[*;!R:4]~[#8] 9066 146 | [$halogen&;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4] 8919 147 | % [#17:1]-[#6X4:2]-[#6X4:3]-[#17:4] t8 148 | [#6H0:1]~[#7:2](~[#7:3]~[*:4])~[$ewg2] 3774 149 | % [*:1]-[#7X3$(*-[#6X3,#6X2])r5:2]-@[#7X3$(*-[#6X3,#6X2])r5:3]~[*:4] t123 150 | [#7+0X3:1]~[#6:2]~[#8:3]~[*;!X3:4]~[#6X3]~[#6] 3876 151 | % [*:1]~[#6X3:2](=[#8,#16,#7])-[#8X2H0:3]-[*:4] t97 152 | [#6;R0:1]~[#6!X4:2]~[#8:3]~[*:4] 4545 153 | % [*:1]~[#6X3:2](=[#8,#16,#7])-[#8:3]-[#1:4] t98 154 | [$ewg1&+0;!X2:1]~[#7:2]~[#16:3]~[*:4] 6619 155 | % [*:1]~[#16X4,#16X3+0:2]-[#7:3]~[*:4] t130 156 | [#6;R0:1]~[#6!X4:2]~[#8:3]~[$ewg2&X3:4] 3713 157 | [#6H0;!R:1]~[#7:2](~[#7:3]~[*:4])~[$ewg2] 5329 158 | [*:1]-[#6:2]~[#16:3]~[$ewg2&A:4] 3291 159 | % [*:1]~[#16X4,#16X3+0:2]-[#6X4:3]-[*:4] t107 160 | [#7H2:1]~[#6:2]~[#6:3]~[#7A:4] 5987 161 | [*:1]=[#7:2]~[#16:3]~[*;!R:4] 6651 162 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X2:3]~[#6X3:4] t143 163 | [#16:1]~[#6:2]~[#6:3]-[#7A;H2:4] 1329 164 | [*;!R:1](~[#6:2]~[#6:3]~[#17!X3;!R:4])~[#7] 3104 165 | [#8!X2;!R:1]~[#6:2]~[#6:3]=[*;!R:4]~[#7] 1340 166 | [*:1]~[#7:2]~[#8:3]-[*:4] 6442 167 | % [*:1]~[#8X2:2]-[#7:3]~[*:4] t114 168 | [*;X3:1](~[#16])(~[#6:2]~[#7:3]~[*;H3:4]):[#6] 6178 169 | % [*:1]-[#6X4:2]-[#7X4:3]-[*:4] t50 170 | [*H2;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4] 6291 171 | [*;X3:1](~[#1])(~[#6:2]~[#7X3:3]~[#8!X2:4])~[#6] 1521 172 | % [#8X1:1]~[#7X3:2]~[#6X3:3]~[*:4] t71 173 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[#15] 3796 174 | [#7H1:1]~[#6:2]~[#6:3]~[#7A!X3:4] 7261 175 | [*A:1]~[#7:2]~[#7:3]:[*;r6:4]~[#8] 4404 176 | % [*:1]-[#7X3$(*-[#6X3,#6X2]):2]-[#7X3$(*-[#6X3,#6X2]):3]-[*:4] t122 177 | [*;X3:1](~[#6:2]~[#7!X3:3]~[*;a:4](~[#16])~[#7])~[$ewg1] 7356 178 | % [*:1]=[#7X2,#7X3+1:2]-[#6X3:3]-[*:4] t72 179 | [*;!R:1](~[#15])(~[#6:2]~[#6;a:3]~[*;!R:4])~[$halogen] 4636 180 | [*;X3:1](~[#6H2:2]~[#7R0:3]=[*;X3:4])~[#6] 8963 181 | % [#6X3:1]=[#7X2,#7X3+1:2]-[#6X4:3]-[#6X3,#6X4:4] t66 182 | [#17;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4] 7150 183 | [*:1]~[#6!X4:2]~[#15:3]~[*:4] 8692 184 | % [*:1]~[#15:2]-[#6X3:3]~[*:4] t113 185 | [*:1]=[#7:2]~[#16:3]~[*;!R:4]~[#1] 8573 186 | [#8+0;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4] 5443 187 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[$ewg2])~[#6] 5499 188 | % [*:1]-[#6X4;r3:2]-[#6X3:3]~[*:4] t28 189 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[#15])~[#6] 3750 190 | [$ewg1&H0:1]~[#6:2]~[#8:3]~[$ewg1&H0:4] 2097 191 | [#16;H1:1](~[#6:2]~[#6:3]~[#7A:4]~[#6])~[*] 7633 192 | [#16H0:1]~[#6:2]~[#6:3]-[#7A;H2:4] 9637 193 | % [#16X2,#16X1-1,#16X3+1:1]-[#6X3:2]-[#6X4:3]-[#7X4,#7X3:4] t26 194 | [#6H2:1]~[#8:2]~[#15:3]~[$ewg1&!X4;!X1:4] 1009 195 | % [#8X2:1]-[#15:2]-[#8X2:3]-[#6X4:4] t147 196 | [*:1]~[#6!X4;!X3:2]~[#8:3]~[*:4] 5204 197 | [*;X3:1]~[#7:2]~[#16:3](~[#8])~[*;X4:4] 7900 198 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X3:3]-[#6X3:4] t138 199 | [#8!X4R2:1](~[#6:2]~[#7:3]~[$ewg1&H1:4])~[#6] 6407 200 | [#7H2:1]~[#6:2]~[#6:3]-[#7A:4] 8242 201 | % [*:1]-[#6X4:2]-[#6X4;r3:3]-[*:4] t14 202 | [#7!X4:1]~[#7:2]~[#8:3]~[*:4] 3625 203 | % [*:1]-[#8X2r5:2]-;@[#7X2r5:3]~[*:4] t116 204 | [$ewg1&X4;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4] 1369 205 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;R:4](~[#1])~[$ewg2])~[$ewg2&H2] 9719 206 | [*;!R:1](~[#6:2]~[#6X4:3]~[$ewg2&A;R:4]~[#1])~[$ewg2] 2659 207 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&H2;!R:4] 4862 208 | -------------------------------------------------------------------------------- /utilities/test_smirks_or_environment_speed/testing_smirks_speed.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Does removing ChemicalEnvironments speed up sampling\n", 8 | "\n", 9 | "This ipython notebook is being used to determine if removing the list of chemicalenvironments would significantly increase the speed of smirky" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "from openforcefield.typing.chemistry.environment import TorsionChemicalEnvironment\n", 21 | "import time\n", 22 | "import copy\n", 23 | "import numpy as np\n", 24 | "from numpy import random\n", 25 | "from smarty.atomtyper import AtomTyper" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "def smirks_sampling(smirks, iterations):\n", 37 | " \"\"\"\n", 38 | " This method takes in a list of smirks strings, then randomly picks one\n", 39 | " creates a chemical environment and writes back out the smirks string\n", 40 | " \n", 41 | " It only stores the smirks strings and only sometimes keeps the \"new one\" \n", 42 | " the new one is just a copy of the randomly chosen current one\n", 43 | " \"\"\"\n", 44 | " current = copy.deepcopy(smirks)\n", 45 | " for i in range(iterations):\n", 46 | " change = random.choice(current)\n", 47 | " \n", 48 | " env = TorsionChemicalEnvironment(smirks = change)\n", 49 | " new_smirks = env.asSMIRKS()\n", 50 | " \n", 51 | " # assume we accept a move 30% of the time and extend the list\n", 52 | " if random.rand() < 0.3: \n", 53 | " current.append(new_smirks)\n", 54 | "\n", 55 | " return current\n", 56 | "\n", 57 | "def environment_sampling(smirks, iterations):\n", 58 | " \"\"\"\n", 59 | " This method taks in a list of smirks, turns them into chemical environments\n", 60 | " and then iterates where some percentage of the time you keep the new environment\n", 61 | " \"\"\"\n", 62 | " current = [TorsionChemicalEnvironment(smirks = c) for c in smirks]\n", 63 | " \n", 64 | " for i in range(iterations):\n", 65 | " change = copy.deepcopy(random.choice(current))\n", 66 | " new_smirks = change.asSMIRKS\n", 67 | " \n", 68 | " # keep the new one 30% of the time\n", 69 | " if random.rand() < 0.3:\n", 70 | " current.append(change)\n", 71 | " \n", 72 | " return [e.asSMIRKS for e in current]\n", 73 | "\n", 74 | "def run_samplings(smirks, iterations):\n", 75 | " \"\"\"\n", 76 | " This method runs smirks_sampling and environment sampling and returns the time for each using \n", 77 | " the same input list and number of iterations\n", 78 | " \"\"\"\n", 79 | " \n", 80 | " # smirks first \n", 81 | " init_time = time.time()\n", 82 | " smirks = smirks_sampling(smirks, iterations)\n", 83 | " end_time = time.time()\n", 84 | " smirks_time = (end_time - init_time) / 60.0\n", 85 | " \n", 86 | " # environments\n", 87 | " init_time = time.time()\n", 88 | " env_smirks = environment_sampling(smirks, iterations)\n", 89 | " end_time = time.time()\n", 90 | " env_time = (end_time - init_time) / 60.0\n", 91 | " \n", 92 | " return smirks_time, env_time" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 6, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stdout", 102 | "output_type": "stream", 103 | "text": [ 104 | "------------------------------ 2 Iterations ------------------------------\n", 105 | " short\t1.97e-05\t6.54e-05\t4.57e-05\n", 106 | " long\t1.93e-05\t4.58e-04\t4.39e-04\n", 107 | " generic\t1.34e-05\t1.82e-05\t4.84e-06\n", 108 | "\n", 109 | "\n", 110 | "------------------------------ 10 Iterations ------------------------------\n", 111 | " short\t7.12e-05\t1.16e-04\t4.53e-05\n", 112 | " long\t8.27e-05\t5.40e-04\t4.58e-04\n", 113 | " generic\t6.60e-05\t6.47e-05\t-1.23e-06\n", 114 | "\n", 115 | "\n", 116 | "------------------------------ 100 Iterations ------------------------------\n", 117 | " short\t6.19e-04\t7.01e-04\t8.20e-05\n", 118 | " long\t7.44e-04\t1.36e-03\t6.12e-04\n", 119 | " generic\t5.49e-04\t6.28e-04\t7.92e-05\n", 120 | "\n", 121 | "\n", 122 | "------------------------------ 1000 Iterations ------------------------------\n", 123 | " short\t7.59e-03\t1.73e-02\t9.76e-03\n", 124 | " long\t8.42e-03\t2.10e-02\t1.26e-02\n", 125 | " generic\t6.89e-03\t1.61e-02\t9.20e-03\n", 126 | "\n", 127 | "\n", 128 | "------------------------------ 10000 Iterations ------------------------------\n", 129 | " short\t8.89e-02\t1.09e+00\t9.98e-01\n", 130 | " long\t9.37e-02\t1.17e+00\t1.08e+00\n", 131 | " generic\t7.18e-02\t1.12e+00\t1.05e+00\n", 132 | "\n", 133 | "\n", 134 | "------------------------------ 30000 Iterations ------------------------------\n", 135 | " short\t3.61e-01\t1.04e+01\t1.00e+01\n", 136 | " long\t4.51e-01\t1.08e+01\t1.04e+01\n", 137 | " generic\t3.13e-01\t1.01e+01\t9.76e+00\n", 138 | "\n", 139 | "\n" 140 | ] 141 | } 142 | ], 143 | "source": [ 144 | "long = AtomTyper.read_typelist('Torsion_0_0.00e+00_results.smarts')\n", 145 | "long = [smirks for (smirks,name) in long if not '$' in smirks]\n", 146 | "smirks_lists = {\n", 147 | " 'generic':['[*:1]~[*:2]~[*:3]~[*:4]'],\n", 148 | " 'short':copy.deepcopy(long[:10]),\n", 149 | " 'long':copy.deepcopy(long)}\n", 150 | "\n", 151 | "iterations = [2, 10, 100, 1000, 10000, 30000]\n", 152 | "\n", 153 | "for its in iterations:\n", 154 | " print('%s %i Iterations %s' % ('-'*30, its, '-'*30))\n", 155 | " for title, smirks in smirks_lists.items():\n", 156 | " smirks_time, env_time = run_samplings(smirks, its)\n", 157 | " dif = env_time - smirks_time\n", 158 | " print(\"%20s\\t%.2e\\t%.2e\\t%.2e\" % (title, smirks_time, env_time,dif))\n", 159 | " print('\\n')" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": { 166 | "collapsed": true 167 | }, 168 | "outputs": [], 169 | "source": [] 170 | } 171 | ], 172 | "metadata": { 173 | "kernelspec": { 174 | "display_name": "Python 3", 175 | "language": "python", 176 | "name": "python3" 177 | }, 178 | "language_info": { 179 | "codemirror_mode": { 180 | "name": "ipython", 181 | "version": 3 182 | }, 183 | "file_extension": ".py", 184 | "mimetype": "text/x-python", 185 | "name": "python", 186 | "nbconvert_exporter": "python", 187 | "pygments_lexer": "ipython3", 188 | "version": "3.5.3" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 2 193 | } 194 | --------------------------------------------------------------------------------