├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── devtools
    ├── conda-recipe
    │   ├── README.md
    │   ├── build.sh
    │   └── meta.yaml
    └── travis-ci
    │   ├── after_success.sh
    │   ├── index.html
    │   ├── install.sh
    │   └── push-docs-to-s3.py
├── dist
    └── smarty-0.1.0-py2.7.egg
├── examples
    ├── README.md
    ├── parm@frosst
    │   ├── README.md
    │   ├── atomtypes
    │   │   ├── README.md
    │   │   ├── basetypes-elemental.smarts
    │   │   ├── basetypes.smarts
    │   │   ├── decorators-simple.smarts
    │   │   ├── decorators.smarts
    │   │   └── substitutions.smarts
    │   ├── make_subset.py
    │   ├── molecules
    │   │   ├── zinc-subset-500-parm@frosst.mol2.gz
    │   │   ├── zinc-subset-500-tripos.mol2.gz
    │   │   ├── zinc-subset-parm@frosst.mol2.gz
    │   │   └── zinc-subset-tripos.mol2.gz
    │   └── scripts
    │   │   ├── README.md
    │   │   └── convert-atom-names-to-tripos.py
    ├── smarty_simulations
    │   ├── AlkEthOH.csv
    │   ├── AlkEthOH.log
    │   ├── AlkEthOH.pdf
    │   ├── Hydrogen.csv
    │   ├── Hydrogen.log
    │   ├── Hydrogen.pdf
    │   ├── README.md
    │   ├── Simple-Decorators.csv
    │   ├── Simple-Decorators.log
    │   └── Simple-Decorators.pdf
    └── smirky
    │   ├── README.md
    │   ├── atom_AND_decorators.smarts
    │   ├── atom_OR_bases.smarts
    │   ├── atom_OR_decorators.smarts
    │   ├── atom_odds_forTorsions.smarts
    │   ├── bond_AND_decorators.smarts
    │   ├── bond_OR_bases.smarts
    │   ├── bond_odds_forTorsions.smarts
    │   ├── initial_Torsions.smarts
    │   ├── output.csv
    │   ├── output.log
    │   ├── output.pdf
    │   ├── output_results.smarts
    │   └── substitutions.smarts
├── oe_license.txt.enc
├── setup.py
├── smarty
    ├── __init__.py
    ├── atomtyper.py
    ├── cli_smarty.py
    ├── cli_smirky.py
    ├── data
    │   ├── README.md
    │   ├── __init__.py
    │   ├── atomtypes
    │   │   ├── README.md
    │   │   ├── basetypes.smarts
    │   │   ├── decorators-simple.smarts
    │   │   ├── decorators.smarts
    │   │   ├── initial_AlkEthOH.smarts
    │   │   ├── initialtypes.smarts
    │   │   ├── new-decorators.smarts
    │   │   └── replacements.smarts
    │   └── odds_files
    │   │   ├── atom_OR_bases.smarts
    │   │   ├── atom_decorators.smarts
    │   │   ├── atom_index_odds.smarts
    │   │   ├── bond_AND_decorators.smarts
    │   │   ├── bond_OR_bases.smarts
    │   │   ├── bond_index_odds.smarts
    │   │   └── substitutions.smarts
    ├── sampler.py
    ├── sampler_smirky.py
    ├── score_utils.py
    ├── tests
    │   ├── __init__.py
    │   ├── test_atomtyper.py
    │   ├── test_sampler.py
    │   ├── test_smirky_sampler.py
    │   └── test_utils.py
    └── utils.py
└── utilities
    ├── README.md
    └── test_smirks_or_environment_speed
        ├── README.md
        ├── Torsion_0_0.00e+00_results.smarts
        └── testing_smirks_speed.ipynb


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Extracted archive
33 | AlkEthOH_inputfiles/
34 | 
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 | 
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 | 
49 | # Translations
50 | *.mo
51 | *.pot
52 | 
53 | # Django stuff:
54 | *.log
55 | 
56 | # Sphinx documentation
57 | docs/_build/
58 | 
59 | # PyBuilder
60 | target/
61 | 
62 | # Ipython notebook checkpoints
63 | *.ipynb_checkpoints/
64 | 
65 | # ignore files created during tests
66 | smarty/tests/*.pdf
67 | smarty/tests/*.log
68 | smarty/tests/*.csv
69 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: c
 2 | sudo: false
 3 | 
 4 | branches:
 5 |   only:
 6 |     - master
 7 | 
 8 | install:
 9 |   - source devtools/travis-ci/install.sh
10 |   - export PYTHONUNBUFFERED=true
11 |   # Unpack encrypted OpenEye license file
12 |   - if [ "$TRAVIS_SECURE_ENV_VARS" == true ]; then openssl aes-256-cbc -K $encrypted_e60be1d1adc8_key -iv $encrypted_e60be1d1adc8_iv -in oe_license.txt.enc -out $OE_LICENSE -d; fi
13 |   - if [ "$TRAVIS_SECURE_ENV_VARS" == false ]; then echo "OpenEye license will not be installed in forks."; fi
14 | 
15 | script:
16 |   # Add omnia channel
17 |   - conda config --add channels ${ORGNAME}
18 |   # Create and activate test environment
19 |   - conda create --yes -n test python=$python
20 |   - source activate test
21 |   # Install OpenEye toolkit
22 |   #- pip install $OPENEYE_CHANNEL openeye-toolkits && python -c "import openeye; print(openeye.__version__)"
23 |   # Use beta version for partial bond orders
24 |   - pip install --pre -i https://pypi.anaconda.org/openeye/label/beta/simple openeye-toolkits && python -c "import openeye; print(openeye.__version__)"
25 |   # Install openforcefield tools
26 |   # TODO if changes to openforcefield become less dynamic switch to conda install?
27 |   - pip install git+https://github.com/openforcefield/openforcefield.git
28 |   # Build the recipe
29 |   - conda build devtools/conda-recipe
30 |   # Install
31 |   - conda install --yes --use-local smarty
32 |   # Run tests
33 |   - conda install --yes nose nose-timer
34 |   - cd devtools && nosetests -vv --nocapture --with-timer $PACKAGENAME && cd ..
35 | 
36 | env:
37 |   matrix:
38 |     - python=2.7  CONDA_PY=27
39 |     - python=3.4  CONDA_PY=34
40 |     - python=3.5  CONDA_PY=35
41 | 
42 |   global:
43 |     - ORGNAME="omnia"
44 |     - PACKAGENAME="smarty"
45 |     # OpenEye toolkit
46 |     - OE_LICENSE="$HOME/oe_license.txt"
47 |     - OPENEYE_CHANNEL="-i https://pypi.anaconda.org/openeye/channel/main/simple"
48 |     # encrypted BINSTAR_TOKEN for push of dev package to binstar
49 |     - secure: "Iw2yv40ElSbS/TstXS9YnsbJFbxsbFQ25fkWlq8H/O3SPJwpX2/PRoCo99R1Scc0mO9BiVMwGDJQeM9y1VoYo3ozv5SIhPvc+0cMOE3AzkRiFEpZeTtDUTxOWsb+k/x5dH5/AapXRtJeKhY3cWe3lhKdv9N+yWrhY29lawXgfU4WsOEl6ON9BPwwPzvKK1sP4z8kIMzDNjt6gJ3m1HzdEQe/ibrOJIEk6Z4kTLQo9z4F9dm73/L4scEgnW6SOACC39nuYCL8PK4zPNKTqpAoVkm18uyrRz62+qPYSl3RCBNOFtbAuz7fz+ShSMA6g//LpAobNptpQeQpWXkHhYk5ALc6xzH2zScVgrPytKAPwi8mYKq9gYZnUPYgpOdjK3bNyfkGjeV9I4sQwNCBYlKtGHoqZ1l+l6oYsbx+Ti+nIeK67ufGmAugH4GJ3dhZvP6ZR73/irOrvSWiJJgqI1/k4c9Ela4wDpQHDp9sRf03HgSrRTX2gQ3E/JmPx8s56tMdkmrIDIgy6Edc80AN6zEKX0+3YVGcH6ltUViDidRGDlZ7xbUUXYtjqMJXuJEh2SV/wbeVmrBM8Pn+IfsBzLKnd1jqe3pXfoCqbCtvNwW8Sr4qMgWBEHvEtB4C5KvO5CydmRx95q/0ziRGb/VEV6QOnGxT7EIJDfyQeUqNqJD7Bdo="
50 |     # encrypted AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
51 |     #- secure: "SrSd1JoI8dBXQxDAX0xBTYBinTusRBQoPETnxHrBAgKdoty1pkzaghTKNMsrGsk78iwkkj1hAyttIY9trdFQkmx+OTx0fLKFmDHsMkgko4RzTtrgLgoxuRIs/gruID2cN1XKEbxlhRmQF14+q8/X1q6iGGdYMrxo51JcYPuEOSo="
52 |     #- secure: "br6QRMYXhHltYTEh/d+zejxcunT3GsqwQvxxLmqnLxi+LIxX4j7eymR6p4fPBd5mCRxyvkQEjnSZxF6e7JlEKxWVcMG28I/dBWzVIRW3EKQQNRmyI+JL1dfNaqj68kHJD+FknBwHK9LD238JPcyqXPdVrm9iPkDijPczvPBxvDs="
53 | 
54 | #after_success:
55 | #  - echo "after_success"
56 | #  - if [ "$TRAVIS_SECURE_ENV_VARS" == true ]; then ./devtools/travis-ci/after_success.sh; fi
57 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016, Open Forcefield Group
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19 | OR OTHER DEALINGS IN THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/openforcefield/smarty.svg?branch=master)](https://travis-ci.org/openforcefield/smarty?branch=master)
  2 | [![DOI](https://zenodo.org/badge/60921138.svg)](https://zenodo.org/badge/latestdoi/60921138)
  3 | 
  4 | # `smarty`: Exploring Bayesian atom type sampling
  5 | 
  6 | This is a simple example of how Bayesian atom type sampling using reversible-jump Markov chain Monte Carlo (RJMCMC) [1] over SMARTS types might work.
  7 | 
  8 | All tools for implementation of the SMIRNOFF in OpenMM have been moved to the [openforcefield repository](https://github.com/openforcefield/openforcefield)
  9 | 
 10 | ## Manifest
 11 | 
 12 | * `examples/` - some toy examples - look here to get started
 13 | * `smarty/` - simple toolkit illustrating the use of RJMCMC to sample over SMARTS-specified atom types and SMIRKS-specified bonded and non-bonded parameter types.
 14 | * `devtools/` - continuous integration and packaging scripts and utilities
 15 | * `oe_license.txt.enc` - encrypted OpenEye license for continuous integration testing
 16 | * `.travis.yml` - travis-ci continuous integration file
 17 | * `utilities/` - some utility functionality relating to the project, specifically testing the speed of ChemicalEnvironments for sampling in SMIRKY.
 18 | 
 19 | ## Prerequisites
 20 | 
 21 | Install [miniconda](http://conda.pydata.org/miniconda.html) first. On `osx` with `bash`, this is:
 22 | ```
 23 | wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh
 24 | bash Miniconda2-latest-MacOSX-x86_64.sh -b -p $HOME/miniconda
 25 | export PATH="$HOME/miniconda/bin:${PATH}""
 26 | ```
 27 | 
 28 | You must first install the OpenEye toolkit:
 29 | ```
 30 | pip install -i https://pypi.anaconda.org/OpenEye/simple OpenEye-toolkits
 31 | ```
 32 | 
 33 | You can then use conda to install smarty:
 34 | ```
 35 | conda config --add channels omnia
 36 | conda install -c omnia smarty
 37 | ```
 38 | 
 39 | ## Installation
 40 | 
 41 | Install `smarty` from the `smarty/` directory with:
 42 | ```bash
 43 | pip install .
 44 | ```
 45 | If you modify the `smarty` source code (rather than the examples), reinstall with
 46 | ```bash
 47 | pip install . --upgrade
 48 | ```
 49 | 
 50 | ## Documentation
 51 | 
 52 | 
 53 | ## SMARTY atom type sampler
 54 | 
 55 | Check out the example in `examples/smarty/`:
 56 | 
 57 | Atom types are specified by SMARTS matches with corresponding parameter names.
 58 | 
 59 | First, we start with a number of initial "base types" which are essentially indestructible (often generic) atom types, specified in `atomtypes/basetypes.smarts`:
 60 | ```
 61 | % atom types
 62 | [#1]    hydrogen
 63 | [#6]    carbon
 64 | [#7]    nitrogen
 65 | [#8]    oxygen
 66 | [#9]    fluorine
 67 | [#15]   phosphorous
 68 | [#16]   sulfur
 69 | [#17]   chlorine
 70 | [#35]   bromine
 71 | [#53]   iodine
 72 | ```
 73 | Note that lines beginning with `%` are comment lines.
 74 | 
 75 | We also specify a number of starting types, "initial types" which can be the same or different from the base types. These follow the same format, and `atomtypes/basetypes.smarts` can be reused unless alternate behavior is desired (such as starting from more sophisticated initial types).
 76 | 
 77 | We have two sampler options for SMARTY which differ in how focused the sampling is. The original sampler samples over all elements/patterns at once, whereas the elemental sampler focuses on sampling only one specific element. The principle of sampling is the same; the only change is in which elements we sample over. To sample only over a single element, such as oxygen, for example, we use the elemental sampler to focus on that element.
 78 | 
 79 | 
 80 | ### Generating New SMARTS patterns
 81 | 
 82 | There are two options for how to change SMARTS patterns when creating new atom types.
 83 | One is using combinatorial decorators (default) and the other is using simple decorators (`--decoratorbehavior=simple-decorators`). However, it should be noted that we have found the simple decorators insufficient at distinguishing atomtypes even for the most simple sets of molecules.
 84 | 
 85 | **Combinatorial Decorators**
 86 | 
 87 | The first option (combinatorial-decorator) attempt to create the new atomtype adding an Alpha or Beta substituent to a basetype or an atomtype.
 88 | This decorators are different from the simple-decorator option and do not have atom types or bond information on it.
 89 | The new decorators are listed in `AlkEthOH/atomtypes/new-decorators.smarts` and `parm@frosst/atomtypes/new-decorators.smarts`:
 90 | 
 91 |  ```
 92 |  % total connectivity
 93 |  X1             connections-1
 94 |  X2             connections-2
 95 |  X3             connections-3
 96 |  X4             connections-4
 97 |  % total-h-count
 98 |  H0             total-h-count-0
 99 |  H1             total-h-count-1
100 |  H2             total-h-count-2
101 |  H3             total-h-count-3
102 |  % formal charge
103 |  +0             neutral
104 |  +1             cationic+1
105 |  -1             anionic-1
106 |  % aromatic/aliphatic
107 |  a              aromatic
108 |  A              aliphatic
109 |  ```
110 | Each decorator has a corresponding string token (no spaces allowed!) that is used to create human-readable versions of the corresponding atom types.
111 | 
112 | For example, we may find the atom type ```[#6]&H3``` which is `carbon total-h-count-3` for a C atom bonded to three hydrogens.
113 | 
114 | **Simple Decorators**
115 | The second option (simple-decorators) attempts to split off a new atom type from a parent atom type by combining (via an "and" operator, `&`) the parent atom type with a "decorator".
116 | The decorators are listed in `AlkEthOH/atomtypes/decorators.smarts` or `parm@frosst/atomtypes/decorators.smarts`:
117 | ```
118 | % bond order
119 | $([*]=[*])     double-bonded
120 | $([*]#[*])     triple-bonded
121 | $([*]:[*])     aromatic-bonded
122 | % bonded to atoms
123 | $(*~[#1])      hydrogen-adjacent
124 | $(*~[#6])      carbon-adjacent
125 | $(*~[#7])      nitrogen-adjacent
126 | $(*~[#8])      oxygen-adjacent
127 | $(*~[#9])      fluorine-adjacent
128 | $(*~[#15])     phosphorous-adjacent
129 | $(*~[#16])     sulfur-adjacent
130 | $(*~[#17])     chlorine-adjacent
131 | $(*~[#35])     bromine-adjacent
132 | $(*~[#53])     iodine-adjacent
133 | % degree
134 | D1             degree-1
135 | D2             degree-2
136 | D3             degree-3
137 | D4             degree-4
138 | D5             degree-5
139 | D6             degree-6
140 | % valence
141 | v1             valence-1
142 | v2             valence-2
143 | v3             valence-3
144 | v4             valence-4
145 | v5             valence-5
146 | v6             valence-6
147 | % total-h-count
148 | H1             total-h-count-1
149 | H2             total-h-count-2
150 | H3             total-h-count-3
151 | % aromatic/aliphatic
152 | a              atomatic
153 | A              aliphatic
154 | ```
155 | This option also has the corresponding string tokens.
156 | 
157 | Newly proposed atom types are added to the end of the list.
158 | After a new atom type is proposed, all molecules are reparameterized using the new set of atom types.
159 | Atom type matching proceeds by trying to see if each SMARTS match can be applied working from top to bottom of the list.
160 | This means the atom type list is hierarchical, with more general types appearing at the top of the list and more specific subtypes appearing at the bottom.
161 | 
162 | If a proposed type matches zero atoms, the RJMCMC move is rejected.
163 | 
164 | Currently, the acceptance criteria does not include the full Metropolis-Hastings acceptance criteria that would include the reverse probability.  This needs to be added in.
165 | 
166 | ### Elemental Decomposition
167 | 
168 | The input option `--element` allows a user to specify which atoms types to sample based on atomic number. The default input is 0 (corresponding to no specified atomic number) and will attempt to match all atom types. If an element number is given (i.e. `--element=1` for hydrogen) only atoms with that atomic number are considered. Specifying an element number does not affect any other smarty behavior.
169 | 
170 | Finally, here is a complete list of input options for smarty. Under `usage` all bracketed parameters are optional.
171 | ```
172 | Usage:     Sample over atom types, optionally attempting to match atom types in a reference typed set of molecules.
173 | 
174 |     usage: smarty --basetypes smartsfile --initialtypes smartsfile
175 |             --decorators smartsfile --molecules molfile
176 |             [--element atomicnumber --substitutions smartsfile --reference molfile
177 |             --decoratorbehavior combinatorial-decorators/simple-decorators
178 |             --iterations niterations --temperature temperature --trajectory trajectorfile
179 |             --plot plotfile]
180 | 
181 |     example:
182 |     python smarty --basetypes=atomtypes/basetypes.smarts --initialtypes=atomtypes/initialtypes.smarts \
183 |             --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \
184 |             --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz \
185 |             --iterations 1000 --temperature=0.1
186 | 
187 | 
188 | Options:
189 |   --version             show program's version number and exit
190 |   -h, --help            show this help message and exit
191 |   -e ELEMENT, --element=ELEMENT
192 |                         By default the element value is 0 corresponding to
193 |                         sampling all atomtypes. If another atomic number is
194 |                         specified only atoms with that atomic number are
195 |                         sampled (i.e. --element=8 will only sample atomtypes
196 |                         for oxygen atoms).
197 |   -b BASETYPES, --basetypes=BASETYPES
198 |                         Filename defining base or generic atom types as SMARTS
199 |                         atom matches; these are indestructible and normally
200 |                         are elemental atom types.
201 |   -f BASETYPES, --initialtypes=BASETYPES
202 |                         Filename defining initial (first) atom types as SMARTS
203 |                         atom matches.
204 |   -d DECORATORS, --decorators=DECORATORS
205 |                         Filename defining decorator atom types as SMARTS atom
206 |                         matches.
207 |   -s SUBSTITUTIONS, --substitutions=SUBSTITUTIONS
208 |                         Filename defining substitution definitions for SMARTS
209 |                         atom matches (OPTIONAL).
210 |   -r REFMOL, --reference=REFMOL
211 |                         Reference typed molecules for computing likelihood
212 |                         (must match same molecule and atom ordering in
213 |                         molecules file) (OPTIONAL).
214 |   -m MOLECULES, --molecules=MOLECULES
215 |                         Small molecule set (in any OpenEye compatible file
216 |                         format) containing 'dG(exp)' fields with experimental
217 |                         hydration free energies.
218 |   -i ITERATIONS, --iterations=ITERATIONS
219 |                         MCMC iterations.
220 |   -t TEMPERATURE, --temperature=TEMPERATURE
221 |                         Effective temperature for Monte Carlo acceptance,
222 |                         indicating fractional tolerance of mismatched atoms
223 |                         (default: 0.1). If 0 is specified, will behave in a
224 |                         greedy manner.
225 |   -l TRAJECTORY_FILE, --trajectory=TRAJECTORY_FILE
226 |                         Name for trajectory file output, trajectory saves only
227 |                         changes to the list of 'atomtypes' for each iteration.
228 |                         If the file already exists, it is overwritten.
229 |   -p PLOT_FILE, --plot=PLOT_FILE
230 |                         Name for output file of a plot of the score versus
231 |                         time. If not specified, none will be written. If
232 |                         provided, needs to use a file extension suitable for
233 |                         matplotlib/pylab. Currently requires a trajectory file
234 |                         to be written using -l or --trajectory.
235 |   -x DECORATOR_BEHAVIOR, --decoratorbehavior=DECORATOR_BEHAVIOR
236 |                         Choose between simple-decorators or combinatorial-
237 |                         decorators (default = combinatorial-decorators).
238 | ```
239 | 
240 | ---
241 | 
242 | ## smirky
243 | 
244 | Check out examples in `examples/smirky/`:
245 | 
246 | This tool can sample any chemical environment type relevant to SMIRNOFFs, that is atoms, bonds, angles, and proper and improper torsions, one at a time
247 | Scoring is analous to smarty (explained above), but uses a SMIRNOFF with existing parameters as a reference insteady of atomtyped molecules.
248 | 
249 | Input for this tool can require up to four different file types
250 | * MOLECULES - any file that are readable in openeye, mol2, sdf, oeb, etc.
251 | * ODDSFILES - File with the form "smarts     odds" for the different decorator or bond options
252 | * SMARTS - .smarts file type with the form "smarts/smirks      label/typename"
253 | * REFERENCE - a SMIRNOFF file with reference atoms, bonts, angles, torsions, and impropers
254 | 
255 | ```
256 | Usage:     Sample over fragment types (atoms, bonds, angles, torsions, or impropers)
257 |     optionally attempting to match created types to an established SMIRNOFF.
258 |     For all files left blank, they will be taken from this module's
259 |     data/odds_files/ subdirectory.
260 | 
261 |     usage smirky --molecules molfile --typetag fragmentType
262 |             [--atomORbases AtomORbaseFile --atomORdecors AtomORdecorFile
263 |             --atomANDdecors AtomANDdecorFile --bondORbase BondORbaseFile
264 |             --bondANDdecors BondANDdecorFile --atomIndexOdds AtomIndexFile
265 |             --bondIndexOdds BondIndexFile --replacements substitutions
266 |             --initialFragments initialFragments --SMIRNOFF referenceSMIRNOFF
267 |             --temperature float --verbose verbose
268 |             --iterations iterations --output outputFile]
269 | 
270 |     example:
271 |     smirky -molecules AlkEthOH_test_filt1_ff.mol2 --typetag Angle
272 | 
273 | 
274 | 
275 | Options:
276 |   --version             show program's version number and exit
277 |   -h, --help            show this help message and exit
278 |   -m MOLECULES, --molecules=MOLECULES
279 |                         Small molecule set (in any OpenEye compatible file
280 |                         format) containing 'dG(exp)' fields with experimental
281 |                         hydration free energies. This filename can also be an
282 |                         option in this module's data/molecules sub-directory
283 |   -T TYPETAG, --typetag=TYPETAG
284 |                         type of fragment being sampled, options are 'VdW',
285 |                         'Bond', 'Angle', 'Torsion', 'Improper'
286 |   -e ODDFILES, --atomORbases=ODDFILES
287 |                         Filename defining atom OR bases and associated
288 |                         probabilities. These are combined with atom OR
289 |                         decorators in SMIRKS, for example in
290 |                         '[#6X4,#7X3;R2:2]' '#6' and '#7' are atom OR bases.
291 |                         (OPTIONAL)
292 |   -O ODDFILES, --atomORdecors=ODDFILES
293 |                         Filename defining atom OR decorators and associated
294 |                         probabilities. These are combined with atom bases in
295 |                         SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'X4' and
296 |                         'X3' are ORdecorators. (OPTIONAL)
297 |   -A ODDFILES, --atomANDdecors=ODDFILES
298 |                         Filename defining atom AND decorators and associated
299 |                         probabilities. These are added to the end of an atom's
300 |                         SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'R2' is an
301 |                         AND decorator. (OPTIONAL)
302 |   -o ODDFILES, --bondORbase=ODDFILES
303 |                         Filename defining bond OR bases and their associated
304 |                         probabilities. These are OR'd together to describe a
305 |                         bond, for example in '[#6]-,=;@[#6]' '-' and '=' are
306 |                         OR bases. (OPTIONAL)
307 |   -a ODDFILES, --bondANDdecors=ODDFILES
308 |                         Filename defining bond AND decorators and their
309 |                         associated probabilities. These are AND'd to the end
310 |                         of a bond, for example in '[#6]-,=;@[#7]' '@' is an
311 |                         AND decorator.(OPTIONAL)
312 |   -D ODDSFILE, --atomOddsFile=ODDSFILE
313 |                         Filename defining atom descriptors and probabilities
314 |                         with making changes to that kind of atom. Options for
315 |                         descriptors are integers corresponding to that indexed
316 |                         atom, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'.
317 |                         (OPTIONAL)
318 |   -d ODDSFILE, --bondOddsFile=ODDSFILE
319 |                         Filename defining bond descriptors and probabilities
320 |                         with making changes to that kind of bond. Options for
321 |                         descriptors are integers corresponding to that indexed
322 |                         bond, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'.
323 |                         (OPTIONAL)
324 |   -s SMARTS, --substitutions=SMARTS
325 |                         Filename defining substitution definitions for SMARTS
326 |                         atom matches. (OPTIONAL).
327 |   -f SMARTS, --initialtypes=SMARTS
328 |                         Filename defining initial (first) fragment types as
329 |                         'SMIRKS    typename'. If this is left blank the
330 |                         initial type will be a generic form of the given
331 |                         fragment, for example '[*:1]~[*:2]' for a bond
332 |                         (OPTIONAL)
333 |   -r REFERENCE, --smirff=REFERENCE
334 |                         Filename defining a SMIRNOFF force fielce used to
335 |                         determine reference fragment types in provided set of
336 |                         molecules. It may be an absolute file path, a path
337 |                         relative to the current working directory, or a path
338 |                         relative to this module's data subdirectory (for built
339 |                         in force fields). (OPTIONAL)
340 |   -i ITERATIONS, --iterations=ITERATIONS
341 |                         MCMC iterations.
342 |   -t TEMPERATURE, --temperature=TEMPERATURE
343 |                         Effective temperature for Monte Carlo acceptance,
344 |                         indicating fractional tolerance of mismatched atoms
345 |                         (default: 0.1). If 0 is specified, will behave in a
346 |                         greedy manner.
347 |   -p OUTPUT, --output=OUTPUT
348 |                         Filename base for output information. This same base
349 |                         will be used for all output files created. If None
350 |                         provided then it is set to 'typetag_temperature'
351 |                         (OPTIONAL).
352 |   -v VERBOSE, --verbose=VERBOSE
353 |                         If True prints minimal information to the commandline
354 |                         during iterations. (OPTIONAL)
355 | ``
356 | 
357 | ## The SMIRNOFF force field format
358 | 
359 | The SMIRNOFF force field format is documented [here](https://github.com/openforcefield/openforcefield/blob/master/The-SMIRNOFF-force-field-format.md).
360 | It was previously avaialbe in this repository, but has been moved.
361 | SMIRNOFF99Frosst, a version of SMIRNOFF mirroring the parameters found in the parm@Frosst force field, is now housed in its own [repository](https://github.com/openforcefield/smirnoff99Frosst).
362 | `forcefield.py` and other modules required to implement the SMIRNOFF format for simulations in OpenMM have also been moved. These scripts and examples on how to use them can be found at [openforcefield/openforcefield](https://github.com/openforcefield/openforcefield).
363 | 
364 | ## References
365 | 
366 | [1] Green PJ. Reversible jump Markov chain Monte Carlo computation and Bayesian model determination. Biometrika 82:711, 1995.
367 | http://dx.doi.org/10.1093/biomet/82.4.711
368 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/README.md:
--------------------------------------------------------------------------------
 1 | This is a recipe for building the current development package into a conda
 2 | binary.
 3 | 
 4 | The installation on travis-ci is done by building the conda package, installing
 5 | it, running the tests, and then if successful pushing the package to binstar
 6 | (and the docs to AWS S3). The binstar auth token is an encrypted environment
 7 | variable generated using:
 8 | 
 9 | binstar auth -n yank-travis -o omnia --max-age 22896000 -c --scopes api:write
10 | 
11 | and then saved in the environment variable BINSTAR_TOKEN.
12 | 
13 | You can set up travis to store an encrypted token via
14 | 
15 | gem install travis travis encrypt BINSTAR_TOKEN=xx
16 | 
17 | where xx is the token output by binstar. The final command should print a line (containing 'secure') for inclusion in your .travis.yml file.
18 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/build.sh:
--------------------------------------------------------------------------------
1 | pip install .
2 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: smarty
 3 |   version: 0.0.0
 4 | 
 5 | source:
 6 |   path: ../..
 7 | 
 8 | build:
 9 |   preserve_egg_dir: True
10 |   number: 0
11 | 
12 | requirements:
13 |   build:
14 |     - python
15 |     - setuptools
16 |     - pandas
17 | 
18 |   run:
19 |     - python
20 |     - numpy
21 |     - networkx
22 |     - lxml
23 |     - openmoltools >=0.7.3
24 |     - parmed
25 |     - matplotlib
26 |     - pandas
27 | 
28 | test:
29 |   requires:
30 |     - nose
31 |     - nose-timer
32 |   imports:
33 |     - smarty
34 | 
35 | about:
36 |   home: https://github.com/openforcefield/smarty
37 |   license: MIT
38 | 


--------------------------------------------------------------------------------
/devtools/travis-ci/after_success.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Must be invoked with $PACKAGENAME
 3 | 
 4 | echo $TRAVIS_PULL_REQUEST $TRAVIS_BRANCH
 5 | PUSH_DOCS_TO_S3=false
 6 | 
 7 | if [ "$TRAVIS_PULL_REQUEST" = true ]; then
 8 |     echo "This is a pull request. No deployment will be done."; exit 0
 9 | fi
10 | 
11 | 
12 | if [ "$TRAVIS_BRANCH" != "master" ]; then
13 |     echo "No deployment on BRANCH='$TRAVIS_BRANCH'"; exit 0
14 | fi
15 | 
16 | 
17 | # Deploy to binstar
18 | conda install --yes anaconda-client jinja2
19 | pushd .
20 | cd $HOME/miniconda/conda-bld
21 | FILES=*/${PACKAGENAME}-dev-*.tar.bz2
22 | for filename in $FILES; do
23 |     anaconda -t $BINSTAR_TOKEN remove --force ${ORGNAME}/${PACKAGENAME}-dev/${filename}
24 |     anaconda -t $BINSTAR_TOKEN upload --force -u ${ORGNAME} -p ${PACKAGENAME}-dev ${filename}
25 | done
26 | popd
27 | 
28 | if [ $PUSH_DOCS_TO_S3 = true ]; then
29 |    # Create the docs and push them to S3
30 |    # -----------------------------------
31 |     conda install --yes pip
32 |     conda config --add channels $ORGNAME
33 |     conda install --yes `conda build devtools/conda-recipe --output`
34 |     pip install numpydoc s3cmd msmb_theme
35 |     conda install --yes `cat docs/requirements.txt | xargs`
36 | 
37 |     conda list -e
38 | 
39 |     (cd docs && make html && cd -)
40 |     ls -lt docs/_build
41 |     pwd
42 |     python devtools/ci/push-docs-to-s3.py
43 | fi
44 | 


--------------------------------------------------------------------------------
/devtools/travis-ci/index.html:
--------------------------------------------------------------------------------
1 | <html><head><meta http-equiv="refresh" content="0;URL='/latest'"/></head></html>
2 | 


--------------------------------------------------------------------------------
/devtools/travis-ci/install.sh:
--------------------------------------------------------------------------------
 1 | # Temporarily change directory to $HOME to install software
 2 | pushd .
 3 | cd $HOME
 4 | 
 5 | # Install Miniconda
 6 | MINICONDA=Miniconda2-latest-Linux-x86_64.sh
 7 | MINICONDA_HOME=$HOME/miniconda
 8 | MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *<td>\(.*\)<\/td> */\1/p')
 9 | wget -q http://repo.continuum.io/miniconda/$MINICONDA
10 | if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then
11 |     echo "Miniconda MD5 mismatch"
12 |     exit 1
13 | fi
14 | bash $MINICONDA -b -p $MINICONDA_HOME
15 | 
16 | # Configure miniconda
17 | export PIP_ARGS="-U"
18 | export PATH=$MINICONDA_HOME/bin:$PATH
19 | conda update --yes conda
20 | conda install --yes conda-build jinja2 anaconda-client pip 
21 | conda install --yes -c omnia openmoltools
22 | conda install --yes -c omnia parmed
23 | conda install --yes -c matplotlib
24 | conda install --yes pandas
25 | 
26 | # Restore original directory
27 | popd
28 | 


--------------------------------------------------------------------------------
/devtools/travis-ci/push-docs-to-s3.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """
 4 | Must have the vollowing environment variables defined:
 5 | * BUCKET_NAME : AWS bucket name
 6 | * PREFIX : 'latest' or other version number
 7 | 
 8 | """
 9 | 
10 | import os
11 | import pip
12 | import tempfile
13 | import subprocess
14 | import thermopyl.version
15 | 
16 | 
17 | BUCKET_NAME = 'thermopyl.org'
18 | if not thermopyl.version.release:
19 |     PREFIX = 'latest'
20 | else:
21 |     PREFIX = thermopyl.version.short_version
22 | 
23 | if not any(d.project_name == 's3cmd' for d in pip.get_installed_distributions()):
24 |     raise ImportError('The s3cmd pacakge is required. try $ pip install s3cmd')
25 | # The secret key is available as a secure environment variable
26 | # on travis-ci to push the build documentation to Amazon S3.
27 | with tempfile.NamedTemporaryFile('w') as f:
28 |     f.write('''[default]
29 | access_key = {AWS_ACCESS_KEY_ID}
30 | secret_key = {AWS_SECRET_ACCESS_KEY}
31 | '''.format(**os.environ))
32 |     f.flush()
33 | 
34 |     template = ('s3cmd --guess-mime-type --config {config} '
35 |                 'sync docs/_build/ s3://{bucket}/{prefix}/')
36 |     cmd = template.format(
37 |             config=f.name,
38 |             bucket=BUCKET_NAME,
39 |             prefix=PREFIX)
40 |     return_val = subprocess.call(cmd.split())
41 | 
42 |     # Sync index file.
43 |     template = ('s3cmd --guess-mime-type --config {config} '
44 |                 'sync devtools/ci/index.html s3://{bucket}/')
45 |     cmd = template.format(
46 |             config=f.name,
47 |             bucket=BUCKET_NAME)
48 |     return_val = subprocess.call(cmd.split())
49 | 


--------------------------------------------------------------------------------
/dist/smarty-0.1.0-py2.7.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/dist/smarty-0.1.0-py2.7.egg


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples for Bayesian atomtype sampler
2 | 
3 | ## Manifest
4 | * `parm@frosst/` - example illustrating attempt to recover parm@frosst atom types
5 | * `smarty_simulations/` - examples to implement smarty, a tool to rediscover parm@frosst atomtypes on the AlkEthOH molecules set
6 | * `smirky_simulations/` - example usage of the smirky sampling tool to rediscover the SMIRNOFF99Frosst parameter types
7 | 
8 | **We have rearranged the Open Force Field group if you are looking for an example that used to be here, but is no longer it can be found at [openforcefield/examples/](https://github.com/openforcefield/openforcefield/tree/master/examples)**
9 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/README.md:
--------------------------------------------------------------------------------
 1 | # Example application of SMARTY atom type sampler to recover parm@frosst typing
 2 | 
 3 | In this example, the SMARTY `AtomTypeSampler` is used to attempt to recover SMARTS atom types that recapitulate the typing rules from a referenced set of typed molecules.
 4 | 
 5 | ## Manifest
 6 | * `smarty.py` - example command-line driver
 7 | * `atomtypes/` - input atom type sample specification files
 8 | * `molecules/` - typed molecule datasets
 9 | * `scripts/` - useful conversion scripts
10 | 
11 | ## Usage
12 | 
13 | Usage
14 | 
15 | Example:
16 | ```
17 | smarty --basetypes=atomtypes/basetypes-elemental.smarts --initialtypes=atomtypes/basetypes-elemental.smarts --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \
18 |     --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz --iterations 1000 --temperature=0.1
19 | ```
20 | 
21 | Initially, the base atom types are added to the pool of current atom types, and the number of atoms and molecules matched by each atom type are shown:
22 | ```
23 | INDEX        ATOMS  MOLECULES                                                          TYPE NAME                           SMARTS REF TYPE        FRACTION OF REF TYPED MOLECULES MATCHED
24 |     1 :      88148       7487 |                                                         hydrogen                             [#1]       HA            28720 /            28720 (100.000%)
25 |     2 :      90146       7505 |                                                           carbon                             [#6]       CA            37143 /            37143 (100.000%)
26 |     3 :      20838       6806 |                                                         nitrogen                             [#7]       NB             7612 /             7612 (100.000%)
27 |     4 :      12829       5946 |                                                           oxygen                             [#8]        O             4876 /             4876 (100.000%)
28 |     5 :       1001        444 |                                                         fluorine                             [#9]        F             1001 /             1001 (100.000%)
29 |     6 :          5          5 |                                                      phosphorous                            [#15]        P                5 /                5 (100.000%)
30 |     7 :       3171       2593 |                                                           sulfur                            [#16]        S             2544 /             2544 (100.000%)
31 |     8 :        574        463 |                                                         chlorine                            [#17]       CL              574 /              574 (100.000%)
32 |     9 :         84         73 |                                                          bromine                            [#35]       BR               84 /               84 (100.000%)
33 |    10 :          8          8 |                                                           iodine                            [#53]        I                8 /                8 (100.000%)
34 | TOTAL :     216804       7505 |                                                                                                       82567 /   216804 match (38.084 %)
35 | ```
36 | After a few iterations, the pool of current atom types will have diverged, with some children having been added to the set or atom types removed from the original set.
37 | ```
38 | INDEX        ATOMS  MOLECULES                                                          TYPE NAME                           SMARTS REF TYPE        FRACTION OF REF TYPED MOLECULES MATCHED
39 |     1 :      88148       7487 |                                                         hydrogen                             [#1]       HA            28720 /            28720 (100.000%)
40 |     2 :      90068       7505 |                                                           carbon                             [#6]       CA            37109 /            37143 ( 99.908%)
41 |     3 :         78         73 |                                          carbon bromine-adjacent                  [#6&$(*~[#35])]       CW               15 /             4850 (  0.309%)
42 |     4 :       9689       5835 |                                                         nitrogen                             [#7]        N             3161 /             3161 (100.000%)
43 |     5 :      11149       5300 |                                                nitrogen degree-2                          [#7&D2]       NB             7480 /             7612 ( 98.266%)
44 |     6 :      12829       5946 |                                                           oxygen                             [#8]        O             4876 /             4876 (100.000%)
45 |     7 :       1001        444 |                                                         fluorine                             [#9]        F             1001 /             1001 (100.000%)
46 |     8 :          5          5 |                                                      phosphorous                            [#15]        P                5 /                5 (100.000%)
47 |     9 :       3171       2593 |                                                           sulfur                            [#16]        S             2544 /             2544 (100.000%)
48 |    10 :        574        463 |                                                         chlorine                            [#17]       CL              574 /              574 (100.000%)
49 |    11 :         84         73 |                                                          bromine                            [#35]       BR               84 /               84 (100.000%)
50 |    12 :          8          8 |                                                           iodine                            [#53]        I                8 /                8 (100.000%)
51 | TOTAL :     216804       7505 |                                                                                                       85577 /   216804 match (39.472 %)
52 | ```
53 | or even
54 | ```
55 | Iteration 241 / 1000
56 | Attempting to destroy atom type [#9] : fluorine...
57 | Typing failed; rejecting.
58 | Rejected.
59 | INDEX        ATOMS  MOLECULES                                                          TYPE NAME                           SMARTS REF TYPE        FRACTION OF REF TYPED MOLECULES MATCHED
60 |     1 :      88148       7487 |                                                         hydrogen                             [#1]       HA            28720 /            28720 (100.000%)
61 |     2 :      63417       7402 |                                                           carbon                             [#6]       CA            36300 /            37143 ( 97.730%)
62 |     3 :       4293       2349 |                                           carbon sulfur-adjacent                  [#6&$(*~[#16])]       CW             1497 /             4850 ( 30.866%)
63 |     4 :      14861       5134 |                                                  carbon degree-4                          [#6&D4]       CT            14509 /            22084 ( 65.699%)
64 |     5 :       7575       4235 |                                           carbon total-h-count-3                          [#6&H3]
65 |     6 :      20253       6767 |                                                         nitrogen                             [#7]       NB             7612 /             7612 (100.000%)
66 |     7 :        585        504 |                                                nitrogen degree-1                          [#7&D1]       NL              585 /              585 (100.000%)
67 |     8 :      12829       5946 |                                                           oxygen                             [#8]        O             4876 /             4876 (100.000%)
68 |     9 :       1001        444 |                                                         fluorine                             [#9]        F             1001 /             1001 (100.000%)
69 |    10 :          5          5 |                                                      phosphorous                            [#15]        P                5 /                5 (100.000%)
70 |    11 :       2593       2144 |                                                           sulfur                            [#16]        S             2544 /             2544 (100.000%)
71 |    12 :        578        563 |                                                 sulfur valence-6                         [#16&v6]       SO              578 /              627 ( 92.185%)
72 |    13 :        574        463 |                                                         chlorine                            [#17]       CL              574 /              574 (100.000%)
73 |    14 :         84         73 |                                                          bromine                            [#35]       BR               84 /               84 (100.000%)
74 |    15 :          8          8 |                                                           iodine                            [#53]        I                8 /                8 (100.000%)
75 | TOTAL :     216804       7505 |                                                                                                       98893 /   216804 match (45.614 %)
76 | ```
77 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/README.md:
--------------------------------------------------------------------------------
 1 | # Atom type SMARTS components
 2 | 
 3 | ## Formats
 4 | 
 5 | ### Initial types
 6 | 
 7 | A `basetypes` file specifies the initial atom types used to initialize the sampler.
 8 | 
 9 | Comments beginning with `%` are ignored throughout the file.
10 | Each line has the format
11 | ```
12 | <SMARTS> <typename>
13 | ```
14 | where `<SMARTS>` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `<typename>` is a human-readable typename associated with that atom type.
15 | 
16 | Atom type definitions are hierarchical, with the last match in the file taking precedence over earlier matches.
17 | 
18 | For example, we could use the elemental base types:
19 | ```
20 | % atom types
21 | H    hydrogen
22 | C    carbon
23 | N    nitrogen
24 | O    oxygen
25 | F    fluorine
26 | P    phosphorous
27 | S    sulfur
28 | Cl   chlorine
29 | Br   bromine
30 | I    iodine
31 | ```
32 | 
33 | ### Decorators
34 | 
35 | A `decorators` file contains a list of SMARTS
36 | 
37 | Comments beginning with `%` are ignored throughout the file.
38 | Each line has the format
39 | ```
40 | <SMARTS> <decoratorname>
41 | ```
42 | where `<SMARTS>` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `<decoratorname>` is a human-readable typename associated with that decorator.
43 | 
44 | The SMARTS component is ANDed together (using the `&` operator) with a parent atom type to create a new proposed child atom type.
45 | The human-readable `<decoratorname>` is appended (with a space) to the parent name to keep a human-readable annotation of the proposed child atom type.
46 | 
47 | ### Substitutions
48 | 
49 | It is often convenient to define various tokens that are substituted for more sophisticated SMARTS expressions.
50 | 
51 | % Substitution definitions
52 | % Format:
53 | % <SMARTS> <replacement-string>
54 | 
55 | Comments beginning with `%` are ignored throughout the file.
56 | Each line has the format
57 | ```
58 | <SMARTS> <substitution-name>
59 | ```
60 | where `<SMARTS>` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `<substitution-name>` is the token that will be substituted for this.
61 | 
62 | For example, we could define some elemental substitutions along with some substitutions for halogens:
63 | ```
64 | % elements
65 | [#9]    fluorine
66 | [#17]   chlorine
67 | [#35]   bromine
68 | [#53]   iodine
69 | 
70 | % halogens
71 | [$smallhals,$largehals]     halogen
72 | [$fluorine,$chlorine]       smallhals
73 | [$bromine,$iodine]          largehals
74 | ```
75 | 
76 | The [`OESmartsLexReplace`](http://docs.eyesopen.com/toolkits/python/oechemtk/OEChemFunctions/OESmartsLexReplace.html) function is used to implement these replacements.
77 | 
78 | ## Manifest
79 | * `basetypes-elemental.smarts` - basetypes file with elemental atom types - this is a good choice to begin with
80 | * `basetypes.smarts` - basetypes file with more sophisticated atom types
81 | * `decorators.smarts` - `decorators` file with a variety of decorators
82 | * `decorators-simple.smarts` - minimal `decorators` file for testing
83 | * `substitutions.smarts` - minimal `substitutions` file
84 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/basetypes-elemental.smarts:
--------------------------------------------------------------------------------
 1 | % atom types
 2 | [#1]    hydrogen
 3 | [#6]    carbon
 4 | [#7]    nitrogen
 5 | [#8]    oxygen
 6 | [#9]    fluorine
 7 | [#15]   phosphorous
 8 | [#16]   sulfur
 9 | [#17]   chlorine
10 | [#35]   bromine
11 | [#53]   iodine
12 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/basetypes.smarts:
--------------------------------------------------------------------------------
 1 | % atom types
 2 | [#1]    hydrogen
 3 | [#6]    carbon
 4 | [#6&a]  carbon aromatic
 5 | [#7]    nitrogen
 6 | [#8]    oxygen
 7 | [#9]    fluorine
 8 | [#15]   phosphorous
 9 | [#16]   sulfur
10 | [#17]   chlorine
11 | [#35]   bromine
12 | [#53]   iodine
13 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/decorators-simple.smarts:
--------------------------------------------------------------------------------
1 | % aromatic/aliphatic
2 | a              aromatic
3 | A              aliphatic
4 | % halogens
5 | $(*~[$halogen]) halogen-adjacent
6 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/decorators.smarts:
--------------------------------------------------------------------------------
 1 | % bond order
 2 | $([*]=[*])     double-bonded
 3 | $([*]#[*])     triple-bonded
 4 | $([*]:[*])     aromatic-bonded
 5 | % bonded to atoms
 6 | $(*~[#1])      hydrogen-adjacent
 7 | $(*~[#6])      carbon-adjacent
 8 | $(*~[#7])      nitrogen-adjacent
 9 | $(*~[#8])      oxygen-adjacent
10 | $(*~[#9])      fluorine-adjacent
11 | $(*~[#15])     phosphorous-adjacent
12 | $(*~[#16])     sulfur-adjacent
13 | $(*~[#17])     chlorine-adjacent
14 | $(*~[#35])     bromine-adjacent
15 | $(*~[#53])     iodine-adjacent
16 | % degree
17 | D1             degree-1
18 | D2             degree-2
19 | D3             degree-3
20 | D4             degree-4
21 | D5             degree-5
22 | D6             degree-6
23 | % valence
24 | v1             valence-1
25 | v2             valence-2
26 | v3             valence-3
27 | v4             valence-4
28 | v5             valence-5
29 | v6             valence-6
30 | % total-h-count
31 | H1             total-h-count-1
32 | H2             total-h-count-2
33 | H3             total-h-count-3
34 | % aromatic/aliphatic
35 | a              aromatic
36 | A              aliphatic
37 | % halogens
38 | $(*~[$halogen]) halogen-adjacent
39 | $(*~[$smallhals]) small-halogen-adjacent
40 | $(*~[$largehals]) large-halogen-adjacent
41 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/substitutions.smarts:
--------------------------------------------------------------------------------
 1 | % Substitution definitions
 2 | % Format:
 3 | % <SMARTS> <replacement-string>
 4 | 
 5 | % elements
 6 | [#1]    hydrogen
 7 | [#6]    carbon
 8 | [#7]    nitrogen
 9 | [#8]    oxygen
10 | [#9]    fluorine
11 | [#15]   phosphorous
12 | [#16]   sulfur
13 | [#17]   chlorine
14 | [#35]   bromine
15 | [#53]   iodine
16 | 
17 | % halogens
18 | [$smallhals,$largehals]     halogen
19 | [$fluorine,$chlorine]       smallhals
20 | [$bromine,$iodine]          largehals
21 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/make_subset.py:
--------------------------------------------------------------------------------
 1 | #!/bin/env python
 2 | 
 3 | """Take the ZINC subset here and make a smaller subset of it for testing purposes."""
 4 | 
 5 | from openeye.oechem import *
 6 | 
 7 | nmols = 500 #Number of molecules to retain out of full ~7500
 8 | # Currently the above are taken as the first 500. We could also take randomly.
 9 | 
10 | 
11 | 
12 | # Read set with tripos types, write subset
13 | ifs = oemolistream( 'molecules/zinc-subset-tripos.mol2.gz')
14 | ofs = oemolostream( 'molecules/zinc-subset-%s-tripos.mol2.gz' % nmols )
15 | mol = OEMol()
16 | ct=0
17 | while OEReadMolecule(ifs, mol) and ct < nmols:
18 |     OEWriteConstMolecule(ofs, mol)
19 |     ct += 1
20 | 
21 | 
22 | # Read set with parm@frosst types, write subset
23 | # Use flavors here to ensure writing doesn't mangle atom types
24 | ifs = oemolistream( 'molecules/zinc-subset-parm@frosst.mol2.gz')
25 | flavor = OEIFlavor_Generic_Default | OEIFlavor_MOL2_Default | OEIFlavor_MOL2_Forcefield
26 | ifs.SetFlavor(OEFormat_MOL2, flavor)
27 | ofs = oemolostream( 'molecules/zinc-subset-%s-parm@frosst.mol2.gz' % nmols )
28 | ofs.SetFlavor(OEFormat_MOL2, flavor)
29 | mol = OEMol()
30 | ct=0
31 | while OEReadMolecule(ifs, mol) and ct < nmols:
32 |     OEWriteConstMolecule(ofs, mol)
33 |     ct+=1
34 | 
35 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/molecules/zinc-subset-500-parm@frosst.mol2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-500-parm@frosst.mol2.gz


--------------------------------------------------------------------------------
/examples/parm@frosst/molecules/zinc-subset-500-tripos.mol2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-500-tripos.mol2.gz


--------------------------------------------------------------------------------
/examples/parm@frosst/molecules/zinc-subset-parm@frosst.mol2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-parm@frosst.mol2.gz


--------------------------------------------------------------------------------
/examples/parm@frosst/molecules/zinc-subset-tripos.mol2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-tripos.mol2.gz


--------------------------------------------------------------------------------
/examples/parm@frosst/scripts/README.md:
--------------------------------------------------------------------------------
1 | # Useful scripts for parm@frosst test
2 | 
3 | ## Manifest
4 | 
5 | * `convert-atom-names-to-tripos.py` - utility to convert atom names to Tripos in mol2 files
6 | 


--------------------------------------------------------------------------------
/examples/parm@frosst/scripts/convert-atom-names-to-tripos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Convert file of molecules from forcefield atom types to Tripos atom types.
 4 | 
 5 | Example:
 6 | 
 7 | > python ../convert-atom-names-to-tripos.py zinc-subset-parm@frosst.mol2.gz zinc-subset-tripos.mol2.gz
 8 | """
 9 | ################################################################
10 | #  Copyright (C) 2006-2015 OpenEye Scientific Software, Inc.
11 | ################################################################
12 | from __future__ import division
13 | from __future__ import print_function
14 | import os,sys
15 | import openeye.oechem as oechem
16 | 
17 | def main(argv=sys.argv):
18 |     if len(argv) != 3:
19 |         oechem.OEThrow.Usage("%s <infile (forcefield types)> <outfile (Tripos types)>" % argv[0])
20 | 
21 |     ifs = oechem.oemolistream()
22 |     flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield
23 |     ifs.SetFlavor(oechem.OEFormat_MOL2, flavor)
24 |     if not ifs.open(argv[1]):
25 |         oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])
26 | 
27 |     ofs = oechem.oemolostream()
28 |     if not ofs.open(argv[2]):
29 |         oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2])
30 | 
31 |     for mol in ifs.GetOEMols():
32 |         oechem.OETriposAtomNames(mol)
33 |         oechem.OEWriteConstMolecule(ofs, mol)
34 | 
35 |     ifs.close()
36 |     ofs.close()
37 | 
38 | if __name__ == "__main__":
39 |     sys.exit(main(sys.argv))#!/usr/bin/env python
40 | 


--------------------------------------------------------------------------------
/examples/smarty_simulations/AlkEthOH.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/AlkEthOH.pdf


--------------------------------------------------------------------------------
/examples/smarty_simulations/Hydrogen.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/Hydrogen.pdf


--------------------------------------------------------------------------------
/examples/smarty_simulations/README.md:
--------------------------------------------------------------------------------
  1 | # Example application of SMARTY atom type sampler to recover parm99 typing of alkanes, ethers, and alcohols
  2 | 
  3 | These are example outputs for a variety of smarty uses. Each example is listed below with the associated command line call.
  4 | Each example has the three output files with the title of the example as the name:
  5 | * `*.csv` - example trajectory file, a csv file that is readable with the `score\_util.py` methods
  6 | * `*.log` - stored commandline output for that simulation
  7 | * `*.pdf` - plot showing the score verses iteration for the simulation
  8 | 
  9 | These are only examples of how to use smarty. All input files are those included in the smarty package 
 10 | available at `smart/data/`, the utility here allows those files to be used in simulations.
 11 | 
 12 | ## AlkEthOH
 13 | 
 14 | Typical smarty behavior with the AlkEthOH molecule set
 15 | with combinatorial decorators and sampling all atoms
 16 | 
 17 | ```
 18 | smarty --basetypes atomtypes/basetypes.smarts \
 19 |     --initialtypes atomtypes/basetypes.smarts \
 20 |     --decorators atomtypes/new-decorators.smarts \
 21 |     --molecules AlkEthOH_test_filt1_tripos.mol2 \
 22 |     --reference AlkEthOH_test_filt1_ff.mol2 \
 23 |     --iterations 1000 \
 24 |     --temperature 0.01 \
 25 |     --trajectory AlkEthOH.csv \
 26 |     --plot AlkEthOH.pdf >> AlkEthOH.log
 27 | ```
 28 | 
 29 | **Example Output** 
 30 | this output shows how smarty is used to sample atomtypes 
 31 | and compared to the parm@frosst typed reference molecules
 32 | 
 33 | ##### Initializing smarty:
 34 | ```
 35 | Loading molecules from '/Users/bannanc/anaconda/lib/python2.7/site-packages/smarty/data/molecules/AlkEthOH_test_filt1_tripos.mol2'...
 36 | 42 molecules read
 37 | 0.006 s elapsed
 38 | Loading molecules from '/Users/bannanc/anaconda/lib/python2.7/site-packages/smarty/data/molecules/AlkEthOH_test_filt1_ff.mol2'...
 39 | 42 molecules read
 40 | 0.006 s elapsed
 41 | Sampling all atomtypes
 42 | ```
 43 | Store bond types that are used in these molecules
 44 | ```
 45 | USED BOND TYPES:
 46 | INDEX        ATOMS  MOLECULES                                                          TYPE NAME                           SMARTS
 47 |     1 :        803         42 |                                                           singly                                -
 48 |     2 :          0          0 |                                                           doubly                                =
 49 |     3 :          0          0 |                                                           triply                                #
 50 |     4 :          0          0 |                                                         aromatic                                :
 51 | TOTAL :        803         42
 52 | ```
 53 | Type molecules with base types and store those with matches
 54 | ```
 55 | MATCHED BASETYPES:
 56 | INDEX        ATOMS  MOLECULES                                                          TYPE NAME                           SMARTS
 57 |     1 :        464         42 |                                                       c_hydrogen                             [#1]
 58 |     2 :        232         42 |                                                         c_carbon                             [#6]
 59 |     3 :          0          0 |                                                       c_nitrogen                             [#7]
 60 |     4 :        107         42 |                                                         c_oxygen                             [#8]
 61 |     5 :          0          0 |                                                       c_fluorine                             [#9]
 62 |     6 :          0          0 |                                                    c_phosphorous                            [#15]
 63 |     7 :          0          0 |                                                         c_sulfur                            [#16]
 64 |     8 :          0          0 |                                                       c_chlorine                            [#17]
 65 |     9 :          0          0 |                                                       c_selenium                            [#34]
 66 |    10 :          0          0 |                                                        c_bromine                            [#35]
 67 |    11 :          0          0 |                                                         c_iodine                            [#53]
 68 | TOTAL :        803         42
 69 | Removing basetype '[#7]' ('c_nitrogen'), which is unused.
 70 | Removing basetype '[#9]' ('c_fluorine'), which is unused.
 71 | Removing basetype '[#15]' ('c_phosphorous'), which is unused.
 72 | Removing basetype '[#16]' ('c_sulfur'), which is unused.
 73 | Removing basetype '[#17]' ('c_chlorine'), which is unused.
 74 | Removing basetype '[#34]' ('c_selenium'), which is unused.
 75 | Removing basetype '[#35]' ('c_bromine'), which is unused.
 76 | Removing basetype '[#53]' ('c_iodine'), which is unused.
 77 | ```
 78 | Type molecules with initial types and store the ones that are used
 79 | ```
 80 | MATCHED INITIAL TYPES:
 81 | INDEX        ATOMS  MOLECULES                                                          TYPE NAME                           SMARTS
 82 |     1 :        464         42 |                                                       c_hydrogen                             [#1]
 83 |     2 :        232         42 |                                                         c_carbon                             [#6]
 84 |     3 :          0          0 |                                                       c_nitrogen                             [#7]
 85 |     4 :        107         42 |                                                         c_oxygen                             [#8]
 86 |     5 :          0          0 |                                                       c_fluorine                             [#9]
 87 |     6 :          0          0 |                                                    c_phosphorous                            [#15]
 88 |     7 :          0          0 |                                                         c_sulfur                            [#16]
 89 |     8 :          0          0 |                                                       c_chlorine                            [#17]
 90 |     9 :          0          0 |                                                       c_selenium                            [#34]
 91 |    10 :          0          0 |                                                        c_bromine                            [#35]
 92 |    11 :          0          0 |                                                         c_iodine                            [#53]
 93 | TOTAL :        803         42
 94 | Removing initial atom type '[#7]', as it matches no atoms
 95 | Removing initial atom type '[#9]', as it matches no atoms
 96 | Removing initial atom type '[#15]', as it matches no atoms
 97 | Removing initial atom type '[#16]', as it matches no atoms
 98 | Removing initial atom type '[#17]', as it matches no atoms
 99 | Removing initial atom type '[#34]', as it matches no atoms
100 | Removing initial atom type '[#35]', as it matches no atoms
101 | Removing initial atom type '[#53]', as it matches no atoms
102 | ```
103 | Use bi-partite scoring sceme to score current atomtypes against reference
104 | ```
105 | Creating graph matching current atom types with reference atom types...
106 | Graph creation took 0.008 s
107 | Computing maximum weight match...
108 | Maximum weight match took 0.001 s
109 | ```
110 | Initial types and which reference they are paired with and initial score (67.746 %)
111 | ```
112 | Atom type matches:
113 | c_hydrogen                                                       matches       HC :      244 atoms matched
114 | c_carbon                                                         matches       CT :      232 atoms matched
115 | c_oxygen                                                         matches       OH :       68 atoms matched
116 | 544 / 803 total atoms match (67.746 %)
117 | ```
118 | ##### Example move in chemical space
119 | ```
120 | Iteration 16 / 1000
121 | Attempting to create new subtype: '[#1]' (c_hydrogen) -> '[#1$(*~[#6])]' (c_hydrogen any c_carbon )
122 | Proposal is valid...
123 | ```
124 | Score proposed atomtypes against reference
125 | ```
126 | Creating graph matching current atom types with reference atom types...
127 | Graph creation took 0.007 s
128 | Computing maximum weight match...
129 | Maximum weight match took 0.001 s
130 | PROPOSED:
131 | Atom type matches:
132 | c_hydrogen                                                       matches       HO :       68 atoms matched
133 | c_carbon                                                         matches       CT :      232 atoms matched
134 | c_oxygen                                                         matches       OH :       68 atoms matched
135 | c_hydrogen any c_carbon                                          matches       HC :      244 atoms matched
136 | 612 / 803 total atoms match (76.214 %)
137 | ```
138 | ##### Accepting or Rejecting a Move
139 | A move that leads to an increased score will always be accepted.
140 | A move with a decrease has a probability of being accepted depending on the temperature.
141 | A 0.0 temperature will lead lead to a complete optimizer where only moves leading to an increased score are accepted,
142 | however these can get stuck in local optima. By using a non-zero temperature we allow more moves to be accepted
143 | and a larger chemical space to be explored.
144 | ```
145 | Proposal score: 544 >> 612 : log_P_accept = 8.46824e+00
146 | Accepted.
147 | ```
148 | Score by reference atomtype
149 | ```
150 | INDEX        ATOMS  MOLECULES                                                          TYPE NAME                           SMARTS REF TYPE        FRACTION OF REF TYPED MOLECULES MATCHED
151 |     1 :         68         42 |                                                       c_hydrogen                             [#1]       HO               68 /               68 (100.000%)
152 |     2 :        232         42 |                                                         c_carbon                             [#6]       CT              232 /              232 (100.000%)
153 |     3 :        107         42 |                                                         c_oxygen                             [#8]       OH               68 /               68 (100.000%)
154 |     4 :        396         42 |                                         c_hydrogen any c_carbon                     [#1$(*~[#6])]       HC              244 /              244 (100.000%)
155 | TOTAL :        803         42 |                                                                                                         612 /      803 match (76.214 %)
156 | ```
157 | Atomtype hierarchy shows which parent type a child descends from
158 | ```
159 | Atom type hierarchy:
160 |     [#6]
161 |     [#8]
162 |     [#1]
163 |         [#1$(*~[#6])]
164 | ```
165 | ##### Final iteration of this simulation
166 | ```
167 | Iteration 999 / 1000
168 | Attempting to destroy atom type [#6] : c_carbon...
169 | Destruction rejected for atom type [#6] because this is a generic type which was initially populated.
170 | Rejected.
171 | INDEX        ATOMS  MOLECULES                                                          TYPE NAME                           SMARTS REF TYPE        FRACTION OF REF TYPED MOLECULES MATCHED
172 |     1 :        291         42 |                                                       c_hydrogen                             [#1]       HC              244 /              244 (100.000%)
173 |     2 :        232         42 |                                                         c_carbon                             [#6]       CT              232 /              232 (100.000%)
174 |     3 :         39         30 |                                                         c_oxygen                             [#8]       OS               39 /               39 (100.000%)
175 |     4 :         68         42 |                                         c_hydrogen any c_oxygen                     [#1$(*~[#8])]       HO               68 /               68 (100.000%)
176 |     5 :         27         21 | c_hydrogen any c_carbon any c_carbon (any c_oxygen) (singly c_oxygen) [#1$(*~[#6](-[#8])(~[#8])~[#6])]       H2               27 /               33 ( 81.818%)
177 |     6 :         78         25 | c_hydrogen any c_carbon any c_carbon (any c_oxygen) (singly c_hydrogen) [#1$(*~[#6](-[#1])(~[#8])~[#6])]       H1               78 /              116 ( 67.241%)
178 |     7 :         68         42 |                                         c_oxygen any c_hydrogen                     [#8$(*~[#1])]       OH               68 /               68 (100.000%)
179 | TOTAL :        803         42 |                                                                                                         756 /      803 match (94.147 %)
180 | 
181 | Atom type hierarchy:
182 |     [#1]
183 |         [#1$(*~[#8])]
184 |         [#1$(*~[#6](-[#8])(~[#8])~[#6])]
185 |         [#1$(*~[#6](-[#1])(~[#8])~[#6])]
186 |     [#8]
187 |         [#8$(*~[#1])]
188 |     [#6]
189 | Maximum score achieved: 0.99
190 | ```
191 | 
192 | ## Hydrogen
193 | 
194 | This is an example of how to implement the elemental sampler for smarty
195 | you only need to add the `--element` option. In this case instead of considering
196 | all atoms, we only sample atom types for hydrogen. 
197 | This allows for more efficient testing of the smarty tool as we can 
198 | focus on the chemical perception sampling around one element. 
199 | In the AlkEthOH, there is only 1 carbon and 2 oxygens, so the 5 hydrogen types
200 | are the best example of this behavior.  
201 | 
202 | ```
203 | smarty --element 1 \
204 |     --basetypes atomtypes/basetypes.smarts \
205 |     --initialtypes atomtypes/basetypes.smarts \
206 |     --decorators atomtypes/new-decorators.smarts \
207 |     --molecules AlkEthOH_test_filt1_tripos.mol2 \
208 |     --reference AlkEthOH_test_filt1_ff.mol2 \
209 |     --iterations 1000 \
210 |     --temperature 0.01 \
211 |     --trajectory Hydrogen.csv \
212 |     --plot Hydrogen.pdf >> Hydrogen.log
213 | ```
214 | 
215 | ## Simple-Decorators 
216 | 
217 | With the simple decorator option new atomtypes are generated by ANDing 
218 | decorator SMARTS patterns to the end of a parent atomtype.
219 | This method is not capable of even getting the complexity in the AlkEthOH
220 | molecule set as it does not allow for beta substitution from the primary atom.
221 |  
222 | ```
223 | smarty --basetypes atomtypes/basetypes.smarts \
224 |     --initialtypes atomtypes/basetypes.smarts \
225 |     --decorators atomtypes/decorators.smarts \
226 |     --substitutions atomtypes/replacements.smarts \
227 |     --molecules AlkEthOH_test_filt1_tripos.mol2 \
228 |     --reference AlkEthOH_test_filt1_ff.mol2 \
229 |     --iterations 1000 \
230 |     --temperature 0.01 \
231 |     --trajectory Simple-decorators.csv \
232 |     --plot Simple-decorators.pdf \
233 |     --decoratorbehavior simple-decorators >> Simple-decorators.log
234 | ```
235 | 
236 | ## More smarty tests
237 | We have done more extensive testing of this tool, but the results are 
238 | a bit bulky to keep on GitHub. We maintain a public (Google Drive Directory)[https://drive.google.com/drive/folders/0BwF2-3puCvfEeWNuNnlsTm1CTlU?usp=sharing]
239 | with these results. Please note it is a work in progress so documentation is on going.
240 | 


--------------------------------------------------------------------------------
/examples/smarty_simulations/Simple-Decorators.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/Simple-Decorators.pdf


--------------------------------------------------------------------------------
/examples/smirky/README.md:
--------------------------------------------------------------------------------
  1 | # smirky sampling of Torsions
  2 | 
  3 | This is an example of how to use smirky, a command line tool for sampling chemical perception of Bonds, Angles, proper or improper Torsions, or van der Waal parameters. Default smirky behaivor only requires two inputs, this is an example of all input options into smirky. 
  4 | 
  5 | ### Input files explained
  6 | 
  7 | * `atom_OR_bases.smarts` - element numbers that form the base of atoms, such as `"#6"` and their associated odds
  8 | * `atom_OR_decorators.smarts` - decorators and associated odds that are combined with element numbers such as `X4` in `"[#6X3,#7]"`
  9 | * `atom_AND_decorators.smarts` - decorators and associated odds for patterns that are "AND'd" to the end of an atom for example `r5` in `"[#6X4,#7X3;r5]"`
 10 | * `bond_OR_bases.smarts` - bond bases and their associated odds, that is '-', '=', ':', or '#' typically
 11 | * `bond_AND_decorators.smarts` - bond decorators that can be "AND'd" in a bond, such as '@' in `"[#6r6]-,:;@[#7r6]"`
 12 | * `atom_odds_forTorsions.smarts` - keywords or indices for atoms in torsions and odds of making changes to them
 13 | * `bond_odds_forTorsions.smarts` - keywords or indices for bonds in torsions and odds of making changes to them
 14 | * `initial_Torsions.smarts` - SMIRKS patterns for initial patterns
 15 | * `substitutions.smarts` - SMIRKS patterns and the short hand they can be replaced with
 16 | 
 17 | ### Command line call
 18 | 
 19 | ```
 20 | smirky --molecules AlkEthOH_test_filt1_ff.mol2 \
 21 |     --typetag Torsion \
 22 |     --atomORbases atom_OR_bases.smarts \
 23 |     --atomORdecors atom_OR_decorators.smarts \
 24 |     --atomANDdecors atom_AND_decorators.smarts \
 25 |     --bondORbase bond_OR_bases.smarts \
 26 |     --bondANDdecors bond_AND_decorators.smarts \
 27 |     --atomOddsFile atom_odds_forTorsions.smarts \
 28 |     --bondOddsFile bond_odds_forTorsions.smarts \
 29 |     --initialtypes initial_Torsions.smarts \
 30 |     --substitutions substitutions.smarts \
 31 |     --smirff forcefield/Frosst_AlkEthOH.ffxml \
 32 |     --iteratorsion 1000 \
 33 |     --temperature 0.001 \ 
 34 |     --verbose True \
 35 |     --output output
 36 | ```
 37 | 
 38 | ### Output files created
 39 | * output.log - detailed log of each iteration, changes made and if it was accepted or rejected
 40 | * output.csv - a "trajectory" file that describes the torsions at each iteration
 41 | * output.pdf - plot showing the overall score vs iteration
 42 | * output_results.smarts - smarts file showing the file SMIRKS and their matched results
 43 | 
 44 | ### Detailed output explained
 45 | 
 46 | Here is a segment of output.log with explaination of what happens in a smirky simulation
 47 | 
 48 | ##### Match initial input
 49 | 
 50 | Type initial parameters 
 51 | ```
 52 | INDEX      TORSIONS  MOLECULES   TYPE NAME: SMIRKS
 53 |     1 :          0          0 | 0: [*:1]~[*:2]~[*:3]~[*:4]
 54 |     2 :       1737         42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4]
 55 |     3 :        438         42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4]
 56 | TOTAL :       2175         42
 57 | ```
 58 | Remove elements that are not used in this molecule set (remember AlkEthOH only has carbon, oxygen, and hydrogen)
 59 | ```
 60 | removing unused element ([#5]) from list
 61 | removing unused element ([#7]) from list
 62 | removing unused element ([#9]) from list
 63 | removing unused element ([#14]) from list
 64 | removing unused element ([#15]) from list
 65 | removing unused element ([#16]) from list
 66 | removing unused element ([#17]) from list
 67 | removing unused element ([#35]) from list
 68 | removing unused element ([#53]) from list
 69 | ```
 70 | ##### Comparing to SMIRNOFF99Frosst
 71 | 
 72 | Use the forcefield tools to type all molecules with SMIRNOFF reference.
 73 | Compare reference types to initial parameter types
 74 | 
 75 | ```
 76 | Creating labeler from forcefield/Frosst_AlkEthOH.ffxml...
 77 | Creating graph matching current types with reference types...
 78 | Graph creation took 0.304 s
 79 | Computing maximum weight match...
 80 | Maximum weight match took 0.001 s
 81 | PROPOSED:
 82 | Torsion type matches:
 83 | 0: [*:1]~[*:2]~[*:3]~[*:4]                                       no match
 84 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4]                                   matches                           t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]:      574 Torsion    types matched
 85 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4]                                   matches                         t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]:      156 Torsion    types matched
 86 | 730 / 2175 total Torsions match (33.563 %)
 87 | ```
 88 | Show current statistics before sampling begins
 89 | ```
 90 | INDEX      TORSIONS  MOLECULES   TYPE NAME: SMIRKS                                  REF TYPE: SMIRKS                                   FRACTION OF REF TYPED MOLECULES MATCHED
 91 |     1 :          0          0 | 0: [*:1]~[*:2]~[*:3]~[*:4]
 92 |     2 :       1737         42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4]                     t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]                 574 /     574 (100.000%)
 93 |     3 :        438         42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4]                     t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]               156 /     156 (100.000%)
 94 | TOTAL :       2175         42 |                                                        730 /     2175 match (33.563 %)
 95 | ```
 96 | 
 97 | ##### Example move to generate a new Torsion
 98 | 
 99 | Create a new torsion, in this case by changing the 4th atom from generic (*) to an oxygen not bound to hydrogen (`#8H0`)
100 | 
101 | ```
102 | Iteration 1 / 1000
103 | Attempting to create new subtype: '4778' ([*:1]~[#6:2]~[#6:3]~[#8!H0:4]) from parent type 'C-C' ([*:1]~[#6:2]~[#6:3]~[*:4])
104 |     Probability of making this environment is 0.004 %Proposal is valid...
105 | ```
106 | Compare proposed types to the SMIRNOFF reference types
107 | ```
108 | Creating graph matching current types with reference types...
109 | Graph creation took 0.176 s
110 | Computing maximum weight match...
111 | Maximum weight match took 0.001 s
112 | PROPOSED:
113 | Torsion type matches:
114 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4]                                   matches                           t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]:      574 Torsion    types matched
115 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4]                                   matches                         t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]:      156 Torsion    types matched
116 | 4778: [*:1]~[#6:2]~[#6:3]~[#8!H0:4]                              matches                         t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4]:      190 Torsion    types matched
117 | 920 / 2175 total Torsions match (42.299 %)
118 | ```
119 | ##### Using temperature and score to accept or reject move
120 | Use change in score and temperature to calculate the probability of accepting the move.
121 | A move with an increased score will always be accepted, the higher the temperature the
122 | more probable a move with a decreased score will be accepted
123 | ```
124 | Proposal score: 730 >> 920 : log_P_accept = 8.73563e+01
125 | Accepted.
126 | INDEX      TORSIONS  MOLECULES   TYPE NAME: SMIRKS                                  REF TYPE: SMIRKS                                   FRACTION OF REF TYPED MOLECULES MATCHED
127 |     1 :       1436         42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4]                     t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]                 574 /     574 (100.000%)
128 |     2 :        438         42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4]                     t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]               156 /     156 (100.000%)
129 |     3 :        301         42 | 4778: [*:1]~[#6:2]~[#6:3]~[#8!H0:4]                t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4]               190 /     307 ( 61.889%)
130 | TOTAL :       2175         42 |                                                        920 /     2175 match (42.299 %)
131 | 
132 | ```
133 | Hierarchy shows which parent types lead to the generation of child types
134 | ```
135 | Torsion type hierarchy:
136 |     C-C ([*:1]~[#6:2]~[#6:3]~[*:4])
137 |         4778 ([*:1]~[#6:2]~[#6:3]~[#8!H0:4])
138 |     C-O ([*:1]~[#6:2]~[#8:3]~[*:4])
139 | ```
140 | ##### Final Iteration in this example
141 | ```
142 | Iteration 999 / 1000
143 | Attempting to destroy type 1876 : [#1:1]~[#6:2]~[#6:3]~[#1:4]...
144 | Proposal is valid...
145 | Creating graph matching current types with reference types...
146 | Graph creation took 0.249 s
147 | Computing maximum weight match...
148 | Maximum weight match took 0.004 s
149 | PROPOSED:
150 | Torsion type matches:
151 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4]                                   matches                           t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]:      574 Torsion    types matched
152 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4]                                   matches                         t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]:      156 Torsion    types matched
153 | 4808: [*:1]~[#6:2]~[#8:3]~[#1!X4:4]                              matches                          t0002: [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4]:      101 Torsion    types matched
154 | 8090: [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4]                          matches                         t0006: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4]:       87 Torsion    types matched
155 | 7751: [*:1]~[#6:2]~[#6:3]~[#6:4]                                 matches                         t0001: [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4]:      146 Torsion    types matched
156 | 1068: [#6!H3:1]~[#6:2]~[#6:3]~[#6:4]                             matches                       t0007: [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]:      131 Torsion    types matched
157 | 6774: [#1H0:1]~[#6:2]~[#6:3]~[#6:4]                              matches                         t0005: [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]:      552 Torsion    types matched
158 | 8025: [#6:1]~[#6:2]~[#8:3]~[#6!H3:4]                             matches                       t0008: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4]:       66 Torsion    types matched
159 | 1813 / 2175 total Torsions match (83.356 %)
160 | Proposal score: 2120 >> 1813 : log_P_accept = -1.41149e+02
161 | Rejected.
162 | INDEX      TORSIONS  MOLECULES   TYPE NAME: SMIRKS                                  REF TYPE: SMIRKS                                   FRACTION OF REF TYPED MOLECULES MATCHED
163 |     1 :        334         42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4]                     t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4]               307 /     307 (100.000%)
164 |     2 :        168         30 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4]                     t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]               156 /     156 (100.000%)
165 |     3 :        117         42 | 4808: [*:1]~[#6:2]~[#8:3]~[#1!X4:4]                t0002: [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4]                101 /     101 (100.000%)
166 |     4 :         87         42 | 8090: [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4]            t0006: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4]                87 /     103 ( 84.466%)
167 |     5 :        146         40 | 7751: [*:1]~[#6:2]~[#6:3]~[#6:4]                   t0001: [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4]               146 /     146 (100.000%)
168 |     6 :        131         37 | 1068: [#6!H3:1]~[#6:2]~[#6:3]~[#6:4]               t0007: [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]             131 /     131 (100.000%)
169 |     7 :        552         40 | 6774: [#1H0:1]~[#6:2]~[#6:3]~[#6:4]                t0005: [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]               552 /     552 (100.000%)
170 |     8 :         66         30 | 8025: [#6:1]~[#6:2]~[#8:3]~[#6!H3:4]               t0008: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4]              66 /      66 (100.000%)
171 |     9 :        574         42 | 1876: [#1:1]~[#6:2]~[#6:3]~[#1:4]                  t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]                 574 /     574 (100.000%)
172 | TOTAL :       2175         42 |                                                       2120 /     2175 match (97.471 %)
173 | 
174 | Torsion type hierarchy:
175 |     C-C ([*:1]~[#6:2]~[#6:3]~[*:4])
176 |         7751 ([*:1]~[#6:2]~[#6:3]~[#6:4])
177 |             1068 ([#6!H3:1]~[#6:2]~[#6:3]~[#6:4])
178 |             6774 ([#1H0:1]~[#6:2]~[#6:3]~[#6:4])
179 |         1876 ([#1:1]~[#6:2]~[#6:3]~[#1:4])
180 |     C-O ([*:1]~[#6:2]~[#8:3]~[*:4])
181 |         4808 ([*:1]~[#6:2]~[#8:3]~[#1!X4:4])
182 |             8090 ([#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4])
183 |         8025 ([#6:1]~[#6:2]~[#8:3]~[#6!H3:4])
184 | 
185 | ```
186 | 
187 | ## More smirky tests
188 | 
189 | The results from smirky tests get a bit bulky so we are not storing them on github. 
190 | We maintain a public (Google Drive Directory)[https://drive.google.com/drive/folders/0BwF2-3puCvfEeWNuNnlsTm1CTlU?usp=sharing]
191 | storing extensive tests on smirky and smarty. Please keep in mind these tests are on going so documentation for the Google Drive is a work in progress.
192 | 


--------------------------------------------------------------------------------
/examples/smirky/atom_AND_decorators.smarts:
--------------------------------------------------------------------------------
 1 | % Decorator         Odds
 2 | % Size of smallest ring
 3 | r3                  0
 4 | r4                  0
 5 | r5                  0
 6 | r6                  0
 7 | % Number of rings
 8 | R0                  0
 9 | R1                  0
10 | R2                  0
11 | R3                  0
12 | R4                  0
13 | R                   0
14 | !R0                 0
15 | !R1                 0
16 | !R2                 0
17 | !R3                 0
18 | !R4                 0
19 | !R                  0
20 | % total connectivity
21 | X1                  0
22 | X2                  0
23 | X3                  0
24 | X4                  0
25 | !X1                 0
26 | !X2                 0
27 | !X3                 0
28 | !X4                 0
29 | % total hydrogen count
30 | H0                  0
31 | !H0                 0
32 | H1                  0
33 | !H1                 0
34 | H2                  0
35 | !H2                 0
36 | H3                  0
37 | !H3                 0
38 | % aromatic/aliphatic
39 | a                   0
40 | !a                  0
41 | A                   0
42 | !A                  0
43 | % charges
44 | -1                  0
45 | +0                  0
46 | +1                  0
47 | % no decorator
48 | ''                  1
49 | 


--------------------------------------------------------------------------------
/examples/smirky/atom_OR_bases.smarts:
--------------------------------------------------------------------------------
 1 | % Decorator     Odds
 2 | % elements  
 3 | [#1]		    1	
 4 | [#5]			0
 5 | [#6]			1
 6 | [#7]			0
 7 | [#8]			1
 8 | [#9]			0
 9 | [#14]			0
10 | [#15]			0
11 | [#16]			0
12 | [#17]			0
13 | [#35]			0
14 | [#53]			0
15 | % substitution groups
16 | $ewg1           0 
17 | $ewg2           0
18 | 


--------------------------------------------------------------------------------
/examples/smirky/atom_OR_decorators.smarts:
--------------------------------------------------------------------------------
 1 | % Decorator         Odds
 2 | % Size of smallest ring
 3 | r3                  0
 4 | r4                  0
 5 | r5                  0
 6 | r6                  0
 7 | % Number of rings
 8 | R0                  0
 9 | R1                  0
10 | R2                  0
11 | R3                  0
12 | R4                  0
13 | R                   0
14 | !R0                 0
15 | !R1                 0
16 | !R2                 0
17 | !R3                 0
18 | !R4                 0
19 | !R                  0
20 | % total connectivity
21 | X1                  0
22 | X2                  1
23 | X3                  0
24 | X4                  1
25 | !X1                 0
26 | !X2                 1
27 | !X3                 0
28 | !X4                 1
29 | % total hydrogen count
30 | H0                  1
31 | !H0                 1
32 | H1                  1
33 | !H1                 1
34 | H2                  1
35 | !H2                 1
36 | H3                  1
37 | !H3                 1
38 | % aromatic/aliphatic
39 | a                   0
40 | !a                  0
41 | A                   0
42 | !A                  0
43 | % charges
44 | -1                  0
45 | +0                  0
46 | +1                  0
47 | % OR base with no decorator
48 | ''                  10
49 | 


--------------------------------------------------------------------------------
/examples/smirky/atom_odds_forTorsions.smarts:
--------------------------------------------------------------------------------
 1 | % Descriptor        odds
 2 | % used in the default, all equally likely
 3 | all                 0
 4 | %
 5 | % Other options remember to use indices appropriately
 6 | 1                   10
 7 | 2                   1
 8 | 3                   1
 9 | 4                   10
10 | Indexed             0
11 | Unindexed           5
12 | Alpha               0
13 | Beta                0
14 | 


--------------------------------------------------------------------------------
/examples/smirky/bond_AND_decorators.smarts:
--------------------------------------------------------------------------------
1 | % Decorator         Odds
2 | @                   0
3 | !@                  0
4 | !#                  0
5 | ''                  1
6 | 


--------------------------------------------------------------------------------
/examples/smirky/bond_OR_bases.smarts:
--------------------------------------------------------------------------------
 1 | % Decorator         Odds
 2 | % bond types
 3 | -                   1
 4 | :                   0
 5 | =                   0
 6 | #                   0
 7 | % not bond types
 8 | !-                  0
 9 | !:                  0
10 | !=                  0
11 | !#                  0
12 | 


--------------------------------------------------------------------------------
/examples/smirky/bond_odds_forTorsions.smarts:
--------------------------------------------------------------------------------
 1 | % Descriptor        odds
 2 | % used in the default, all equally likely
 3 | all                 0
 4 | %
 5 | % Other options remember to use indices appropriately
 6 | 1                   10
 7 | 2                   1
 8 | 3                   10
 9 | Indexed             0
10 | Unindexed           20
11 | Alpha               0
12 | Beta                0
13 | 


--------------------------------------------------------------------------------
/examples/smirky/initial_Torsions.smarts:
--------------------------------------------------------------------------------
1 | % Van Der Waal fragments to begin with
2 | [*:1]~[*:2]~[*:3]~[*:4]      0
3 | [*:1]~[#6:2]~[#6:3]~[*:4]      C-C
4 | [*:1]~[#6:2]~[#8:3]~[*:4]      C-O
5 | 


--------------------------------------------------------------------------------
/examples/smirky/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smirky/output.pdf


--------------------------------------------------------------------------------
/examples/smirky/output_results.smarts:
--------------------------------------------------------------------------------
 1 | % Results for sampling Torsions at 1.00e-03
 2 | %% SMIRKS patterns for final results are below
 3 | % followed by a their matched reference SMIRKS from forcefield/Frosst_AlkEthOH.ffxml
 4 | %Final Score was 97.471 %
 5 | %%
 6 | [*:1]~[#6:2]~[#6:3]~[*:4]                          C-C
 7 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4]                t0012
 8 | [*:1]~[#6:2]~[#8:3]~[*:4]                          C-O
 9 | % [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]                t0003
10 | [*:1]~[#6:2]~[#8:3]~[#1!X4:4]                      4808
11 | % [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4]                 t0002
12 | [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4]                  8090
13 | % [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4]                t0006
14 | [*:1]~[#6:2]~[#6:3]~[#6:4]                         7751
15 | % [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4]                t0001
16 | [#6!H3:1]~[#6:2]~[#6:3]~[#6:4]                     1068
17 | % [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]              t0007
18 | [#1H0:1]~[#6:2]~[#6:3]~[#6:4]                      6774
19 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]                t0005
20 | [#6:1]~[#6:2]~[#8:3]~[#6!H3:4]                     8025
21 | % [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4]              t0008
22 | [#1:1]~[#6:2]~[#6:3]~[#1:4]                        1876
23 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]                  t0004
24 | 


--------------------------------------------------------------------------------
/examples/smirky/substitutions.smarts:
--------------------------------------------------------------------------------
 1 | % Substitution definitions
 2 | % Format:
 3 | % <SMARTS> <replacement-string>
 4 | % halogens
 5 | [#7!-1,#8,#16]              ewg2
 6 | [#7!-1,#8!-1,#16!-1,$halogen]     ewg1
 7 | [$smallhals,$largehals]     halogen
 8 | [#9,#17]                    smallhals
 9 | [#35,#53]                   largehals
10 | 


--------------------------------------------------------------------------------
/oe_license.txt.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/oe_license.txt.enc


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from os.path import relpath, join
 3 | from setuptools import setup
 4 | 
 5 | def read(fname):
 6 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
 7 | 
 8 | def find_package_data(data_root, package_root):
 9 |     files = []
10 |     for root, dirnames, filenames in os.walk(data_root):
11 |         for fn in filenames:
12 |             files.append(relpath(join(root, fn), package_root))
13 |     return files
14 | 
15 | setup(
16 |     name = "smarty",
17 |     version = "0.1.6",
18 |     author = "John Chodera, David Mobley, and others",
19 |     author_email = "john.chodera@choderalab.org",
20 |     description = ("Automated Bayesian atomtype sampling"),
21 |     license = "MIT",
22 |     keywords = "Bayesian atomtype sampling forcefield parameterization",
23 |     url = "http://github.com/openforcefield/smarty",
24 |     packages=['smarty', 'smarty/tests', 'smarty/data'],
25 |     long_description=read('README.md'),
26 |     classifiers=[
27 |         "Development Status :: 3 - Alpha",
28 |         "Topic :: Utilities",
29 |         "License :: OSI Approved :: MIT",
30 |     ],
31 |     entry_points={'console_scripts': ['smarty = smarty.cli_smarty:main', 'smirky = smarty.cli_smirky:main']},
32 |     package_data={'smarty': find_package_data('smarty/data', 'smarty')},
33 | )
34 | 


--------------------------------------------------------------------------------
/smarty/__init__.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     import openeye
 3 |     # These can only be imported if openeye tools are available
 4 |     from smarty.atomtyper import *
 5 |     from smarty.sampler import *
 6 |     from smarty.utils import *
 7 |     from smarty.sampler_smirky import *
 8 | 
 9 | except Exception as e:
10 |     print(e)
11 |     print('Warning: Cannot import openeye toolkit; not all functionality will be available.')
12 | 
13 | from smarty.score_utils import *
14 | 


--------------------------------------------------------------------------------
/smarty/atomtyper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | #=============================================================================================
  4 | # MODULE DOCSTRING
  5 | #=============================================================================================
  6 | 
  7 | """
  8 | atomtyper.py
  9 | 
 10 | Atom type assignment engine using SMARTS strings.
 11 | 
 12 | Authors
 13 | -------
 14 | John Chodera <john.chodera@choderalab.org>, Memorial Sloan Kettering Cancer Center and University of California, Berkeley
 15 | 
 16 | The AtomTyper class is based on 'patty' from Pat Walters, Vertex Pharmaceuticals.
 17 | 
 18 | """
 19 | #=============================================================================================
 20 | # GLOBAL IMPORTS
 21 | #=============================================================================================
 22 | 
 23 | import sys
 24 | import string
 25 | 
 26 | import os
 27 | import math
 28 | import copy
 29 | import re
 30 | import numpy
 31 | import random
 32 | from smarty import utils
 33 | 
 34 | import openeye.oechem
 35 | import openeye.oeomega
 36 | import openeye.oequacpac
 37 | 
 38 | from openeye.oechem import *
 39 | from openeye.oeomega import *
 40 | from openeye.oequacpac import *
 41 | 
 42 | #=============================================================================================
 43 | # ATOM TYPER
 44 | #=============================================================================================
 45 | 
 46 | class AtomTyper(object):
 47 |     """
 48 |     Atom typer based on SMARTS-defined atom types.
 49 | 
 50 |     Based on 'Patty' implementation by Pat Walters.
 51 | 
 52 |     """
 53 | 
 54 |     class TypingException(Exception):
 55 |         """
 56 |         Atom typing exception.
 57 | 
 58 |         """
 59 |         def __init__(self, molecule, atom):
 60 |             self.molecule = molecule
 61 |             self.atom = atom
 62 | 
 63 |         def __str__(self):
 64 |             msg = "Atom not assigned: molecule %s : atom index %6d name %8s element %8s" % (self.molecule.GetTitle(), self.atom.GetIdx(), self.atom.GetName(), OEGetAtomicSymbol(self.atom.GetAtomicNum()))
 65 |             msg += '\n'
 66 |             for atom in self.molecule.GetAtoms():
 67 |                 msg += 'atom %8d : name %8s element %8s' % (atom.GetIdx(), atom.GetName(), OEGetAtomicSymbol(self.atom.GetAtomicNum()))
 68 |                 if atom == self.atom:
 69 |                     msg += '  ***'
 70 |                 msg += '\n'
 71 | 
 72 |             return msg
 73 | 
 74 |     def __init__(self, typelist, tagname, replacements=None):
 75 |         """"
 76 |         Create an atom typer instance.
 77 | 
 78 |         ARGUMENTS
 79 | 
 80 |         typelist : str
 81 |             If specified, will read types from list with each element [smarts, typename]
 82 |         tagname : str
 83 |             Tag name
 84 |         replacements : list of [smarts, shortname]
 85 |             Substitution/replacement bindings.
 86 | 
 87 |         """
 88 | 
 89 |         self.pattyTag = OEGetTag(tagname)
 90 | 
 91 |         # Create bindings list.
 92 |         bindings = list()
 93 |         if replacements is not None:
 94 |             for [smarts,shortname] in replacements:
 95 |                 bindings.append( (shortname, smarts) )
 96 | 
 97 |         # Create table of search objects.
 98 |         self.smartsList = []
 99 |         for [smarts, typename] in typelist:
100 |             # Perform binding replacements
101 |             smarts = OESmartsLexReplace(smarts, bindings)
102 |             # Create SMARTS search
103 |             pat = OESubSearch()
104 |             pat.Init(smarts)
105 |             pat.SetMaxMatches(0)
106 |             self.smartsList.append([pat,typename,smarts])
107 | 
108 |         return
109 | 
110 |     def dump(self):
111 |         for pat,type,smarts in self.smartsList:
112 |             print(pat,type,smarts)
113 |         return
114 | 
115 |     def assignTypes(self,mol,element = 0):
116 |         # Assign null types.
117 |         for atom in mol.GetAtoms():
118 |             atom.SetStringData(self.pattyTag, "")
119 | 
120 |         # Assign atom types using rules.
121 |         OEAssignAromaticFlags(mol)
122 |         for pat,type,smarts in self.smartsList:
123 |             OEPrepareSearch(mol, pat)
124 |             for matchbase in pat.Match(mol):
125 |                 for matchpair in matchbase.GetAtoms():
126 |                     matchpair.target.SetStringData(self.pattyTag,type)
127 | 
128 |         # Check if any atoms remain unassigned.
129 |         if element > 0:
130 |             mol_atoms = mol.GetAtoms(OEHasAtomicNum(element))
131 |         else:
132 |             mol_atoms = mol.GetAtoms()
133 |         for atom in mol_atoms:
134 |             if atom.GetStringData(self.pattyTag)=="":
135 |                 raise AtomTyper.TypingException(mol, atom)
136 |         return
137 | 
138 |     def debugTypes(self,mol):
139 |         for atom in mol.GetAtoms():
140 |             print("%6d %8s %8s" % (atom.GetIdx(),OEGetAtomicSymbol(atom.GetAtomicNum()),atom.GetStringData(self.pattyTag)))
141 |         return
142 | 
143 |     def getTypeList(self,mol):
144 |         typeList = []
145 |         for atom in mol.GetAtoms():
146 |             typeList.append(atom.GetStringData(self.pattyTag))
147 |         return typeList
148 | 
149 |     @classmethod
150 |     def read_typelist(cls, filename):
151 |         """
152 |         Read an atomtype or decorator list from a file.
153 | 
154 |         Parameters
155 |         ----------
156 |         filename : str
157 |             The name of the file to be read
158 | 
159 |         Returns
160 |         -------
161 |         typelist : list of tuples
162 |             Typelist[i] is element i of the typelist in format [smarts, typename]
163 | 
164 |         """
165 |         if filename is None:
166 |             return None
167 | 
168 |         if not os.path.exists(filename):
169 |             built_in = utils.get_data_filename(filename)
170 |             if not os.path.exists(built_in):
171 |                 raise Exception("File '%s' not found." % filename)
172 |             filename = built_in
173 | 
174 |         typelist = list()
175 |         ifs = open(filename)
176 |         lines = ifs.readlines()
177 |         used_typenames = list()
178 |         for line in lines:
179 |             # Strip trailing comments
180 |             index = line.find('%')
181 |             if index != -1:
182 |                 line = line[0:index]
183 |             # Split into tokens.
184 |             tokens = line.split()
185 |             # Process if we have enough tokens
186 |             if len(tokens) >= 2:
187 |                 smarts = tokens[0]
188 |                 typename = ' '.join(tokens[1:])
189 |                 if typename not in used_typenames:
190 |                     typelist.append([smarts,typename])
191 |                     used_typenames.append(typename)
192 |                 else:
193 |                     raise Exception("Error in file '%s' -- each entry must "
194 |                          "have a unique name." % filename )
195 |         ifs.close()
196 | 
197 |         return typelist
198 | 


--------------------------------------------------------------------------------
/smarty/cli_smarty.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Command-line driver example for SMARTY.
  3 | 
  4 | """
  5 | 
  6 | import sys
  7 | import string
  8 | 
  9 | from optparse import OptionParser # For parsing of command line arguments
 10 | 
 11 | import os
 12 | import math
 13 | import copy
 14 | import re
 15 | import numpy
 16 | import random
 17 | 
 18 | import smarty
 19 | from openforcefield.utils import utils
 20 | 
 21 | def main():
 22 |     # Create command-line argument options.
 23 |     usage_string = """\
 24 |     Sample over atom types, optionally attempting to match atom types in a reference typed set of molecules.
 25 | 
 26 |     usage: %prog --basetypes smartsfile --initialtypes smartsfile --decorators smartsfile [--substitutions smartsfile] --molecules molfile [--reference molfile] --iterations niterations [--temperature temperature]
 27 | 
 28 |     example:
 29 | 
 30 |     python %prog --basetypes=atomtypes/basetypes.smarts --initialtypes=atomtypes/initialtypes.smarts --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \
 31 |         --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz --iterations 1000 --temperature=0.1
 32 | 
 33 |     """
 34 |     version_string = "%prog %__version__"
 35 |     parser = OptionParser(usage=usage_string, version=version_string)
 36 | 
 37 |     parser.add_option("-e", "--element", metavar='ELEMENT',
 38 |                       action="store", type="int", dest='element', default=0,
 39 |                       help= "By default the element value is 0 corresponding to sampling all atomtypes. If another atomic number is specified only atoms with that atomic number are sampled (i.e. --element=8 will only sample atomtypes for oxygen atoms).")
 40 | 
 41 | 
 42 |     parser.add_option("-b", "--basetypes", metavar='BASETYPES',
 43 |                       action="store", type="string", dest='basetypes_filename', default=None,
 44 |                       help="Filename defining base or generic atom types as SMARTS atom matches; these are indestructible and normally are elemental atom types.")
 45 | 
 46 |     parser.add_option("-f", "--initialtypes", metavar='BASETYPES',
 47 |                       action="store", type="string", dest='initialtypes_filename', default=None,
 48 |                       help="Filename defining initial (first) atom types as SMARTS atom matches.")
 49 | 
 50 |     parser.add_option("-d", "--decorators", metavar='DECORATORS',
 51 |                       action="store", type="string", dest='decorators_filename', default=None,
 52 |                       help="Filename defining decorator atom types as SMARTS atom matches.")
 53 | 
 54 |     parser.add_option("-s", "--substitutions", metavar="SUBSTITUTIONS",
 55 |                       action="store", type="string", dest='substitutions_filename', default=None,
 56 |                       help="Filename defining substitution definitions for SMARTS atom matches (OPTIONAL).")
 57 | 
 58 |     parser.add_option("-r", "--reference", metavar="REFMOL",
 59 |                       action="store", type="string", dest='reference_molecules_filename', default=None,
 60 |                       help="Reference typed molecules for computing likelihood (must match same molecule and atom ordering in molecules file) (OPTIONAL).")
 61 | 
 62 |     parser.add_option("-m", "--molecules", metavar='MOLECULES',
 63 |                       action="store", type="string", dest='molecules_filename', default=None,
 64 |                       help="Small molecule set (in any OpenEye compatible file format) containing 'dG(exp)' fields with experimental hydration free energies.")
 65 | 
 66 |     parser.add_option("-i", "--iterations", metavar='ITERATIONS',
 67 |                       action="store", type="int", dest='iterations', default=150,
 68 |                       help="MCMC iterations.")
 69 | 
 70 |     parser.add_option("-t", "--temperature", metavar='TEMPERATURE',
 71 |                       action="store", type="float", dest='temperature', default=0.1,
 72 |                       help="Effective temperature for Monte Carlo acceptance, indicating fractional tolerance of mismatched atoms (default: 0.1). If 0 is specified, will behave in a greedy manner.")
 73 | 
 74 |     parser.add_option("-l", '--trajectory', metavar="TRAJECTORY_FILE",
 75 |             action = "store", dest = "traj_file", default = "trajectory.csv",
 76 |             help = "Name for trajectory file output, trajectory saves only changes to the list of 'atomtypes' for each iteration. If the file already exists, it is overwritten.")
 77 | 
 78 |     parser.add_option("-p", '--plot', metavar="PLOT_FILE",
 79 |             action = "store", dest = "plot_file", default = None,
 80 |             help = "Name for output file of a plot of the score versus time. If not specified, none will be written. If provided, needs to use a file extension suitable for matplotlib/pylab. Currently requires a trajectory file to be written using -l or --trajectory.")
 81 | 
 82 | 
 83 |     parser.add_option("-x", "--decoratorbehavior", metavar='DECORATOR_BEHAVIOR',
 84 |                       action="store", type="string", dest='decorator_behavior', default='combinatorial-decorators',
 85 |                       help="Choose between simple-decorators or combinatorial-decorators (default = combinatorial-decorators).")
 86 | 
 87 |     verbose = True
 88 | 
 89 |     # Parse command-line arguments.
 90 |     (options,args) = parser.parse_args()
 91 | 
 92 |     # Ensure all required options have been specified.
 93 |     if (options.basetypes_filename is None) or (options.decorators_filename is None) or (options.molecules_filename is None):
 94 |         parser.print_help()
 95 |         parser.error("All input files must be specified.")
 96 | 
 97 |     # Ensure the Decorator Behavior option has been specified right
 98 |     if not (options.decorator_behavior == 'simple-decorators' or options.decorator_behavior == 'combinatorial-decorators'):
 99 |         parser.print_help()
100 |         parser.error("Option not valid for decorator behavior.")
101 | 
102 |     # Load and type all molecules in the specified dataset.
103 |     molecules = utils.read_molecules(options.molecules_filename, verbose=True)
104 | 
105 |     # Read reference typed molecules, if specified.
106 |     reference_typed_molecules = None
107 |     if options.reference_molecules_filename is not None:
108 |         reference_typed_molecules = utils.read_molecules(options.reference_molecules_filename, verbose=True)
109 | 
110 |     # Construct atom type sampler.
111 |     if options.element == 0:
112 |         if verbose: print("Sampling all atomtypes")
113 |     elif options.element > 0:
114 |         if verbose: print("Sampling atoms with atomic number %i" % options.element)
115 |     else:
116 |         parser.print_help()
117 |         parser.error("Element number must be 0 for all atoms or an integer greater than 0 for an atomic number")
118 |     atomtype_sampler = smarty.AtomTypeSampler(molecules, options.basetypes_filename, options.initialtypes_filename, options.decorators_filename, replacements_filename=options.substitutions_filename, reference_typed_molecules=reference_typed_molecules, verbose=verbose, temperature=options.temperature, decorator_behavior=options.decorator_behavior, element = options.element)
119 | 
120 |     # Start sampling atom types.
121 |     atomtype_sampler.run(options.iterations, options.traj_file)
122 | 
123 |     if options.plot_file is not None:
124 |         if options.traj_file is None:
125 |             print("Cannot create plot file without a trajectory file")
126 |         else:
127 |             smarty.score_utils.create_plot_file(options.traj_file, options.plot_file, False, verbose)
128 | 


--------------------------------------------------------------------------------
/smarty/cli_smirky.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Command-line driver example for SMIRKY.
  3 | 
  4 | """
  5 | 
  6 | import sys
  7 | import string
  8 | import time
  9 | 
 10 | from optparse import OptionParser # For parsing of command line arguments
 11 | import smarty
 12 | from openforcefield.utils import utils
 13 | 
 14 | import os
 15 | import math
 16 | import copy
 17 | import re
 18 | import numpy
 19 | from numpy import random
 20 | 
 21 | def main():
 22 |     # Create command-line argument options.
 23 |     usage_string = """\
 24 |     Sample over fragment types (atoms, bonds, angles, torsions, or impropers)
 25 |     optionally attempting to match created types to an established SMIRFF.
 26 |     For all files left blank, they will be taken from this module's
 27 |     data/odds_files/ subdirectory.
 28 | 
 29 |     usage %prog --molecules molfile --typetag fragmentType
 30 |             [--atomORbases AtomORbaseFile --atomORdecors AtomORdecorFile
 31 |             --atomANDdecors AtomANDdecorFile --bondORbase BondORbaseFile
 32 |             --bondANDdecors BondANDdecorFile --atomIndexOdds AtomIndexFile
 33 |             --bondIndexOdds BondIndexFile --replacements substitutions
 34 |             --initialtypes initialFragmentsFile --SMIRFF referenceSMIRFF
 35 |             --temperature float --verbose verbose
 36 |             --iterations iterations --output outputFile]
 37 | 
 38 |     example:
 39 |     smirky --molecules AlkEthOH_test_filt1_ff.mol2 --typetag Angle
 40 | 
 41 |     """
 42 |     version_string = "%prog %__version__"
 43 |     parser = OptionParser(usage=usage_string, version=version_string)
 44 | 
 45 |     parser.add_option("-m", "--molecules", metavar='MOLECULES',
 46 |             action="store", type="string", dest='molecules_filename', default=None,
 47 |             help="Small molecule set (in any OpenEye compatible file format) containing 'dG(exp)' fields with experimental hydration free energies. This filename can also be an option in this module's data/molecules sub-directory")
 48 |     #TODO: ask about the the dG(exp) fields?
 49 | 
 50 |     parser.add_option("-T", "--typetag", metavar='TYPETAG',
 51 |             action = "store", type="choice", dest='typetag',
 52 |             default=None, choices = ['VdW', 'Bond', 'Angle', 'Torsion', 'Improper'],
 53 |             help="type of fragment being sampled, options are 'VdW', 'Bond', 'Angle', 'Torsion', 'Improper'")
 54 | 
 55 |     parser.add_option('-e', '--atomORbases', metavar="DECORATORS",
 56 |             action='store', type='string', dest='atom_OR_bases',
 57 |             default = 'odds_files/atom_OR_bases.smarts',
 58 |             help="Filename defining atom OR bases and associated probabilities. These are combined with atom OR decorators in SMIRKS, for example in '[#6X4,#7X3;R2:2]' '#6' and '#7' are atom OR bases. (OPTIONAL)")
 59 | 
 60 |     parser.add_option("-O", "--atomORdecors", metavar="DECORATORS",
 61 |             action='store', type='string', dest='atom_OR_decorators',
 62 |             default = 'odds_files/atom_decorators.smarts',
 63 |             help="Filename defining atom OR decorators and associated probabilities. These are combined with atom bases in SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'X4' and 'X3' are ORdecorators. (OPTIONAL)")
 64 | 
 65 |     parser.add_option('-A', '--atomANDdecors', metavar="DECORATORS",
 66 |             action='store', type='string', dest='atom_AND_decorators',
 67 |             default='odds_files/atom_decorators.smarts',
 68 |             help="Filename defining atom AND decorators and associated probabilities. These are added to the end of an atom's SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'R2' is an AND decorator. (OPTIONAL)")
 69 | 
 70 |     parser.add_option('-o', '--bondORbase', metavar="DECORATORS",
 71 |             action='store', type='string', dest='bond_OR_bases',
 72 |             default='odds_files/bond_OR_bases.smarts',
 73 |             help="Filename defining bond OR bases and their associated probabilities. These are OR'd together to describe a bond, for example in '[#6]-,=;@[#6]' '-' and '=' are OR bases. (OPTIONAL)")
 74 | 
 75 |     parser.add_option('-a', '--bondANDdecors', metavar="DECORATORS",
 76 |             action="store", type='string', dest='bond_AND_decorators',
 77 |             default='odds_files/bond_AND_decorators.smarts',
 78 |             help="Filename defining bond AND decorators and their associated probabilities. These are AND'd to the end of a bond, for example in '[#6]-,=;@[#7]' '@' is an AND decorator.(OPTIONAL)")
 79 | 
 80 |     parser.add_option('-D', '--atomOddsFile', metavar="ODDSFILE",
 81 |             action="store", type="string", dest="atom_odds",
 82 |             default='odds_files/atom_index_odds.smarts',
 83 |             help="Filename defining atom descriptors and probabilities with making changes to that kind of atom. Options for descriptors are integers corresponding to that indexed atom, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'. (OPTIONAL)")
 84 | 
 85 |     parser.add_option('-d', '--bondOddsFile', metavar="ODDSFILE",
 86 |             action="store", type="string", dest="bond_odds",
 87 |             default='odds_files/bond_index_odds.smarts',
 88 |             help="Filename defining bond descriptors and probabilities with making changes to that kind of bond. Options for descriptors are integers corresponding to that indexed bond, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'. (OPTIONAL)")
 89 | 
 90 |     parser.add_option("-s", "--substitutions", metavar="SUBSTITUTIONS",
 91 |             action="store", type="string", dest='substitutions_filename',
 92 |             default=None,
 93 |             help="Filename defining substitution definitions for SMARTS atom matches. (OPTIONAL).")
 94 | 
 95 |     parser.add_option("-f", "--initialtypes", metavar='INITIALTYPES',
 96 |             action="store", type="string", dest='initialtypes_filename',
 97 |             default=None,
 98 |             help="Filename defining initial fragment types. The file is formatted with two columns: 'SMIRKS    typename'. For the default the initial type will be a generic form of the given fragment, for example '[*:1]~[*:2]' for a bond (OPTIONAL)")
 99 | 
100 |     parser.add_option('-r', '--smirff', metavar='REFERENCE',
101 |             action='store', type='string', dest='SMIRFF',
102 |             default=None,
103 |             help="Filename defining a SMIRFF force fielce used to determine reference fragment types in provided set of molecules. It may be an absolute file path, a path relative to the current working directory, or a path relative to this module's data subdirectory (for built in force fields). (OPTIONAL)")
104 | 
105 |     parser.add_option("-i", "--iterations", metavar='ITERATIONS',
106 |             action="store", type="int", dest='iterations',
107 |             default=150,
108 |             help="MCMC iterations.")
109 | 
110 |     parser.add_option("-t", "--temperature", metavar='TEMPERATURE',
111 |             action="store", type="float", dest='temperature',
112 |             default=0.1,
113 |             help="Effective temperature for Monte Carlo acceptance, indicating fractional tolerance of mismatched atoms (default: 0.1). If 0 is specified, will behave in a greedy manner.")
114 | 
115 |     parser.add_option("-p", "--output", metavar='OUTPUT',
116 |             action="store", type="string", dest='outputfile',
117 |             default=None,
118 |             help="Filename base for output information. This same base will be used for all output files created. If None provided then it is set to 'typetag_temperature' (OPTIONAL).")
119 | 
120 |     parser.add_option('-v', '--verbose', metavar='VERBOSE',
121 |             action='store', type='choice', dest='verbose',
122 |             default=False, choices = ['True', 'False'],
123 |             help="If True prints minimal information to the commandline during iterations. (OPTIONAL)")
124 | 
125 |     # Parse command-line arguments.
126 |     (option,args) = parser.parse_args()
127 | 
128 |     # Molecules are required
129 |     if option.molecules_filename is None:
130 |         parser.print_help()
131 |         parser.error("Molecules input files must be specified.")
132 | 
133 |     verbose = option.verbose == 'True'
134 |     # Load and type all molecules in the specified dataset.
135 |     molecules = utils.read_molecules(option.molecules_filename, verbose=verbose)
136 | 
137 |     # Parse input odds files
138 |     atom_OR_bases = smarty.parse_odds_file(option.atom_OR_bases, verbose)
139 |     atom_OR_decorators = smarty.parse_odds_file(option.atom_OR_decorators, verbose)
140 |     atom_AND_decorators = smarty.parse_odds_file(option.atom_AND_decorators, verbose)
141 |     bond_OR_bases = smarty.parse_odds_file(option.bond_OR_bases, verbose)
142 |     bond_AND_decorators = smarty.parse_odds_file(option.bond_AND_decorators, verbose)
143 |     atom_odds = smarty.parse_odds_file(option.atom_odds, verbose)
144 |     bond_odds = smarty.parse_odds_file(option.bond_odds, verbose)
145 | 
146 |     # get initial types if provided, otherwise none
147 |     if option.initialtypes_filename is None:
148 |         initialtypes = None
149 |     else:
150 |         initialtypes = smarty.AtomTyper.read_typelist(option.initialtypes_filename)
151 | 
152 |     output = option.outputfile
153 |     if output is None:
154 |         output = "%s_%.2e" % ( option.typetag, option.temperature)
155 |     # get replacements
156 |     if option.substitutions_filename is None:
157 |         sub_file = smarty.get_data_filename('odds_files/substitutions.smarts')
158 |     else:
159 |         sub_file = option.substitutions_filename
160 |     replacements = smarty.AtomTyper.read_typelist(sub_file)
161 |     replacements = [ (short, smarts) for (smarts, short) in replacements]
162 | 
163 |     start_sampler = time.time()
164 |     fragment_sampler = smarty.FragmentSampler(
165 |             molecules, option.typetag, atom_OR_bases, atom_OR_decorators,
166 |             atom_AND_decorators, bond_OR_bases, bond_AND_decorators,
167 |             atom_odds, bond_odds, replacements, initialtypes,
168 |             option.SMIRFF, option.temperature, output)
169 |     # report time
170 |     finish_sampler = time.time()
171 |     elapsed = finish_sampler - start_sampler
172 |     if verbose: print("Creating %s sampler took %.3f s" % (option.typetag, elapsed))
173 | 
174 |     # Make iterations
175 |     frac_found = fragment_sampler.run(option.iterations, verbose)
176 |     results = fragment_sampler.write_results_smarts_file()
177 |     finished = time.time()
178 |     elapsed = finished - finish_sampler
179 |     per_it = elapsed / float(option.iterations)
180 |     if verbose: print("%i iterations took %.3f s (%.3f s / iteration)" % (option.iterations, elapsed, per_it))
181 |     if verbose: print("Final score was %.3f %%" % (frac_found*100.0))
182 | 
183 |     # plot results
184 |     plot_file = "%s.pdf" % output
185 |     traj = "%s.csv" % output
186 |     smarty.score_utils.create_plot_file(traj, plot_file, False, verbose)
187 | 


--------------------------------------------------------------------------------
/smarty/data/README.md:
--------------------------------------------------------------------------------
1 | # Data used by smarty
2 | 
3 | ## Manifest
4 | - `atomtypes` - contains files used by smarty to determine how it samples over atom types
5 | - `odds_files` - contains odds files used by smirky to influence sampling
6 | 


--------------------------------------------------------------------------------
/smarty/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/smarty/data/__init__.py


--------------------------------------------------------------------------------
/smarty/data/atomtypes/README.md:
--------------------------------------------------------------------------------
 1 | # Atom type SMARTS components
 2 | 
 3 | ## Formats
 4 | smarts files are used as input for the smarty sampler
 5 | there are a variety of types, detailed below. All follow
 6 | the same general format.
 7 | 
 8 | Comments beginning with `%` are ignored throughout the file.
 9 | Each line has the format
10 | ```
11 | <SMARTS> <typename>
12 | ```
13 | where `<SMARTS>` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `<typename>` is a human-readable typename associated with that atom type.
14 | 
15 | Atom type definitions are hierarchical, with the last match in the file taking precedence over earlier matches.
16 | 
17 | ### Initial and Base types
18 | 
19 | These are both used to initialize the smarty sampler.
20 | `basetypes` are considered more generic. 
21 | These are the atomtypes used to create new atomtypes.
22 | See the file `basetypes.smarts`.
23 | 
24 | `initial` types can be more complex 
25 | for example the files
26 | `initialtypes.smarts` or `initiali\_AlkEthOH.smarts`
27 | 
28 | Best practices should have base and initial types that are listed from most to
29 | least general
30 | 
31 | ### Simple and Combinatorial Decorators
32 | 
33 | A `decorators` file contains a list of SMARTS
34 | 
35 | In smarty, when using simple decorators, the new atomtypes are created only
36 | by ANDing the decorator SMARTS component to the parent atomtype (using the `&` operator).
37 | The human-readable `<decoratorname>` is appended (with a space) to the parent name to keep a human-readable annotation of the proposed child atom type.
38 | 
39 | 
40 | Example simple decorators are in *`decorators.smarts`* and are typically more complicated as they must include all 
41 | ways of generating new atomtypes
42 | 
43 | Combinatorial decorators use a more complex set of rules to generate new SMARTS strings. 
44 | In this case, bonded atoms are found in the basetypes, so only "non-bonding decorators" need to be 
45 | in the decorator file. 
46 | For exampl see *`new-decorators.smarts`* 
47 | 
48 | ### Substitutions
49 | 
50 | It is often convenient to define various tokens that are substituted for more sophisticated SMARTS expressions.
51 | 
52 | For example, we could define some elemental substitutions along with some substitutions for halogens:
53 | ```
54 | % elements
55 | [#9]    fluorine
56 | [#17]   chlorine
57 | [#35]   bromine
58 | [#53]   iodine
59 | 
60 | % halogens
61 | [$smallhals,$largehals]     halogen
62 | [$fluorine,$chlorine]       smallhals
63 | [$bromine,$iodine]          largehals
64 | ```
65 | 
66 | The [`OESmartsLexReplace`](http://docs.eyesopen.com/toolkits/python/oechemtk/OEChemFunctions/OESmartsLexReplace.html) function is used to implement these replacements.
67 | 
68 | ## Manifest
69 | * `basetypes.smarts` - basetypes file with elemental atom types - this is a good choice to begin with
70 | * `initial.smarts` - basetypes file with more sophisticated atom types
71 | * `initial\_AlkEthOH.smarts` - the "answer" SMARTS strings for the AlkEthOH molecule set
72 | * `decorators.smarts` - `decorators` file with a variety of decorators
73 | * `decorators-simple.smarts` - minimal `decorators` file for testing
74 | * `new-decorators.smarts` - decorators file without bond information (new modular framework)
75 | * `substitutions.smarts` - minimal `substitutions` file
76 | 


--------------------------------------------------------------------------------
/smarty/data/atomtypes/basetypes.smarts:
--------------------------------------------------------------------------------
 1 | % atom types
 2 | [*]     any_atom
 3 | [$ewg1] ewg1
 4 | [$ewg2] ewg2
 5 | [#1]    hydrogen
 6 | [#6]    carbon
 7 | [#7]    nitrogen
 8 | [#8]    oxygen
 9 | [#9]    fluorine
10 | [#15]   phosphorous
11 | [#16]   sulfur
12 | [#17]   chlorine
13 | [#34]   selenium
14 | [#35]   bromine
15 | [#53]   iodine
16 | 


--------------------------------------------------------------------------------
/smarty/data/atomtypes/decorators-simple.smarts:
--------------------------------------------------------------------------------
1 | % aromatic/aliphatic
2 | a              aromatic
3 | A              aliphatic
4 | % halogens
5 | $(*~[$halogen]) halogen-adjacent
6 | 


--------------------------------------------------------------------------------
/smarty/data/atomtypes/decorators.smarts:
--------------------------------------------------------------------------------
 1 | % bond order
 2 | $([*]=[*])     double-bonded
 3 | $([*]#[*])     triple-bonded
 4 | $([*]:[*])     aromatic-bonded
 5 | % bonded to atoms
 6 | $(*~[#1])      hydrogen-adjacent
 7 | $(*~[#6])      carbon-adjacent
 8 | $(*~[#7])      nitrogen-adjacent
 9 | $(*~[#8])      oxygen-adjacent
10 | $(*~[#9])      fluorine-adjacent
11 | $(*~[#15])     phosphorous-adjacent
12 | $(*~[#16])     sulfur-adjacent
13 | $(*~[#17])     chlorine-adjacent
14 | $(*~[#35])     bromine-adjacent
15 | $(*~[#53])     iodine-adjacent
16 | % degree
17 | D1             degree-1
18 | D2             degree-2
19 | D3             degree-3
20 | D4             degree-4
21 | D5             degree-5
22 | D6             degree-6
23 | % valence
24 | v1             valence-1
25 | v2             valence-2
26 | v3             valence-3
27 | v4             valence-4
28 | v5             valence-5
29 | v6             valence-6
30 | % total-h-count
31 | H1             total-h-count-1
32 | H2             total-h-count-2
33 | H3             total-h-count-3
34 | % aromatic/aliphatic
35 | a              aromatic
36 | A              aliphatic
37 | % halogens
38 | $(*~[$halogen]) halogen-adjacent
39 | $(*~[$smallhals]) small-halogen-adjacent
40 | $(*~[$largehals]) large-halogen-adjacent
41 | 


--------------------------------------------------------------------------------
/smarty/data/atomtypes/initial_AlkEthOH.smarts:
--------------------------------------------------------------------------------
 1 | % atom types
 2 | [$([#1]-C)] hydrogen-carbon
 3 | [$([#1]-C-[#7,#8,F,#16,Cl,Br])] hydrogen-carbon-ewd
 4 | [$([#1]-C(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br])]   hydrogen-carbon-ewd2
 5 | [$([#1]-C(-[#7,#8,F,#16,Cl,Br])(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br])] hydrogen-carbon-ewd3
 6 | [#1$(*-[#8])]   hydrogen-oxygen
 7 | [#6X4]  carbon-tet
 8 | [#8X2]  oxygen-dival
 9 | [#8X2+0$(*-[#1])]    oxygen-hydrogen
10 | 


--------------------------------------------------------------------------------
/smarty/data/atomtypes/initialtypes.smarts:
--------------------------------------------------------------------------------
 1 | % atom types
 2 | [#1]    hydrogen
 3 | [#6]    carbon
 4 | [#6&a]  carbon aromatic
 5 | [#7]    nitrogen
 6 | [#8]    oxygen
 7 | [#9]    fluorine
 8 | [#15]   phosphorous
 9 | [#16]   sulfur
10 | [#17]   chlorine
11 | [#35]   bromine
12 | [#53]   iodine
13 | 


--------------------------------------------------------------------------------
/smarty/data/atomtypes/new-decorators.smarts:
--------------------------------------------------------------------------------
 1 | % Size of smallest ring
 2 | r3
 3 | r4
 4 | r5
 5 | r6
 6 | % Number of ring bonds
 7 | R0
 8 | R2
 9 | R3
10 | R4
11 | R
12 | % total connectivity
13 | X1             connections-1
14 | X2             connections-2
15 | X3             connections-3
16 | X4             connections-4
17 | % total-h-count
18 | H0             total-h-count-0
19 | H1             total-h-count-1
20 | H2             total-h-count-2
21 | H3             total-h-count-3
22 | % formal charge
23 | +0             neutral
24 | +1             cationic+1
25 | -1             anionic-1
26 | % aromatic/aliphatic
27 | a              aromatic
28 | A              aliphatic
29 | 


--------------------------------------------------------------------------------
/smarty/data/atomtypes/replacements.smarts:
--------------------------------------------------------------------------------
 1 | % Substitution definitions
 2 | % Format:
 3 | % <SMARTS> <replacement-string>
 4 | 
 5 | % elements
 6 | [#1]    hydrogen
 7 | [#6]    carbon
 8 | [#7]    nitrogen
 9 | [#8]    oxygen
10 | [#9]    fluorine
11 | [#15]   phosphorous
12 | [#16]   sulfur
13 | [#17]   chlorine
14 | [#35]   bromine
15 | [#53]   iodine
16 | 
17 | % electron withdrawing groups
18 | [#7!-1,#8,#16]              ewg2
19 | [#7!-1,#8!-1,#16!-1,$halogen]     ewg1
20 | 
21 | % halogens
22 | [$smallhals,$largehals]     halogen
23 | [$fluorine,$chlorine]       smallhals
24 | [$bromine,$iodine]          largehals
25 | 


--------------------------------------------------------------------------------
/smarty/data/odds_files/atom_OR_bases.smarts:
--------------------------------------------------------------------------------
 1 | % Decorator     Odds
 2 | % elements  
 3 | [#1]			
 4 | [#5]			
 5 | [#6]			
 6 | [#7]			
 7 | [#8]			
 8 | [#9]			
 9 | [#14]			
10 | [#15]			
11 | [#16]			
12 | [#17]			
13 | [#35]			
14 | [#53]			
15 | % substitution groups
16 | $ewg1            
17 | $ewg2           
18 | 


--------------------------------------------------------------------------------
/smarty/data/odds_files/atom_decorators.smarts:
--------------------------------------------------------------------------------
 1 | % Decorator         Odds
 2 | % Size of smallest ring
 3 | r3                  
 4 | r4                  
 5 | r5                     
 6 | r6                  
 7 | % Number of ring bonds
 8 | R0                                
 9 | R2                  
10 | R3                  
11 | R4                  
12 | R
13 | !R0                                  
14 | !R2                  
15 | !R3                  
16 | !R4
17 | !R                  
18 | % total connectivity
19 | X1              
20 | X2              
21 | X3              
22 | X4              
23 | !X1              
24 | !X2              
25 | !X3              
26 | !X4              
27 | % total hydrogen count
28 | H0              
29 | !H0             
30 | H1       
31 | !H1       
32 | H2 
33 | !H2             
34 | H3 
35 | !H3             
36 | % aromatic/aliphatic
37 | a  
38 | !a             
39 | A 
40 | !A              
41 | % charges
42 | -1              
43 | +0              
44 | +1              
45 | % no decorator
46 | ''
47 | 


--------------------------------------------------------------------------------
/smarty/data/odds_files/atom_index_odds.smarts:
--------------------------------------------------------------------------------
 1 | % Descriptor        odds
 2 | % used in the default, all equally likely
 3 | all                 1
 4 | %
 5 | % Other options remember to use indices appropriately
 6 | 1                   0
 7 | 2                   0
 8 | 3                   0
 9 | 4                   0
10 | Indexed             0
11 | Unindexed           0
12 | Alpha               0
13 | Beta                0
14 | 


--------------------------------------------------------------------------------
/smarty/data/odds_files/bond_AND_decorators.smarts:
--------------------------------------------------------------------------------
1 | % Decorator         Odds
2 | @                   1
3 | !@                  1
4 | !#                  0
5 | 


--------------------------------------------------------------------------------
/smarty/data/odds_files/bond_OR_bases.smarts:
--------------------------------------------------------------------------------
 1 | % Decorator         Odds
 2 | % bond types
 3 | -                   
 4 | :                  
 5 | =                    
 6 | #                   
 7 | % not bond types
 8 | !-                  
 9 | !:                  
10 | !=                 
11 | !#                  
12 | 


--------------------------------------------------------------------------------
/smarty/data/odds_files/bond_index_odds.smarts:
--------------------------------------------------------------------------------
 1 | % Descriptor        odds
 2 | % used in the default, all equally likely
 3 | all                 1
 4 | %
 5 | % Other options remember to use indices appropriately
 6 | 1                   0
 7 | 2                   0
 8 | 3                   0
 9 | Indexed             0
10 | Unindexed           0
11 | Alpha               0
12 | Beta                0
13 | 


--------------------------------------------------------------------------------
/smarty/data/odds_files/substitutions.smarts:
--------------------------------------------------------------------------------
 1 | % Substitution definitions
 2 | % Format:
 3 | % <SMARTS> <replacement-string>
 4 | % halogens
 5 | [#7!-1,#8,#16]              ewg2
 6 | [#7!-1,#8!-1,#16!-1,$halogen]     ewg1
 7 | [$smallhals,$largehals]     halogen
 8 | [#9,#17]                    smallhals
 9 | [#35,#53]                   largehals
10 | 


--------------------------------------------------------------------------------
/smarty/sampler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | #=============================================================================================
  4 | # MODULE DOCSTRING
  5 | #=============================================================================================
  6 | 
  7 | """
  8 | smarty.py
  9 | Example illustrating a scheme to create and destroy atom types automatically using SMARTS.
 10 | AUTHORS
 11 | John Chodera <john.chodera@choderalab.org>, Memorial Sloan Kettering Cancer Center.
 12 | Additional contributions from the Mobley lab, UC Irvine, including David Mobley, Caitlin Bannan, and Camila Zanette.
 13 | The AtomTyper class is based on 'patty' by Pat Walters, Vertex Pharmaceuticals.
 14 | """
 15 | #=============================================================================================
 16 | # GLOBAL IMPORTS
 17 | #=============================================================================================
 18 | 
 19 | import os
 20 | import copy
 21 | import re
 22 | import numpy
 23 | import random
 24 | 
 25 | import openeye.oechem
 26 | import openeye.oeomega
 27 | import openeye.oequacpac
 28 | 
 29 | from openeye.oechem import *
 30 | from openeye.oeomega import *
 31 | from openeye.oequacpac import *
 32 | 
 33 | import networkx as nx
 34 | 
 35 | import time
 36 | 
 37 | from smarty.atomtyper import AtomTyper
 38 | from smarty.score_utils import load_trajectory
 39 | from smarty.score_utils import scores_vs_time
 40 | 
 41 | #=============================================================================================
 42 | # ATOMTYPE SAMPLER
 43 | #=============================================================================================
 44 | 
 45 | class AtomTypeSampler(object):
 46 |     """
 47 |     Atom type sampler.
 48 |     """
 49 |     def __init__(self, molecules, basetypes_filename, initialtypes_filename, decorators_filename, replacements_filename=None, reference_typed_molecules=None, temperature=0.1, verbose=False, decorator_behavior='combinatorial-decorators', element = 0):
 50 |         """
 51 |         Initialize an atom type sampler.
 52 |         ARGUMENTS
 53 |         molecules : list of molecules for typing
 54 |             List of molecules for typing
 55 |         basetypes_filename : str
 56 |             File defining base/generic atom types (which cannot be destroyed); often these are elemental types
 57 |         initialtypes_filename :
 58 |             File defining initial atom types (which CAN be destroyed, except for those which occur in basetypes_filename
 59 |         decorators_filename : str
 60 |             File containing decorators that can be added to existing types to generate subtypes
 61 |         replacements_filename : str, optional, default=None
 62 |             If specified, SMARTS replacement definitions will be read from this file
 63 |         reference_typed_molecules : list of OEMol, optional, default=None
 64 |             List of molecules with reference types for use in Monte Carlo acceptance.
 65 |             If specified, the likelihood function will utilize the maximal number of matched atom types with these molecules.
 66 |             If not specified, no likelihood function will be employed.
 67 |         temperature : float, optional, default=0.1
 68 |             Temperature for Monte Carlo acceptance/rejection
 69 |         verbose : bool, optional, default=False
 70 |             If True, verbose output will be printed.
 71 |         decorator_behavior : string either "combinatorial-decorators" or "simple-decorators"
 72 |             simple decorators include bonded atoms as decorators
 73 |         element : integer >= 0
 74 |             If 0 all atomtypes sampled, otherwise only atomtypes of that atomic number are sampled
 75 |         Notes
 76 |         -----
 77 |         This is just a proof of concept for chemical perception sampling.
 78 |         Scoring for purposed atomtypes is based on reference atomtypes.
 79 |         No scoring of molecular properties is performed
 80 |         """
 81 |         # store simple input information
 82 |         self.verbose = verbose
 83 |         self.decorator_behavior = decorator_behavior
 84 |         self.typetag = 'atomtype' # internal tag
 85 |         self.temperature = temperature
 86 |         self.element = element
 87 | 
 88 |         # Read atomtypes (initial and base) and decorators.
 89 |         self.atomtypes = AtomTyper.read_typelist(initialtypes_filename)
 90 |         self.basetypes = AtomTyper.read_typelist(basetypes_filename)
 91 |         self.decorators = AtomTyper.read_typelist(decorators_filename)
 92 |         self.replacements = AtomTyper.read_typelist(replacements_filename)
 93 | 
 94 |         # Store a deep copy of the molecules since they will be annotated
 95 |         # loop through input molecules to remove repeats
 96 |         self.molecules = list()
 97 |         if reference_typed_molecules is not None:
 98 |             self.reference_typed_molecules = list()
 99 |         else: self.reference_typed_molecules = None
100 | 
101 |         smiles = set()
102 |         for idx, mol in enumerate(molecules):
103 |             smile = OECreateIsoSmiString(mol)
104 |             if not smile in smiles:
105 |                 self.molecules.append(OEMol(mol))
106 |                 smiles.add(smile)
107 |                 if reference_typed_molecules is not None:
108 |                     ref_mol = OEMol(reference_typed_molecules[idx])
109 |                     ref_smile = OECreateIsoSmiString(ref_mol)
110 |                     # TODO: add ref_smile == smile check?
111 |                     self.reference_typed_molecules.append(OEMol(ref_mol))
112 | 
113 |         # Save bond list to use throughout
114 |         bondset = [("-","singly"), ("=", "doubly"), ("#","triply"), (":", "aromatic")]
115 | 
116 |         used_basetypes = list()
117 |         self.atomtypes_with_no_matches = set()
118 |         # Check all SMART strings that are used as a base type
119 |         for (smarts, atom_type) in self.basetypes:
120 |             check_basetype = self.smarts_matches(smarts)
121 |             if check_basetype:
122 |                 # Keep used base types
123 |                 used_basetypes.append( ( smarts, atom_type) )
124 |             else:
125 |                 # Remove unused base types
126 |                 self.atomtypes_with_no_matches.add( smarts )
127 |         self.basetypes = copy.deepcopy(used_basetypes)
128 |         if verbose:
129 |             print("USED BASE TYPES:")
130 |             for (smarts, typename) in self.basetypes:
131 |                 print("%10s %25s" % (smarts, typename))
132 | 
133 |         # Calculate which bonds in set are used
134 |         bond_typelist = [("[*]%s[*]" %bond, name) for (bond, name) in bondset]
135 |         tmpmolecules = copy.deepcopy(self.molecules)
136 |         self.type_molecules(bond_typelist, tmpmolecules, 0)
137 |         [bond_typecounts, molecule_bond_typecounts] = self.compute_type_statistics( bondset, tmpmolecules, 0)
138 |         if self.verbose:
139 |             print("USED BOND TYPES:")
140 |             self.show_type_statistics(bondset, bond_typecounts, molecule_bond_typecounts)
141 | 
142 |         # only same bonds that are used
143 |         self.bondset = [ ('~', 'any') ]
144 |         for (bond, name) in bondset:
145 |             if bond_typecounts[name] > 0:
146 |                 self.bondset.append( (bond, name) )
147 | 
148 |         # Rename base/initial types to ensure their names are unique
149 |         # clashes between initial and target types will cause problems
150 |         for idx, [smarts, typename] in enumerate(self.atomtypes):
151 |             self.atomtypes[idx] = (smarts, 'c_'+typename)
152 |         for idx, [smarts, typename] in enumerate(self.basetypes):
153 |             self.basetypes[idx] = (smarts, 'c_'+typename)
154 | 
155 |         # Store smarts for basetypes
156 |         self.basetypes_smarts = [ smarts for (smarts, name) in self.basetypes ]
157 | 
158 |         # Add any base types not already there to the initial types
159 |         initial_smarts = [ smarts for (smarts, name) in self.atomtypes ]
160 |         missing_basetypes = list()
161 |         for [smarts, typename] in self.basetypes:
162 |             if smarts not in initial_smarts:
163 |                 missing_basetypes.append( (smarts, typename) )
164 |                 if self.verbose: print("Added base (generic) type `%s`, name %s, to initial types." % (smarts, typename))
165 | 
166 |         self.atomtypes = missing_basetypes + self.atomtypes
167 | 
168 |         # Type all molecules with current typelist to ensure that starting types are sufficient.
169 |         self.type_molecules(self.atomtypes, self.molecules, self.element)
170 | 
171 |         # Compute atomtype statistics on molecules for current atomtype set
172 |         [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element)
173 |         if self.verbose:
174 |             print("MATCHED INITIAL TYPES:")
175 |             self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts)
176 | 
177 |         # Track only used atomtypes and add unused to atomtypes with no matches
178 |         used_initial_atomtypes = list()
179 |         for (smarts, atom_type) in self.atomtypes:
180 |             if atom_typecounts[atom_type] > 0:
181 |                 used_initial_atomtypes.append( (smarts, atom_type) )
182 |             else:
183 |                 self.atomtypes_with_no_matches.add( smarts )
184 |                 if self.verbose: print("Removing initial atom type '%s', as it matches no atoms" % smarts)
185 |         self.atomtypes = copy.deepcopy(used_initial_atomtypes)
186 |         self.initial_atomtypes = copy.deepcopy(used_initial_atomtypes)
187 | 
188 |         # Type molecules again with the updated atomtype list
189 |         self.type_molecules(self.atomtypes, self.molecules, self.element)
190 | 
191 |         # These are atomtypes where not all children have been matched
192 |         self.unmatched_atomtypes = copy.deepcopy(self.atomtypes)
193 | 
194 |         # Creat dictionary to store children of initial atom types
195 |         self.parents = dict()
196 |         for [smarts, typename] in self.atomtypes:
197 |             #store empty list of chlidren for each atomtype
198 |             self.parents[smarts] = []
199 |         # store reverse parent dictionary with child to parent
200 |         self.child_to_parent = self._switch_parent_dict()
201 | 
202 |         # Compute total atoms
203 |         self.total_atoms = 0.0
204 |         for molecule in self.molecules:
205 |             for atom in self._GetAtoms(molecule, self.element):
206 |                 self.total_atoms += 1.0
207 | 
208 |         # Store reference molecules
209 |         self.reference_atomtypes = set()
210 |         self.current_atom_matches = None
211 |         if self.reference_typed_molecules is not None:
212 |             # Extract list of reference atom types
213 |             for molecule in self.reference_typed_molecules:
214 |                 for atom in self._GetAtoms(molecule, self.element):
215 |                     atomtype = atom.GetType()
216 |                     self.reference_atomtypes.add(atomtype)
217 |             self.reference_atomtypes = list(self.reference_atomtypes)
218 |             # Compute current atom matches
219 |             [self.atom_type_matches, self.total_atom_type_matches] = self.best_match_reference_types(self.atomtypes, self.molecules)
220 |             # Count atom types.
221 |             self.reference_atomtypes_atomcount = { atomtype : 0 for atomtype in self.reference_atomtypes }
222 |             for molecule in self.reference_typed_molecules:
223 |                 for atom in self._GetAtoms(molecule, self.element):
224 |                     atomtype = atom.GetType()
225 |                     self.reference_atomtypes_atomcount[atomtype] += 1
226 |         return
227 | 
228 |     def smarts_matches(self, smarts):
229 |         """
230 |         This method returns true if the provided SMARTS pattern is in
231 |         at least one molecule
232 |         Parameters
233 |         ----------
234 |         smarts: str, SMARTS pattern
235 |         Returns
236 |         -------
237 |         matched: boolean, True=smarts matches a molecule, False has no matches
238 |         """
239 |         # Create bindings list for the replacements (uses the replacements
240 |         # file)
241 |         bindings = list()
242 |         if self.replacements is not None:
243 |             for [smarts_s,shortname] in self.replacements:
244 |                 bindings.append( (shortname, smarts_s) )
245 |         # Perform binding replacements
246 |         smarts = OESmartsLexReplace(smarts, bindings)
247 | 
248 |         # create query
249 |         qmol = OEQMol()
250 |         if not OEParseSmarts(qmol, smarts):
251 |             raise Exception("Error parsing SMARTS %s" % smarts)
252 |         ss = OESubSearch(qmol)
253 |         for mol in self.molecules:
254 |             if ss.SingleMatch(mol):
255 |                 return True
256 |         return False
257 | 
258 |     def _GetAtoms(self, molecule, element = 0):
259 |         """
260 |         Parameters
261 |         ----------
262 |         molecule : OEMol
263 |         element : integer
264 |             if 0 looks at all atoms, otherwise only those with the given atomic number
265 | 
266 |         Returns
267 |         -------
268 |         iterator over the atoms based on the molecule and element number
269 |         """
270 |         if element > 0:
271 |             return molecule.GetAtoms(OEHasAtomicNum(element))
272 |         else:
273 |             return molecule.GetAtoms()
274 | 
275 |     def best_match_reference_types(self, atomtypes, molecules):
276 |         """
277 |         Determine best match for each parameter with reference atom types
278 |         Parameters
279 |         ----------
280 |         atomtypes :
281 |             Current atom types
282 |         molecules : list of OEMol
283 |             Typed molecules, where types are stored in self.atomtypetag string data.
284 |         Returns
285 |         -------
286 |         atom_type_matches : list of tuples (current_atomtype, reference_atomtype, counts)
287 |             Best correspondence between current and reference atomtypes, along with number of atoms equivalently typed in reference molecule set.
288 |         total_atom_type_matches : int
289 |             The total number of correspondingly typed atoms in the reference molecule set.
290 |         * Currently, types for reference typed molecules are accessed via atom.GetType(), while types for current typed molecules are accessed via atom.GetStringData(self.typetag).
291 |           This should be homogenized.
292 |         Contributor:
293 |         * Josh Fass <josh.fass@choderalab.org> contributed this algorithm.
294 |         """
295 |         if self.reference_typed_molecules is None:
296 |             if self.verbose: print('No reference molecules specified, so skipping likelihood calculation.')
297 |             return None
298 | 
299 |         # Create bipartite graph (U,V,E) matching current atom types U with reference atom types V via edges E with weights equal to number of atoms typed in common.
300 |         if self.verbose: print('Creating graph matching current atom types with reference atom types...')
301 |         initial_time = time.time()
302 |         graph = nx.Graph()
303 | 
304 |         # Get current atomtypes and reference atom types
305 |         current_atomtypes = [ typename for (smarts, typename) in atomtypes ]
306 |         reference_atomtypes = [ typename for typename in self.reference_atomtypes ]
307 |         # check that current atom types are not in reference atom types
308 |         if set(current_atomtypes) & set(reference_atomtypes):
309 |             raise Exception("Current and reference atom types must be unique")
310 |         # Add current atom types
311 |         for atomtype in current_atomtypes:
312 |             graph.add_node(atomtype, bipartite=0)
313 |         # Add reference atom types
314 |         for atomtype in reference_atomtypes:
315 |             graph.add_node(atomtype, bipartite=1)
316 |         # Add edges.
317 |         atoms_in_common = dict()
318 |         # Make an entry in the dictionary for each pair of types
319 |         for current_atomtype in current_atomtypes:
320 |             for reference_atomtype in reference_atomtypes:
321 |                 atoms_in_common[(current_atomtype,reference_atomtype)] = 0
322 |         # Loop through all molecules
323 |         for (current_typed_molecule, reference_typed_molecule) in zip(molecules, self.reference_typed_molecules):
324 |             current_atoms = self._GetAtoms(current_typed_molecule, self.element)
325 |             reference_atoms = self._GetAtoms(reference_typed_molecule, self.element)
326 |             # For each atom add a count to the current/referance atomtype pair
327 |             for (current_typed_atom, reference_typed_atom) in zip(current_atoms, reference_atoms):
328 |                 current_atomtype = current_typed_atom.GetStringData(self.typetag)
329 |                 reference_atomtype = reference_typed_atom.GetType()
330 |                 atoms_in_common[(current_atomtype,reference_atomtype)] += 1
331 |         # Make weighted edges connecting the current and reference nodes
332 |         for current_atomtype in current_atomtypes:
333 |             for reference_atomtype in reference_atomtypes:
334 |                 weight = atoms_in_common[(current_atomtype,reference_atomtype)]
335 |                 graph.add_edge(current_atomtype, reference_atomtype, weight=weight)
336 |         elapsed_time = time.time() - initial_time
337 |         if self.verbose: print('Graph creation took %.3f s' % elapsed_time)
338 | 
339 |         # Compute maximum match using networkx algorithm
340 |         if self.verbose: print('Computing maximum weight match...')
341 |         initial_time = time.time()
342 |         mate = nx.algorithms.max_weight_matching(graph, maxcardinality=False)
343 |         elapsed_time = time.time() - initial_time
344 |         if self.verbose: print('Maximum weight match took %.3f s' % elapsed_time)
345 | 
346 |         # Compute match dictionary and total number of matches.
347 |         atom_type_matches = list()
348 |         total_atom_type_matches = 0
349 |         for current_atomtype in current_atomtypes:
350 |             if current_atomtype in mate:
351 |                 reference_atomtype = mate[current_atomtype]
352 |                 counts = graph[current_atomtype][reference_atomtype]['weight']
353 |                 total_atom_type_matches += counts
354 |                 atom_type_matches.append( (current_atomtype, reference_atomtype, counts) )
355 |             else:
356 |                 atom_type_matches.append( (current_atomtype, None, None) )
357 | 
358 |         # Report on matches
359 |         if self.verbose:
360 |             print("PROPOSED:")
361 |             self.show_type_matches(atom_type_matches)
362 | 
363 |         return (atom_type_matches, total_atom_type_matches)
364 | 
365 |     def show_type_matches(self, atom_type_matches):
366 |         """
367 |         Show pairing of current to reference atom types.
368 |         Parameters
369 |         ----------
370 |         atom_type_matches : list of (current_atomtype, reference_atomtype, counts)
371 | 
372 |         Returns
373 |         -------
374 |         fraction_matched_atoms : the fractional count of matched atoms
375 |         """
376 |         print('Atom type matches:')
377 |         total_atom_type_matches = 0
378 |         for (current_atomtype, reference_atomtype, counts) in atom_type_matches:
379 |             if reference_atomtype is not None:
380 |                 print('%-64s matches %8s : %8d atoms matched' % (current_atomtype, reference_atomtype, counts))
381 |                 total_atom_type_matches += counts
382 |             else:
383 |                 print('%-64s         no match' % (current_atomtype))
384 | 
385 |         fraction_matched_atoms = float(total_atom_type_matches) / float(self.total_atoms)
386 |         print('%d / %d total atoms match (%.3f %%)' % (total_atom_type_matches, self.total_atoms, fraction_matched_atoms * 100))
387 | 
388 |         return fraction_matched_atoms
389 | 
390 | 
391 |     def AtomDecorator(self, atom1type, decorator):
392 |         """
393 |         Given an atom and a decorator ammend the SMARTS string with that decorator
394 | 
395 |         Parameters
396 |         -----------
397 |         atom1type : atomtype tuple in form (smarts, typename)
398 |         decorator : decorator being added to current atom
399 | 
400 |         Returns
401 |         -------
402 |         decorated atomtype as a tuple (smarts, typename)
403 |         """
404 |         if self.HasAlpha(atom1type):
405 |             # decorators should go before the $ sign on the atom
406 |             dollar = atom1type[0].find('$')
407 |             proposed_atomtype = atom1type[0][:dollar] + decorator[0] + atom3[0][dollar:]
408 |             proposed_typename = atom1type[1] + ' ' + decorator[1]
409 |         else:
410 |             # No alpha atom so the decorator goes before the ']'
411 |             proposed_atomtype = atom1type[0][:-1] + decorator[0] + ']'
412 |             proposed_typename = atom1type[1] + ' '  + decorator[1]
413 | 
414 |         return (proposed_atomtype, proposed_typename)
415 | 
416 |     def PickAnAtom(self, atomList):
417 |         """
418 |         Parameters
419 |         ----------
420 |         atomList : any list of tuples in the form (smarts, typename)
421 |                    this could include decorator or bond lists
422 | 
423 |         Returns
424 |         -------
425 |         one random (smarts, typename) pair from given list
426 | 
427 |         This allows for continuity in the code, this method could be changed,
428 |         and all random choices would still be made in the same way.
429 |         It also allowed for testing which atomtypes to choose from while sampling.
430 |         """
431 |         return random.choice(atomList)
432 | 
433 |     def HasAlpha(self, atom1type):
434 |         """
435 |         Parameter
436 |         ---------
437 |         atom1type : an atomtype tuple (smarts, typename)
438 | 
439 |         Returns
440 |         -------
441 |         True if atomtype has at least 1 alpha substituent otherwise False
442 |         """
443 |         # Alpha atoms are connected in the form [#1] --> [#1$(*~[#6])]
444 |         # The new characters are '$(*'
445 |         if '$(*' in atom1type[0]:
446 |             return True
447 |         else:
448 |             return False
449 | 
450 |     def AddAlphaSubstituentAtom(self, atom1type, bondset, atom2type):
451 |         """
452 |         Adds an atom alpha to the primary atom. The new alpha substituent
453 |         always adds to the end of the sequence of alpha atom
454 |         so if you have '[#1$(*~[#6])]' the next alpha atom [#8] is added in
455 |         this way '[#1$(*~[#6])$(*~[#8])]'
456 | 
457 |         Parameters
458 |         ----------
459 |         atom1type : current atomtype (smarts, typename)
460 |         bondset : bondtype to connect two atoms (smarts, bondname)
461 |         atom2type : atom to be added (smarts, typename)
462 | 
463 |         Returns
464 |         -------
465 |         Atomtype with new alpha substituent (smarts, typename)
466 |         """
467 |         proposed_atomtype = atom1type[0][:len(atom1type[0])-1] + '$(*' + bondset[0] + atom2type[0] + ')]'
468 |         proposed_typename = atom1type[1] + ' ' + bondset[1] + ' ' + atom2type[1] + ' '
469 |         return (proposed_atomtype, proposed_typename)
470 | 
471 |     def AddBetaSubstituentAtom(self, atom1type, bondset, atom2type):
472 |         """
473 |         Adds atom2type as a beta substituent bonding it to the
474 |         first alpha atom in atom1type. If atom1type does not have
475 |         an alpha atom this metho will call addAlphaSubstituentAtom instead.
476 | 
477 |         Parameters
478 |         ----------
479 |         atom1type : parent atomtype (smarts, typename)
480 |         bondset : bond used to connect atoms (smarts, bondname)
481 |         atom2type : atomtype being bonded in beta position (smarts, typename)
482 | 
483 |         Returns
484 |         -------
485 |         child atomtype as tuple (smarts, typename)
486 | 
487 |         """
488 | 
489 |         # counting '[' tells us how many atoms are in the mix
490 |         count = atom1type[0].count('[')
491 |         proposed_atomtype = ""
492 |         number_brackets = 0
493 |         # find closed alpha atom
494 |         closeAlpha = atom1type[0].find(']')
495 |         # This has two atoms (already has an alpha atom)
496 |         if count == 2:
497 |             proposed_atomtype = atom1type[0][:closeAlpha+1]
498 |             proposed_atomtype += bondset[0] + atom2type[0] + ')]'
499 |             proposed_typename = atom1type[1] + bondset[1] + ' ' + atom2type[1]
500 |             if self.verbose: print("ADD FIRST BETA SUB: proposed --- %s %s" % ( str(proposed_atomtype), str(proposed_typename)))
501 |         elif count > 2:
502 |             # Has an alpha atom with at least 1 beta atom
503 |             proposed_atomtype = atom1type[0][:closeAlpha+1]
504 |             proposed_atomtype += '(' + bondset[0] + atom2type[0] + ')'
505 |             proposed_atomtype += atom1type[0][closeAlpha+1:]
506 |             proposed_typename = atom1type[1] + ' (' + bondset[1] + ' ' + atom2type[1] + ')'
507 |             if self.verbose: print("ADD MORE BETA SUB: proposed --- %s %s" % ( str(proposed_atomtype), str(proposed_typename)))
508 |         else:
509 |             # Has only 1 atom which means there isn't an alpha atom yet, add an alpha atom instead
510 |             proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondset, atom2type)
511 |         return (proposed_atomtype, proposed_typename)
512 | 
513 | 
514 |     def sample_atomtypes(self):
515 |         """
516 |         Perform one step of atom type sampling.
517 |         This is done by either removing a current atomtype
518 |         or creating a child atom type. Then the proposed
519 |         atomtype list is scored and the move is accepted or rejected
520 |         """
521 |         # Copy current atomtypes for proposal.
522 |         proposed_atomtypes = copy.deepcopy(self.atomtypes)
523 |         proposed_molecules = copy.deepcopy(self.molecules)
524 |         proposed_parents = copy.deepcopy(self.parents)
525 | 
526 |         if random.random() < 0.5:
527 |             # Pick a random index and remove atomtype at that index
528 |             (atomtype, typename) = self.PickAnAtom(proposed_atomtypes)
529 |             if self.verbose: print("Attempting to destroy atom type %s : %s..." % (atomtype, typename))
530 | 
531 |             # Reject deletion of (populated) base types as we want to retain
532 |             # generics even if empty
533 |             if atomtype in self.basetypes_smarts:
534 |                 if self.verbose: print("Destruction rejected for atom type %s because this is a generic type which was initially populated." % atomtype )
535 |                 return False
536 | 
537 |             # remove chosen atomtype
538 |             proposed_atomtypes.remove( (atomtype, typename) )
539 |             # update proposed parent dictionary
540 |             for parent, children in proposed_parents.items():
541 |                 if atomtype in [at for (at, tn) in children]:
542 |                     children += proposed_parents[atomtype]
543 |                     children.remove( (atomtype, typename) )
544 | 
545 |             del proposed_parents[atomtype]
546 | 
547 |             # Try to type all molecules.
548 |             try:
549 |                 self.type_molecules(proposed_atomtypes, proposed_molecules, self.element)
550 |             except AtomTyper.TypingException as e:
551 |                 # Reject since typing failed.
552 |                 if self.verbose: print("Typing failed; rejecting.")
553 |                 return False
554 |         else:
555 |             if self.decorator_behavior == 'simple-decorators':
556 |                 # Pick an atomtype to subtype.
557 |                 atom1type = self.PickAnAtom(self.atomtypes)
558 |                 # Pick a decorator to add.
559 |                 (decorator, decorator_typename) = self.PickAnAtom(self.decorators)
560 | 
561 |                 # Create new atomtype to insert by appending decorator with 'and' operator.
562 |                 result = re.match('\[(.+)\]', atom1type[0])
563 |                 proposed_atomtype = '[' + result.groups(1)[0] + '&' + decorator + ']'
564 |                 proposed_typename = atom1type[1] + ' ' + decorator_typename
565 |                 if self.verbose: print("Attempting to create new subtype: '%s' (%s) + '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], decorator, decorator_typename, proposed_atomtype, proposed_typename))
566 | 
567 |             else: # combinatorial-decorators
568 |                 # Pick an atomtype
569 |                 atom1type = self.PickAnAtom(self.atomtypes)
570 |                 # Check if we need to add an alpha or beta substituent
571 |                 if self.HasAlpha(atom1type):
572 |                     # Has alpha
573 |                     bondtype = self.PickAnAtom(self.bondset)
574 |                     atom2type = self.PickAnAtom(self.basetypes)
575 |                     if random.random() < 0.5 or atom1type[0][2] == '1': # Add Beta Substituent Atom randomly or when it is Hydrogen
576 |                         proposed_atomtype, proposed_typename = self.AddBetaSubstituentAtom(atom1type, bondtype, atom2type)
577 |                     else: # Add another Alpha Substituent if it is not a Hydrogen
578 |                         proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondtype, atom2type)
579 |                     if self.verbose: print("Attempting to create new subtype: -> '%s' (%s)" % (proposed_atomtype, proposed_typename))
580 |                 else:
581 |                     # Has no alpha
582 |                     if random.random() < 0.5: # add decorator to primary atom
583 |                         decorator = self.PickAnAtom(self.decorators)
584 |                         proposed_atomtype, proposed_typename = self.AtomDecorator(atom1type, decorator)
585 |                         if self.verbose: print("Attempting to create new subtype: '%s' (%s) + '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], decorator[0], decorator[1], proposed_atomtype, proposed_typename))
586 |                     else: # add Alpha substituent
587 |                         bondtype = self.PickAnAtom(self.bondset)
588 |                         atom2type = self.PickAnAtom(self.basetypes)
589 |                         proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondtype, atom2type)
590 |                         if self.verbose: print("Attempting to create new subtype: '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], proposed_atomtype, proposed_typename))
591 | 
592 |             # Check that we haven't already determined this atom type isn't matched in the dataset.
593 |             if proposed_atomtype in self.atomtypes_with_no_matches:
594 |                 if self.verbose: print("Atom type '%s' (%s) unused in dataset; rejecting." % (proposed_atomtype, proposed_typename))
595 |                 return False
596 | 
597 |             # Check that it is a new SMARTS pattern
598 |             if proposed_atomtype in [smarts for (smarts, typename) in self.atomtypes]:
599 |                 if self.verbose: print("Atom type '%s' (%s) is in the existing atomtype list; rejecting." % (proposed_atomtype, proposed_typename))
600 |                 return False
601 | 
602 |             # Check the proposed type name is unique
603 |             current_typenames = [typename for (smarts, typename) in self.atomtypes]
604 |             while proposed_typename in current_typenames:
605 |                 proposed_typename += '%i' % random.randint(0,10)
606 | 
607 |             # for either decorator - update proposed parent dictionary
608 |             proposed_parents[atom1type[0]].append( (proposed_atomtype, proposed_typename) )
609 |             proposed_parents[proposed_atomtype] = []
610 | 
611 |             # Insert atomtype immediately after.
612 |             proposed_atomtypes.append( (proposed_atomtype, proposed_typename) )
613 |             # Try to type all molecules.
614 |             try:
615 |                 # Type molecules.
616 |                 self.type_molecules(proposed_atomtypes, proposed_molecules, self.element)
617 |                 # Compute updated statistics.
618 |                 [proposed_atom_typecounts, proposed_molecule_typecounts] = self.compute_type_statistics(proposed_atomtypes, proposed_molecules, self.element)
619 |             except AtomTyper.TypingException as e:
620 |                 print("Exception: %s" % str(e))
621 |                 # Reject since typing failed.
622 |                 if self.verbose: print("Typing failed for one or more molecules using proposed atomtypes; rejecting.")
623 |                 return False
624 | 
625 |             # Reject if new type is unused.
626 |             if (proposed_atom_typecounts[proposed_typename] == 0):
627 |                 # Reject because new type is unused in dataset.
628 |                 if self.verbose: print("Atom type '%s' (%s) unused in dataset; rejecting." % (proposed_atomtype, proposed_typename))
629 |                 # Store this atomtype to speed up future rejections
630 |                 self.atomtypes_with_no_matches.add(proposed_atomtype)
631 |                 return False
632 | 
633 |             # Reject if any type is emptied (UNLESS it is a basetype)
634 |             for (smarts, typename) in proposed_atomtypes:
635 |                 if not smarts in self.basetypes_smarts: # not a base type
636 |                     if proposed_atom_typecounts[typename] == 0: # no matches
637 |                         if self.verbose: print("Atomtype '%s' (%s) is now unused in dataset; rejecting." % (smarts, typename))
638 |                         return False
639 | 
640 |         if self.verbose: print('Proposal is valid...')
641 | 
642 |         # Accept automatically if no reference molecules
643 |         accept = False
644 |         if self.reference_typed_molecules is None:
645 |             accept = True
646 |         else:
647 |             # Find number of matches for current set
648 |             (proposed_atom_type_matches, proposed_total_atom_type_matches) = self.best_match_reference_types(proposed_atomtypes, proposed_molecules)
649 |             score_dif = (proposed_total_atom_type_matches - self.total_atom_type_matches)
650 |             # if temperature is zero only accept increased scores
651 |             if self.temperature == 0.0:
652 |                 print('Proposal score: %d >> %d' % (self.total_atom_type_matches, proposed_total_atom_type_matches))
653 |                 accept = score_dif > 0.0
654 | 
655 |             # If finite temperature compute effective temperature and log_P_accept
656 |             else:
657 |                 # Compute effective temperature
658 |                 effective_temperature = (self.total_atoms * self.temperature)
659 |                 # Compute likelihood for accept/reject
660 |                 log_P_accept = score_dif / effective_temperature
661 |                 print('Proposal score: %d >> %d : log_P_accept = %.5e' % (self.total_atom_type_matches, proposed_total_atom_type_matches, log_P_accept))
662 |                 accept = (log_P_accept > 0.0) or (numpy.random.uniform() < numpy.exp(log_P_accept))
663 | 
664 |         # Accept or reject
665 |         if accept:
666 |             self.atomtypes = proposed_atomtypes
667 |             self.molecules = proposed_molecules
668 |             self.parents = proposed_parents
669 |             self.child_to_parent = self._switch_parent_dict()
670 |             self.atom_type_matches = proposed_atom_type_matches
671 |             self.total_atom_type_matches = proposed_total_atom_type_matches
672 |             return True
673 |         else:
674 |             return False
675 | 
676 |     def type_molecules(self, typelist, molecules, element = 0):
677 |         """
678 |         Type all molecules with the specified typelist.
679 |         Parameters
680 |         ----------
681 |         typelist : list of atomtypes or tuples in the form (smarts, typename)
682 |         molecules : list of OEMols
683 |         element : integer 0 for all atoms or atomic number being sampled
684 | 
685 |         For every atom in each molecule the relevant typename is assigned
686 |         so it can be accessed at atom.GetStringData(self.typetag)
687 |         """
688 |         # Create an atom typer.
689 |         atomtyper = AtomTyper(typelist, self.typetag, replacements=self.replacements)
690 | 
691 |         # Type molecules.
692 |         for molecule in molecules:
693 |             atomtyper.assignTypes(molecule, element)
694 | 
695 |         return
696 | 
697 |     def compute_type_statistics(self, typelist, molecules, element = 0):
698 |         """
699 |         Compute statistics for numnber of molecules assigned each type.
700 |         Parameters
701 |         ----------
702 |         typelist : atomtype list of form (smarts, typename)
703 |         molecules : list of OEmols
704 |         element : 0 for all atoms or atomic number being sampled
705 |         Returns
706 |         -------
707 |         atom_typecounts (dict) : counts of number of atoms containing each atomtype
708 |         molecule_typecounds (dict) : counts of number of molecules containing each atom type
709 |         """
710 |         # Zero type counts by atom and molecule.
711 |         atom_typecounts = dict()
712 |         molecule_typecounts = dict()
713 |         for [smarts, typename] in typelist:
714 |             atom_typecounts[typename] = 0
715 |             molecule_typecounts[typename] = 0
716 | 
717 |         # Count number of atoms with each type.
718 |         for molecule in molecules:
719 |             types_in_this_molecule = set()
720 |             for atom in self._GetAtoms(molecule, element):
721 |                 atomtype = atom.GetStringData(self.typetag)
722 |                 types_in_this_molecule.add(atomtype)
723 |                 atom_typecounts[atomtype] += 1
724 |             for atomtype in types_in_this_molecule:
725 |                 molecule_typecounts[atomtype] += 1
726 | 
727 |         return (atom_typecounts, molecule_typecounts)
728 | 
729 |     def show_type_statistics(self, typelist, atom_typecounts, molecule_typecounts, atomtype_matches=None):
730 |         """
731 |         Print atom type statistics to the commandline
732 |         Parameters
733 |         ----------
734 |         typelist : atomtype list of form (smarts, typename)
735 |         atom_typecounts : dictionary result from compute_type_statistics
736 |         molecule_typecounts : dictionary result from compute_type_statistics
737 |         atomtype_matches : dictionary result from best_match_references_types
738 |                            if there are reference molecules
739 |         """
740 |         index = 1
741 |         natoms = 0
742 | 
743 |         if atomtype_matches is not None:
744 |             reference_type_info = dict()
745 |             for (typename, reference_atomtype, count) in atomtype_matches:
746 |                 reference_type_info[typename] = (reference_atomtype, count)
747 | 
748 |         # Print header
749 |         if atomtype_matches is not None:
750 |             print("%5s   %10s %10s   %64s %32s %8s %46s" % ('INDEX', 'ATOMS', 'MOLECULES', 'TYPE NAME', 'SMARTS', 'REF TYPE', 'FRACTION OF REF TYPED MOLECULES MATCHED'))
751 |         else:
752 |             print("%5s   %10s %10s   %64s %32s" % ('INDEX', 'ATOMS', 'MOLECULES', 'TYPE NAME', 'SMARTS'))
753 | 
754 |         # Print counts
755 |         for [smarts, typename] in typelist:
756 |             if atomtype_matches is not None:
757 |                 (reference_atomtype, reference_count) = reference_type_info[typename]
758 |                 if reference_atomtype is not None:
759 |                     reference_total = self.reference_atomtypes_atomcount[reference_atomtype]
760 |                     reference_fraction = float(reference_count) / float(reference_total)
761 |                     print("%5d : %10d %10d | %64s %32s %8s %16d / %16d (%7.3f%%)" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts, reference_atomtype, reference_count, reference_total, reference_fraction*100))
762 |                 else:
763 |                     print("%5d : %10d %10d | %64s %32s" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts))
764 |             else:
765 |                 print("%5d : %10d %10d | %64s %32s" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts))
766 | 
767 |             natoms += atom_typecounts[typename]
768 |             index += 1
769 | 
770 |         nmolecules = len(self.molecules)
771 | 
772 |         if atomtype_matches is not None:
773 |             print("%5s : %10d %10d |  %64s %32s %8d / %8d match (%.3f %%)" % ('TOTAL', natoms, nmolecules, '', '', self.total_atom_type_matches, self.total_atoms, (float(self.total_atom_type_matches) / float(self.total_atoms)) * 100))
774 |         else:
775 |             print("%5s : %10d %10d" % ('TOTAL', natoms, nmolecules))
776 | 
777 |         return
778 | 
779 |     def save_type_statistics(self, typelist, atom_typecounts, molecule_typecounts, atomtype_matches=None):
780 |         """
781 |         Saves the match information in format for a trajectory file
782 |         Parameters
783 |         ----------
784 |         typelist : atomtype list of form (smarts, typename)
785 |         atom_typecounts : dictionary result from compute_type_statistics
786 |         molecule_typecounts : dictionary result from compute_type_statistics
787 |         atomtype_matches : dictionary result from best_match_references_types
788 |                            if there are reference molecules
789 |         Returns
790 |         -------
791 |         output : string line for trajectory file
792 |         """
793 |         if atomtype_matches is not None:
794 |             reference_type_info = dict()
795 |             for (typename, reference_atomtype, count) in atomtype_matches:
796 |                 reference_type_info[typename] = (reference_atomtype, count)
797 | 
798 |         index = 1
799 |         output = []
800 |         ntypes = 0
801 |         # Print counts
802 |         # INDEX, SMARTS, PARENT INDEX, REF TYPE, MATCHES, MOLECULES, FRACTION, OUT of, PERCENTAGE
803 |         for [smarts, typename] in typelist:
804 |             parent = str(self.child_to_parent[smarts])
805 |             if atomtype_matches is not None:
806 |                 (reference_atomtype, reference_count) = reference_type_info[typename]
807 |                 if reference_atomtype is not None:
808 |                     reference_total = self.reference_atomtypes_atomcount[reference_atomtype]
809 |                     reference_fraction = float(reference_count) / float(reference_total)
810 |                     # Save output
811 |                     output.append("%i,'%s','%s','%s','%s',%i,%i,%i,%i" % (index, smarts, typename, parent, reference_atomtype, atom_typecounts[typename], molecule_typecounts[typename], reference_count, reference_total))
812 |                 else:
813 |                     output.append("%i,'%s','%s','%s','%s',%i,%i,%i,%i" % (index, smarts, typename, parent, 'NONE', atom_typecounts[typename], molecule_typecounts[typename], 0, 0))
814 | 
815 |             else:
816 |                 output.append("%i,'%s',%i,%i,'%s',%i,%i,%i,%i" % (index, smarts, typename, parent, 'NONE', atom_typecounts[typename], molecule_typecounts[typename], 0, 0))
817 | 
818 |             ntypes += atom_typecounts[typename]
819 |             index += 1
820 | 
821 |         nmolecules = len(self.molecules)
822 |         if atomtype_matches is None:
823 |             output.append("-1,'total','all','None','all',%i,%i,0,0" % (ntypes, nmolecules))
824 |         else:
825 |             output.append("-1,'total','all','None','all',%i,%i,%i,%i" % (ntypes,nmolecules,self.total_atom_type_matches,self.total_atoms))
826 |         return output
827 | 
828 |     def _switch_parent_dict(self):
829 |         """
830 |         Takes the parent dictionary and returns a dictionary in the form
831 |         {child: parent}
832 |         """
833 |         child_to_parent = dict()
834 |         for smarts in self.parents.keys():
835 |             child_to_parent[smarts] = None
836 | 
837 |         for smarts, children in self.parents.items():
838 |             for [child_smarts, child_typename] in children:
839 |                 child_to_parent[child_smarts] = smarts
840 | 
841 |         return child_to_parent
842 | 
843 |     def print_parent_tree(self, roots, start=''):
844 |         """
845 |         Recursively prints the parent tree.
846 |         Parameters
847 |         ----------
848 |         roots = list of smarts strings to print
849 |         """
850 |         for r in roots:
851 |             print("%s%s" % (start, r))
852 |             if r in self.parents:
853 |                 new_roots = [smart for [smart, name] in self.parents[r]]
854 |                 self.print_parent_tree(new_roots, start+'\t')
855 | 
856 | 
857 |     def run(self, niterations, trajFile=None):
858 |         """
859 |         Run atomtype sampler for the specified number of iterations.
860 |         Parameters
861 |         ----------
862 |         niterations : int
863 |             The specified number of iterations
864 |         trajFile : str, optional, default=None
865 |             Output trajectory filename
866 |         Returns
867 |         ----------
868 |         fraction_matched_atoms : float
869 |             fraction of total atoms matched successfully at end of run
870 |         """
871 |         if trajFile is not None:
872 |             # make "trajectory" file
873 |             if os.path.isfile(trajFile):
874 |                 print("trajectory file already exists, it was overwritten")
875 |             self.traj = open(trajFile, 'w')
876 |             self.traj.write('Iteration,Index,Smarts,Typename,ParentSMARTS,RefType,Matches,Molecules,FractionMatched,Denominator\n')
877 | 
878 |         for iteration in range(niterations):
879 |             if self.verbose:
880 |                 print("Iteration %d / %d" % (iteration, niterations))
881 | 
882 |             accepted = self.sample_atomtypes()
883 |             [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element)
884 | 
885 |             if trajFile is not None:
886 |                 # Get data as list of csv strings
887 |                 lines = self.save_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
888 |                 # Add lines to trajectory with iteration number:
889 |                 for l in lines:
890 |                     self.traj.write('%i,%s \n' % (iteration, l))
891 | 
892 |             if self.verbose:
893 |                 if accepted:
894 |                     print('Accepted.')
895 |                 else:
896 |                     print('Rejected.')
897 | 
898 |                 # Compute atomtype statistics on molecules.
899 |                 self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
900 |                 print('')
901 | 
902 |                 # Print parent tree as it is now.
903 |                 roots = [r for r in self.child_to_parent.keys() if self.child_to_parent[r] is None]
904 | 
905 |                 print("Atom type hierarchy:")
906 |                 self.print_parent_tree(roots, '\t')
907 | 
908 |         if trajFile is not None:
909 |             self.traj.close()
910 |             # Get/print some stats on trajectory
911 |             # Load timeseries
912 |             timeseries = load_trajectory( trajFile )
913 |             time_fractions = scores_vs_time( timeseries )
914 |             print("Maximum score achieved: %.2f" % max(time_fractions['all']))
915 | 
916 | 
917 |         #Compute final type stats
918 |         [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element)
919 |         fraction_matched_atoms = self.show_type_matches(self.atom_type_matches)
920 | 
921 |         # If verbose print parent tree:
922 |         if self.verbose:
923 |             roots = self.parents.keys()
924 |             # Remove keys from roots if they are children
925 |             for parent, children in self.parents.items():
926 |                 child_smarts = [smarts for [smarts, name] in children]
927 |                 for child in child_smarts:
928 |                     if child in roots:
929 |                         roots.remove(child)
930 | 
931 |             print("Atom type hierarchy:")
932 |             self.print_parent_tree(roots, '\t')
933 |         return fraction_matched_atoms
934 | 


--------------------------------------------------------------------------------
/smarty/score_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import pandas as pd
  3 | import matplotlib
  4 | matplotlib.use('pdf')
  5 | import pylab as pl
  6 | 
  7 | def load_trajectory( trajFile):
  8 |     """Load data from a specified smarty trajectory .csv file and return a summary.
  9 | 
 10 |     Note that any SMARTS patterns which do not match anything are ignored in the resulting summary.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 | 
 15 |         trajFile (str) : filename to read from
 16 | 
 17 |     Returns
 18 |     -------
 19 |         timeseries (dict) : status by iteration number
 20 |             Dictionary, keyed by iteration, storing the state at each iteration
 21 |             Subsequent keys are by reference types, (i.e. timeseries[1]['HO'])
 22 |             and an entry for total if included in the trajectory file at timeseries[1]['total']
 23 |             gives data at step 1 on what (if anything) matches 'HO'. Subsequent
 24 |             keys are 'smarts', 'matches', 'molecules', 'fractionmatched', 'index' (serial #
 25 |             of match), `ParNum` (parameter number/label), `ParentParNum` (parameter number/label of parent)
 26 |             `denominator` (number of possible matches of this type), `fraction`
 27 |             (fraction of this type matched).
 28 | 
 29 |     """
 30 |     data = pd.read_csv(trajFile, quotechar="'")
 31 |     data_dict = data.to_dict()
 32 |     # If the number if headers is not as expected, this is a different version and we can't parse
 33 |     if len(data.columns) != 10:
 34 |         raise Exception("Number of headers in trajectory not as expected; can't parse.")
 35 | 
 36 |     # Initialize storage
 37 |     timeseries = {}
 38 | 
 39 |     # Number of lines
 40 |     max_lines = data.index[-1]
 41 | 
 42 |     # How many iterations are we looking at?
 43 |     max_its = data.Iteration[max_lines]
 44 | 
 45 |     keys = list(data.columns)
 46 |     keys.remove('RefType')
 47 |     keys.remove('Iteration')
 48 | 
 49 |     numerator = data.columns[-2].lower()
 50 |     denominator = data.columns[-1].lower()
 51 |     # Process file
 52 |     for linenr in data.index:
 53 |         iteration = data.Iteration[linenr]
 54 | 
 55 |         # Pull elements from line and store
 56 |         if not iteration in timeseries: timeseries[iteration] = {}
 57 |         reftype = data.RefType[linenr]
 58 | 
 59 |         if not reftype=="'NONE'":
 60 |             timeseries[iteration][reftype]={}
 61 |             for k in keys:
 62 |                 if k in ['ParNum', 'ParentParNum']:
 63 |                     timeseries[iteration][reftype][k] = data_dict[k][linenr]
 64 |                 else:
 65 |                     timeseries[iteration][reftype][k.lower()] = data_dict[k][linenr]
 66 |             den = float(timeseries[iteration][reftype][denominator])
 67 |             if den == 0.0:
 68 |                 print("At iteration %s, found %s matched atoms and a denominator of %s for reftype %s..." % (iteration, timeseries[iteration][reftype][numerator], timeseries[iteration][reftype][denominator], reftype))
 69 |                 timeseries[iteration][reftype]['fraction'] = numpy.nan
 70 |             else:
 71 |                 timeseries[iteration][reftype]['fraction'] = timeseries[iteration][reftype][numerator]/den
 72 | 
 73 |     return timeseries
 74 | 
 75 | def scores_vs_time(timeseries, numerator = 'fractionmatched'):
 76 |     """Process a timeseries as read by load_trajectory and return the fraction of each reference atom type found at each time.
 77 | 
 78 | 
 79 |     Parameters
 80 |     ----------
 81 |     trajectory : dict
 82 |         Trajectory information as output by load_trajectory
 83 | 
 84 |     Returns
 85 |     -------
 86 |     time_fractions : dict
 87 |         Dictionary of NumPy arrays, keyed by reference type.
 88 |         The full score across all types is under `all`.
 89 |             'all' is from the total list if available or calculated from other references
 90 |     """
 91 | 
 92 |     # How many iterations are present?
 93 |     max_its = numpy.max([k for k in timeseries])
 94 | 
 95 |     # Retrieve keys of all reference types
 96 |     reftypes = set()
 97 |     for it in timeseries:
 98 |         for reftype in timeseries[it]:
 99 |             if reftype not in reftypes:
100 |                  reftypes.add(reftype)
101 | 
102 |     # Allocate storage
103 |     time_fractions = {}
104 |     time_fractions['all'] = numpy.zeros( max_its, float)
105 |     for reftype in reftypes:
106 |         time_fractions[reftype] = numpy.zeros( max_its, float)
107 | 
108 |     # Update with data
109 |     for it in range(max_its):
110 |         # Update reference types occuring at this iteration
111 |         denom = 0
112 |         numer = 0
113 |         for reftype in reftypes:
114 |             if reftype in timeseries[it]:
115 |                 try:
116 |                     time_fractions[reftype][it] = timeseries[it][reftype]['fraction']
117 |                 except KeyError:
118 |                     print("Can't find key set %s, %s, %s for timeseries." % (it, reftype, 'fraction'))
119 |                     print("Available keys:", timeseries[it][reftype])
120 |                 denom += timeseries[it][reftype]['denominator']
121 |                 numer += timeseries[it][reftype][numerator]
122 | 
123 |             # Any reference type which does not appear at this time point has zero matches so we just leave the value at zero
124 | 
125 |         # Handle 'all' case last
126 |         if time_fractions['all'][it] == 0:
127 |             time_fractions['all'][it] = numer/float(denom)
128 | 
129 |     return time_fractions
130 | 
131 | def create_plot_file(trajFile, plot_filename, plot_others=False, verbose = False):
132 |     """
133 |     Creates plot to demonstrate performance of smarty or smirky
134 | 
135 |     trajFile - csv file generated by smarty, smarty_elemental, or smirky
136 |     plot_filename - pdf to save plot file to
137 |     plot_others - if True plots data for all reftypes separately, optional
138 |     """
139 | 
140 |     data = pd.read_csv(trajFile, quotechar="'")
141 |     numerator = data.columns[-2].lower()
142 | 
143 |     timeseries = load_trajectory(trajFile)
144 |     time_fractions = scores_vs_time(timeseries, numerator)
145 | 
146 |     max_score = max(time_fractions['all']) *100.0
147 |     if verbose: print("Maximum score was %.1f %%" % max_score)
148 |     # plot overall score
149 |     pl.plot( time_fractions['all'], 'k-', linewidth = 2.0)
150 | 
151 |     if plot_others:
152 |         reftypes = [k for k in time_fractions]
153 |         reftypes.remove('all')
154 | 
155 |         # Plot scors for individual types
156 |         for reftype in reftypes:
157 |             pl.plot(time_fractions[reftype])
158 | 
159 |         pl.legend(['all']+reftypes, loc='lower right')
160 | 
161 |     pl.xlabel('Iterations')
162 |     pl.ylabel('Fraction of reference type found')
163 |     pl.ylim(-0.1, 1.1)
164 | 
165 |     pl.savefig(plot_filename)
166 | 
167 | 


--------------------------------------------------------------------------------
/smarty/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/smarty/tests/__init__.py


--------------------------------------------------------------------------------
/smarty/tests/test_atomtyper.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | from smarty import AtomTyper
 3 | import smarty
 4 | from smarty.utils import get_data_filename
 5 | from openforcefield.utils import read_molecules
 6 | from unittest import TestCase
 7 | 
 8 | class TestAtomTyper(TestCase):
 9 |     def test_read_typelist(self):
10 |         atomtypes = AtomTyper.read_typelist(get_data_filename('atomtypes/basetypes.smarts'))
11 |         decorators = AtomTyper.read_typelist(get_data_filename('atomtypes/decorators.smarts'))
12 |         replacements = AtomTyper.read_typelist(get_data_filename('atomtypes/replacements.smarts'))
13 | 
14 |     def test_atomtyper(self):
15 |         typetag = 'atomtype'
16 |         atomtypes = AtomTyper.read_typelist(get_data_filename('atomtypes/basetypes.smarts'))
17 |         replacements = AtomTyper.read_typelist(get_data_filename('atomtypes/replacements.smarts'))
18 |         molecules = read_molecules('zinc-subset-tripos.mol2.gz', verbose=False)
19 | 
20 |         atomtyper = AtomTyper(atomtypes, typetag, replacements=replacements)
21 |         for molecule in molecules:
22 |             atomtyper.assignTypes(molecule)
23 | 


--------------------------------------------------------------------------------
/smarty/tests/test_sampler.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | import smarty
  3 | from smarty import AtomTyper, AtomTypeSampler, score_utils
  4 | from smarty.utils import get_data_filename
  5 | from openforcefield.utils import read_molecules
  6 | from openforcefield import utils
  7 | import unittest
  8 | from unittest import TestCase
  9 | 
 10 | class TestAtomTypeSampler(TestCase):
 11 |     def __init__(self, *args, **kwargs):
 12 |         """
 13 |         Initialize TestCase including files used in all smarty tests
 14 |         """
 15 |         unittest.TestCase.__init__(self, *args, **kwargs)
 16 | 
 17 |         self.basetypes = get_data_filename('atomtypes/basetypes.smarts')
 18 |         self.alkethoh_answers = get_data_filename('atomtypes/initial_AlkEthOH.smarts')
 19 |         self.simple_decs = get_data_filename('atomtypes/decorators.smarts')
 20 |         self.combine_decs = get_data_filename('atomtypes/new-decorators.smarts')
 21 |         self.replacements = get_data_filename('atomtypes/replacements.smarts')
 22 | 
 23 |         # import molecules
 24 |         self.mols_zinc = read_molecules('zinc-subset-tripos.mol2.gz', verbose=False)
 25 |         self.mols_zinc_ref = read_molecules('zinc-subset-parm@frosst.mol2.gz', verbose=False)
 26 | 
 27 |         self.mols_alkethoh = read_molecules('AlkEthOH_test_filt1_tripos.mol2', verbose=False)
 28 |         self.mols_alkethoh_ref = read_molecules('AlkEthOH_test_filt1_ff.mol2', verbose=False)
 29 | 
 30 | 
 31 |     def test_atomtyper(self):
 32 |         """
 33 |         Test atomtype sampler with simple-decorators
 34 |         """
 35 |         atomtype_sampler = smarty.AtomTypeSampler(self.mols_zinc,
 36 |                 self.basetypes, self.basetypes, self.simple_decs,
 37 |                 replacements_filename = self.replacements,
 38 |                 reference_typed_molecules =self.mols_zinc_ref,
 39 |                 temperature = 0.1, verbose = False,
 40 |                 decorator_behavior = 'simple-decorators', element =0)
 41 |         atomtype_sampler.run(2)
 42 | 
 43 |     def test_atomtyper_combinatorial(self):
 44 |         """
 45 |         Test atomtype sampler with combinatorial-decorators and optional output files
 46 |         """
 47 |         atomtype_sampler = smarty.AtomTypeSampler(self.mols_zinc,
 48 |                 self.basetypes, self.basetypes, self.combine_decs,
 49 |                 replacements_filename = self.replacements,
 50 |                 reference_typed_molecules =self.mols_zinc_ref,
 51 |                 temperature = 0.1, verbose = False)
 52 | 
 53 |         # run sampler with optional outputs
 54 |         traj = 'test_smarty.csv'
 55 |         plot = 'test_smarty.pdf'
 56 |         atomtype_sampler.run(5, traj)
 57 |         # test trajectory analysis functions on smarty output
 58 |         timeseries = score_utils.load_trajectory(traj)
 59 |         scores_vs_time = score_utils.scores_vs_time(timeseries)
 60 |         score_utils.create_plot_file(traj, plot, True, False)
 61 | 
 62 |         # check if score is 100% at first iteration
 63 |         if scores_vs_time['all'][0] == 1.0:
 64 |             raise Exception("Scoring problem, 100% at first iteration for total")
 65 | 
 66 |     def test_atomtyper_elemental(self):
 67 |         """
 68 |         Test elemental atomtype sampler for hydrogen
 69 |         """
 70 |         atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh,
 71 |                 self.basetypes, self.basetypes, self.combine_decs,
 72 |                 replacements_filename = self.replacements,
 73 |                 reference_typed_molecules = self.mols_alkethoh_ref,
 74 |                 temperature = 0.1, verbose = False,
 75 |                 decorator_behavior = 'combinatorial-decorators', element=1)
 76 |         # run sampler with optional outputs
 77 |         traj = 'test_smarty.csv'
 78 |         plot = 'test_smarty.pdf'
 79 |         atomtype_sampler.run(5, traj)
 80 |         # test trajectory analysis functions on smarty output
 81 |         timeseries = score_utils.load_trajectory(traj)
 82 |         scores_vs_time = score_utils.scores_vs_time(timeseries)
 83 |         score_utils.create_plot_file(traj, plot, True, False)
 84 | 
 85 |         # check if score is 100% at first iteration
 86 |         if scores_vs_time['all'][0] == 1.0:
 87 |             raise Exception("Scoring problem, 100% at first iteration for total")
 88 | 
 89 | 
 90 |     def test_atomtyper_AlkEthOH(self):
 91 |         """
 92 |         Test atomtype sampler with correct "answers"
 93 |         """
 94 |         atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh,
 95 |                 self.basetypes, self.alkethoh_answers, self.combine_decs,
 96 |                 replacements_filename = self.replacements,
 97 |                 reference_typed_molecules = self.mols_alkethoh_ref,
 98 |                 temperature = 0, verbose = False)
 99 |         # Start sampling atom types.
100 |         fracfound = atomtype_sampler.run(2)
101 |         # Ensure fraction found is 1.0
102 |         if fracfound < 1.0:
103 |             raise Exception("Not finding 100% of AlkEthOH when starting from"
104 |                             " correct SMARTS.")
105 | 
106 |     def test_atomtyper_elemental_AlkEthOH(self):
107 |         """
108 |         Test elemental sampler with correct "answers"
109 |         """
110 |         atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh,
111 |                 self.basetypes, self.alkethoh_answers, self.combine_decs,
112 |                 replacements_filename = self.replacements,
113 |                 reference_typed_molecules = self.mols_alkethoh_ref,
114 |                 temperature = 0, verbose = False,
115 |                 decorator_behavior = 'combinatorial-decorators',element = 1)
116 |         # Start sampling atom types.
117 |         fracfound = atomtype_sampler.run(2)
118 | 
119 |         # Ensure fraction found is 1.0
120 |         if fracfound < 1.0:
121 |             raise Exception("Not finding 100% of Hydrogens of AlkEthOH when starting from"
122 |                             " correct SMARTS.")
123 | 
124 | 


--------------------------------------------------------------------------------
/smarty/tests/test_smirky_sampler.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import smarty
  3 | from openforcefield.typing.chemistry.environment import *
  4 | from openforcefield.utils.utils import read_molecules
  5 | from smarty.sampler_smirky import *
  6 | from smarty import utils
  7 | from smarty import score_utils
  8 | from operator import itemgetter, attrgetter
  9 | import openeye.oechem
 10 | from openeye.oechem import *
 11 | import copy
 12 | import sys # used to exit while testing
 13 | 
 14 | class TestSmirkySampler(unittest.TestCase):
 15 |     def __init__(self, *args, **kwargs):
 16 |         """
 17 |         Initialize TestCase and then read in odds from files in smarty/data
 18 |         """
 19 |         unittest.TestCase.__init__(self,*args, **kwargs)
 20 | 
 21 |         self.atom_OR_bases = utils.parse_odds_file("odds_files/atom_OR_bases.smarts" , False)
 22 |         self.atom_OR_decors = utils.parse_odds_file("odds_files/atom_decorators.smarts", False)
 23 |         self.atom_AND_decors = utils.parse_odds_file("odds_files/atom_decorators.smarts", False)
 24 |         self.bond_OR_bases = utils.parse_odds_file("odds_files/bond_OR_bases.smarts", False)
 25 |         self.bond_AND_decors = utils.parse_odds_file("odds_files/bond_AND_decorators.smarts", False)
 26 |         self.atom_odds = utils.parse_odds_file("odds_files/atom_index_odds.smarts", False)
 27 |         self.bond_odds = utils.parse_odds_file("odds_files/bond_index_odds.smarts", False)
 28 |         self.molecules = read_molecules("test_filt1_tripos.mol2", False)
 29 |         self.SMIRFF = "forcefield/Frosst_AlkEthOH.ffxml"
 30 |         self.outputFile = 'test_smirky'
 31 |         replacement_file = utils.get_data_filename("odds_files/substitutions.smarts")
 32 |         self.replacements = smarty.AtomTyper.read_typelist(replacement_file)
 33 |         self.replacements = [ [short, smarts] for [smarts, short] in self.replacements]
 34 | 
 35 |         self.correctDict = {'VdW': [ ["[#1:1]-[#6]", 'HC'], [ "[#1:1]-[#6]-[#7,#8,F,#16,Cl,Br]", 'H1'], [ "[#1:1]-[#6](-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br]", 'H2'], [ "[#1:1]-[#6](-[#7,#8,F,#16,Cl,Br])(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br]", 'H3'], [ "[#1:1]-[#8]", 'HO'], [ "[#6X4:1]", 'CT'], [ "[#8X2:1]", 'OS'], [ "[#8X2+0:1]-[#1]", 'OH'] ],
 36 |                 'Bond': [ ["[#6X4:1]-[#6X4:2]", 'CT-CT'], [ "[#6X4:1]-[#1:2]", 'CT-H'], [ "[#8:1]~[#1:2]", 'O~H'], [ "[#6X4:1]-[#8;X2;H1:2]", "CT-OH"], [ "[#6X4:1]-[#8;X2;H0:2]", "CT-OS"] ],
 37 |                 'Angle': [ [ "[a,A:1]-[#6&X4:2]-[a,A:3]", 'any-CT-any'], [ "[#1:1]-[#6&X4:2]-[#1:3]", "H-CT-H"], [ "[#6&X4:1]-[#6&X4:2]-[#6&X4:3]", 'CT-CT-CT'], [ "[#8&X2:1]-[#6&X4:2]-[#8&X2:3]", 'O-CT-O'], [ "[#6&X4:1]-[#8&X2:2]-[#1:3]", 'CT-OH-HO'], [ "[#6X4:1]-[#8X2:2]-[#6X4:3]", 'CT-OS-CT'] ],
 38 |                 'Torsion': [["[a,A:1]-[#6&X4:2]-[#6&X4:3]-[a,A:4]", 'any-CT-CT-any'], [ "[a,A:1]-[#6&X4:2]-[#8&X2:3]-[#1:4]", 'any-CT-OH-HO'], [ "[a,A:1]-[#6&X4:2]-[#8&X2:3]-[!#1:4]", 'any-CT-OS-!H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[#1:4]", 'H-CT-CT-H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[#6&X4:4]", 'H-CT-CT-CT'], [ "[#6&X4:1]-[#6&X4:2]-[#8&X2:3]-[#1:4]", 'CT-CT-OH-HO'], [ "[#6&X4:1]-[#6&X4:2]-[#6&X4:3]-[#6&X4:4]", 'CT-CT-CT-CT'], [ "[#6&X4:1]-[#6&X4:2]-[#8&X2:3]-[#6&X4:4]", 'CT-CT-OS-CT'], [ "[#6&X4:1]-[#8&X2:2]-[#6&X4:3]-[O&X2&H0:4]", 'CT-OS-CT-OS'], [ "[#8&X2:1]-[#6&X4:2]-[#6&X4:3]-[#8&X2:4]", 'O-CT-CT-O'], [ "[#8&X2:1]-[#6&X4:2]-[#6&X4:3]-[#1:4]", 'O-CT-CT-H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[O&X2:4]", 'H-CT-CT-O'] ]}
 39 | 
 40 |     def test_correct_fragments(self):
 41 |         """
 42 |         Test score is 100% if correct VdW, Bond, Angles, or Torsions
 43 |         from AlkEthOH are used as input to the FragmentSampler
 44 |         """
 45 | 
 46 |         for typetag, initialtypes in self.correctDict.items():
 47 |             sampler = FragmentSampler(self.molecules, typetag,
 48 |                     self.atom_OR_bases, self.atom_OR_decors, self.atom_AND_decors,
 49 |                     self.bond_OR_bases, self.bond_AND_decors,
 50 |                     AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
 51 |                     replacements = self.replacements, initialtypes = initialtypes,
 52 |                     SMIRFF = self.SMIRFF, temperature = 0.0, outputFile =self.outputFile)
 53 | 
 54 |             fracfound = sampler.run(1)
 55 |             self.assertAlmostEqual(fracfound, 1.0, msg = "Not finding 100%% of AlkEthOH when starting from correct %s SMIRKS." % typetag)
 56 | 
 57 |     def test_random_sampler(self):
 58 |         """
 59 |         Test FragmentSampler runs for 10 iterations with no failures
 60 |         Test score_utils functions with the outputFile
 61 |         """
 62 |         typetag = 'Torsion'
 63 |         sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases,
 64 |                 self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases,
 65 |                 self.bond_AND_decors,
 66 |                 AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
 67 |                 replacements = self.replacements, initialtypes = None,
 68 |                 SMIRFF = self.SMIRFF, temperature = 0.0, outputFile = self.outputFile)
 69 |         fracfound = sampler.run(10)
 70 |         # load_trajectory converts csv file to dictionary
 71 |         timeseries = score_utils.load_trajectory('%s.csv' % self.outputFile)
 72 |         # scores_vs_time converts num/den entries to fractional scores
 73 |         scores_vs_time = score_utils.scores_vs_time(timeseries)
 74 |         # test plotting function
 75 |         score_utils.create_plot_file('%s.csv' % self.outputFile, '%s.pdf' % self.outputFile)
 76 | 
 77 | 
 78 |     def test_sampler_functions(self):
 79 |         """
 80 |         Test fragment sampler functions are working
 81 |         """
 82 |         typetag = 'Angle'
 83 |         sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases,
 84 |                 self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases,
 85 |                 self.bond_AND_decors,
 86 |                 AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
 87 |                 replacements = self.replacements, initialtypes = None,
 88 |                 SMIRFF = self.SMIRFF, temperature = 0.0, outputFile = self.outputFile)
 89 | 
 90 |         typetags = [ ('VdW', 'NonbondedGenerator'),
 91 |                 ('Bond', 'HarmonicBondGenerator'),
 92 |                 ('Angle', 'HarmonicAngleGenerator'),
 93 |                 ('Torsion', 'PeriodicTorsionGenerator'),
 94 |                 ('Improper','PeriodicTorsionGenerator'),
 95 |                 ('None', None)]
 96 | 
 97 |         for (tag, expected) in typetags:
 98 |             sample_tag, edges, sym_odds = sampler.get_type_info(tag)
 99 |             self.assertEqual(sample_tag, expected, msg = "get_force_type(%s) should return %s, but %s was returned instead" % (tag, expected, sample_tag))
100 | 
101 |         # Running each method just to make sure they work
102 |         # get environment
103 |         env = sampler.envList[0]
104 |         new_env, prob = sampler.create_new_environment(env)
105 |         # check atom methods
106 |         atom,prob = sampler.pick_an_atom(new_env)
107 |         removeable = sampler.isremoveable(new_env,atom)
108 |         prob = sampler.add_atom(new_env,atom)
109 |         prob = sampler.change_atom(new_env, atom)
110 |         atom.addORtype('#6', ['X4'])
111 |         prob = sampler.change_ORdecorator(atom, self.atom_OR_decors)
112 |         prob = sampler.change_ORbase(atom, self.atom_OR_bases, self.atom_OR_decors)
113 |         prob = sampler.change_ANDdecorators(atom, self.atom_AND_decors)
114 | 
115 |         # check bond methods
116 |         bond,prob = sampler.pick_a_bond(new_env)
117 |         prob = sampler.change_bond(new_env, bond)
118 |         prob = sampler.change_ORbase(bond, self.bond_OR_bases, sampler.BondORdecorators)
119 |         prob = sampler.change_ANDdecorators(bond, self.bond_AND_decors)
120 | 
121 |     def test_no_reference_smirff(self):
122 |         """
123 |         Test that sampling still works with no reference SMIRFF provided
124 |         """
125 |         typetag = 'Bond'
126 |         sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases,
127 |                 self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases,
128 |                 self.bond_AND_decors,
129 |                 AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
130 |                 replacements = self.replacements, initialtypes = None,
131 |                 SMIRFF = None, temperature = 0.0, outputFile = self.outputFile)
132 |         fracfound = sampler.run(10)
133 | 
134 | 


--------------------------------------------------------------------------------
/smarty/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | from smarty.utils import get_data_filename
 3 | from unittest import TestCase
 4 | 
 5 | import smarty
 6 | 
 7 | class TestUtils(TestCase):
 8 |     def test_parse_odds_file(self):
 9 |         """
10 |         Testing parse_odds_file and get_data_filename
11 |         """
12 |         # parse_odds_file uses get_data_filename so this run checks both
13 |         odds = smarty.utils.parse_odds_file('odds_files/atom_index_odds.smarts', verbose = True)
14 |         odds = smarty.utils.parse_odds_file('odds_files/bond_OR_bases.smarts')
15 |         self.assertIsNone(odds[1], msg = "Parsing odds file with no odds should give None as the second entry")
16 | 


--------------------------------------------------------------------------------
/smarty/utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | """
  4 | Utility subroutines for SMARTY atom type sampling
  5 | 
  6 | """
  7 | #=============================================================================================
  8 | # GLOBAL IMPORTS
  9 | #=============================================================================================
 10 | 
 11 | import os
 12 | 
 13 | #=============================================================================================
 14 | # UTILITY ROUTINES
 15 | #=============================================================================================
 16 | 
 17 | def get_data_filename(relative_path):
 18 |     """Get the full path to one of the reference files in testsystems.
 19 | 
 20 |     In the source distribution, these files are in ``smarty/data/``,
 21 |     but on installation, they're moved to somewhere in the user's python
 22 |     site-packages directory.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     name : str
 27 |         Name of the file to load (with respect to the repex folder).
 28 | 
 29 |     """
 30 | 
 31 |     from pkg_resources import resource_filename
 32 |     fn = resource_filename('smarty', os.path.join('data', relative_path))
 33 | 
 34 |     if not os.path.exists(fn):
 35 |         raise ValueError("Sorry! %s does not exist. If you just added it, you'll have to re-install" % fn)
 36 | 
 37 |     return fn
 38 | 
 39 | 
 40 | def parse_odds_file(filename, verbose = False):
 41 |     """
 42 |     parses files that have the form
 43 |     decorator       odds
 44 |     if only one column odds will be assumed equally probable
 45 | 
 46 |     Parameters
 47 |     -----------
 48 |     filename: string or file object
 49 |     may be an absolute file path, a path relative to the current working directory, a path relative to this module's data subdirectory (for built in decorator files), or an opten file-like object with a readlines() method.
 50 | 
 51 |     Returns
 52 |     --------
 53 |     choices: 2-tuple of the form ( [decorators], [odds] )
 54 |     """
 55 |     # if no file return None
 56 |     if filename is None:
 57 |         if verbose: print("No filename provided, returning None")
 58 |         return None
 59 | 
 60 |     # if input is a file object
 61 |     try:
 62 |         input_lines = filename.readlines()
 63 |         if verbose: print("Attempting to parse file '%s'" % filename.name)
 64 |     except AttributeError:
 65 |         if verbose: print("Attempting to parse file '%s'" % filename)
 66 |         try:
 67 |             ifs = open(filename, 'r')
 68 |             input_lines = ifs.readlines()
 69 |         except IOError:
 70 |             ifs = get_data_filename(filename)
 71 |             ifs = open(ifs, 'r')
 72 |             input_lines = ifs.readlines()
 73 |         except Exception as e:
 74 |             raise Exception("%s\nProvided file (%s) could not be parsed" % (str(e), filename))
 75 |     except Exception as e:
 76 |         msg = str(e) + '\n'
 77 |         msg += "Could not read data from file %s" % filename
 78 |         raise Exception(msg)
 79 | 
 80 |     # close file
 81 |     ifs.close()
 82 | 
 83 |     decorators = []
 84 |     odds = []
 85 |     noOdds = False
 86 |     for l in input_lines:
 87 |         # skip empty lines
 88 |         if len(l) == 0:
 89 |             continue
 90 |         # check for and remove comments
 91 |         comment = l.find('%')
 92 |         if comment == -1: # no comment
 93 |             entry = l.split()
 94 |         elif comment > 0: # remove trailing comment
 95 |             entry = l[:comment].split()
 96 |         else: # whole line is a comment skip
 97 |             continue
 98 | 
 99 |         # add decorator
100 |         if entry[0] == "''" or entry[0] == '""':
101 |             decorators.append('')
102 |         else:
103 |             decorators.append(entry[0])
104 | 
105 |         if len(entry) == 2:
106 |             odds.append(float(entry[1]))
107 |         elif len(entry) == 1:
108 |             noOdds = True
109 |         else:
110 |             raise Exception("Error entry (%s) in decorator file '%s' is invalid" % (l, filename))
111 | 
112 |     if (odds.count(0) == len(odds)) or noOdds:
113 |         odds = None
114 |         #TODO: handle case where 1 line is missing odds entry
115 | 
116 |     return (decorators, odds)
117 | 
118 | 


--------------------------------------------------------------------------------
/utilities/README.md:
--------------------------------------------------------------------------------
1 | # Utilities related to SMARTY and SMIRKY
2 | 
3 | * `test_smirks_or_environment_speed/` - compares computing time cost of storing SMIRKS strings compared to storing Chemical Environments
4 | 


--------------------------------------------------------------------------------
/utilities/test_smirks_or_environment_speed/README.md:
--------------------------------------------------------------------------------
 1 | # Testing slow down when storing ChemicalEnvironments
 2 | 
 3 | We were concerned that storing chemical environments would be slower than storing strings. 
 4 | Since ChemicalEnvironments can easily be converted to and from SMIRKS strings you could store a list of SMIRKS instead of a list of chemical environments when sampling parameter types (such as smirky). 
 5 | The notebook `testing_smirks_speed.ipynb` logs the time to store a list of SMIRKS or environments for a number of steps. It uses `Torsion_0_0.00e+00_results.smarts` as an example of the complext SMIRKS patterns that can be generated during a smirky simulation. 
 6 | 
 7 | Below are the results for this test. For each test data is reported in this order:
 8 | * Parameter type list
 9 |     - generic: starts with only `"[*:1]~[*:2]~[*:3]~[*:4]"`
10 |     - short: starts with first 10 SMIRKS in `*_results.smarts`
11 |     - long: starts with all 82 SMIRKS in `*_results.smarts`
12 | * Time in minutes to do X iterations storing SMIRKS strings
13 | * Time in minutes to do X iterations storing Chemical Environments for each input SMIRKS
14 | * Difference in Chemical Environment and SMIRKS time in minutes 
15 | 
16 | ```
17 | ------------------------------  2 Iterations  ------------------------------
18 |                short    1.97e-05    6.54e-05    4.57e-05
19 |                 long    1.93e-05    4.58e-04    4.39e-04
20 |              generic    1.34e-05    1.82e-05    4.84e-06
21 | 
22 | 
23 | ------------------------------  10 Iterations  ------------------------------
24 |                short    7.12e-05    1.16e-04    4.53e-05
25 |                 long    8.27e-05    5.40e-04    4.58e-04
26 |              generic    6.60e-05    6.47e-05    -1.23e-06
27 | 
28 | 
29 | ------------------------------  100 Iterations  ------------------------------
30 |                short    6.19e-04    7.01e-04    8.20e-05
31 |                 long    7.44e-04    1.36e-03    6.12e-04
32 |              generic    5.49e-04    6.28e-04    7.92e-05
33 | 
34 | 
35 | ------------------------------  1000 Iterations  ------------------------------
36 |                short    7.59e-03    1.73e-02    9.76e-03
37 |                 long    8.42e-03    2.10e-02    1.26e-02
38 |              generic    6.89e-03    1.61e-02    9.20e-03
39 | 
40 | 
41 | ------------------------------  10000 Iterations  ------------------------------
42 |                short    8.89e-02    1.09e+00    9.98e-01
43 |                 long    9.37e-02    1.17e+00    1.08e+00
44 |              generic    7.18e-02    1.12e+00    1.05e+00
45 | 
46 | 
47 | ------------------------------  30000 Iterations  ------------------------------
48 |                short    3.61e-01    1.04e+01    1.00e+01
49 |                 long    4.51e-01    1.08e+01    1.04e+01
50 |              generic    3.13e-01    1.01e+01    9.76e+00
51 | ```
52 | 
53 | We concluded from this that while the timing difference isn't so significant on the number of iterations typically run with smirky, future move proposal engines would probably benefit from storing SMIRKS patterns rather than Chemical Environments. 
54 | 


--------------------------------------------------------------------------------
/utilities/test_smirks_or_environment_speed/Torsion_0_0.00e+00_results.smarts:
--------------------------------------------------------------------------------
  1 | % Results for sampling Torsions at 0.00e+00
  2 | %% SMIRKS patterns for final results are below
  3 | % followed by a their matched reference SMIRKS from /beegfs/DATA/mobley/bannanc/smirky_testing/SMIRKY/inputFiles//smirff99Frosst.ffxml
  4 | %Final Score was 51.963 %
  5 | %%
  6 | [*:1]~[#6:2]~[#6:3]~[*:4]                          C~C                 
  7 | % [*:1]~[#6X3:2]:[#6X3:3]~[*:4]                    t45                 
  8 | [*:1]~[#6:2]~[#7:3]~[*:4]                          C~N                 
  9 | % [*:1]-[#6X4:2]-[#7X3$(*~[#6X3,#6X2]):3]~[*:4]    t59                 
 10 | [*:1]~[#6:2]~[#8:3]~[*:4]                          C~O                 
 11 | % [*:1]-[#6X4:2]-[#8X2H0:3]-[*:4]                  t85                 
 12 | [*:1]~[#6:2]~[#15:3]~[*:4]                         C~P                 
 13 | % [*:1]~[#15:2]-[#6:3]-[*:4]                       t112                
 14 | [*:1]~[#6:2]~[#16:3]~[*:4]                         C~S                 
 15 | % [*:1]-[#16X2,#16X3+1:2]-[#6:3]~[*:4]             t104                
 16 | [*:1]~[#7:2]~[#7:3]~[*:4]                          N~N                 
 17 | % [*:1]~[#7X2:2]-[#7X3:3]~[*:4]                    t124                
 18 | [*:1]~[#7:2]~[#8:3]~[*:4]                          N~O                 
 19 | % [*:1]-[#8X2r5:2]-;@[#7X3r5:3]~[*:4]              t115                
 20 | [*:1]~[#7:2]~[#16:3]~[*:4]                         N~S                 
 21 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X3:3]-[#6X3:4]     t139                
 22 | [*:1]~[#8:2]~[#15:3]~[*:4]                         O~P                 
 23 | % [*:1]-[#8X2:2]-[#15:3]~[*:4]                     t146                
 24 | [*:1]~[#8:2]~[#16:3]~[*:4]                         O~S                 
 25 | % [*:1]~[#16X4,#16X3+0:2]-[#8X2:3]-[*:4]           t144                
 26 | [*:1]~[#16:2]~[#16:3]~[*:4]                        S~S                 
 27 | % [*:1]-[#16X2,#16X3+1:2]-[#16X2,#16X3+1:3]-[*:4]  t145                
 28 | [*;X3:1](~[#6:2]~[#7:3]~[*:4])~[#6]                2936                
 29 | % [*:1]-,:[#6X3:2]=[#7X2:3]-[*:4]                  t76                 
 30 | [*;X3:1](~[#6:2]~[#7:3]~[*;a:4])~[$ewg1]           8087                
 31 | % [*:1]=[#7X2,#7X3+1:2]-[#6X3:3]=,:[*:4]           t73                 
 32 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[*:4]               7890                
 33 | % [#6X3:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t134                
 34 | [#7!R:1]~[#7:2]~[#7:3]~[*:4]                       2323                
 35 | % [*:1]~[*:2]=[#6,#7,#16,#15;X2:3]=[*:4]           t150                
 36 | [*;X3:1]~[#6:2]~[#7:3]~[*;a:4]~[#1]                2632                
 37 | % [#6X3:1]:[#7X2:2]:[#6X3:3]:[#6X3:4]              t75                 
 38 | [#1:1]~[#7:2]~[#7:3]~[#1:4]                        2525                
 39 | % [*:1]-[#7X4,#7X3:2]-[#7X3$(*~[#6X3,#6X2]):3]~[*:4] t121                
 40 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[$ewg2&:4]          4181                
 41 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t136                
 42 | [*;!X4:1]~[#7:2]~[#16:3]~[#6H2:4]                  3345                
 43 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4]  t131                
 44 | [*;!R:1]~[#6:2]~[#6:3]~[*;!R:4]                    8243                
 45 | % [*:1]-[#6X4;r3:2]-@[#6X4;r3:3]-[*:4]             t16                 
 46 | [*;!R:1]~[#6:2]~[#6:3]~[*;!R:4]~[#1]               5859                
 47 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]                t5                  
 48 | [*;a:1]~[#6:2]~[#16:3]~[*:4]                       1983                
 49 | % [#6X3:1]-@[#16X2,#16X1-1,#16X3+1:2]-@[#6X3,#7X2;r5:3]=@[#6,#7;r5:4] t106                
 50 | [#8!X4:1]~[#6:2]~[#7:3]~[*:4]                      5660                
 51 | % [*:1]~[#7X3,#7X2-1:2]-[#6X3:3]~[*:4]             t67                 
 52 | [*;!R:1](~[#6:2]~[#6:3]~[*;!R:4]~[#1])~[$ewg2]     9500                
 53 | % [#6X4:1]-[#6X4:2]-[#6X3:3]-[#7X3:4]              t24                 
 54 | [*;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]             1122                
 55 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4]                t10                 
 56 | [#6H2:1]~[#8:2]~[#15:3]~[*;!X1:4]                  8301                
 57 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4])~[#6]      7607                
 58 | % [*:1]~[#6X3:2]-[#6X4:3]-[*:4]                    t18                 
 59 | [*;!R:1]~[#6:2]~[#6:3]=[*;!R:4]                    6470                
 60 | % [*:1]~[#6X3:2]-[#6X3:3]~[*:4]                    t44                 
 61 | [*;X3:1](~[#6:2]~[#7X3:3]~[*:4])~[#6]              1918                
 62 | % [*:1]-[#7X3;r5:2]-@[#6X3;r5:3]~[*:4]             t70                 
 63 | [#7+0X3:1]~[#6:2]~[#8:3]~[*;!X3:4]~[#6]            9934                
 64 | % [#6X4:1]-[#8X2:2]-[#6X4:3]-[#7X3:4]              t89                 
 65 | [*;X3:1](~[#6:2]~[#7R0:3]~[*:4])~[#6]              1518                
 66 | % [*:1]~[#7X3,#7X2-1:2]-!@[#6X3:3]~[*:4]           t68                 
 67 | [*:1]~[#7:2]~[#7:3]~[*;r6:4]                       7562                
 68 | % [*:1]~[#7X2:2]=,:[#7X2:3]~[*:4]                  t126                
 69 | [*;!R:1]~[#6:2]~[#6:3]#[*;!R:4]~[#1]               5488                
 70 | % [*:1]~[*:2]-[*:3]#[*:4]                          t149                
 71 | [*;a:1]~[#6:2]~[#16:3]~;!@[*:4]                    4949                
 72 | % [#6:1]-[#16X4,#16X3+0:2]-[#6X3:3]~[*:4]          t111                
 73 | [$ewg1&+0:1]~[#7:2]~[#16:3]~[*:4]                  7297                
 74 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X3:3]-[#7X2:4]     t140                
 75 | [#7H1:1]~[#6:2]~[#6:3]~[#7A:4]                     6854                
 76 | % [#7X3:1]-[#6X4:2]-[#6X3:3]-[#7X3:4]              t23                 
 77 | [#1!X4:1]~[#7:2]~[#16:3]~[*;R2:4]                  4082                
 78 | % [#6X3:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4]  t132                
 79 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;!R:4]~[#1])~[$ewg2]   5403                
 80 | % [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]              t3                  
 81 | [*;X3:1](~[#6H2:2]~[#7R0:3]~[*:4])~[#6]            1131                
 82 | % [*:1]-[#6X4:2]-[#7X3:3]-[*:4]                    t51                 
 83 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;R:4]~[#1])~[$ewg2]    8238                
 84 | % [#6X4;r3:1]-[#6X4;r3:2]-[#6X4;r3:3]-[*:4]        t17                 
 85 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]         1268                
 86 | % [#8X2:1]-[#6X4:2]-[#6X4:3]-[#8X2:4]              t6                  
 87 | [*:1]~[#6!X4:2]~[#8:3]~[*:4]                       2683                
 88 | % [#1:1]-[#8X2:2]-[#6X3:3]=[#8X1:4]                t99                 
 89 | [#8!X4:1]~[#6X3:2]~[#7:3]~[#1:4]                   6762                
 90 | % [#1:1]-[#7X3:2]-[#6X3:3]=[#8,#16,#7:4]           t69                 
 91 | [*:1]~[#6:2]~[#6:3]~[#35H0:4]                      5347                
 92 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#35:4]                 t13                 
 93 | [*:1]-[#6:2]~[#7r6:3]:[*:4]                        9273                
 94 | % [*:1]~[#7X2,#7X3$(*~[#8X1]):2]:[#6X3:3]~[*:4]    t74                 
 95 | [*;X3:1](~[#6:2]~[#7:3]~[*;H3:4]):[#6]             1413                
 96 | % [*:1]-[#7X4:2]-[#6X3:3]~[*:4]                    t58                 
 97 | [#1:1]~[#6:2]~[#6:3]~[#7A:4]                       9141                
 98 | % [*:1]-[#6X4:2]-[#6X4:3]-[*:4]                    t2                  
 99 | [#1:1]~[#6:2]~[#6:3]~[#1:4]                        5525                
100 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]                  t4                  
101 | [*;!R:1]~[#6:2]~[#6;a:3]~[*;!R:4]                  4246                
102 | % [*:1]-,:[#6X3:2]=[#6X3:3]-,:[*:4]                t46                 
103 | [*:1]~[#6:2]~[#7:3]~[$ewg2&A;+1:4]                 3498                
104 | % [*:1]~[#7X2:2]-[#6X4:3]-[*:4]                    t64                 
105 | [#8a:1]~[#7:2]~[#7:3]~[*;r6:4]                     7797                
106 | % [*:1]~[#7X3+1:2]=,:[#7X2:3]~[*:4]                t127                
107 | [*a:1](~[#6:2]~[#6:3]~[#7A:4])~[$ewg1]             6650                
108 | % [*:1]~[#6X3:2]-[#6X3$(*=[#8,#16,#7]):3]~[*:4]    t48                 
109 | [*;X3:1](~[#6H2:2]~[#7R0:3]~[*;X3:4])~[#6]         7694                
110 | % [#6X3:1]-[#7X3:2]-[#6X4:3]-[#6X3:4]              t60                 
111 | [#1!X4:1]~[#7:2]~[#16:3]~[$ewg1&:4]                4831                
112 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4]  t135                
113 | [#6:1]~[#6!X4:2]~[#8:3]~[*:4]                      6039                
114 | % [*:1]~[#6X3:2]-[#8X2:3]-[#1:4]                   t96                 
115 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;!R:4]~[#1])(~[#8])(~[#9])~[$ewg2] 5351                
116 | [#8!X4R2:1](~[#6:2]~[#7:3]~[*:4])~[#6]             3851                
117 | % [#8X2H0:1]-[#6X4:2]-[#7X3:3]-[#6X3:4]            t62                 
118 | [#16:1](~[#6:2]~[#6:3]~[#7A:4]~[#6])~[*]           2631                
119 | % [#16X2,#16X1-1,#16X3+1:1]-[#6X3:2]-[#6X4:3]-[#7X3$(*-[#6X3,#6X2]):4] t27                 
120 | [*:1]~[#6:2]~[#16:3]~[*;H2:4]                      9632                
121 | % [*:1]-[#16X2,#16X3+1:2]-[#6:3]-[#1:4]            t105                
122 | [#8!X2;!R:1]~[#6:2]~[#6:3]=[*;!R:4]                9260                
123 | % [#6X3:1]=[#6X3:2]-[#6X3:3]=[#8X1:4]              t49                 
124 | [*;X4:1]~[#6:2]~[#8:3]~[*;X4:4]                    2174                
125 | % [#6X4:1]-[#6X4:2]-[#8X2H0:3]-[#6X4:4]            t86                 
126 | [#7+0;R:1](~[#6:2]~[#8:3]~[*!X4;!X3:4])~[#6]       6501                
127 | % [*:1]-[#6X4:2]-[#8X2:3]-[#1:4]                   t83                 
128 | [*:1]~[#7:2]~[#16:3]~[#7X2:4]                      1601                
129 | % [*:1]-[#16X2,#16X3+1:2]-[!#6:3]~[*:4]            t129                
130 | [*;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4]                 7827                
131 | % [#9:1]-[#6X4:2]-[#6X4:3]-[#9:4]                  t7                  
132 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[*;X4:4]            4974                
133 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t133                
134 | [*:1]~[#6!X4:2]~[#8:3]~[#6!X3:4]                   5843                
135 | % [*:1]~[#6X3:2]-[#8X2:3]-[*:4]                    t95                 
136 | [*:1]~[#6:2]~[#16:3]~[$ewg2&A:4]                   2986                
137 | % [*:1]~[#16X4,#16X3+0:2]-[#6X3:3]~[*:4]           t110                
138 | [*;!R;+0:1]~[#6X4:2]~[#6:3]=[*;!R:4]               6290                
139 | % [*:1]-[#6X4:2]-[#6X3:3]=[*:4]                    t21                 
140 | [*;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4]               9724                
141 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#17:4]                 t12                 
142 | [*:1]~[#6X2:2]~[#7:3]~[*:4]                        4773                
143 | [#8!X2:1]~[#6!X4:2]~[#8:3]~[#6!X3:4]               2494                
144 | % [#8,#16,#7:1]=[#6X3:2]-[#8X2H0:3]-[#6X4:4]       t100                
145 | [*;!R;+0:1]~[#6X4:2]~[#6:3]=[*;!R:4]~[#8]          9066                
146 | [$halogen&;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4]       8919                
147 | % [#17:1]-[#6X4:2]-[#6X4:3]-[#17:4]                t8                  
148 | [#6H0:1]~[#7:2](~[#7:3]~[*:4])~[$ewg2]             3774                
149 | % [*:1]-[#7X3$(*-[#6X3,#6X2])r5:2]-@[#7X3$(*-[#6X3,#6X2])r5:3]~[*:4] t123                
150 | [#7+0X3:1]~[#6:2]~[#8:3]~[*;!X3:4]~[#6X3]~[#6]     3876                
151 | % [*:1]~[#6X3:2](=[#8,#16,#7])-[#8X2H0:3]-[*:4]    t97                 
152 | [#6;R0:1]~[#6!X4:2]~[#8:3]~[*:4]                   4545                
153 | % [*:1]~[#6X3:2](=[#8,#16,#7])-[#8:3]-[#1:4]       t98                 
154 | [$ewg1&+0;!X2:1]~[#7:2]~[#16:3]~[*:4]              6619                
155 | % [*:1]~[#16X4,#16X3+0:2]-[#7:3]~[*:4]             t130                
156 | [#6;R0:1]~[#6!X4:2]~[#8:3]~[$ewg2&X3:4]            3713                
157 | [#6H0;!R:1]~[#7:2](~[#7:3]~[*:4])~[$ewg2]          5329                
158 | [*:1]-[#6:2]~[#16:3]~[$ewg2&A:4]                   3291                
159 | % [*:1]~[#16X4,#16X3+0:2]-[#6X4:3]-[*:4]           t107                
160 | [#7H2:1]~[#6:2]~[#6:3]~[#7A:4]                     5987                
161 | [*:1]=[#7:2]~[#16:3]~[*;!R:4]                      6651                
162 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X2:3]~[#6X3:4]     t143                
163 | [#16:1]~[#6:2]~[#6:3]-[#7A;H2:4]                   1329                
164 | [*;!R:1](~[#6:2]~[#6:3]~[#17!X3;!R:4])~[#7]        3104                
165 | [#8!X2;!R:1]~[#6:2]~[#6:3]=[*;!R:4]~[#7]           1340                
166 | [*:1]~[#7:2]~[#8:3]-[*:4]                          6442                
167 | % [*:1]~[#8X2:2]-[#7:3]~[*:4]                      t114                
168 | [*;X3:1](~[#16])(~[#6:2]~[#7:3]~[*;H3:4]):[#6]     6178                
169 | % [*:1]-[#6X4:2]-[#7X4:3]-[*:4]                    t50                 
170 | [*H2;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4]               6291                
171 | [*;X3:1](~[#1])(~[#6:2]~[#7X3:3]~[#8!X2:4])~[#6]   1521                
172 | % [#8X1:1]~[#7X3:2]~[#6X3:3]~[*:4]                 t71                 
173 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[#15]   3796                
174 | [#7H1:1]~[#6:2]~[#6:3]~[#7A!X3:4]                  7261                
175 | [*A:1]~[#7:2]~[#7:3]:[*;r6:4]~[#8]                 4404                
176 | % [*:1]-[#7X3$(*-[#6X3,#6X2]):2]-[#7X3$(*-[#6X3,#6X2]):3]-[*:4] t122                
177 | [*;X3:1](~[#6:2]~[#7!X3:3]~[*;a:4](~[#16])~[#7])~[$ewg1] 7356                
178 | % [*:1]=[#7X2,#7X3+1:2]-[#6X3:3]-[*:4]             t72                 
179 | [*;!R:1](~[#15])(~[#6:2]~[#6;a:3]~[*;!R:4])~[$halogen] 4636                
180 | [*;X3:1](~[#6H2:2]~[#7R0:3]=[*;X3:4])~[#6]         8963                
181 | % [#6X3:1]=[#7X2,#7X3+1:2]-[#6X4:3]-[#6X3,#6X4:4]  t66                 
182 | [#17;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4]               7150                
183 | [*:1]~[#6!X4:2]~[#15:3]~[*:4]                      8692                
184 | % [*:1]~[#15:2]-[#6X3:3]~[*:4]                     t113                
185 | [*:1]=[#7:2]~[#16:3]~[*;!R:4]~[#1]                 8573                
186 | [#8+0;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4]            5443                
187 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[$ewg2])~[#6] 5499                
188 | % [*:1]-[#6X4;r3:2]-[#6X3:3]~[*:4]                 t28                 
189 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[#15])~[#6] 3750                
190 | [$ewg1&H0:1]~[#6:2]~[#8:3]~[$ewg1&H0:4]            2097                
191 | [#16;H1:1](~[#6:2]~[#6:3]~[#7A:4]~[#6])~[*]        7633                
192 | [#16H0:1]~[#6:2]~[#6:3]-[#7A;H2:4]                 9637                
193 | % [#16X2,#16X1-1,#16X3+1:1]-[#6X3:2]-[#6X4:3]-[#7X4,#7X3:4] t26                 
194 | [#6H2:1]~[#8:2]~[#15:3]~[$ewg1&!X4;!X1:4]          1009                
195 | % [#8X2:1]-[#15:2]-[#8X2:3]-[#6X4:4]               t147                
196 | [*:1]~[#6!X4;!X3:2]~[#8:3]~[*:4]                   5204                
197 | [*;X3:1]~[#7:2]~[#16:3](~[#8])~[*;X4:4]            7900                
198 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X3:3]-[#6X3:4]     t138                
199 | [#8!X4R2:1](~[#6:2]~[#7:3]~[$ewg1&H1:4])~[#6]      6407                
200 | [#7H2:1]~[#6:2]~[#6:3]-[#7A:4]                     8242                
201 | % [*:1]-[#6X4:2]-[#6X4;r3:3]-[*:4]                 t14                 
202 | [#7!X4:1]~[#7:2]~[#8:3]~[*:4]                      3625                
203 | % [*:1]-[#8X2r5:2]-;@[#7X2r5:3]~[*:4]              t116                
204 | [$ewg1&X4;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]      1369                
205 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;R:4](~[#1])~[$ewg2])~[$ewg2&H2] 9719                
206 | [*;!R:1](~[#6:2]~[#6X4:3]~[$ewg2&A;R:4]~[#1])~[$ewg2] 2659                
207 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&H2;!R:4]         4862                
208 | 


--------------------------------------------------------------------------------
/utilities/test_smirks_or_environment_speed/testing_smirks_speed.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Does removing ChemicalEnvironments speed up sampling\n",
  8 |     "\n",
  9 |     "This ipython notebook is being used to determine if removing the list of chemicalenvironments would significantly increase the speed of smirky"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 2,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from openforcefield.typing.chemistry.environment import TorsionChemicalEnvironment\n",
 21 |     "import time\n",
 22 |     "import copy\n",
 23 |     "import numpy as np\n",
 24 |     "from numpy import random\n",
 25 |     "from smarty.atomtyper import AtomTyper"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 3,
 31 |    "metadata": {
 32 |     "collapsed": true
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "def smirks_sampling(smirks, iterations):\n",
 37 |     "    \"\"\"\n",
 38 |     "    This method takes in a list of smirks strings, then randomly picks one\n",
 39 |     "    creates a chemical environment and writes back out the smirks string\n",
 40 |     "    \n",
 41 |     "    It only stores the smirks strings and only sometimes keeps the \"new one\" \n",
 42 |     "    the new one is just a copy of the randomly chosen current one\n",
 43 |     "    \"\"\"\n",
 44 |     "    current = copy.deepcopy(smirks)\n",
 45 |     "    for i in range(iterations):\n",
 46 |     "        change = random.choice(current)\n",
 47 |     "        \n",
 48 |     "        env = TorsionChemicalEnvironment(smirks = change)\n",
 49 |     "        new_smirks = env.asSMIRKS()\n",
 50 |     "        \n",
 51 |     "        # assume we accept a move 30% of the time and extend the list\n",
 52 |     "        if random.rand() < 0.3: \n",
 53 |     "            current.append(new_smirks)\n",
 54 |     "\n",
 55 |     "    return current\n",
 56 |     "\n",
 57 |     "def environment_sampling(smirks, iterations):\n",
 58 |     "    \"\"\"\n",
 59 |     "    This method taks in a list of smirks, turns them into chemical environments\n",
 60 |     "    and then iterates where some percentage of the time you keep the new environment\n",
 61 |     "    \"\"\"\n",
 62 |     "    current = [TorsionChemicalEnvironment(smirks = c) for c in smirks]\n",
 63 |     "    \n",
 64 |     "    for i in range(iterations):\n",
 65 |     "        change = copy.deepcopy(random.choice(current))\n",
 66 |     "        new_smirks = change.asSMIRKS\n",
 67 |     "        \n",
 68 |     "        # keep the new one 30% of the time\n",
 69 |     "        if random.rand() < 0.3:\n",
 70 |     "            current.append(change)\n",
 71 |     "            \n",
 72 |     "    return [e.asSMIRKS for e in current]\n",
 73 |     "\n",
 74 |     "def run_samplings(smirks, iterations):\n",
 75 |     "    \"\"\"\n",
 76 |     "    This method runs smirks_sampling and environment sampling and returns the time for each using \n",
 77 |     "    the same input list and number of iterations\n",
 78 |     "    \"\"\"\n",
 79 |     "    \n",
 80 |     "    # smirks first \n",
 81 |     "    init_time = time.time()\n",
 82 |     "    smirks = smirks_sampling(smirks, iterations)\n",
 83 |     "    end_time = time.time()\n",
 84 |     "    smirks_time = (end_time - init_time) / 60.0\n",
 85 |     "    \n",
 86 |     "    # environments\n",
 87 |     "    init_time = time.time()\n",
 88 |     "    env_smirks = environment_sampling(smirks, iterations)\n",
 89 |     "    end_time = time.time()\n",
 90 |     "    env_time = (end_time - init_time) / 60.0\n",
 91 |     "    \n",
 92 |     "    return smirks_time, env_time"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 6,
 98 |    "metadata": {},
 99 |    "outputs": [
100 |     {
101 |      "name": "stdout",
102 |      "output_type": "stream",
103 |      "text": [
104 |       "------------------------------  2 Iterations  ------------------------------\n",
105 |       "               short\t1.97e-05\t6.54e-05\t4.57e-05\n",
106 |       "                long\t1.93e-05\t4.58e-04\t4.39e-04\n",
107 |       "             generic\t1.34e-05\t1.82e-05\t4.84e-06\n",
108 |       "\n",
109 |       "\n",
110 |       "------------------------------  10 Iterations  ------------------------------\n",
111 |       "               short\t7.12e-05\t1.16e-04\t4.53e-05\n",
112 |       "                long\t8.27e-05\t5.40e-04\t4.58e-04\n",
113 |       "             generic\t6.60e-05\t6.47e-05\t-1.23e-06\n",
114 |       "\n",
115 |       "\n",
116 |       "------------------------------  100 Iterations  ------------------------------\n",
117 |       "               short\t6.19e-04\t7.01e-04\t8.20e-05\n",
118 |       "                long\t7.44e-04\t1.36e-03\t6.12e-04\n",
119 |       "             generic\t5.49e-04\t6.28e-04\t7.92e-05\n",
120 |       "\n",
121 |       "\n",
122 |       "------------------------------  1000 Iterations  ------------------------------\n",
123 |       "               short\t7.59e-03\t1.73e-02\t9.76e-03\n",
124 |       "                long\t8.42e-03\t2.10e-02\t1.26e-02\n",
125 |       "             generic\t6.89e-03\t1.61e-02\t9.20e-03\n",
126 |       "\n",
127 |       "\n",
128 |       "------------------------------  10000 Iterations  ------------------------------\n",
129 |       "               short\t8.89e-02\t1.09e+00\t9.98e-01\n",
130 |       "                long\t9.37e-02\t1.17e+00\t1.08e+00\n",
131 |       "             generic\t7.18e-02\t1.12e+00\t1.05e+00\n",
132 |       "\n",
133 |       "\n",
134 |       "------------------------------  30000 Iterations  ------------------------------\n",
135 |       "               short\t3.61e-01\t1.04e+01\t1.00e+01\n",
136 |       "                long\t4.51e-01\t1.08e+01\t1.04e+01\n",
137 |       "             generic\t3.13e-01\t1.01e+01\t9.76e+00\n",
138 |       "\n",
139 |       "\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "long = AtomTyper.read_typelist('Torsion_0_0.00e+00_results.smarts')\n",
145 |     "long = [smirks for (smirks,name) in long if not '$' in smirks]\n",
146 |     "smirks_lists = {\n",
147 |     "    'generic':['[*:1]~[*:2]~[*:3]~[*:4]'],\n",
148 |     "    'short':copy.deepcopy(long[:10]),\n",
149 |     "    'long':copy.deepcopy(long)}\n",
150 |     "\n",
151 |     "iterations = [2, 10, 100, 1000, 10000, 30000]\n",
152 |     "\n",
153 |     "for its in iterations:\n",
154 |     "    print('%s  %i Iterations  %s' % ('-'*30, its, '-'*30))\n",
155 |     "    for title, smirks in smirks_lists.items():\n",
156 |     "        smirks_time, env_time = run_samplings(smirks, its)\n",
157 |     "        dif = env_time - smirks_time\n",
158 |     "        print(\"%20s\\t%.2e\\t%.2e\\t%.2e\" % (title, smirks_time, env_time,dif))\n",
159 |     "    print('\\n')"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "code",
164 |    "execution_count": null,
165 |    "metadata": {
166 |     "collapsed": true
167 |    },
168 |    "outputs": [],
169 |    "source": []
170 |   }
171 |  ],
172 |  "metadata": {
173 |   "kernelspec": {
174 |    "display_name": "Python 3",
175 |    "language": "python",
176 |    "name": "python3"
177 |   },
178 |   "language_info": {
179 |    "codemirror_mode": {
180 |     "name": "ipython",
181 |     "version": 3
182 |    },
183 |    "file_extension": ".py",
184 |    "mimetype": "text/x-python",
185 |    "name": "python",
186 |    "nbconvert_exporter": "python",
187 |    "pygments_lexer": "ipython3",
188 |    "version": "3.5.3"
189 |   }
190 |  },
191 |  "nbformat": 4,
192 |  "nbformat_minor": 2
193 | }
194 | 


--------------------------------------------------------------------------------