├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── devtools
├── conda-recipe
│ ├── README.md
│ ├── build.sh
│ └── meta.yaml
└── travis-ci
│ ├── after_success.sh
│ ├── index.html
│ ├── install.sh
│ └── push-docs-to-s3.py
├── dist
└── smarty-0.1.0-py2.7.egg
├── examples
├── README.md
├── parm@frosst
│ ├── README.md
│ ├── atomtypes
│ │ ├── README.md
│ │ ├── basetypes-elemental.smarts
│ │ ├── basetypes.smarts
│ │ ├── decorators-simple.smarts
│ │ ├── decorators.smarts
│ │ └── substitutions.smarts
│ ├── make_subset.py
│ ├── molecules
│ │ ├── zinc-subset-500-parm@frosst.mol2.gz
│ │ ├── zinc-subset-500-tripos.mol2.gz
│ │ ├── zinc-subset-parm@frosst.mol2.gz
│ │ └── zinc-subset-tripos.mol2.gz
│ └── scripts
│ │ ├── README.md
│ │ └── convert-atom-names-to-tripos.py
├── smarty_simulations
│ ├── AlkEthOH.csv
│ ├── AlkEthOH.log
│ ├── AlkEthOH.pdf
│ ├── Hydrogen.csv
│ ├── Hydrogen.log
│ ├── Hydrogen.pdf
│ ├── README.md
│ ├── Simple-Decorators.csv
│ ├── Simple-Decorators.log
│ └── Simple-Decorators.pdf
└── smirky
│ ├── README.md
│ ├── atom_AND_decorators.smarts
│ ├── atom_OR_bases.smarts
│ ├── atom_OR_decorators.smarts
│ ├── atom_odds_forTorsions.smarts
│ ├── bond_AND_decorators.smarts
│ ├── bond_OR_bases.smarts
│ ├── bond_odds_forTorsions.smarts
│ ├── initial_Torsions.smarts
│ ├── output.csv
│ ├── output.log
│ ├── output.pdf
│ ├── output_results.smarts
│ └── substitutions.smarts
├── oe_license.txt.enc
├── setup.py
├── smarty
├── __init__.py
├── atomtyper.py
├── cli_smarty.py
├── cli_smirky.py
├── data
│ ├── README.md
│ ├── __init__.py
│ ├── atomtypes
│ │ ├── README.md
│ │ ├── basetypes.smarts
│ │ ├── decorators-simple.smarts
│ │ ├── decorators.smarts
│ │ ├── initial_AlkEthOH.smarts
│ │ ├── initialtypes.smarts
│ │ ├── new-decorators.smarts
│ │ └── replacements.smarts
│ └── odds_files
│ │ ├── atom_OR_bases.smarts
│ │ ├── atom_decorators.smarts
│ │ ├── atom_index_odds.smarts
│ │ ├── bond_AND_decorators.smarts
│ │ ├── bond_OR_bases.smarts
│ │ ├── bond_index_odds.smarts
│ │ └── substitutions.smarts
├── sampler.py
├── sampler_smirky.py
├── score_utils.py
├── tests
│ ├── __init__.py
│ ├── test_atomtyper.py
│ ├── test_sampler.py
│ ├── test_smirky_sampler.py
│ └── test_utils.py
└── utils.py
└── utilities
├── README.md
└── test_smirks_or_environment_speed
├── README.md
├── Torsion_0_0.00e+00_results.smarts
└── testing_smirks_speed.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 |
26 | # PyInstaller
27 | # Usually these files are written by a python script from a template
28 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 |
32 | # Extracted archive
33 | AlkEthOH_inputfiles/
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *,cover
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 |
56 | # Sphinx documentation
57 | docs/_build/
58 |
59 | # PyBuilder
60 | target/
61 |
62 | # Ipython notebook checkpoints
63 | *.ipynb_checkpoints/
64 |
65 | # ignore files created during tests
66 | smarty/tests/*.pdf
67 | smarty/tests/*.log
68 | smarty/tests/*.csv
69 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 | sudo: false
3 |
4 | branches:
5 | only:
6 | - master
7 |
8 | install:
9 | - source devtools/travis-ci/install.sh
10 | - export PYTHONUNBUFFERED=true
11 | # Unpack encrypted OpenEye license file
12 | - if [ "$TRAVIS_SECURE_ENV_VARS" == true ]; then openssl aes-256-cbc -K $encrypted_e60be1d1adc8_key -iv $encrypted_e60be1d1adc8_iv -in oe_license.txt.enc -out $OE_LICENSE -d; fi
13 | - if [ "$TRAVIS_SECURE_ENV_VARS" == false ]; then echo "OpenEye license will not be installed in forks."; fi
14 |
15 | script:
16 | # Add omnia channel
17 | - conda config --add channels ${ORGNAME}
18 | # Create and activate test environment
19 | - conda create --yes -n test python=$python
20 | - source activate test
21 | # Install OpenEye toolkit
22 | #- pip install $OPENEYE_CHANNEL openeye-toolkits && python -c "import openeye; print(openeye.__version__)"
23 | # Use beta version for partial bond orders
24 | - pip install --pre -i https://pypi.anaconda.org/openeye/label/beta/simple openeye-toolkits && python -c "import openeye; print(openeye.__version__)"
25 | # Install openforcefield tools
26 | # TODO if changes to openforcefield become less dynamic switch to conda install?
27 | - pip install git+https://github.com/openforcefield/openforcefield.git
28 | # Build the recipe
29 | - conda build devtools/conda-recipe
30 | # Install
31 | - conda install --yes --use-local smarty
32 | # Run tests
33 | - conda install --yes nose nose-timer
34 | - cd devtools && nosetests -vv --nocapture --with-timer $PACKAGENAME && cd ..
35 |
36 | env:
37 | matrix:
38 | - python=2.7 CONDA_PY=27
39 | - python=3.4 CONDA_PY=34
40 | - python=3.5 CONDA_PY=35
41 |
42 | global:
43 | - ORGNAME="omnia"
44 | - PACKAGENAME="smarty"
45 | # OpenEye toolkit
46 | - OE_LICENSE="$HOME/oe_license.txt"
47 | - OPENEYE_CHANNEL="-i https://pypi.anaconda.org/openeye/channel/main/simple"
48 | # encrypted BINSTAR_TOKEN for push of dev package to binstar
49 | - secure: "Iw2yv40ElSbS/TstXS9YnsbJFbxsbFQ25fkWlq8H/O3SPJwpX2/PRoCo99R1Scc0mO9BiVMwGDJQeM9y1VoYo3ozv5SIhPvc+0cMOE3AzkRiFEpZeTtDUTxOWsb+k/x5dH5/AapXRtJeKhY3cWe3lhKdv9N+yWrhY29lawXgfU4WsOEl6ON9BPwwPzvKK1sP4z8kIMzDNjt6gJ3m1HzdEQe/ibrOJIEk6Z4kTLQo9z4F9dm73/L4scEgnW6SOACC39nuYCL8PK4zPNKTqpAoVkm18uyrRz62+qPYSl3RCBNOFtbAuz7fz+ShSMA6g//LpAobNptpQeQpWXkHhYk5ALc6xzH2zScVgrPytKAPwi8mYKq9gYZnUPYgpOdjK3bNyfkGjeV9I4sQwNCBYlKtGHoqZ1l+l6oYsbx+Ti+nIeK67ufGmAugH4GJ3dhZvP6ZR73/irOrvSWiJJgqI1/k4c9Ela4wDpQHDp9sRf03HgSrRTX2gQ3E/JmPx8s56tMdkmrIDIgy6Edc80AN6zEKX0+3YVGcH6ltUViDidRGDlZ7xbUUXYtjqMJXuJEh2SV/wbeVmrBM8Pn+IfsBzLKnd1jqe3pXfoCqbCtvNwW8Sr4qMgWBEHvEtB4C5KvO5CydmRx95q/0ziRGb/VEV6QOnGxT7EIJDfyQeUqNqJD7Bdo="
50 | # encrypted AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
51 | #- secure: "SrSd1JoI8dBXQxDAX0xBTYBinTusRBQoPETnxHrBAgKdoty1pkzaghTKNMsrGsk78iwkkj1hAyttIY9trdFQkmx+OTx0fLKFmDHsMkgko4RzTtrgLgoxuRIs/gruID2cN1XKEbxlhRmQF14+q8/X1q6iGGdYMrxo51JcYPuEOSo="
52 | #- secure: "br6QRMYXhHltYTEh/d+zejxcunT3GsqwQvxxLmqnLxi+LIxX4j7eymR6p4fPBd5mCRxyvkQEjnSZxF6e7JlEKxWVcMG28I/dBWzVIRW3EKQQNRmyI+JL1dfNaqj68kHJD+FknBwHK9LD238JPcyqXPdVrm9iPkDijPczvPBxvDs="
53 |
54 | #after_success:
55 | # - echo "after_success"
56 | # - if [ "$TRAVIS_SECURE_ENV_VARS" == true ]; then ./devtools/travis-ci/after_success.sh; fi
57 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2016, Open Forcefield Group
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
15 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
16 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
17 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
19 | OR OTHER DEALINGS IN THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/openforcefield/smarty?branch=master)
2 | [](https://zenodo.org/badge/latestdoi/60921138)
3 |
4 | # `smarty`: Exploring Bayesian atom type sampling
5 |
6 | This is a simple example of how Bayesian atom type sampling using reversible-jump Markov chain Monte Carlo (RJMCMC) [1] over SMARTS types might work.
7 |
8 | All tools for implementation of the SMIRNOFF in OpenMM have been moved to the [openforcefield repository](https://github.com/openforcefield/openforcefield)
9 |
10 | ## Manifest
11 |
12 | * `examples/` - some toy examples - look here to get started
13 | * `smarty/` - simple toolkit illustrating the use of RJMCMC to sample over SMARTS-specified atom types and SMIRKS-specified bonded and non-bonded parameter types.
14 | * `devtools/` - continuous integration and packaging scripts and utilities
15 | * `oe_license.txt.enc` - encrypted OpenEye license for continuous integration testing
16 | * `.travis.yml` - travis-ci continuous integration file
17 | * `utilities/` - some utility functionality relating to the project, specifically testing the speed of ChemicalEnvironments for sampling in SMIRKY.
18 |
19 | ## Prerequisites
20 |
21 | Install [miniconda](http://conda.pydata.org/miniconda.html) first. On `osx` with `bash`, this is:
22 | ```
23 | wget https://repo.continuum.io/miniconda/Miniconda2-latest-MacOSX-x86_64.sh
24 | bash Miniconda2-latest-MacOSX-x86_64.sh -b -p $HOME/miniconda
25 | export PATH="$HOME/miniconda/bin:${PATH}""
26 | ```
27 |
28 | You must first install the OpenEye toolkit:
29 | ```
30 | pip install -i https://pypi.anaconda.org/OpenEye/simple OpenEye-toolkits
31 | ```
32 |
33 | You can then use conda to install smarty:
34 | ```
35 | conda config --add channels omnia
36 | conda install -c omnia smarty
37 | ```
38 |
39 | ## Installation
40 |
41 | Install `smarty` from the `smarty/` directory with:
42 | ```bash
43 | pip install .
44 | ```
45 | If you modify the `smarty` source code (rather than the examples), reinstall with
46 | ```bash
47 | pip install . --upgrade
48 | ```
49 |
50 | ## Documentation
51 |
52 |
53 | ## SMARTY atom type sampler
54 |
55 | Check out the example in `examples/smarty/`:
56 |
57 | Atom types are specified by SMARTS matches with corresponding parameter names.
58 |
59 | First, we start with a number of initial "base types" which are essentially indestructible (often generic) atom types, specified in `atomtypes/basetypes.smarts`:
60 | ```
61 | % atom types
62 | [#1] hydrogen
63 | [#6] carbon
64 | [#7] nitrogen
65 | [#8] oxygen
66 | [#9] fluorine
67 | [#15] phosphorous
68 | [#16] sulfur
69 | [#17] chlorine
70 | [#35] bromine
71 | [#53] iodine
72 | ```
73 | Note that lines beginning with `%` are comment lines.
74 |
75 | We also specify a number of starting types, "initial types" which can be the same or different from the base types. These follow the same format, and `atomtypes/basetypes.smarts` can be reused unless alternate behavior is desired (such as starting from more sophisticated initial types).
76 |
77 | We have two sampler options for SMARTY which differ in how focused the sampling is. The original sampler samples over all elements/patterns at once, whereas the elemental sampler focuses on sampling only one specific element. The principle of sampling is the same; the only change is in which elements we sample over. To sample only over a single element, such as oxygen, for example, we use the elemental sampler to focus on that element.
78 |
79 |
80 | ### Generating New SMARTS patterns
81 |
82 | There are two options for how to change SMARTS patterns when creating new atom types.
83 | One is using combinatorial decorators (default) and the other is using simple decorators (`--decoratorbehavior=simple-decorators`). However, it should be noted that we have found the simple decorators insufficient at distinguishing atomtypes even for the most simple sets of molecules.
84 |
85 | **Combinatorial Decorators**
86 |
87 | The first option (combinatorial-decorator) attempt to create the new atomtype adding an Alpha or Beta substituent to a basetype or an atomtype.
88 | This decorators are different from the simple-decorator option and do not have atom types or bond information on it.
89 | The new decorators are listed in `AlkEthOH/atomtypes/new-decorators.smarts` and `parm@frosst/atomtypes/new-decorators.smarts`:
90 |
91 | ```
92 | % total connectivity
93 | X1 connections-1
94 | X2 connections-2
95 | X3 connections-3
96 | X4 connections-4
97 | % total-h-count
98 | H0 total-h-count-0
99 | H1 total-h-count-1
100 | H2 total-h-count-2
101 | H3 total-h-count-3
102 | % formal charge
103 | +0 neutral
104 | +1 cationic+1
105 | -1 anionic-1
106 | % aromatic/aliphatic
107 | a aromatic
108 | A aliphatic
109 | ```
110 | Each decorator has a corresponding string token (no spaces allowed!) that is used to create human-readable versions of the corresponding atom types.
111 |
112 | For example, we may find the atom type ```[#6]&H3``` which is `carbon total-h-count-3` for a C atom bonded to three hydrogens.
113 |
114 | **Simple Decorators**
115 | The second option (simple-decorators) attempts to split off a new atom type from a parent atom type by combining (via an "and" operator, `&`) the parent atom type with a "decorator".
116 | The decorators are listed in `AlkEthOH/atomtypes/decorators.smarts` or `parm@frosst/atomtypes/decorators.smarts`:
117 | ```
118 | % bond order
119 | $([*]=[*]) double-bonded
120 | $([*]#[*]) triple-bonded
121 | $([*]:[*]) aromatic-bonded
122 | % bonded to atoms
123 | $(*~[#1]) hydrogen-adjacent
124 | $(*~[#6]) carbon-adjacent
125 | $(*~[#7]) nitrogen-adjacent
126 | $(*~[#8]) oxygen-adjacent
127 | $(*~[#9]) fluorine-adjacent
128 | $(*~[#15]) phosphorous-adjacent
129 | $(*~[#16]) sulfur-adjacent
130 | $(*~[#17]) chlorine-adjacent
131 | $(*~[#35]) bromine-adjacent
132 | $(*~[#53]) iodine-adjacent
133 | % degree
134 | D1 degree-1
135 | D2 degree-2
136 | D3 degree-3
137 | D4 degree-4
138 | D5 degree-5
139 | D6 degree-6
140 | % valence
141 | v1 valence-1
142 | v2 valence-2
143 | v3 valence-3
144 | v4 valence-4
145 | v5 valence-5
146 | v6 valence-6
147 | % total-h-count
148 | H1 total-h-count-1
149 | H2 total-h-count-2
150 | H3 total-h-count-3
151 | % aromatic/aliphatic
152 | a atomatic
153 | A aliphatic
154 | ```
155 | This option also has the corresponding string tokens.
156 |
157 | Newly proposed atom types are added to the end of the list.
158 | After a new atom type is proposed, all molecules are reparameterized using the new set of atom types.
159 | Atom type matching proceeds by trying to see if each SMARTS match can be applied working from top to bottom of the list.
160 | This means the atom type list is hierarchical, with more general types appearing at the top of the list and more specific subtypes appearing at the bottom.
161 |
162 | If a proposed type matches zero atoms, the RJMCMC move is rejected.
163 |
164 | Currently, the acceptance criteria does not include the full Metropolis-Hastings acceptance criteria that would include the reverse probability. This needs to be added in.
165 |
166 | ### Elemental Decomposition
167 |
168 | The input option `--element` allows a user to specify which atoms types to sample based on atomic number. The default input is 0 (corresponding to no specified atomic number) and will attempt to match all atom types. If an element number is given (i.e. `--element=1` for hydrogen) only atoms with that atomic number are considered. Specifying an element number does not affect any other smarty behavior.
169 |
170 | Finally, here is a complete list of input options for smarty. Under `usage` all bracketed parameters are optional.
171 | ```
172 | Usage: Sample over atom types, optionally attempting to match atom types in a reference typed set of molecules.
173 |
174 | usage: smarty --basetypes smartsfile --initialtypes smartsfile
175 | --decorators smartsfile --molecules molfile
176 | [--element atomicnumber --substitutions smartsfile --reference molfile
177 | --decoratorbehavior combinatorial-decorators/simple-decorators
178 | --iterations niterations --temperature temperature --trajectory trajectorfile
179 | --plot plotfile]
180 |
181 | example:
182 | python smarty --basetypes=atomtypes/basetypes.smarts --initialtypes=atomtypes/initialtypes.smarts \
183 | --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \
184 | --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz \
185 | --iterations 1000 --temperature=0.1
186 |
187 |
188 | Options:
189 | --version show program's version number and exit
190 | -h, --help show this help message and exit
191 | -e ELEMENT, --element=ELEMENT
192 | By default the element value is 0 corresponding to
193 | sampling all atomtypes. If another atomic number is
194 | specified only atoms with that atomic number are
195 | sampled (i.e. --element=8 will only sample atomtypes
196 | for oxygen atoms).
197 | -b BASETYPES, --basetypes=BASETYPES
198 | Filename defining base or generic atom types as SMARTS
199 | atom matches; these are indestructible and normally
200 | are elemental atom types.
201 | -f BASETYPES, --initialtypes=BASETYPES
202 | Filename defining initial (first) atom types as SMARTS
203 | atom matches.
204 | -d DECORATORS, --decorators=DECORATORS
205 | Filename defining decorator atom types as SMARTS atom
206 | matches.
207 | -s SUBSTITUTIONS, --substitutions=SUBSTITUTIONS
208 | Filename defining substitution definitions for SMARTS
209 | atom matches (OPTIONAL).
210 | -r REFMOL, --reference=REFMOL
211 | Reference typed molecules for computing likelihood
212 | (must match same molecule and atom ordering in
213 | molecules file) (OPTIONAL).
214 | -m MOLECULES, --molecules=MOLECULES
215 | Small molecule set (in any OpenEye compatible file
216 | format) containing 'dG(exp)' fields with experimental
217 | hydration free energies.
218 | -i ITERATIONS, --iterations=ITERATIONS
219 | MCMC iterations.
220 | -t TEMPERATURE, --temperature=TEMPERATURE
221 | Effective temperature for Monte Carlo acceptance,
222 | indicating fractional tolerance of mismatched atoms
223 | (default: 0.1). If 0 is specified, will behave in a
224 | greedy manner.
225 | -l TRAJECTORY_FILE, --trajectory=TRAJECTORY_FILE
226 | Name for trajectory file output, trajectory saves only
227 | changes to the list of 'atomtypes' for each iteration.
228 | If the file already exists, it is overwritten.
229 | -p PLOT_FILE, --plot=PLOT_FILE
230 | Name for output file of a plot of the score versus
231 | time. If not specified, none will be written. If
232 | provided, needs to use a file extension suitable for
233 | matplotlib/pylab. Currently requires a trajectory file
234 | to be written using -l or --trajectory.
235 | -x DECORATOR_BEHAVIOR, --decoratorbehavior=DECORATOR_BEHAVIOR
236 | Choose between simple-decorators or combinatorial-
237 | decorators (default = combinatorial-decorators).
238 | ```
239 |
240 | ---
241 |
242 | ## smirky
243 |
244 | Check out examples in `examples/smirky/`:
245 |
246 | This tool can sample any chemical environment type relevant to SMIRNOFFs, that is atoms, bonds, angles, and proper and improper torsions, one at a time
247 | Scoring is analous to smarty (explained above), but uses a SMIRNOFF with existing parameters as a reference insteady of atomtyped molecules.
248 |
249 | Input for this tool can require up to four different file types
250 | * MOLECULES - any file that are readable in openeye, mol2, sdf, oeb, etc.
251 | * ODDSFILES - File with the form "smarts odds" for the different decorator or bond options
252 | * SMARTS - .smarts file type with the form "smarts/smirks label/typename"
253 | * REFERENCE - a SMIRNOFF file with reference atoms, bonts, angles, torsions, and impropers
254 |
255 | ```
256 | Usage: Sample over fragment types (atoms, bonds, angles, torsions, or impropers)
257 | optionally attempting to match created types to an established SMIRNOFF.
258 | For all files left blank, they will be taken from this module's
259 | data/odds_files/ subdirectory.
260 |
261 | usage smirky --molecules molfile --typetag fragmentType
262 | [--atomORbases AtomORbaseFile --atomORdecors AtomORdecorFile
263 | --atomANDdecors AtomANDdecorFile --bondORbase BondORbaseFile
264 | --bondANDdecors BondANDdecorFile --atomIndexOdds AtomIndexFile
265 | --bondIndexOdds BondIndexFile --replacements substitutions
266 | --initialFragments initialFragments --SMIRNOFF referenceSMIRNOFF
267 | --temperature float --verbose verbose
268 | --iterations iterations --output outputFile]
269 |
270 | example:
271 | smirky -molecules AlkEthOH_test_filt1_ff.mol2 --typetag Angle
272 |
273 |
274 |
275 | Options:
276 | --version show program's version number and exit
277 | -h, --help show this help message and exit
278 | -m MOLECULES, --molecules=MOLECULES
279 | Small molecule set (in any OpenEye compatible file
280 | format) containing 'dG(exp)' fields with experimental
281 | hydration free energies. This filename can also be an
282 | option in this module's data/molecules sub-directory
283 | -T TYPETAG, --typetag=TYPETAG
284 | type of fragment being sampled, options are 'VdW',
285 | 'Bond', 'Angle', 'Torsion', 'Improper'
286 | -e ODDFILES, --atomORbases=ODDFILES
287 | Filename defining atom OR bases and associated
288 | probabilities. These are combined with atom OR
289 | decorators in SMIRKS, for example in
290 | '[#6X4,#7X3;R2:2]' '#6' and '#7' are atom OR bases.
291 | (OPTIONAL)
292 | -O ODDFILES, --atomORdecors=ODDFILES
293 | Filename defining atom OR decorators and associated
294 | probabilities. These are combined with atom bases in
295 | SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'X4' and
296 | 'X3' are ORdecorators. (OPTIONAL)
297 | -A ODDFILES, --atomANDdecors=ODDFILES
298 | Filename defining atom AND decorators and associated
299 | probabilities. These are added to the end of an atom's
300 | SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'R2' is an
301 | AND decorator. (OPTIONAL)
302 | -o ODDFILES, --bondORbase=ODDFILES
303 | Filename defining bond OR bases and their associated
304 | probabilities. These are OR'd together to describe a
305 | bond, for example in '[#6]-,=;@[#6]' '-' and '=' are
306 | OR bases. (OPTIONAL)
307 | -a ODDFILES, --bondANDdecors=ODDFILES
308 | Filename defining bond AND decorators and their
309 | associated probabilities. These are AND'd to the end
310 | of a bond, for example in '[#6]-,=;@[#7]' '@' is an
311 | AND decorator.(OPTIONAL)
312 | -D ODDSFILE, --atomOddsFile=ODDSFILE
313 | Filename defining atom descriptors and probabilities
314 | with making changes to that kind of atom. Options for
315 | descriptors are integers corresponding to that indexed
316 | atom, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'.
317 | (OPTIONAL)
318 | -d ODDSFILE, --bondOddsFile=ODDSFILE
319 | Filename defining bond descriptors and probabilities
320 | with making changes to that kind of bond. Options for
321 | descriptors are integers corresponding to that indexed
322 | bond, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'.
323 | (OPTIONAL)
324 | -s SMARTS, --substitutions=SMARTS
325 | Filename defining substitution definitions for SMARTS
326 | atom matches. (OPTIONAL).
327 | -f SMARTS, --initialtypes=SMARTS
328 | Filename defining initial (first) fragment types as
329 | 'SMIRKS typename'. If this is left blank the
330 | initial type will be a generic form of the given
331 | fragment, for example '[*:1]~[*:2]' for a bond
332 | (OPTIONAL)
333 | -r REFERENCE, --smirff=REFERENCE
334 | Filename defining a SMIRNOFF force fielce used to
335 | determine reference fragment types in provided set of
336 | molecules. It may be an absolute file path, a path
337 | relative to the current working directory, or a path
338 | relative to this module's data subdirectory (for built
339 | in force fields). (OPTIONAL)
340 | -i ITERATIONS, --iterations=ITERATIONS
341 | MCMC iterations.
342 | -t TEMPERATURE, --temperature=TEMPERATURE
343 | Effective temperature for Monte Carlo acceptance,
344 | indicating fractional tolerance of mismatched atoms
345 | (default: 0.1). If 0 is specified, will behave in a
346 | greedy manner.
347 | -p OUTPUT, --output=OUTPUT
348 | Filename base for output information. This same base
349 | will be used for all output files created. If None
350 | provided then it is set to 'typetag_temperature'
351 | (OPTIONAL).
352 | -v VERBOSE, --verbose=VERBOSE
353 | If True prints minimal information to the commandline
354 | during iterations. (OPTIONAL)
355 | ``
356 |
357 | ## The SMIRNOFF force field format
358 |
359 | The SMIRNOFF force field format is documented [here](https://github.com/openforcefield/openforcefield/blob/master/The-SMIRNOFF-force-field-format.md).
360 | It was previously avaialbe in this repository, but has been moved.
361 | SMIRNOFF99Frosst, a version of SMIRNOFF mirroring the parameters found in the parm@Frosst force field, is now housed in its own [repository](https://github.com/openforcefield/smirnoff99Frosst).
362 | `forcefield.py` and other modules required to implement the SMIRNOFF format for simulations in OpenMM have also been moved. These scripts and examples on how to use them can be found at [openforcefield/openforcefield](https://github.com/openforcefield/openforcefield).
363 |
364 | ## References
365 |
366 | [1] Green PJ. Reversible jump Markov chain Monte Carlo computation and Bayesian model determination. Biometrika 82:711, 1995.
367 | http://dx.doi.org/10.1093/biomet/82.4.711
368 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/README.md:
--------------------------------------------------------------------------------
1 | This is a recipe for building the current development package into a conda
2 | binary.
3 |
4 | The installation on travis-ci is done by building the conda package, installing
5 | it, running the tests, and then if successful pushing the package to binstar
6 | (and the docs to AWS S3). The binstar auth token is an encrypted environment
7 | variable generated using:
8 |
9 | binstar auth -n yank-travis -o omnia --max-age 22896000 -c --scopes api:write
10 |
11 | and then saved in the environment variable BINSTAR_TOKEN.
12 |
13 | You can set up travis to store an encrypted token via
14 |
15 | gem install travis travis encrypt BINSTAR_TOKEN=xx
16 |
17 | where xx is the token output by binstar. The final command should print a line (containing 'secure') for inclusion in your .travis.yml file.
18 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/build.sh:
--------------------------------------------------------------------------------
1 | pip install .
2 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: smarty
3 | version: 0.0.0
4 |
5 | source:
6 | path: ../..
7 |
8 | build:
9 | preserve_egg_dir: True
10 | number: 0
11 |
12 | requirements:
13 | build:
14 | - python
15 | - setuptools
16 | - pandas
17 |
18 | run:
19 | - python
20 | - numpy
21 | - networkx
22 | - lxml
23 | - openmoltools >=0.7.3
24 | - parmed
25 | - matplotlib
26 | - pandas
27 |
28 | test:
29 | requires:
30 | - nose
31 | - nose-timer
32 | imports:
33 | - smarty
34 |
35 | about:
36 | home: https://github.com/openforcefield/smarty
37 | license: MIT
38 |
--------------------------------------------------------------------------------
/devtools/travis-ci/after_success.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Must be invoked with $PACKAGENAME
3 |
4 | echo $TRAVIS_PULL_REQUEST $TRAVIS_BRANCH
5 | PUSH_DOCS_TO_S3=false
6 |
7 | if [ "$TRAVIS_PULL_REQUEST" = true ]; then
8 | echo "This is a pull request. No deployment will be done."; exit 0
9 | fi
10 |
11 |
12 | if [ "$TRAVIS_BRANCH" != "master" ]; then
13 | echo "No deployment on BRANCH='$TRAVIS_BRANCH'"; exit 0
14 | fi
15 |
16 |
17 | # Deploy to binstar
18 | conda install --yes anaconda-client jinja2
19 | pushd .
20 | cd $HOME/miniconda/conda-bld
21 | FILES=*/${PACKAGENAME}-dev-*.tar.bz2
22 | for filename in $FILES; do
23 | anaconda -t $BINSTAR_TOKEN remove --force ${ORGNAME}/${PACKAGENAME}-dev/${filename}
24 | anaconda -t $BINSTAR_TOKEN upload --force -u ${ORGNAME} -p ${PACKAGENAME}-dev ${filename}
25 | done
26 | popd
27 |
28 | if [ $PUSH_DOCS_TO_S3 = true ]; then
29 | # Create the docs and push them to S3
30 | # -----------------------------------
31 | conda install --yes pip
32 | conda config --add channels $ORGNAME
33 | conda install --yes `conda build devtools/conda-recipe --output`
34 | pip install numpydoc s3cmd msmb_theme
35 | conda install --yes `cat docs/requirements.txt | xargs`
36 |
37 | conda list -e
38 |
39 | (cd docs && make html && cd -)
40 | ls -lt docs/_build
41 | pwd
42 | python devtools/ci/push-docs-to-s3.py
43 | fi
44 |
--------------------------------------------------------------------------------
/devtools/travis-ci/index.html:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/devtools/travis-ci/install.sh:
--------------------------------------------------------------------------------
1 | # Temporarily change directory to $HOME to install software
2 | pushd .
3 | cd $HOME
4 |
5 | # Install Miniconda
6 | MINICONDA=Miniconda2-latest-Linux-x86_64.sh
7 | MINICONDA_HOME=$HOME/miniconda
8 | MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *\(.*\)<\/td> */\1/p')
9 | wget -q http://repo.continuum.io/miniconda/$MINICONDA
10 | if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then
11 | echo "Miniconda MD5 mismatch"
12 | exit 1
13 | fi
14 | bash $MINICONDA -b -p $MINICONDA_HOME
15 |
16 | # Configure miniconda
17 | export PIP_ARGS="-U"
18 | export PATH=$MINICONDA_HOME/bin:$PATH
19 | conda update --yes conda
20 | conda install --yes conda-build jinja2 anaconda-client pip
21 | conda install --yes -c omnia openmoltools
22 | conda install --yes -c omnia parmed
23 | conda install --yes -c matplotlib
24 | conda install --yes pandas
25 |
26 | # Restore original directory
27 | popd
28 |
--------------------------------------------------------------------------------
/devtools/travis-ci/push-docs-to-s3.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Must have the vollowing environment variables defined:
5 | * BUCKET_NAME : AWS bucket name
6 | * PREFIX : 'latest' or other version number
7 |
8 | """
9 |
10 | import os
11 | import pip
12 | import tempfile
13 | import subprocess
14 | import thermopyl.version
15 |
16 |
17 | BUCKET_NAME = 'thermopyl.org'
18 | if not thermopyl.version.release:
19 | PREFIX = 'latest'
20 | else:
21 | PREFIX = thermopyl.version.short_version
22 |
23 | if not any(d.project_name == 's3cmd' for d in pip.get_installed_distributions()):
24 | raise ImportError('The s3cmd pacakge is required. try $ pip install s3cmd')
25 | # The secret key is available as a secure environment variable
26 | # on travis-ci to push the build documentation to Amazon S3.
27 | with tempfile.NamedTemporaryFile('w') as f:
28 | f.write('''[default]
29 | access_key = {AWS_ACCESS_KEY_ID}
30 | secret_key = {AWS_SECRET_ACCESS_KEY}
31 | '''.format(**os.environ))
32 | f.flush()
33 |
34 | template = ('s3cmd --guess-mime-type --config {config} '
35 | 'sync docs/_build/ s3://{bucket}/{prefix}/')
36 | cmd = template.format(
37 | config=f.name,
38 | bucket=BUCKET_NAME,
39 | prefix=PREFIX)
40 | return_val = subprocess.call(cmd.split())
41 |
42 | # Sync index file.
43 | template = ('s3cmd --guess-mime-type --config {config} '
44 | 'sync devtools/ci/index.html s3://{bucket}/')
45 | cmd = template.format(
46 | config=f.name,
47 | bucket=BUCKET_NAME)
48 | return_val = subprocess.call(cmd.split())
49 |
--------------------------------------------------------------------------------
/dist/smarty-0.1.0-py2.7.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/dist/smarty-0.1.0-py2.7.egg
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | # Examples for Bayesian atomtype sampler
2 |
3 | ## Manifest
4 | * `parm@frosst/` - example illustrating attempt to recover parm@frosst atom types
5 | * `smarty_simulations/` - examples to implement smarty, a tool to rediscover parm@frosst atomtypes on the AlkEthOH molecules set
6 | * `smirky_simulations/` - example usage of the smirky sampling tool to rediscover the SMIRNOFF99Frosst parameter types
7 |
8 | **We have rearranged the Open Force Field group if you are looking for an example that used to be here, but is no longer it can be found at [openforcefield/examples/](https://github.com/openforcefield/openforcefield/tree/master/examples)**
9 |
--------------------------------------------------------------------------------
/examples/parm@frosst/README.md:
--------------------------------------------------------------------------------
1 | # Example application of SMARTY atom type sampler to recover parm@frosst typing
2 |
3 | In this example, the SMARTY `AtomTypeSampler` is used to attempt to recover SMARTS atom types that recapitulate the typing rules from a referenced set of typed molecules.
4 |
5 | ## Manifest
6 | * `smarty.py` - example command-line driver
7 | * `atomtypes/` - input atom type sample specification files
8 | * `molecules/` - typed molecule datasets
9 | * `scripts/` - useful conversion scripts
10 |
11 | ## Usage
12 |
13 | Usage
14 |
15 | Example:
16 | ```
17 | smarty --basetypes=atomtypes/basetypes-elemental.smarts --initialtypes=atomtypes/basetypes-elemental.smarts --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \
18 | --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz --iterations 1000 --temperature=0.1
19 | ```
20 |
21 | Initially, the base atom types are added to the pool of current atom types, and the number of atoms and molecules matched by each atom type are shown:
22 | ```
23 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED
24 | 1 : 88148 7487 | hydrogen [#1] HA 28720 / 28720 (100.000%)
25 | 2 : 90146 7505 | carbon [#6] CA 37143 / 37143 (100.000%)
26 | 3 : 20838 6806 | nitrogen [#7] NB 7612 / 7612 (100.000%)
27 | 4 : 12829 5946 | oxygen [#8] O 4876 / 4876 (100.000%)
28 | 5 : 1001 444 | fluorine [#9] F 1001 / 1001 (100.000%)
29 | 6 : 5 5 | phosphorous [#15] P 5 / 5 (100.000%)
30 | 7 : 3171 2593 | sulfur [#16] S 2544 / 2544 (100.000%)
31 | 8 : 574 463 | chlorine [#17] CL 574 / 574 (100.000%)
32 | 9 : 84 73 | bromine [#35] BR 84 / 84 (100.000%)
33 | 10 : 8 8 | iodine [#53] I 8 / 8 (100.000%)
34 | TOTAL : 216804 7505 | 82567 / 216804 match (38.084 %)
35 | ```
36 | After a few iterations, the pool of current atom types will have diverged, with some children having been added to the set or atom types removed from the original set.
37 | ```
38 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED
39 | 1 : 88148 7487 | hydrogen [#1] HA 28720 / 28720 (100.000%)
40 | 2 : 90068 7505 | carbon [#6] CA 37109 / 37143 ( 99.908%)
41 | 3 : 78 73 | carbon bromine-adjacent [#6&$(*~[#35])] CW 15 / 4850 ( 0.309%)
42 | 4 : 9689 5835 | nitrogen [#7] N 3161 / 3161 (100.000%)
43 | 5 : 11149 5300 | nitrogen degree-2 [#7&D2] NB 7480 / 7612 ( 98.266%)
44 | 6 : 12829 5946 | oxygen [#8] O 4876 / 4876 (100.000%)
45 | 7 : 1001 444 | fluorine [#9] F 1001 / 1001 (100.000%)
46 | 8 : 5 5 | phosphorous [#15] P 5 / 5 (100.000%)
47 | 9 : 3171 2593 | sulfur [#16] S 2544 / 2544 (100.000%)
48 | 10 : 574 463 | chlorine [#17] CL 574 / 574 (100.000%)
49 | 11 : 84 73 | bromine [#35] BR 84 / 84 (100.000%)
50 | 12 : 8 8 | iodine [#53] I 8 / 8 (100.000%)
51 | TOTAL : 216804 7505 | 85577 / 216804 match (39.472 %)
52 | ```
53 | or even
54 | ```
55 | Iteration 241 / 1000
56 | Attempting to destroy atom type [#9] : fluorine...
57 | Typing failed; rejecting.
58 | Rejected.
59 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED
60 | 1 : 88148 7487 | hydrogen [#1] HA 28720 / 28720 (100.000%)
61 | 2 : 63417 7402 | carbon [#6] CA 36300 / 37143 ( 97.730%)
62 | 3 : 4293 2349 | carbon sulfur-adjacent [#6&$(*~[#16])] CW 1497 / 4850 ( 30.866%)
63 | 4 : 14861 5134 | carbon degree-4 [#6&D4] CT 14509 / 22084 ( 65.699%)
64 | 5 : 7575 4235 | carbon total-h-count-3 [#6&H3]
65 | 6 : 20253 6767 | nitrogen [#7] NB 7612 / 7612 (100.000%)
66 | 7 : 585 504 | nitrogen degree-1 [#7&D1] NL 585 / 585 (100.000%)
67 | 8 : 12829 5946 | oxygen [#8] O 4876 / 4876 (100.000%)
68 | 9 : 1001 444 | fluorine [#9] F 1001 / 1001 (100.000%)
69 | 10 : 5 5 | phosphorous [#15] P 5 / 5 (100.000%)
70 | 11 : 2593 2144 | sulfur [#16] S 2544 / 2544 (100.000%)
71 | 12 : 578 563 | sulfur valence-6 [#16&v6] SO 578 / 627 ( 92.185%)
72 | 13 : 574 463 | chlorine [#17] CL 574 / 574 (100.000%)
73 | 14 : 84 73 | bromine [#35] BR 84 / 84 (100.000%)
74 | 15 : 8 8 | iodine [#53] I 8 / 8 (100.000%)
75 | TOTAL : 216804 7505 | 98893 / 216804 match (45.614 %)
76 | ```
77 |
--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/README.md:
--------------------------------------------------------------------------------
1 | # Atom type SMARTS components
2 |
3 | ## Formats
4 |
5 | ### Initial types
6 |
7 | A `basetypes` file specifies the initial atom types used to initialize the sampler.
8 |
9 | Comments beginning with `%` are ignored throughout the file.
10 | Each line has the format
11 | ```
12 |
13 | ```
14 | where `` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `` is a human-readable typename associated with that atom type.
15 |
16 | Atom type definitions are hierarchical, with the last match in the file taking precedence over earlier matches.
17 |
18 | For example, we could use the elemental base types:
19 | ```
20 | % atom types
21 | H hydrogen
22 | C carbon
23 | N nitrogen
24 | O oxygen
25 | F fluorine
26 | P phosphorous
27 | S sulfur
28 | Cl chlorine
29 | Br bromine
30 | I iodine
31 | ```
32 |
33 | ### Decorators
34 |
35 | A `decorators` file contains a list of SMARTS
36 |
37 | Comments beginning with `%` are ignored throughout the file.
38 | Each line has the format
39 | ```
40 |
41 | ```
42 | where `` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `` is a human-readable typename associated with that decorator.
43 |
44 | The SMARTS component is ANDed together (using the `&` operator) with a parent atom type to create a new proposed child atom type.
45 | The human-readable `` is appended (with a space) to the parent name to keep a human-readable annotation of the proposed child atom type.
46 |
47 | ### Substitutions
48 |
49 | It is often convenient to define various tokens that are substituted for more sophisticated SMARTS expressions.
50 |
51 | % Substitution definitions
52 | % Format:
53 | %
54 |
55 | Comments beginning with `%` are ignored throughout the file.
56 | Each line has the format
57 | ```
58 |
59 | ```
60 | where `` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `` is the token that will be substituted for this.
61 |
62 | For example, we could define some elemental substitutions along with some substitutions for halogens:
63 | ```
64 | % elements
65 | [#9] fluorine
66 | [#17] chlorine
67 | [#35] bromine
68 | [#53] iodine
69 |
70 | % halogens
71 | [$smallhals,$largehals] halogen
72 | [$fluorine,$chlorine] smallhals
73 | [$bromine,$iodine] largehals
74 | ```
75 |
76 | The [`OESmartsLexReplace`](http://docs.eyesopen.com/toolkits/python/oechemtk/OEChemFunctions/OESmartsLexReplace.html) function is used to implement these replacements.
77 |
78 | ## Manifest
79 | * `basetypes-elemental.smarts` - basetypes file with elemental atom types - this is a good choice to begin with
80 | * `basetypes.smarts` - basetypes file with more sophisticated atom types
81 | * `decorators.smarts` - `decorators` file with a variety of decorators
82 | * `decorators-simple.smarts` - minimal `decorators` file for testing
83 | * `substitutions.smarts` - minimal `substitutions` file
84 |
--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/basetypes-elemental.smarts:
--------------------------------------------------------------------------------
1 | % atom types
2 | [#1] hydrogen
3 | [#6] carbon
4 | [#7] nitrogen
5 | [#8] oxygen
6 | [#9] fluorine
7 | [#15] phosphorous
8 | [#16] sulfur
9 | [#17] chlorine
10 | [#35] bromine
11 | [#53] iodine
12 |
--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/basetypes.smarts:
--------------------------------------------------------------------------------
1 | % atom types
2 | [#1] hydrogen
3 | [#6] carbon
4 | [#6&a] carbon aromatic
5 | [#7] nitrogen
6 | [#8] oxygen
7 | [#9] fluorine
8 | [#15] phosphorous
9 | [#16] sulfur
10 | [#17] chlorine
11 | [#35] bromine
12 | [#53] iodine
13 |
--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/decorators-simple.smarts:
--------------------------------------------------------------------------------
1 | % aromatic/aliphatic
2 | a aromatic
3 | A aliphatic
4 | % halogens
5 | $(*~[$halogen]) halogen-adjacent
6 |
--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/decorators.smarts:
--------------------------------------------------------------------------------
1 | % bond order
2 | $([*]=[*]) double-bonded
3 | $([*]#[*]) triple-bonded
4 | $([*]:[*]) aromatic-bonded
5 | % bonded to atoms
6 | $(*~[#1]) hydrogen-adjacent
7 | $(*~[#6]) carbon-adjacent
8 | $(*~[#7]) nitrogen-adjacent
9 | $(*~[#8]) oxygen-adjacent
10 | $(*~[#9]) fluorine-adjacent
11 | $(*~[#15]) phosphorous-adjacent
12 | $(*~[#16]) sulfur-adjacent
13 | $(*~[#17]) chlorine-adjacent
14 | $(*~[#35]) bromine-adjacent
15 | $(*~[#53]) iodine-adjacent
16 | % degree
17 | D1 degree-1
18 | D2 degree-2
19 | D3 degree-3
20 | D4 degree-4
21 | D5 degree-5
22 | D6 degree-6
23 | % valence
24 | v1 valence-1
25 | v2 valence-2
26 | v3 valence-3
27 | v4 valence-4
28 | v5 valence-5
29 | v6 valence-6
30 | % total-h-count
31 | H1 total-h-count-1
32 | H2 total-h-count-2
33 | H3 total-h-count-3
34 | % aromatic/aliphatic
35 | a aromatic
36 | A aliphatic
37 | % halogens
38 | $(*~[$halogen]) halogen-adjacent
39 | $(*~[$smallhals]) small-halogen-adjacent
40 | $(*~[$largehals]) large-halogen-adjacent
41 |
--------------------------------------------------------------------------------
/examples/parm@frosst/atomtypes/substitutions.smarts:
--------------------------------------------------------------------------------
1 | % Substitution definitions
2 | % Format:
3 | %
4 |
5 | % elements
6 | [#1] hydrogen
7 | [#6] carbon
8 | [#7] nitrogen
9 | [#8] oxygen
10 | [#9] fluorine
11 | [#15] phosphorous
12 | [#16] sulfur
13 | [#17] chlorine
14 | [#35] bromine
15 | [#53] iodine
16 |
17 | % halogens
18 | [$smallhals,$largehals] halogen
19 | [$fluorine,$chlorine] smallhals
20 | [$bromine,$iodine] largehals
21 |
--------------------------------------------------------------------------------
/examples/parm@frosst/make_subset.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python
2 |
3 | """Take the ZINC subset here and make a smaller subset of it for testing purposes."""
4 |
5 | from openeye.oechem import *
6 |
7 | nmols = 500 #Number of molecules to retain out of full ~7500
8 | # Currently the above are taken as the first 500. We could also take randomly.
9 |
10 |
11 |
12 | # Read set with tripos types, write subset
13 | ifs = oemolistream( 'molecules/zinc-subset-tripos.mol2.gz')
14 | ofs = oemolostream( 'molecules/zinc-subset-%s-tripos.mol2.gz' % nmols )
15 | mol = OEMol()
16 | ct=0
17 | while OEReadMolecule(ifs, mol) and ct < nmols:
18 | OEWriteConstMolecule(ofs, mol)
19 | ct += 1
20 |
21 |
22 | # Read set with parm@frosst types, write subset
23 | # Use flavors here to ensure writing doesn't mangle atom types
24 | ifs = oemolistream( 'molecules/zinc-subset-parm@frosst.mol2.gz')
25 | flavor = OEIFlavor_Generic_Default | OEIFlavor_MOL2_Default | OEIFlavor_MOL2_Forcefield
26 | ifs.SetFlavor(OEFormat_MOL2, flavor)
27 | ofs = oemolostream( 'molecules/zinc-subset-%s-parm@frosst.mol2.gz' % nmols )
28 | ofs.SetFlavor(OEFormat_MOL2, flavor)
29 | mol = OEMol()
30 | ct=0
31 | while OEReadMolecule(ifs, mol) and ct < nmols:
32 | OEWriteConstMolecule(ofs, mol)
33 | ct+=1
34 |
35 |
--------------------------------------------------------------------------------
/examples/parm@frosst/molecules/zinc-subset-500-parm@frosst.mol2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-500-parm@frosst.mol2.gz
--------------------------------------------------------------------------------
/examples/parm@frosst/molecules/zinc-subset-500-tripos.mol2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-500-tripos.mol2.gz
--------------------------------------------------------------------------------
/examples/parm@frosst/molecules/zinc-subset-parm@frosst.mol2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-parm@frosst.mol2.gz
--------------------------------------------------------------------------------
/examples/parm@frosst/molecules/zinc-subset-tripos.mol2.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/parm@frosst/molecules/zinc-subset-tripos.mol2.gz
--------------------------------------------------------------------------------
/examples/parm@frosst/scripts/README.md:
--------------------------------------------------------------------------------
1 | # Useful scripts for parm@frosst test
2 |
3 | ## Manifest
4 |
5 | * `convert-atom-names-to-tripos.py` - utility to convert atom names to Tripos in mol2 files
6 |
--------------------------------------------------------------------------------
/examples/parm@frosst/scripts/convert-atom-names-to-tripos.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """
3 | Convert file of molecules from forcefield atom types to Tripos atom types.
4 |
5 | Example:
6 |
7 | > python ../convert-atom-names-to-tripos.py zinc-subset-parm@frosst.mol2.gz zinc-subset-tripos.mol2.gz
8 | """
9 | ################################################################
10 | # Copyright (C) 2006-2015 OpenEye Scientific Software, Inc.
11 | ################################################################
12 | from __future__ import division
13 | from __future__ import print_function
14 | import os,sys
15 | import openeye.oechem as oechem
16 |
17 | def main(argv=sys.argv):
18 | if len(argv) != 3:
19 | oechem.OEThrow.Usage("%s " % argv[0])
20 |
21 | ifs = oechem.oemolistream()
22 | flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield
23 | ifs.SetFlavor(oechem.OEFormat_MOL2, flavor)
24 | if not ifs.open(argv[1]):
25 | oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1])
26 |
27 | ofs = oechem.oemolostream()
28 | if not ofs.open(argv[2]):
29 | oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2])
30 |
31 | for mol in ifs.GetOEMols():
32 | oechem.OETriposAtomNames(mol)
33 | oechem.OEWriteConstMolecule(ofs, mol)
34 |
35 | ifs.close()
36 | ofs.close()
37 |
38 | if __name__ == "__main__":
39 | sys.exit(main(sys.argv))#!/usr/bin/env python
40 |
--------------------------------------------------------------------------------
/examples/smarty_simulations/AlkEthOH.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/AlkEthOH.pdf
--------------------------------------------------------------------------------
/examples/smarty_simulations/Hydrogen.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/Hydrogen.pdf
--------------------------------------------------------------------------------
/examples/smarty_simulations/README.md:
--------------------------------------------------------------------------------
1 | # Example application of SMARTY atom type sampler to recover parm99 typing of alkanes, ethers, and alcohols
2 |
3 | These are example outputs for a variety of smarty uses. Each example is listed below with the associated command line call.
4 | Each example has the three output files with the title of the example as the name:
5 | * `*.csv` - example trajectory file, a csv file that is readable with the `score\_util.py` methods
6 | * `*.log` - stored commandline output for that simulation
7 | * `*.pdf` - plot showing the score verses iteration for the simulation
8 |
9 | These are only examples of how to use smarty. All input files are those included in the smarty package
10 | available at `smart/data/`, the utility here allows those files to be used in simulations.
11 |
12 | ## AlkEthOH
13 |
14 | Typical smarty behavior with the AlkEthOH molecule set
15 | with combinatorial decorators and sampling all atoms
16 |
17 | ```
18 | smarty --basetypes atomtypes/basetypes.smarts \
19 | --initialtypes atomtypes/basetypes.smarts \
20 | --decorators atomtypes/new-decorators.smarts \
21 | --molecules AlkEthOH_test_filt1_tripos.mol2 \
22 | --reference AlkEthOH_test_filt1_ff.mol2 \
23 | --iterations 1000 \
24 | --temperature 0.01 \
25 | --trajectory AlkEthOH.csv \
26 | --plot AlkEthOH.pdf >> AlkEthOH.log
27 | ```
28 |
29 | **Example Output**
30 | this output shows how smarty is used to sample atomtypes
31 | and compared to the parm@frosst typed reference molecules
32 |
33 | ##### Initializing smarty:
34 | ```
35 | Loading molecules from '/Users/bannanc/anaconda/lib/python2.7/site-packages/smarty/data/molecules/AlkEthOH_test_filt1_tripos.mol2'...
36 | 42 molecules read
37 | 0.006 s elapsed
38 | Loading molecules from '/Users/bannanc/anaconda/lib/python2.7/site-packages/smarty/data/molecules/AlkEthOH_test_filt1_ff.mol2'...
39 | 42 molecules read
40 | 0.006 s elapsed
41 | Sampling all atomtypes
42 | ```
43 | Store bond types that are used in these molecules
44 | ```
45 | USED BOND TYPES:
46 | INDEX ATOMS MOLECULES TYPE NAME SMARTS
47 | 1 : 803 42 | singly -
48 | 2 : 0 0 | doubly =
49 | 3 : 0 0 | triply #
50 | 4 : 0 0 | aromatic :
51 | TOTAL : 803 42
52 | ```
53 | Type molecules with base types and store those with matches
54 | ```
55 | MATCHED BASETYPES:
56 | INDEX ATOMS MOLECULES TYPE NAME SMARTS
57 | 1 : 464 42 | c_hydrogen [#1]
58 | 2 : 232 42 | c_carbon [#6]
59 | 3 : 0 0 | c_nitrogen [#7]
60 | 4 : 107 42 | c_oxygen [#8]
61 | 5 : 0 0 | c_fluorine [#9]
62 | 6 : 0 0 | c_phosphorous [#15]
63 | 7 : 0 0 | c_sulfur [#16]
64 | 8 : 0 0 | c_chlorine [#17]
65 | 9 : 0 0 | c_selenium [#34]
66 | 10 : 0 0 | c_bromine [#35]
67 | 11 : 0 0 | c_iodine [#53]
68 | TOTAL : 803 42
69 | Removing basetype '[#7]' ('c_nitrogen'), which is unused.
70 | Removing basetype '[#9]' ('c_fluorine'), which is unused.
71 | Removing basetype '[#15]' ('c_phosphorous'), which is unused.
72 | Removing basetype '[#16]' ('c_sulfur'), which is unused.
73 | Removing basetype '[#17]' ('c_chlorine'), which is unused.
74 | Removing basetype '[#34]' ('c_selenium'), which is unused.
75 | Removing basetype '[#35]' ('c_bromine'), which is unused.
76 | Removing basetype '[#53]' ('c_iodine'), which is unused.
77 | ```
78 | Type molecules with initial types and store the ones that are used
79 | ```
80 | MATCHED INITIAL TYPES:
81 | INDEX ATOMS MOLECULES TYPE NAME SMARTS
82 | 1 : 464 42 | c_hydrogen [#1]
83 | 2 : 232 42 | c_carbon [#6]
84 | 3 : 0 0 | c_nitrogen [#7]
85 | 4 : 107 42 | c_oxygen [#8]
86 | 5 : 0 0 | c_fluorine [#9]
87 | 6 : 0 0 | c_phosphorous [#15]
88 | 7 : 0 0 | c_sulfur [#16]
89 | 8 : 0 0 | c_chlorine [#17]
90 | 9 : 0 0 | c_selenium [#34]
91 | 10 : 0 0 | c_bromine [#35]
92 | 11 : 0 0 | c_iodine [#53]
93 | TOTAL : 803 42
94 | Removing initial atom type '[#7]', as it matches no atoms
95 | Removing initial atom type '[#9]', as it matches no atoms
96 | Removing initial atom type '[#15]', as it matches no atoms
97 | Removing initial atom type '[#16]', as it matches no atoms
98 | Removing initial atom type '[#17]', as it matches no atoms
99 | Removing initial atom type '[#34]', as it matches no atoms
100 | Removing initial atom type '[#35]', as it matches no atoms
101 | Removing initial atom type '[#53]', as it matches no atoms
102 | ```
103 | Use bi-partite scoring sceme to score current atomtypes against reference
104 | ```
105 | Creating graph matching current atom types with reference atom types...
106 | Graph creation took 0.008 s
107 | Computing maximum weight match...
108 | Maximum weight match took 0.001 s
109 | ```
110 | Initial types and which reference they are paired with and initial score (67.746 %)
111 | ```
112 | Atom type matches:
113 | c_hydrogen matches HC : 244 atoms matched
114 | c_carbon matches CT : 232 atoms matched
115 | c_oxygen matches OH : 68 atoms matched
116 | 544 / 803 total atoms match (67.746 %)
117 | ```
118 | ##### Example move in chemical space
119 | ```
120 | Iteration 16 / 1000
121 | Attempting to create new subtype: '[#1]' (c_hydrogen) -> '[#1$(*~[#6])]' (c_hydrogen any c_carbon )
122 | Proposal is valid...
123 | ```
124 | Score proposed atomtypes against reference
125 | ```
126 | Creating graph matching current atom types with reference atom types...
127 | Graph creation took 0.007 s
128 | Computing maximum weight match...
129 | Maximum weight match took 0.001 s
130 | PROPOSED:
131 | Atom type matches:
132 | c_hydrogen matches HO : 68 atoms matched
133 | c_carbon matches CT : 232 atoms matched
134 | c_oxygen matches OH : 68 atoms matched
135 | c_hydrogen any c_carbon matches HC : 244 atoms matched
136 | 612 / 803 total atoms match (76.214 %)
137 | ```
138 | ##### Accepting or Rejecting a Move
139 | A move that leads to an increased score will always be accepted.
140 | A move with a decrease has a probability of being accepted depending on the temperature.
141 | A 0.0 temperature will lead lead to a complete optimizer where only moves leading to an increased score are accepted,
142 | however these can get stuck in local optima. By using a non-zero temperature we allow more moves to be accepted
143 | and a larger chemical space to be explored.
144 | ```
145 | Proposal score: 544 >> 612 : log_P_accept = 8.46824e+00
146 | Accepted.
147 | ```
148 | Score by reference atomtype
149 | ```
150 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED
151 | 1 : 68 42 | c_hydrogen [#1] HO 68 / 68 (100.000%)
152 | 2 : 232 42 | c_carbon [#6] CT 232 / 232 (100.000%)
153 | 3 : 107 42 | c_oxygen [#8] OH 68 / 68 (100.000%)
154 | 4 : 396 42 | c_hydrogen any c_carbon [#1$(*~[#6])] HC 244 / 244 (100.000%)
155 | TOTAL : 803 42 | 612 / 803 match (76.214 %)
156 | ```
157 | Atomtype hierarchy shows which parent type a child descends from
158 | ```
159 | Atom type hierarchy:
160 | [#6]
161 | [#8]
162 | [#1]
163 | [#1$(*~[#6])]
164 | ```
165 | ##### Final iteration of this simulation
166 | ```
167 | Iteration 999 / 1000
168 | Attempting to destroy atom type [#6] : c_carbon...
169 | Destruction rejected for atom type [#6] because this is a generic type which was initially populated.
170 | Rejected.
171 | INDEX ATOMS MOLECULES TYPE NAME SMARTS REF TYPE FRACTION OF REF TYPED MOLECULES MATCHED
172 | 1 : 291 42 | c_hydrogen [#1] HC 244 / 244 (100.000%)
173 | 2 : 232 42 | c_carbon [#6] CT 232 / 232 (100.000%)
174 | 3 : 39 30 | c_oxygen [#8] OS 39 / 39 (100.000%)
175 | 4 : 68 42 | c_hydrogen any c_oxygen [#1$(*~[#8])] HO 68 / 68 (100.000%)
176 | 5 : 27 21 | c_hydrogen any c_carbon any c_carbon (any c_oxygen) (singly c_oxygen) [#1$(*~[#6](-[#8])(~[#8])~[#6])] H2 27 / 33 ( 81.818%)
177 | 6 : 78 25 | c_hydrogen any c_carbon any c_carbon (any c_oxygen) (singly c_hydrogen) [#1$(*~[#6](-[#1])(~[#8])~[#6])] H1 78 / 116 ( 67.241%)
178 | 7 : 68 42 | c_oxygen any c_hydrogen [#8$(*~[#1])] OH 68 / 68 (100.000%)
179 | TOTAL : 803 42 | 756 / 803 match (94.147 %)
180 |
181 | Atom type hierarchy:
182 | [#1]
183 | [#1$(*~[#8])]
184 | [#1$(*~[#6](-[#8])(~[#8])~[#6])]
185 | [#1$(*~[#6](-[#1])(~[#8])~[#6])]
186 | [#8]
187 | [#8$(*~[#1])]
188 | [#6]
189 | Maximum score achieved: 0.99
190 | ```
191 |
192 | ## Hydrogen
193 |
194 | This is an example of how to implement the elemental sampler for smarty
195 | you only need to add the `--element` option. In this case instead of considering
196 | all atoms, we only sample atom types for hydrogen.
197 | This allows for more efficient testing of the smarty tool as we can
198 | focus on the chemical perception sampling around one element.
199 | In the AlkEthOH, there is only 1 carbon and 2 oxygens, so the 5 hydrogen types
200 | are the best example of this behavior.
201 |
202 | ```
203 | smarty --element 1 \
204 | --basetypes atomtypes/basetypes.smarts \
205 | --initialtypes atomtypes/basetypes.smarts \
206 | --decorators atomtypes/new-decorators.smarts \
207 | --molecules AlkEthOH_test_filt1_tripos.mol2 \
208 | --reference AlkEthOH_test_filt1_ff.mol2 \
209 | --iterations 1000 \
210 | --temperature 0.01 \
211 | --trajectory Hydrogen.csv \
212 | --plot Hydrogen.pdf >> Hydrogen.log
213 | ```
214 |
215 | ## Simple-Decorators
216 |
217 | With the simple decorator option new atomtypes are generated by ANDing
218 | decorator SMARTS patterns to the end of a parent atomtype.
219 | This method is not capable of even getting the complexity in the AlkEthOH
220 | molecule set as it does not allow for beta substitution from the primary atom.
221 |
222 | ```
223 | smarty --basetypes atomtypes/basetypes.smarts \
224 | --initialtypes atomtypes/basetypes.smarts \
225 | --decorators atomtypes/decorators.smarts \
226 | --substitutions atomtypes/replacements.smarts \
227 | --molecules AlkEthOH_test_filt1_tripos.mol2 \
228 | --reference AlkEthOH_test_filt1_ff.mol2 \
229 | --iterations 1000 \
230 | --temperature 0.01 \
231 | --trajectory Simple-decorators.csv \
232 | --plot Simple-decorators.pdf \
233 | --decoratorbehavior simple-decorators >> Simple-decorators.log
234 | ```
235 |
236 | ## More smarty tests
237 | We have done more extensive testing of this tool, but the results are
238 | a bit bulky to keep on GitHub. We maintain a public (Google Drive Directory)[https://drive.google.com/drive/folders/0BwF2-3puCvfEeWNuNnlsTm1CTlU?usp=sharing]
239 | with these results. Please note it is a work in progress so documentation is on going.
240 |
--------------------------------------------------------------------------------
/examples/smarty_simulations/Simple-Decorators.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smarty_simulations/Simple-Decorators.pdf
--------------------------------------------------------------------------------
/examples/smirky/README.md:
--------------------------------------------------------------------------------
1 | # smirky sampling of Torsions
2 |
3 | This is an example of how to use smirky, a command line tool for sampling chemical perception of Bonds, Angles, proper or improper Torsions, or van der Waal parameters. Default smirky behaivor only requires two inputs, this is an example of all input options into smirky.
4 |
5 | ### Input files explained
6 |
7 | * `atom_OR_bases.smarts` - element numbers that form the base of atoms, such as `"#6"` and their associated odds
8 | * `atom_OR_decorators.smarts` - decorators and associated odds that are combined with element numbers such as `X4` in `"[#6X3,#7]"`
9 | * `atom_AND_decorators.smarts` - decorators and associated odds for patterns that are "AND'd" to the end of an atom for example `r5` in `"[#6X4,#7X3;r5]"`
10 | * `bond_OR_bases.smarts` - bond bases and their associated odds, that is '-', '=', ':', or '#' typically
11 | * `bond_AND_decorators.smarts` - bond decorators that can be "AND'd" in a bond, such as '@' in `"[#6r6]-,:;@[#7r6]"`
12 | * `atom_odds_forTorsions.smarts` - keywords or indices for atoms in torsions and odds of making changes to them
13 | * `bond_odds_forTorsions.smarts` - keywords or indices for bonds in torsions and odds of making changes to them
14 | * `initial_Torsions.smarts` - SMIRKS patterns for initial patterns
15 | * `substitutions.smarts` - SMIRKS patterns and the short hand they can be replaced with
16 |
17 | ### Command line call
18 |
19 | ```
20 | smirky --molecules AlkEthOH_test_filt1_ff.mol2 \
21 | --typetag Torsion \
22 | --atomORbases atom_OR_bases.smarts \
23 | --atomORdecors atom_OR_decorators.smarts \
24 | --atomANDdecors atom_AND_decorators.smarts \
25 | --bondORbase bond_OR_bases.smarts \
26 | --bondANDdecors bond_AND_decorators.smarts \
27 | --atomOddsFile atom_odds_forTorsions.smarts \
28 | --bondOddsFile bond_odds_forTorsions.smarts \
29 | --initialtypes initial_Torsions.smarts \
30 | --substitutions substitutions.smarts \
31 | --smirff forcefield/Frosst_AlkEthOH.ffxml \
32 | --iteratorsion 1000 \
33 | --temperature 0.001 \
34 | --verbose True \
35 | --output output
36 | ```
37 |
38 | ### Output files created
39 | * output.log - detailed log of each iteration, changes made and if it was accepted or rejected
40 | * output.csv - a "trajectory" file that describes the torsions at each iteration
41 | * output.pdf - plot showing the overall score vs iteration
42 | * output_results.smarts - smarts file showing the file SMIRKS and their matched results
43 |
44 | ### Detailed output explained
45 |
46 | Here is a segment of output.log with explaination of what happens in a smirky simulation
47 |
48 | ##### Match initial input
49 |
50 | Type initial parameters
51 | ```
52 | INDEX TORSIONS MOLECULES TYPE NAME: SMIRKS
53 | 1 : 0 0 | 0: [*:1]~[*:2]~[*:3]~[*:4]
54 | 2 : 1737 42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4]
55 | 3 : 438 42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4]
56 | TOTAL : 2175 42
57 | ```
58 | Remove elements that are not used in this molecule set (remember AlkEthOH only has carbon, oxygen, and hydrogen)
59 | ```
60 | removing unused element ([#5]) from list
61 | removing unused element ([#7]) from list
62 | removing unused element ([#9]) from list
63 | removing unused element ([#14]) from list
64 | removing unused element ([#15]) from list
65 | removing unused element ([#16]) from list
66 | removing unused element ([#17]) from list
67 | removing unused element ([#35]) from list
68 | removing unused element ([#53]) from list
69 | ```
70 | ##### Comparing to SMIRNOFF99Frosst
71 |
72 | Use the forcefield tools to type all molecules with SMIRNOFF reference.
73 | Compare reference types to initial parameter types
74 |
75 | ```
76 | Creating labeler from forcefield/Frosst_AlkEthOH.ffxml...
77 | Creating graph matching current types with reference types...
78 | Graph creation took 0.304 s
79 | Computing maximum weight match...
80 | Maximum weight match took 0.001 s
81 | PROPOSED:
82 | Torsion type matches:
83 | 0: [*:1]~[*:2]~[*:3]~[*:4] no match
84 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] matches t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]: 574 Torsion types matched
85 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] matches t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]: 156 Torsion types matched
86 | 730 / 2175 total Torsions match (33.563 %)
87 | ```
88 | Show current statistics before sampling begins
89 | ```
90 | INDEX TORSIONS MOLECULES TYPE NAME: SMIRKS REF TYPE: SMIRKS FRACTION OF REF TYPED MOLECULES MATCHED
91 | 1 : 0 0 | 0: [*:1]~[*:2]~[*:3]~[*:4]
92 | 2 : 1737 42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] 574 / 574 (100.000%)
93 | 3 : 438 42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4] 156 / 156 (100.000%)
94 | TOTAL : 2175 42 | 730 / 2175 match (33.563 %)
95 | ```
96 |
97 | ##### Example move to generate a new Torsion
98 |
99 | Create a new torsion, in this case by changing the 4th atom from generic (*) to an oxygen not bound to hydrogen (`#8H0`)
100 |
101 | ```
102 | Iteration 1 / 1000
103 | Attempting to create new subtype: '4778' ([*:1]~[#6:2]~[#6:3]~[#8!H0:4]) from parent type 'C-C' ([*:1]~[#6:2]~[#6:3]~[*:4])
104 | Probability of making this environment is 0.004 %Proposal is valid...
105 | ```
106 | Compare proposed types to the SMIRNOFF reference types
107 | ```
108 | Creating graph matching current types with reference types...
109 | Graph creation took 0.176 s
110 | Computing maximum weight match...
111 | Maximum weight match took 0.001 s
112 | PROPOSED:
113 | Torsion type matches:
114 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] matches t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]: 574 Torsion types matched
115 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] matches t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]: 156 Torsion types matched
116 | 4778: [*:1]~[#6:2]~[#6:3]~[#8!H0:4] matches t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4]: 190 Torsion types matched
117 | 920 / 2175 total Torsions match (42.299 %)
118 | ```
119 | ##### Using temperature and score to accept or reject move
120 | Use change in score and temperature to calculate the probability of accepting the move.
121 | A move with an increased score will always be accepted, the higher the temperature the
122 | more probable a move with a decreased score will be accepted
123 | ```
124 | Proposal score: 730 >> 920 : log_P_accept = 8.73563e+01
125 | Accepted.
126 | INDEX TORSIONS MOLECULES TYPE NAME: SMIRKS REF TYPE: SMIRKS FRACTION OF REF TYPED MOLECULES MATCHED
127 | 1 : 1436 42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] 574 / 574 (100.000%)
128 | 2 : 438 42 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4] 156 / 156 (100.000%)
129 | 3 : 301 42 | 4778: [*:1]~[#6:2]~[#6:3]~[#8!H0:4] t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] 190 / 307 ( 61.889%)
130 | TOTAL : 2175 42 | 920 / 2175 match (42.299 %)
131 |
132 | ```
133 | Hierarchy shows which parent types lead to the generation of child types
134 | ```
135 | Torsion type hierarchy:
136 | C-C ([*:1]~[#6:2]~[#6:3]~[*:4])
137 | 4778 ([*:1]~[#6:2]~[#6:3]~[#8!H0:4])
138 | C-O ([*:1]~[#6:2]~[#8:3]~[*:4])
139 | ```
140 | ##### Final Iteration in this example
141 | ```
142 | Iteration 999 / 1000
143 | Attempting to destroy type 1876 : [#1:1]~[#6:2]~[#6:3]~[#1:4]...
144 | Proposal is valid...
145 | Creating graph matching current types with reference types...
146 | Graph creation took 0.249 s
147 | Computing maximum weight match...
148 | Maximum weight match took 0.004 s
149 | PROPOSED:
150 | Torsion type matches:
151 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] matches t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4]: 574 Torsion types matched
152 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] matches t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4]: 156 Torsion types matched
153 | 4808: [*:1]~[#6:2]~[#8:3]~[#1!X4:4] matches t0002: [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4]: 101 Torsion types matched
154 | 8090: [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4] matches t0006: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4]: 87 Torsion types matched
155 | 7751: [*:1]~[#6:2]~[#6:3]~[#6:4] matches t0001: [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4]: 146 Torsion types matched
156 | 1068: [#6!H3:1]~[#6:2]~[#6:3]~[#6:4] matches t0007: [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]: 131 Torsion types matched
157 | 6774: [#1H0:1]~[#6:2]~[#6:3]~[#6:4] matches t0005: [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4]: 552 Torsion types matched
158 | 8025: [#6:1]~[#6:2]~[#8:3]~[#6!H3:4] matches t0008: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4]: 66 Torsion types matched
159 | 1813 / 2175 total Torsions match (83.356 %)
160 | Proposal score: 2120 >> 1813 : log_P_accept = -1.41149e+02
161 | Rejected.
162 | INDEX TORSIONS MOLECULES TYPE NAME: SMIRKS REF TYPE: SMIRKS FRACTION OF REF TYPED MOLECULES MATCHED
163 | 1 : 334 42 | C-C: [*:1]~[#6:2]~[#6:3]~[*:4] t0012: [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] 307 / 307 (100.000%)
164 | 2 : 168 30 | C-O: [*:1]~[#6:2]~[#8:3]~[*:4] t0003: [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4] 156 / 156 (100.000%)
165 | 3 : 117 42 | 4808: [*:1]~[#6:2]~[#8:3]~[#1!X4:4] t0002: [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4] 101 / 101 (100.000%)
166 | 4 : 87 42 | 8090: [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4] t0006: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4] 87 / 103 ( 84.466%)
167 | 5 : 146 40 | 7751: [*:1]~[#6:2]~[#6:3]~[#6:4] t0001: [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4] 146 / 146 (100.000%)
168 | 6 : 131 37 | 1068: [#6!H3:1]~[#6:2]~[#6:3]~[#6:4] t0007: [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] 131 / 131 (100.000%)
169 | 7 : 552 40 | 6774: [#1H0:1]~[#6:2]~[#6:3]~[#6:4] t0005: [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] 552 / 552 (100.000%)
170 | 8 : 66 30 | 8025: [#6:1]~[#6:2]~[#8:3]~[#6!H3:4] t0008: [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4] 66 / 66 (100.000%)
171 | 9 : 574 42 | 1876: [#1:1]~[#6:2]~[#6:3]~[#1:4] t0004: [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] 574 / 574 (100.000%)
172 | TOTAL : 2175 42 | 2120 / 2175 match (97.471 %)
173 |
174 | Torsion type hierarchy:
175 | C-C ([*:1]~[#6:2]~[#6:3]~[*:4])
176 | 7751 ([*:1]~[#6:2]~[#6:3]~[#6:4])
177 | 1068 ([#6!H3:1]~[#6:2]~[#6:3]~[#6:4])
178 | 6774 ([#1H0:1]~[#6:2]~[#6:3]~[#6:4])
179 | 1876 ([#1:1]~[#6:2]~[#6:3]~[#1:4])
180 | C-O ([*:1]~[#6:2]~[#8:3]~[*:4])
181 | 4808 ([*:1]~[#6:2]~[#8:3]~[#1!X4:4])
182 | 8090 ([#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4])
183 | 8025 ([#6:1]~[#6:2]~[#8:3]~[#6!H3:4])
184 |
185 | ```
186 |
187 | ## More smirky tests
188 |
189 | The results from smirky tests get a bit bulky so we are not storing them on github.
190 | We maintain a public (Google Drive Directory)[https://drive.google.com/drive/folders/0BwF2-3puCvfEeWNuNnlsTm1CTlU?usp=sharing]
191 | storing extensive tests on smirky and smarty. Please keep in mind these tests are on going so documentation for the Google Drive is a work in progress.
192 |
--------------------------------------------------------------------------------
/examples/smirky/atom_AND_decorators.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | % Size of smallest ring
3 | r3 0
4 | r4 0
5 | r5 0
6 | r6 0
7 | % Number of rings
8 | R0 0
9 | R1 0
10 | R2 0
11 | R3 0
12 | R4 0
13 | R 0
14 | !R0 0
15 | !R1 0
16 | !R2 0
17 | !R3 0
18 | !R4 0
19 | !R 0
20 | % total connectivity
21 | X1 0
22 | X2 0
23 | X3 0
24 | X4 0
25 | !X1 0
26 | !X2 0
27 | !X3 0
28 | !X4 0
29 | % total hydrogen count
30 | H0 0
31 | !H0 0
32 | H1 0
33 | !H1 0
34 | H2 0
35 | !H2 0
36 | H3 0
37 | !H3 0
38 | % aromatic/aliphatic
39 | a 0
40 | !a 0
41 | A 0
42 | !A 0
43 | % charges
44 | -1 0
45 | +0 0
46 | +1 0
47 | % no decorator
48 | '' 1
49 |
--------------------------------------------------------------------------------
/examples/smirky/atom_OR_bases.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | % elements
3 | [#1] 1
4 | [#5] 0
5 | [#6] 1
6 | [#7] 0
7 | [#8] 1
8 | [#9] 0
9 | [#14] 0
10 | [#15] 0
11 | [#16] 0
12 | [#17] 0
13 | [#35] 0
14 | [#53] 0
15 | % substitution groups
16 | $ewg1 0
17 | $ewg2 0
18 |
--------------------------------------------------------------------------------
/examples/smirky/atom_OR_decorators.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | % Size of smallest ring
3 | r3 0
4 | r4 0
5 | r5 0
6 | r6 0
7 | % Number of rings
8 | R0 0
9 | R1 0
10 | R2 0
11 | R3 0
12 | R4 0
13 | R 0
14 | !R0 0
15 | !R1 0
16 | !R2 0
17 | !R3 0
18 | !R4 0
19 | !R 0
20 | % total connectivity
21 | X1 0
22 | X2 1
23 | X3 0
24 | X4 1
25 | !X1 0
26 | !X2 1
27 | !X3 0
28 | !X4 1
29 | % total hydrogen count
30 | H0 1
31 | !H0 1
32 | H1 1
33 | !H1 1
34 | H2 1
35 | !H2 1
36 | H3 1
37 | !H3 1
38 | % aromatic/aliphatic
39 | a 0
40 | !a 0
41 | A 0
42 | !A 0
43 | % charges
44 | -1 0
45 | +0 0
46 | +1 0
47 | % OR base with no decorator
48 | '' 10
49 |
--------------------------------------------------------------------------------
/examples/smirky/atom_odds_forTorsions.smarts:
--------------------------------------------------------------------------------
1 | % Descriptor odds
2 | % used in the default, all equally likely
3 | all 0
4 | %
5 | % Other options remember to use indices appropriately
6 | 1 10
7 | 2 1
8 | 3 1
9 | 4 10
10 | Indexed 0
11 | Unindexed 5
12 | Alpha 0
13 | Beta 0
14 |
--------------------------------------------------------------------------------
/examples/smirky/bond_AND_decorators.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | @ 0
3 | !@ 0
4 | !# 0
5 | '' 1
6 |
--------------------------------------------------------------------------------
/examples/smirky/bond_OR_bases.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | % bond types
3 | - 1
4 | : 0
5 | = 0
6 | # 0
7 | % not bond types
8 | !- 0
9 | !: 0
10 | != 0
11 | !# 0
12 |
--------------------------------------------------------------------------------
/examples/smirky/bond_odds_forTorsions.smarts:
--------------------------------------------------------------------------------
1 | % Descriptor odds
2 | % used in the default, all equally likely
3 | all 0
4 | %
5 | % Other options remember to use indices appropriately
6 | 1 10
7 | 2 1
8 | 3 10
9 | Indexed 0
10 | Unindexed 20
11 | Alpha 0
12 | Beta 0
13 |
--------------------------------------------------------------------------------
/examples/smirky/initial_Torsions.smarts:
--------------------------------------------------------------------------------
1 | % Van Der Waal fragments to begin with
2 | [*:1]~[*:2]~[*:3]~[*:4] 0
3 | [*:1]~[#6:2]~[#6:3]~[*:4] C-C
4 | [*:1]~[#6:2]~[#8:3]~[*:4] C-O
5 |
--------------------------------------------------------------------------------
/examples/smirky/output.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/examples/smirky/output.pdf
--------------------------------------------------------------------------------
/examples/smirky/output_results.smarts:
--------------------------------------------------------------------------------
1 | % Results for sampling Torsions at 1.00e-03
2 | %% SMIRKS patterns for final results are below
3 | % followed by a their matched reference SMIRKS from forcefield/Frosst_AlkEthOH.ffxml
4 | %Final Score was 97.471 %
5 | %%
6 | [*:1]~[#6:2]~[#6:3]~[*:4] C-C
7 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] t0012
8 | [*:1]~[#6:2]~[#8:3]~[*:4] C-O
9 | % [a,A:1]-[#6X4:2]-[#8X2:3]-[!#1:4] t0003
10 | [*:1]~[#6:2]~[#8:3]~[#1!X4:4] 4808
11 | % [a,A:1]-[#6X4:2]-[#8X2:3]-[#1:4] t0002
12 | [#6!H1:1]~[#6:2]~[#8:3]~[#1!X4:4] 8090
13 | % [#6X4:1]-[#6X4:2]-[#8X2:3]-[#1:4] t0006
14 | [*:1]~[#6:2]~[#6:3]~[#6:4] 7751
15 | % [a,A:1]-[#6X4:2]-[#6X4:3]-[a,A:4] t0001
16 | [#6!H3:1]~[#6:2]~[#6:3]~[#6:4] 1068
17 | % [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] t0007
18 | [#1H0:1]~[#6:2]~[#6:3]~[#6:4] 6774
19 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] t0005
20 | [#6:1]~[#6:2]~[#8:3]~[#6!H3:4] 8025
21 | % [#6X4:1]-[#6X4:2]-[#8X2:3]-[#6X4:4] t0008
22 | [#1:1]~[#6:2]~[#6:3]~[#1:4] 1876
23 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] t0004
24 |
--------------------------------------------------------------------------------
/examples/smirky/substitutions.smarts:
--------------------------------------------------------------------------------
1 | % Substitution definitions
2 | % Format:
3 | %
4 | % halogens
5 | [#7!-1,#8,#16] ewg2
6 | [#7!-1,#8!-1,#16!-1,$halogen] ewg1
7 | [$smallhals,$largehals] halogen
8 | [#9,#17] smallhals
9 | [#35,#53] largehals
10 |
--------------------------------------------------------------------------------
/oe_license.txt.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/oe_license.txt.enc
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | from os.path import relpath, join
3 | from setuptools import setup
4 |
5 | def read(fname):
6 | return open(os.path.join(os.path.dirname(__file__), fname)).read()
7 |
8 | def find_package_data(data_root, package_root):
9 | files = []
10 | for root, dirnames, filenames in os.walk(data_root):
11 | for fn in filenames:
12 | files.append(relpath(join(root, fn), package_root))
13 | return files
14 |
15 | setup(
16 | name = "smarty",
17 | version = "0.1.6",
18 | author = "John Chodera, David Mobley, and others",
19 | author_email = "john.chodera@choderalab.org",
20 | description = ("Automated Bayesian atomtype sampling"),
21 | license = "MIT",
22 | keywords = "Bayesian atomtype sampling forcefield parameterization",
23 | url = "http://github.com/openforcefield/smarty",
24 | packages=['smarty', 'smarty/tests', 'smarty/data'],
25 | long_description=read('README.md'),
26 | classifiers=[
27 | "Development Status :: 3 - Alpha",
28 | "Topic :: Utilities",
29 | "License :: OSI Approved :: MIT",
30 | ],
31 | entry_points={'console_scripts': ['smarty = smarty.cli_smarty:main', 'smirky = smarty.cli_smirky:main']},
32 | package_data={'smarty': find_package_data('smarty/data', 'smarty')},
33 | )
34 |
--------------------------------------------------------------------------------
/smarty/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | import openeye
3 | # These can only be imported if openeye tools are available
4 | from smarty.atomtyper import *
5 | from smarty.sampler import *
6 | from smarty.utils import *
7 | from smarty.sampler_smirky import *
8 |
9 | except Exception as e:
10 | print(e)
11 | print('Warning: Cannot import openeye toolkit; not all functionality will be available.')
12 |
13 | from smarty.score_utils import *
14 |
--------------------------------------------------------------------------------
/smarty/atomtyper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | #=============================================================================================
4 | # MODULE DOCSTRING
5 | #=============================================================================================
6 |
7 | """
8 | atomtyper.py
9 |
10 | Atom type assignment engine using SMARTS strings.
11 |
12 | Authors
13 | -------
14 | John Chodera , Memorial Sloan Kettering Cancer Center and University of California, Berkeley
15 |
16 | The AtomTyper class is based on 'patty' from Pat Walters, Vertex Pharmaceuticals.
17 |
18 | """
19 | #=============================================================================================
20 | # GLOBAL IMPORTS
21 | #=============================================================================================
22 |
23 | import sys
24 | import string
25 |
26 | import os
27 | import math
28 | import copy
29 | import re
30 | import numpy
31 | import random
32 | from smarty import utils
33 |
34 | import openeye.oechem
35 | import openeye.oeomega
36 | import openeye.oequacpac
37 |
38 | from openeye.oechem import *
39 | from openeye.oeomega import *
40 | from openeye.oequacpac import *
41 |
42 | #=============================================================================================
43 | # ATOM TYPER
44 | #=============================================================================================
45 |
46 | class AtomTyper(object):
47 | """
48 | Atom typer based on SMARTS-defined atom types.
49 |
50 | Based on 'Patty' implementation by Pat Walters.
51 |
52 | """
53 |
54 | class TypingException(Exception):
55 | """
56 | Atom typing exception.
57 |
58 | """
59 | def __init__(self, molecule, atom):
60 | self.molecule = molecule
61 | self.atom = atom
62 |
63 | def __str__(self):
64 | msg = "Atom not assigned: molecule %s : atom index %6d name %8s element %8s" % (self.molecule.GetTitle(), self.atom.GetIdx(), self.atom.GetName(), OEGetAtomicSymbol(self.atom.GetAtomicNum()))
65 | msg += '\n'
66 | for atom in self.molecule.GetAtoms():
67 | msg += 'atom %8d : name %8s element %8s' % (atom.GetIdx(), atom.GetName(), OEGetAtomicSymbol(self.atom.GetAtomicNum()))
68 | if atom == self.atom:
69 | msg += ' ***'
70 | msg += '\n'
71 |
72 | return msg
73 |
74 | def __init__(self, typelist, tagname, replacements=None):
75 | """"
76 | Create an atom typer instance.
77 |
78 | ARGUMENTS
79 |
80 | typelist : str
81 | If specified, will read types from list with each element [smarts, typename]
82 | tagname : str
83 | Tag name
84 | replacements : list of [smarts, shortname]
85 | Substitution/replacement bindings.
86 |
87 | """
88 |
89 | self.pattyTag = OEGetTag(tagname)
90 |
91 | # Create bindings list.
92 | bindings = list()
93 | if replacements is not None:
94 | for [smarts,shortname] in replacements:
95 | bindings.append( (shortname, smarts) )
96 |
97 | # Create table of search objects.
98 | self.smartsList = []
99 | for [smarts, typename] in typelist:
100 | # Perform binding replacements
101 | smarts = OESmartsLexReplace(smarts, bindings)
102 | # Create SMARTS search
103 | pat = OESubSearch()
104 | pat.Init(smarts)
105 | pat.SetMaxMatches(0)
106 | self.smartsList.append([pat,typename,smarts])
107 |
108 | return
109 |
110 | def dump(self):
111 | for pat,type,smarts in self.smartsList:
112 | print(pat,type,smarts)
113 | return
114 |
115 | def assignTypes(self,mol,element = 0):
116 | # Assign null types.
117 | for atom in mol.GetAtoms():
118 | atom.SetStringData(self.pattyTag, "")
119 |
120 | # Assign atom types using rules.
121 | OEAssignAromaticFlags(mol)
122 | for pat,type,smarts in self.smartsList:
123 | OEPrepareSearch(mol, pat)
124 | for matchbase in pat.Match(mol):
125 | for matchpair in matchbase.GetAtoms():
126 | matchpair.target.SetStringData(self.pattyTag,type)
127 |
128 | # Check if any atoms remain unassigned.
129 | if element > 0:
130 | mol_atoms = mol.GetAtoms(OEHasAtomicNum(element))
131 | else:
132 | mol_atoms = mol.GetAtoms()
133 | for atom in mol_atoms:
134 | if atom.GetStringData(self.pattyTag)=="":
135 | raise AtomTyper.TypingException(mol, atom)
136 | return
137 |
138 | def debugTypes(self,mol):
139 | for atom in mol.GetAtoms():
140 | print("%6d %8s %8s" % (atom.GetIdx(),OEGetAtomicSymbol(atom.GetAtomicNum()),atom.GetStringData(self.pattyTag)))
141 | return
142 |
143 | def getTypeList(self,mol):
144 | typeList = []
145 | for atom in mol.GetAtoms():
146 | typeList.append(atom.GetStringData(self.pattyTag))
147 | return typeList
148 |
149 | @classmethod
150 | def read_typelist(cls, filename):
151 | """
152 | Read an atomtype or decorator list from a file.
153 |
154 | Parameters
155 | ----------
156 | filename : str
157 | The name of the file to be read
158 |
159 | Returns
160 | -------
161 | typelist : list of tuples
162 | Typelist[i] is element i of the typelist in format [smarts, typename]
163 |
164 | """
165 | if filename is None:
166 | return None
167 |
168 | if not os.path.exists(filename):
169 | built_in = utils.get_data_filename(filename)
170 | if not os.path.exists(built_in):
171 | raise Exception("File '%s' not found." % filename)
172 | filename = built_in
173 |
174 | typelist = list()
175 | ifs = open(filename)
176 | lines = ifs.readlines()
177 | used_typenames = list()
178 | for line in lines:
179 | # Strip trailing comments
180 | index = line.find('%')
181 | if index != -1:
182 | line = line[0:index]
183 | # Split into tokens.
184 | tokens = line.split()
185 | # Process if we have enough tokens
186 | if len(tokens) >= 2:
187 | smarts = tokens[0]
188 | typename = ' '.join(tokens[1:])
189 | if typename not in used_typenames:
190 | typelist.append([smarts,typename])
191 | used_typenames.append(typename)
192 | else:
193 | raise Exception("Error in file '%s' -- each entry must "
194 | "have a unique name." % filename )
195 | ifs.close()
196 |
197 | return typelist
198 |
--------------------------------------------------------------------------------
/smarty/cli_smarty.py:
--------------------------------------------------------------------------------
1 | """
2 | Command-line driver example for SMARTY.
3 |
4 | """
5 |
6 | import sys
7 | import string
8 |
9 | from optparse import OptionParser # For parsing of command line arguments
10 |
11 | import os
12 | import math
13 | import copy
14 | import re
15 | import numpy
16 | import random
17 |
18 | import smarty
19 | from openforcefield.utils import utils
20 |
21 | def main():
22 | # Create command-line argument options.
23 | usage_string = """\
24 | Sample over atom types, optionally attempting to match atom types in a reference typed set of molecules.
25 |
26 | usage: %prog --basetypes smartsfile --initialtypes smartsfile --decorators smartsfile [--substitutions smartsfile] --molecules molfile [--reference molfile] --iterations niterations [--temperature temperature]
27 |
28 | example:
29 |
30 | python %prog --basetypes=atomtypes/basetypes.smarts --initialtypes=atomtypes/initialtypes.smarts --decorators=atomtypes/decorators.smarts --substitutions=atomtypes/substitutions.smarts \
31 | --molecules=molecules/zinc-subset-tripos.mol2.gz --reference=molecules/zinc-subset-parm@frosst.mol2.gz --iterations 1000 --temperature=0.1
32 |
33 | """
34 | version_string = "%prog %__version__"
35 | parser = OptionParser(usage=usage_string, version=version_string)
36 |
37 | parser.add_option("-e", "--element", metavar='ELEMENT',
38 | action="store", type="int", dest='element', default=0,
39 | help= "By default the element value is 0 corresponding to sampling all atomtypes. If another atomic number is specified only atoms with that atomic number are sampled (i.e. --element=8 will only sample atomtypes for oxygen atoms).")
40 |
41 |
42 | parser.add_option("-b", "--basetypes", metavar='BASETYPES',
43 | action="store", type="string", dest='basetypes_filename', default=None,
44 | help="Filename defining base or generic atom types as SMARTS atom matches; these are indestructible and normally are elemental atom types.")
45 |
46 | parser.add_option("-f", "--initialtypes", metavar='BASETYPES',
47 | action="store", type="string", dest='initialtypes_filename', default=None,
48 | help="Filename defining initial (first) atom types as SMARTS atom matches.")
49 |
50 | parser.add_option("-d", "--decorators", metavar='DECORATORS',
51 | action="store", type="string", dest='decorators_filename', default=None,
52 | help="Filename defining decorator atom types as SMARTS atom matches.")
53 |
54 | parser.add_option("-s", "--substitutions", metavar="SUBSTITUTIONS",
55 | action="store", type="string", dest='substitutions_filename', default=None,
56 | help="Filename defining substitution definitions for SMARTS atom matches (OPTIONAL).")
57 |
58 | parser.add_option("-r", "--reference", metavar="REFMOL",
59 | action="store", type="string", dest='reference_molecules_filename', default=None,
60 | help="Reference typed molecules for computing likelihood (must match same molecule and atom ordering in molecules file) (OPTIONAL).")
61 |
62 | parser.add_option("-m", "--molecules", metavar='MOLECULES',
63 | action="store", type="string", dest='molecules_filename', default=None,
64 | help="Small molecule set (in any OpenEye compatible file format) containing 'dG(exp)' fields with experimental hydration free energies.")
65 |
66 | parser.add_option("-i", "--iterations", metavar='ITERATIONS',
67 | action="store", type="int", dest='iterations', default=150,
68 | help="MCMC iterations.")
69 |
70 | parser.add_option("-t", "--temperature", metavar='TEMPERATURE',
71 | action="store", type="float", dest='temperature', default=0.1,
72 | help="Effective temperature for Monte Carlo acceptance, indicating fractional tolerance of mismatched atoms (default: 0.1). If 0 is specified, will behave in a greedy manner.")
73 |
74 | parser.add_option("-l", '--trajectory', metavar="TRAJECTORY_FILE",
75 | action = "store", dest = "traj_file", default = "trajectory.csv",
76 | help = "Name for trajectory file output, trajectory saves only changes to the list of 'atomtypes' for each iteration. If the file already exists, it is overwritten.")
77 |
78 | parser.add_option("-p", '--plot', metavar="PLOT_FILE",
79 | action = "store", dest = "plot_file", default = None,
80 | help = "Name for output file of a plot of the score versus time. If not specified, none will be written. If provided, needs to use a file extension suitable for matplotlib/pylab. Currently requires a trajectory file to be written using -l or --trajectory.")
81 |
82 |
83 | parser.add_option("-x", "--decoratorbehavior", metavar='DECORATOR_BEHAVIOR',
84 | action="store", type="string", dest='decorator_behavior', default='combinatorial-decorators',
85 | help="Choose between simple-decorators or combinatorial-decorators (default = combinatorial-decorators).")
86 |
87 | verbose = True
88 |
89 | # Parse command-line arguments.
90 | (options,args) = parser.parse_args()
91 |
92 | # Ensure all required options have been specified.
93 | if (options.basetypes_filename is None) or (options.decorators_filename is None) or (options.molecules_filename is None):
94 | parser.print_help()
95 | parser.error("All input files must be specified.")
96 |
97 | # Ensure the Decorator Behavior option has been specified right
98 | if not (options.decorator_behavior == 'simple-decorators' or options.decorator_behavior == 'combinatorial-decorators'):
99 | parser.print_help()
100 | parser.error("Option not valid for decorator behavior.")
101 |
102 | # Load and type all molecules in the specified dataset.
103 | molecules = utils.read_molecules(options.molecules_filename, verbose=True)
104 |
105 | # Read reference typed molecules, if specified.
106 | reference_typed_molecules = None
107 | if options.reference_molecules_filename is not None:
108 | reference_typed_molecules = utils.read_molecules(options.reference_molecules_filename, verbose=True)
109 |
110 | # Construct atom type sampler.
111 | if options.element == 0:
112 | if verbose: print("Sampling all atomtypes")
113 | elif options.element > 0:
114 | if verbose: print("Sampling atoms with atomic number %i" % options.element)
115 | else:
116 | parser.print_help()
117 | parser.error("Element number must be 0 for all atoms or an integer greater than 0 for an atomic number")
118 | atomtype_sampler = smarty.AtomTypeSampler(molecules, options.basetypes_filename, options.initialtypes_filename, options.decorators_filename, replacements_filename=options.substitutions_filename, reference_typed_molecules=reference_typed_molecules, verbose=verbose, temperature=options.temperature, decorator_behavior=options.decorator_behavior, element = options.element)
119 |
120 | # Start sampling atom types.
121 | atomtype_sampler.run(options.iterations, options.traj_file)
122 |
123 | if options.plot_file is not None:
124 | if options.traj_file is None:
125 | print("Cannot create plot file without a trajectory file")
126 | else:
127 | smarty.score_utils.create_plot_file(options.traj_file, options.plot_file, False, verbose)
128 |
--------------------------------------------------------------------------------
/smarty/cli_smirky.py:
--------------------------------------------------------------------------------
1 | """
2 | Command-line driver example for SMIRKY.
3 |
4 | """
5 |
6 | import sys
7 | import string
8 | import time
9 |
10 | from optparse import OptionParser # For parsing of command line arguments
11 | import smarty
12 | from openforcefield.utils import utils
13 |
14 | import os
15 | import math
16 | import copy
17 | import re
18 | import numpy
19 | from numpy import random
20 |
21 | def main():
22 | # Create command-line argument options.
23 | usage_string = """\
24 | Sample over fragment types (atoms, bonds, angles, torsions, or impropers)
25 | optionally attempting to match created types to an established SMIRFF.
26 | For all files left blank, they will be taken from this module's
27 | data/odds_files/ subdirectory.
28 |
29 | usage %prog --molecules molfile --typetag fragmentType
30 | [--atomORbases AtomORbaseFile --atomORdecors AtomORdecorFile
31 | --atomANDdecors AtomANDdecorFile --bondORbase BondORbaseFile
32 | --bondANDdecors BondANDdecorFile --atomIndexOdds AtomIndexFile
33 | --bondIndexOdds BondIndexFile --replacements substitutions
34 | --initialtypes initialFragmentsFile --SMIRFF referenceSMIRFF
35 | --temperature float --verbose verbose
36 | --iterations iterations --output outputFile]
37 |
38 | example:
39 | smirky --molecules AlkEthOH_test_filt1_ff.mol2 --typetag Angle
40 |
41 | """
42 | version_string = "%prog %__version__"
43 | parser = OptionParser(usage=usage_string, version=version_string)
44 |
45 | parser.add_option("-m", "--molecules", metavar='MOLECULES',
46 | action="store", type="string", dest='molecules_filename', default=None,
47 | help="Small molecule set (in any OpenEye compatible file format) containing 'dG(exp)' fields with experimental hydration free energies. This filename can also be an option in this module's data/molecules sub-directory")
48 | #TODO: ask about the the dG(exp) fields?
49 |
50 | parser.add_option("-T", "--typetag", metavar='TYPETAG',
51 | action = "store", type="choice", dest='typetag',
52 | default=None, choices = ['VdW', 'Bond', 'Angle', 'Torsion', 'Improper'],
53 | help="type of fragment being sampled, options are 'VdW', 'Bond', 'Angle', 'Torsion', 'Improper'")
54 |
55 | parser.add_option('-e', '--atomORbases', metavar="DECORATORS",
56 | action='store', type='string', dest='atom_OR_bases',
57 | default = 'odds_files/atom_OR_bases.smarts',
58 | help="Filename defining atom OR bases and associated probabilities. These are combined with atom OR decorators in SMIRKS, for example in '[#6X4,#7X3;R2:2]' '#6' and '#7' are atom OR bases. (OPTIONAL)")
59 |
60 | parser.add_option("-O", "--atomORdecors", metavar="DECORATORS",
61 | action='store', type='string', dest='atom_OR_decorators',
62 | default = 'odds_files/atom_decorators.smarts',
63 | help="Filename defining atom OR decorators and associated probabilities. These are combined with atom bases in SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'X4' and 'X3' are ORdecorators. (OPTIONAL)")
64 |
65 | parser.add_option('-A', '--atomANDdecors', metavar="DECORATORS",
66 | action='store', type='string', dest='atom_AND_decorators',
67 | default='odds_files/atom_decorators.smarts',
68 | help="Filename defining atom AND decorators and associated probabilities. These are added to the end of an atom's SMIRKS, for example in '[#6X4,#7X3;R2:2]' 'R2' is an AND decorator. (OPTIONAL)")
69 |
70 | parser.add_option('-o', '--bondORbase', metavar="DECORATORS",
71 | action='store', type='string', dest='bond_OR_bases',
72 | default='odds_files/bond_OR_bases.smarts',
73 | help="Filename defining bond OR bases and their associated probabilities. These are OR'd together to describe a bond, for example in '[#6]-,=;@[#6]' '-' and '=' are OR bases. (OPTIONAL)")
74 |
75 | parser.add_option('-a', '--bondANDdecors', metavar="DECORATORS",
76 | action="store", type='string', dest='bond_AND_decorators',
77 | default='odds_files/bond_AND_decorators.smarts',
78 | help="Filename defining bond AND decorators and their associated probabilities. These are AND'd to the end of a bond, for example in '[#6]-,=;@[#7]' '@' is an AND decorator.(OPTIONAL)")
79 |
80 | parser.add_option('-D', '--atomOddsFile', metavar="ODDSFILE",
81 | action="store", type="string", dest="atom_odds",
82 | default='odds_files/atom_index_odds.smarts',
83 | help="Filename defining atom descriptors and probabilities with making changes to that kind of atom. Options for descriptors are integers corresponding to that indexed atom, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'. (OPTIONAL)")
84 |
85 | parser.add_option('-d', '--bondOddsFile', metavar="ODDSFILE",
86 | action="store", type="string", dest="bond_odds",
87 | default='odds_files/bond_index_odds.smarts',
88 | help="Filename defining bond descriptors and probabilities with making changes to that kind of bond. Options for descriptors are integers corresponding to that indexed bond, 'Indexed', 'Unindexed', 'Alpha', 'Beta', 'All'. (OPTIONAL)")
89 |
90 | parser.add_option("-s", "--substitutions", metavar="SUBSTITUTIONS",
91 | action="store", type="string", dest='substitutions_filename',
92 | default=None,
93 | help="Filename defining substitution definitions for SMARTS atom matches. (OPTIONAL).")
94 |
95 | parser.add_option("-f", "--initialtypes", metavar='INITIALTYPES',
96 | action="store", type="string", dest='initialtypes_filename',
97 | default=None,
98 | help="Filename defining initial fragment types. The file is formatted with two columns: 'SMIRKS typename'. For the default the initial type will be a generic form of the given fragment, for example '[*:1]~[*:2]' for a bond (OPTIONAL)")
99 |
100 | parser.add_option('-r', '--smirff', metavar='REFERENCE',
101 | action='store', type='string', dest='SMIRFF',
102 | default=None,
103 | help="Filename defining a SMIRFF force fielce used to determine reference fragment types in provided set of molecules. It may be an absolute file path, a path relative to the current working directory, or a path relative to this module's data subdirectory (for built in force fields). (OPTIONAL)")
104 |
105 | parser.add_option("-i", "--iterations", metavar='ITERATIONS',
106 | action="store", type="int", dest='iterations',
107 | default=150,
108 | help="MCMC iterations.")
109 |
110 | parser.add_option("-t", "--temperature", metavar='TEMPERATURE',
111 | action="store", type="float", dest='temperature',
112 | default=0.1,
113 | help="Effective temperature for Monte Carlo acceptance, indicating fractional tolerance of mismatched atoms (default: 0.1). If 0 is specified, will behave in a greedy manner.")
114 |
115 | parser.add_option("-p", "--output", metavar='OUTPUT',
116 | action="store", type="string", dest='outputfile',
117 | default=None,
118 | help="Filename base for output information. This same base will be used for all output files created. If None provided then it is set to 'typetag_temperature' (OPTIONAL).")
119 |
120 | parser.add_option('-v', '--verbose', metavar='VERBOSE',
121 | action='store', type='choice', dest='verbose',
122 | default=False, choices = ['True', 'False'],
123 | help="If True prints minimal information to the commandline during iterations. (OPTIONAL)")
124 |
125 | # Parse command-line arguments.
126 | (option,args) = parser.parse_args()
127 |
128 | # Molecules are required
129 | if option.molecules_filename is None:
130 | parser.print_help()
131 | parser.error("Molecules input files must be specified.")
132 |
133 | verbose = option.verbose == 'True'
134 | # Load and type all molecules in the specified dataset.
135 | molecules = utils.read_molecules(option.molecules_filename, verbose=verbose)
136 |
137 | # Parse input odds files
138 | atom_OR_bases = smarty.parse_odds_file(option.atom_OR_bases, verbose)
139 | atom_OR_decorators = smarty.parse_odds_file(option.atom_OR_decorators, verbose)
140 | atom_AND_decorators = smarty.parse_odds_file(option.atom_AND_decorators, verbose)
141 | bond_OR_bases = smarty.parse_odds_file(option.bond_OR_bases, verbose)
142 | bond_AND_decorators = smarty.parse_odds_file(option.bond_AND_decorators, verbose)
143 | atom_odds = smarty.parse_odds_file(option.atom_odds, verbose)
144 | bond_odds = smarty.parse_odds_file(option.bond_odds, verbose)
145 |
146 | # get initial types if provided, otherwise none
147 | if option.initialtypes_filename is None:
148 | initialtypes = None
149 | else:
150 | initialtypes = smarty.AtomTyper.read_typelist(option.initialtypes_filename)
151 |
152 | output = option.outputfile
153 | if output is None:
154 | output = "%s_%.2e" % ( option.typetag, option.temperature)
155 | # get replacements
156 | if option.substitutions_filename is None:
157 | sub_file = smarty.get_data_filename('odds_files/substitutions.smarts')
158 | else:
159 | sub_file = option.substitutions_filename
160 | replacements = smarty.AtomTyper.read_typelist(sub_file)
161 | replacements = [ (short, smarts) for (smarts, short) in replacements]
162 |
163 | start_sampler = time.time()
164 | fragment_sampler = smarty.FragmentSampler(
165 | molecules, option.typetag, atom_OR_bases, atom_OR_decorators,
166 | atom_AND_decorators, bond_OR_bases, bond_AND_decorators,
167 | atom_odds, bond_odds, replacements, initialtypes,
168 | option.SMIRFF, option.temperature, output)
169 | # report time
170 | finish_sampler = time.time()
171 | elapsed = finish_sampler - start_sampler
172 | if verbose: print("Creating %s sampler took %.3f s" % (option.typetag, elapsed))
173 |
174 | # Make iterations
175 | frac_found = fragment_sampler.run(option.iterations, verbose)
176 | results = fragment_sampler.write_results_smarts_file()
177 | finished = time.time()
178 | elapsed = finished - finish_sampler
179 | per_it = elapsed / float(option.iterations)
180 | if verbose: print("%i iterations took %.3f s (%.3f s / iteration)" % (option.iterations, elapsed, per_it))
181 | if verbose: print("Final score was %.3f %%" % (frac_found*100.0))
182 |
183 | # plot results
184 | plot_file = "%s.pdf" % output
185 | traj = "%s.csv" % output
186 | smarty.score_utils.create_plot_file(traj, plot_file, False, verbose)
187 |
--------------------------------------------------------------------------------
/smarty/data/README.md:
--------------------------------------------------------------------------------
1 | # Data used by smarty
2 |
3 | ## Manifest
4 | - `atomtypes` - contains files used by smarty to determine how it samples over atom types
5 | - `odds_files` - contains odds files used by smirky to influence sampling
6 |
--------------------------------------------------------------------------------
/smarty/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/smarty/data/__init__.py
--------------------------------------------------------------------------------
/smarty/data/atomtypes/README.md:
--------------------------------------------------------------------------------
1 | # Atom type SMARTS components
2 |
3 | ## Formats
4 | smarts files are used as input for the smarty sampler
5 | there are a variety of types, detailed below. All follow
6 | the same general format.
7 |
8 | Comments beginning with `%` are ignored throughout the file.
9 | Each line has the format
10 | ```
11 |
12 | ```
13 | where `` is an [OpenEye SMARTS string](https://docs.eyesopen.com/toolkits/cpp/oechemtk/SMARTS.html) and `` is a human-readable typename associated with that atom type.
14 |
15 | Atom type definitions are hierarchical, with the last match in the file taking precedence over earlier matches.
16 |
17 | ### Initial and Base types
18 |
19 | These are both used to initialize the smarty sampler.
20 | `basetypes` are considered more generic.
21 | These are the atomtypes used to create new atomtypes.
22 | See the file `basetypes.smarts`.
23 |
24 | `initial` types can be more complex
25 | for example the files
26 | `initialtypes.smarts` or `initiali\_AlkEthOH.smarts`
27 |
28 | Best practices should have base and initial types that are listed from most to
29 | least general
30 |
31 | ### Simple and Combinatorial Decorators
32 |
33 | A `decorators` file contains a list of SMARTS
34 |
35 | In smarty, when using simple decorators, the new atomtypes are created only
36 | by ANDing the decorator SMARTS component to the parent atomtype (using the `&` operator).
37 | The human-readable `` is appended (with a space) to the parent name to keep a human-readable annotation of the proposed child atom type.
38 |
39 |
40 | Example simple decorators are in *`decorators.smarts`* and are typically more complicated as they must include all
41 | ways of generating new atomtypes
42 |
43 | Combinatorial decorators use a more complex set of rules to generate new SMARTS strings.
44 | In this case, bonded atoms are found in the basetypes, so only "non-bonding decorators" need to be
45 | in the decorator file.
46 | For exampl see *`new-decorators.smarts`*
47 |
48 | ### Substitutions
49 |
50 | It is often convenient to define various tokens that are substituted for more sophisticated SMARTS expressions.
51 |
52 | For example, we could define some elemental substitutions along with some substitutions for halogens:
53 | ```
54 | % elements
55 | [#9] fluorine
56 | [#17] chlorine
57 | [#35] bromine
58 | [#53] iodine
59 |
60 | % halogens
61 | [$smallhals,$largehals] halogen
62 | [$fluorine,$chlorine] smallhals
63 | [$bromine,$iodine] largehals
64 | ```
65 |
66 | The [`OESmartsLexReplace`](http://docs.eyesopen.com/toolkits/python/oechemtk/OEChemFunctions/OESmartsLexReplace.html) function is used to implement these replacements.
67 |
68 | ## Manifest
69 | * `basetypes.smarts` - basetypes file with elemental atom types - this is a good choice to begin with
70 | * `initial.smarts` - basetypes file with more sophisticated atom types
71 | * `initial\_AlkEthOH.smarts` - the "answer" SMARTS strings for the AlkEthOH molecule set
72 | * `decorators.smarts` - `decorators` file with a variety of decorators
73 | * `decorators-simple.smarts` - minimal `decorators` file for testing
74 | * `new-decorators.smarts` - decorators file without bond information (new modular framework)
75 | * `substitutions.smarts` - minimal `substitutions` file
76 |
--------------------------------------------------------------------------------
/smarty/data/atomtypes/basetypes.smarts:
--------------------------------------------------------------------------------
1 | % atom types
2 | [*] any_atom
3 | [$ewg1] ewg1
4 | [$ewg2] ewg2
5 | [#1] hydrogen
6 | [#6] carbon
7 | [#7] nitrogen
8 | [#8] oxygen
9 | [#9] fluorine
10 | [#15] phosphorous
11 | [#16] sulfur
12 | [#17] chlorine
13 | [#34] selenium
14 | [#35] bromine
15 | [#53] iodine
16 |
--------------------------------------------------------------------------------
/smarty/data/atomtypes/decorators-simple.smarts:
--------------------------------------------------------------------------------
1 | % aromatic/aliphatic
2 | a aromatic
3 | A aliphatic
4 | % halogens
5 | $(*~[$halogen]) halogen-adjacent
6 |
--------------------------------------------------------------------------------
/smarty/data/atomtypes/decorators.smarts:
--------------------------------------------------------------------------------
1 | % bond order
2 | $([*]=[*]) double-bonded
3 | $([*]#[*]) triple-bonded
4 | $([*]:[*]) aromatic-bonded
5 | % bonded to atoms
6 | $(*~[#1]) hydrogen-adjacent
7 | $(*~[#6]) carbon-adjacent
8 | $(*~[#7]) nitrogen-adjacent
9 | $(*~[#8]) oxygen-adjacent
10 | $(*~[#9]) fluorine-adjacent
11 | $(*~[#15]) phosphorous-adjacent
12 | $(*~[#16]) sulfur-adjacent
13 | $(*~[#17]) chlorine-adjacent
14 | $(*~[#35]) bromine-adjacent
15 | $(*~[#53]) iodine-adjacent
16 | % degree
17 | D1 degree-1
18 | D2 degree-2
19 | D3 degree-3
20 | D4 degree-4
21 | D5 degree-5
22 | D6 degree-6
23 | % valence
24 | v1 valence-1
25 | v2 valence-2
26 | v3 valence-3
27 | v4 valence-4
28 | v5 valence-5
29 | v6 valence-6
30 | % total-h-count
31 | H1 total-h-count-1
32 | H2 total-h-count-2
33 | H3 total-h-count-3
34 | % aromatic/aliphatic
35 | a aromatic
36 | A aliphatic
37 | % halogens
38 | $(*~[$halogen]) halogen-adjacent
39 | $(*~[$smallhals]) small-halogen-adjacent
40 | $(*~[$largehals]) large-halogen-adjacent
41 |
--------------------------------------------------------------------------------
/smarty/data/atomtypes/initial_AlkEthOH.smarts:
--------------------------------------------------------------------------------
1 | % atom types
2 | [$([#1]-C)] hydrogen-carbon
3 | [$([#1]-C-[#7,#8,F,#16,Cl,Br])] hydrogen-carbon-ewd
4 | [$([#1]-C(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br])] hydrogen-carbon-ewd2
5 | [$([#1]-C(-[#7,#8,F,#16,Cl,Br])(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br])] hydrogen-carbon-ewd3
6 | [#1$(*-[#8])] hydrogen-oxygen
7 | [#6X4] carbon-tet
8 | [#8X2] oxygen-dival
9 | [#8X2+0$(*-[#1])] oxygen-hydrogen
10 |
--------------------------------------------------------------------------------
/smarty/data/atomtypes/initialtypes.smarts:
--------------------------------------------------------------------------------
1 | % atom types
2 | [#1] hydrogen
3 | [#6] carbon
4 | [#6&a] carbon aromatic
5 | [#7] nitrogen
6 | [#8] oxygen
7 | [#9] fluorine
8 | [#15] phosphorous
9 | [#16] sulfur
10 | [#17] chlorine
11 | [#35] bromine
12 | [#53] iodine
13 |
--------------------------------------------------------------------------------
/smarty/data/atomtypes/new-decorators.smarts:
--------------------------------------------------------------------------------
1 | % Size of smallest ring
2 | r3
3 | r4
4 | r5
5 | r6
6 | % Number of ring bonds
7 | R0
8 | R2
9 | R3
10 | R4
11 | R
12 | % total connectivity
13 | X1 connections-1
14 | X2 connections-2
15 | X3 connections-3
16 | X4 connections-4
17 | % total-h-count
18 | H0 total-h-count-0
19 | H1 total-h-count-1
20 | H2 total-h-count-2
21 | H3 total-h-count-3
22 | % formal charge
23 | +0 neutral
24 | +1 cationic+1
25 | -1 anionic-1
26 | % aromatic/aliphatic
27 | a aromatic
28 | A aliphatic
29 |
--------------------------------------------------------------------------------
/smarty/data/atomtypes/replacements.smarts:
--------------------------------------------------------------------------------
1 | % Substitution definitions
2 | % Format:
3 | %
4 |
5 | % elements
6 | [#1] hydrogen
7 | [#6] carbon
8 | [#7] nitrogen
9 | [#8] oxygen
10 | [#9] fluorine
11 | [#15] phosphorous
12 | [#16] sulfur
13 | [#17] chlorine
14 | [#35] bromine
15 | [#53] iodine
16 |
17 | % electron withdrawing groups
18 | [#7!-1,#8,#16] ewg2
19 | [#7!-1,#8!-1,#16!-1,$halogen] ewg1
20 |
21 | % halogens
22 | [$smallhals,$largehals] halogen
23 | [$fluorine,$chlorine] smallhals
24 | [$bromine,$iodine] largehals
25 |
--------------------------------------------------------------------------------
/smarty/data/odds_files/atom_OR_bases.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | % elements
3 | [#1]
4 | [#5]
5 | [#6]
6 | [#7]
7 | [#8]
8 | [#9]
9 | [#14]
10 | [#15]
11 | [#16]
12 | [#17]
13 | [#35]
14 | [#53]
15 | % substitution groups
16 | $ewg1
17 | $ewg2
18 |
--------------------------------------------------------------------------------
/smarty/data/odds_files/atom_decorators.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | % Size of smallest ring
3 | r3
4 | r4
5 | r5
6 | r6
7 | % Number of ring bonds
8 | R0
9 | R2
10 | R3
11 | R4
12 | R
13 | !R0
14 | !R2
15 | !R3
16 | !R4
17 | !R
18 | % total connectivity
19 | X1
20 | X2
21 | X3
22 | X4
23 | !X1
24 | !X2
25 | !X3
26 | !X4
27 | % total hydrogen count
28 | H0
29 | !H0
30 | H1
31 | !H1
32 | H2
33 | !H2
34 | H3
35 | !H3
36 | % aromatic/aliphatic
37 | a
38 | !a
39 | A
40 | !A
41 | % charges
42 | -1
43 | +0
44 | +1
45 | % no decorator
46 | ''
47 |
--------------------------------------------------------------------------------
/smarty/data/odds_files/atom_index_odds.smarts:
--------------------------------------------------------------------------------
1 | % Descriptor odds
2 | % used in the default, all equally likely
3 | all 1
4 | %
5 | % Other options remember to use indices appropriately
6 | 1 0
7 | 2 0
8 | 3 0
9 | 4 0
10 | Indexed 0
11 | Unindexed 0
12 | Alpha 0
13 | Beta 0
14 |
--------------------------------------------------------------------------------
/smarty/data/odds_files/bond_AND_decorators.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | @ 1
3 | !@ 1
4 | !# 0
5 |
--------------------------------------------------------------------------------
/smarty/data/odds_files/bond_OR_bases.smarts:
--------------------------------------------------------------------------------
1 | % Decorator Odds
2 | % bond types
3 | -
4 | :
5 | =
6 | #
7 | % not bond types
8 | !-
9 | !:
10 | !=
11 | !#
12 |
--------------------------------------------------------------------------------
/smarty/data/odds_files/bond_index_odds.smarts:
--------------------------------------------------------------------------------
1 | % Descriptor odds
2 | % used in the default, all equally likely
3 | all 1
4 | %
5 | % Other options remember to use indices appropriately
6 | 1 0
7 | 2 0
8 | 3 0
9 | Indexed 0
10 | Unindexed 0
11 | Alpha 0
12 | Beta 0
13 |
--------------------------------------------------------------------------------
/smarty/data/odds_files/substitutions.smarts:
--------------------------------------------------------------------------------
1 | % Substitution definitions
2 | % Format:
3 | %
4 | % halogens
5 | [#7!-1,#8,#16] ewg2
6 | [#7!-1,#8!-1,#16!-1,$halogen] ewg1
7 | [$smallhals,$largehals] halogen
8 | [#9,#17] smallhals
9 | [#35,#53] largehals
10 |
--------------------------------------------------------------------------------
/smarty/sampler.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | #=============================================================================================
4 | # MODULE DOCSTRING
5 | #=============================================================================================
6 |
7 | """
8 | smarty.py
9 | Example illustrating a scheme to create and destroy atom types automatically using SMARTS.
10 | AUTHORS
11 | John Chodera , Memorial Sloan Kettering Cancer Center.
12 | Additional contributions from the Mobley lab, UC Irvine, including David Mobley, Caitlin Bannan, and Camila Zanette.
13 | The AtomTyper class is based on 'patty' by Pat Walters, Vertex Pharmaceuticals.
14 | """
15 | #=============================================================================================
16 | # GLOBAL IMPORTS
17 | #=============================================================================================
18 |
19 | import os
20 | import copy
21 | import re
22 | import numpy
23 | import random
24 |
25 | import openeye.oechem
26 | import openeye.oeomega
27 | import openeye.oequacpac
28 |
29 | from openeye.oechem import *
30 | from openeye.oeomega import *
31 | from openeye.oequacpac import *
32 |
33 | import networkx as nx
34 |
35 | import time
36 |
37 | from smarty.atomtyper import AtomTyper
38 | from smarty.score_utils import load_trajectory
39 | from smarty.score_utils import scores_vs_time
40 |
41 | #=============================================================================================
42 | # ATOMTYPE SAMPLER
43 | #=============================================================================================
44 |
45 | class AtomTypeSampler(object):
46 | """
47 | Atom type sampler.
48 | """
49 | def __init__(self, molecules, basetypes_filename, initialtypes_filename, decorators_filename, replacements_filename=None, reference_typed_molecules=None, temperature=0.1, verbose=False, decorator_behavior='combinatorial-decorators', element = 0):
50 | """
51 | Initialize an atom type sampler.
52 | ARGUMENTS
53 | molecules : list of molecules for typing
54 | List of molecules for typing
55 | basetypes_filename : str
56 | File defining base/generic atom types (which cannot be destroyed); often these are elemental types
57 | initialtypes_filename :
58 | File defining initial atom types (which CAN be destroyed, except for those which occur in basetypes_filename
59 | decorators_filename : str
60 | File containing decorators that can be added to existing types to generate subtypes
61 | replacements_filename : str, optional, default=None
62 | If specified, SMARTS replacement definitions will be read from this file
63 | reference_typed_molecules : list of OEMol, optional, default=None
64 | List of molecules with reference types for use in Monte Carlo acceptance.
65 | If specified, the likelihood function will utilize the maximal number of matched atom types with these molecules.
66 | If not specified, no likelihood function will be employed.
67 | temperature : float, optional, default=0.1
68 | Temperature for Monte Carlo acceptance/rejection
69 | verbose : bool, optional, default=False
70 | If True, verbose output will be printed.
71 | decorator_behavior : string either "combinatorial-decorators" or "simple-decorators"
72 | simple decorators include bonded atoms as decorators
73 | element : integer >= 0
74 | If 0 all atomtypes sampled, otherwise only atomtypes of that atomic number are sampled
75 | Notes
76 | -----
77 | This is just a proof of concept for chemical perception sampling.
78 | Scoring for purposed atomtypes is based on reference atomtypes.
79 | No scoring of molecular properties is performed
80 | """
81 | # store simple input information
82 | self.verbose = verbose
83 | self.decorator_behavior = decorator_behavior
84 | self.typetag = 'atomtype' # internal tag
85 | self.temperature = temperature
86 | self.element = element
87 |
88 | # Read atomtypes (initial and base) and decorators.
89 | self.atomtypes = AtomTyper.read_typelist(initialtypes_filename)
90 | self.basetypes = AtomTyper.read_typelist(basetypes_filename)
91 | self.decorators = AtomTyper.read_typelist(decorators_filename)
92 | self.replacements = AtomTyper.read_typelist(replacements_filename)
93 |
94 | # Store a deep copy of the molecules since they will be annotated
95 | # loop through input molecules to remove repeats
96 | self.molecules = list()
97 | if reference_typed_molecules is not None:
98 | self.reference_typed_molecules = list()
99 | else: self.reference_typed_molecules = None
100 |
101 | smiles = set()
102 | for idx, mol in enumerate(molecules):
103 | smile = OECreateIsoSmiString(mol)
104 | if not smile in smiles:
105 | self.molecules.append(OEMol(mol))
106 | smiles.add(smile)
107 | if reference_typed_molecules is not None:
108 | ref_mol = OEMol(reference_typed_molecules[idx])
109 | ref_smile = OECreateIsoSmiString(ref_mol)
110 | # TODO: add ref_smile == smile check?
111 | self.reference_typed_molecules.append(OEMol(ref_mol))
112 |
113 | # Save bond list to use throughout
114 | bondset = [("-","singly"), ("=", "doubly"), ("#","triply"), (":", "aromatic")]
115 |
116 | used_basetypes = list()
117 | self.atomtypes_with_no_matches = set()
118 | # Check all SMART strings that are used as a base type
119 | for (smarts, atom_type) in self.basetypes:
120 | check_basetype = self.smarts_matches(smarts)
121 | if check_basetype:
122 | # Keep used base types
123 | used_basetypes.append( ( smarts, atom_type) )
124 | else:
125 | # Remove unused base types
126 | self.atomtypes_with_no_matches.add( smarts )
127 | self.basetypes = copy.deepcopy(used_basetypes)
128 | if verbose:
129 | print("USED BASE TYPES:")
130 | for (smarts, typename) in self.basetypes:
131 | print("%10s %25s" % (smarts, typename))
132 |
133 | # Calculate which bonds in set are used
134 | bond_typelist = [("[*]%s[*]" %bond, name) for (bond, name) in bondset]
135 | tmpmolecules = copy.deepcopy(self.molecules)
136 | self.type_molecules(bond_typelist, tmpmolecules, 0)
137 | [bond_typecounts, molecule_bond_typecounts] = self.compute_type_statistics( bondset, tmpmolecules, 0)
138 | if self.verbose:
139 | print("USED BOND TYPES:")
140 | self.show_type_statistics(bondset, bond_typecounts, molecule_bond_typecounts)
141 |
142 | # only same bonds that are used
143 | self.bondset = [ ('~', 'any') ]
144 | for (bond, name) in bondset:
145 | if bond_typecounts[name] > 0:
146 | self.bondset.append( (bond, name) )
147 |
148 | # Rename base/initial types to ensure their names are unique
149 | # clashes between initial and target types will cause problems
150 | for idx, [smarts, typename] in enumerate(self.atomtypes):
151 | self.atomtypes[idx] = (smarts, 'c_'+typename)
152 | for idx, [smarts, typename] in enumerate(self.basetypes):
153 | self.basetypes[idx] = (smarts, 'c_'+typename)
154 |
155 | # Store smarts for basetypes
156 | self.basetypes_smarts = [ smarts for (smarts, name) in self.basetypes ]
157 |
158 | # Add any base types not already there to the initial types
159 | initial_smarts = [ smarts for (smarts, name) in self.atomtypes ]
160 | missing_basetypes = list()
161 | for [smarts, typename] in self.basetypes:
162 | if smarts not in initial_smarts:
163 | missing_basetypes.append( (smarts, typename) )
164 | if self.verbose: print("Added base (generic) type `%s`, name %s, to initial types." % (smarts, typename))
165 |
166 | self.atomtypes = missing_basetypes + self.atomtypes
167 |
168 | # Type all molecules with current typelist to ensure that starting types are sufficient.
169 | self.type_molecules(self.atomtypes, self.molecules, self.element)
170 |
171 | # Compute atomtype statistics on molecules for current atomtype set
172 | [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element)
173 | if self.verbose:
174 | print("MATCHED INITIAL TYPES:")
175 | self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts)
176 |
177 | # Track only used atomtypes and add unused to atomtypes with no matches
178 | used_initial_atomtypes = list()
179 | for (smarts, atom_type) in self.atomtypes:
180 | if atom_typecounts[atom_type] > 0:
181 | used_initial_atomtypes.append( (smarts, atom_type) )
182 | else:
183 | self.atomtypes_with_no_matches.add( smarts )
184 | if self.verbose: print("Removing initial atom type '%s', as it matches no atoms" % smarts)
185 | self.atomtypes = copy.deepcopy(used_initial_atomtypes)
186 | self.initial_atomtypes = copy.deepcopy(used_initial_atomtypes)
187 |
188 | # Type molecules again with the updated atomtype list
189 | self.type_molecules(self.atomtypes, self.molecules, self.element)
190 |
191 | # These are atomtypes where not all children have been matched
192 | self.unmatched_atomtypes = copy.deepcopy(self.atomtypes)
193 |
194 | # Creat dictionary to store children of initial atom types
195 | self.parents = dict()
196 | for [smarts, typename] in self.atomtypes:
197 | #store empty list of chlidren for each atomtype
198 | self.parents[smarts] = []
199 | # store reverse parent dictionary with child to parent
200 | self.child_to_parent = self._switch_parent_dict()
201 |
202 | # Compute total atoms
203 | self.total_atoms = 0.0
204 | for molecule in self.molecules:
205 | for atom in self._GetAtoms(molecule, self.element):
206 | self.total_atoms += 1.0
207 |
208 | # Store reference molecules
209 | self.reference_atomtypes = set()
210 | self.current_atom_matches = None
211 | if self.reference_typed_molecules is not None:
212 | # Extract list of reference atom types
213 | for molecule in self.reference_typed_molecules:
214 | for atom in self._GetAtoms(molecule, self.element):
215 | atomtype = atom.GetType()
216 | self.reference_atomtypes.add(atomtype)
217 | self.reference_atomtypes = list(self.reference_atomtypes)
218 | # Compute current atom matches
219 | [self.atom_type_matches, self.total_atom_type_matches] = self.best_match_reference_types(self.atomtypes, self.molecules)
220 | # Count atom types.
221 | self.reference_atomtypes_atomcount = { atomtype : 0 for atomtype in self.reference_atomtypes }
222 | for molecule in self.reference_typed_molecules:
223 | for atom in self._GetAtoms(molecule, self.element):
224 | atomtype = atom.GetType()
225 | self.reference_atomtypes_atomcount[atomtype] += 1
226 | return
227 |
228 | def smarts_matches(self, smarts):
229 | """
230 | This method returns true if the provided SMARTS pattern is in
231 | at least one molecule
232 | Parameters
233 | ----------
234 | smarts: str, SMARTS pattern
235 | Returns
236 | -------
237 | matched: boolean, True=smarts matches a molecule, False has no matches
238 | """
239 | # Create bindings list for the replacements (uses the replacements
240 | # file)
241 | bindings = list()
242 | if self.replacements is not None:
243 | for [smarts_s,shortname] in self.replacements:
244 | bindings.append( (shortname, smarts_s) )
245 | # Perform binding replacements
246 | smarts = OESmartsLexReplace(smarts, bindings)
247 |
248 | # create query
249 | qmol = OEQMol()
250 | if not OEParseSmarts(qmol, smarts):
251 | raise Exception("Error parsing SMARTS %s" % smarts)
252 | ss = OESubSearch(qmol)
253 | for mol in self.molecules:
254 | if ss.SingleMatch(mol):
255 | return True
256 | return False
257 |
258 | def _GetAtoms(self, molecule, element = 0):
259 | """
260 | Parameters
261 | ----------
262 | molecule : OEMol
263 | element : integer
264 | if 0 looks at all atoms, otherwise only those with the given atomic number
265 |
266 | Returns
267 | -------
268 | iterator over the atoms based on the molecule and element number
269 | """
270 | if element > 0:
271 | return molecule.GetAtoms(OEHasAtomicNum(element))
272 | else:
273 | return molecule.GetAtoms()
274 |
275 | def best_match_reference_types(self, atomtypes, molecules):
276 | """
277 | Determine best match for each parameter with reference atom types
278 | Parameters
279 | ----------
280 | atomtypes :
281 | Current atom types
282 | molecules : list of OEMol
283 | Typed molecules, where types are stored in self.atomtypetag string data.
284 | Returns
285 | -------
286 | atom_type_matches : list of tuples (current_atomtype, reference_atomtype, counts)
287 | Best correspondence between current and reference atomtypes, along with number of atoms equivalently typed in reference molecule set.
288 | total_atom_type_matches : int
289 | The total number of correspondingly typed atoms in the reference molecule set.
290 | * Currently, types for reference typed molecules are accessed via atom.GetType(), while types for current typed molecules are accessed via atom.GetStringData(self.typetag).
291 | This should be homogenized.
292 | Contributor:
293 | * Josh Fass contributed this algorithm.
294 | """
295 | if self.reference_typed_molecules is None:
296 | if self.verbose: print('No reference molecules specified, so skipping likelihood calculation.')
297 | return None
298 |
299 | # Create bipartite graph (U,V,E) matching current atom types U with reference atom types V via edges E with weights equal to number of atoms typed in common.
300 | if self.verbose: print('Creating graph matching current atom types with reference atom types...')
301 | initial_time = time.time()
302 | graph = nx.Graph()
303 |
304 | # Get current atomtypes and reference atom types
305 | current_atomtypes = [ typename for (smarts, typename) in atomtypes ]
306 | reference_atomtypes = [ typename for typename in self.reference_atomtypes ]
307 | # check that current atom types are not in reference atom types
308 | if set(current_atomtypes) & set(reference_atomtypes):
309 | raise Exception("Current and reference atom types must be unique")
310 | # Add current atom types
311 | for atomtype in current_atomtypes:
312 | graph.add_node(atomtype, bipartite=0)
313 | # Add reference atom types
314 | for atomtype in reference_atomtypes:
315 | graph.add_node(atomtype, bipartite=1)
316 | # Add edges.
317 | atoms_in_common = dict()
318 | # Make an entry in the dictionary for each pair of types
319 | for current_atomtype in current_atomtypes:
320 | for reference_atomtype in reference_atomtypes:
321 | atoms_in_common[(current_atomtype,reference_atomtype)] = 0
322 | # Loop through all molecules
323 | for (current_typed_molecule, reference_typed_molecule) in zip(molecules, self.reference_typed_molecules):
324 | current_atoms = self._GetAtoms(current_typed_molecule, self.element)
325 | reference_atoms = self._GetAtoms(reference_typed_molecule, self.element)
326 | # For each atom add a count to the current/referance atomtype pair
327 | for (current_typed_atom, reference_typed_atom) in zip(current_atoms, reference_atoms):
328 | current_atomtype = current_typed_atom.GetStringData(self.typetag)
329 | reference_atomtype = reference_typed_atom.GetType()
330 | atoms_in_common[(current_atomtype,reference_atomtype)] += 1
331 | # Make weighted edges connecting the current and reference nodes
332 | for current_atomtype in current_atomtypes:
333 | for reference_atomtype in reference_atomtypes:
334 | weight = atoms_in_common[(current_atomtype,reference_atomtype)]
335 | graph.add_edge(current_atomtype, reference_atomtype, weight=weight)
336 | elapsed_time = time.time() - initial_time
337 | if self.verbose: print('Graph creation took %.3f s' % elapsed_time)
338 |
339 | # Compute maximum match using networkx algorithm
340 | if self.verbose: print('Computing maximum weight match...')
341 | initial_time = time.time()
342 | mate = nx.algorithms.max_weight_matching(graph, maxcardinality=False)
343 | elapsed_time = time.time() - initial_time
344 | if self.verbose: print('Maximum weight match took %.3f s' % elapsed_time)
345 |
346 | # Compute match dictionary and total number of matches.
347 | atom_type_matches = list()
348 | total_atom_type_matches = 0
349 | for current_atomtype in current_atomtypes:
350 | if current_atomtype in mate:
351 | reference_atomtype = mate[current_atomtype]
352 | counts = graph[current_atomtype][reference_atomtype]['weight']
353 | total_atom_type_matches += counts
354 | atom_type_matches.append( (current_atomtype, reference_atomtype, counts) )
355 | else:
356 | atom_type_matches.append( (current_atomtype, None, None) )
357 |
358 | # Report on matches
359 | if self.verbose:
360 | print("PROPOSED:")
361 | self.show_type_matches(atom_type_matches)
362 |
363 | return (atom_type_matches, total_atom_type_matches)
364 |
365 | def show_type_matches(self, atom_type_matches):
366 | """
367 | Show pairing of current to reference atom types.
368 | Parameters
369 | ----------
370 | atom_type_matches : list of (current_atomtype, reference_atomtype, counts)
371 |
372 | Returns
373 | -------
374 | fraction_matched_atoms : the fractional count of matched atoms
375 | """
376 | print('Atom type matches:')
377 | total_atom_type_matches = 0
378 | for (current_atomtype, reference_atomtype, counts) in atom_type_matches:
379 | if reference_atomtype is not None:
380 | print('%-64s matches %8s : %8d atoms matched' % (current_atomtype, reference_atomtype, counts))
381 | total_atom_type_matches += counts
382 | else:
383 | print('%-64s no match' % (current_atomtype))
384 |
385 | fraction_matched_atoms = float(total_atom_type_matches) / float(self.total_atoms)
386 | print('%d / %d total atoms match (%.3f %%)' % (total_atom_type_matches, self.total_atoms, fraction_matched_atoms * 100))
387 |
388 | return fraction_matched_atoms
389 |
390 |
391 | def AtomDecorator(self, atom1type, decorator):
392 | """
393 | Given an atom and a decorator ammend the SMARTS string with that decorator
394 |
395 | Parameters
396 | -----------
397 | atom1type : atomtype tuple in form (smarts, typename)
398 | decorator : decorator being added to current atom
399 |
400 | Returns
401 | -------
402 | decorated atomtype as a tuple (smarts, typename)
403 | """
404 | if self.HasAlpha(atom1type):
405 | # decorators should go before the $ sign on the atom
406 | dollar = atom1type[0].find('$')
407 | proposed_atomtype = atom1type[0][:dollar] + decorator[0] + atom3[0][dollar:]
408 | proposed_typename = atom1type[1] + ' ' + decorator[1]
409 | else:
410 | # No alpha atom so the decorator goes before the ']'
411 | proposed_atomtype = atom1type[0][:-1] + decorator[0] + ']'
412 | proposed_typename = atom1type[1] + ' ' + decorator[1]
413 |
414 | return (proposed_atomtype, proposed_typename)
415 |
416 | def PickAnAtom(self, atomList):
417 | """
418 | Parameters
419 | ----------
420 | atomList : any list of tuples in the form (smarts, typename)
421 | this could include decorator or bond lists
422 |
423 | Returns
424 | -------
425 | one random (smarts, typename) pair from given list
426 |
427 | This allows for continuity in the code, this method could be changed,
428 | and all random choices would still be made in the same way.
429 | It also allowed for testing which atomtypes to choose from while sampling.
430 | """
431 | return random.choice(atomList)
432 |
433 | def HasAlpha(self, atom1type):
434 | """
435 | Parameter
436 | ---------
437 | atom1type : an atomtype tuple (smarts, typename)
438 |
439 | Returns
440 | -------
441 | True if atomtype has at least 1 alpha substituent otherwise False
442 | """
443 | # Alpha atoms are connected in the form [#1] --> [#1$(*~[#6])]
444 | # The new characters are '$(*'
445 | if '$(*' in atom1type[0]:
446 | return True
447 | else:
448 | return False
449 |
450 | def AddAlphaSubstituentAtom(self, atom1type, bondset, atom2type):
451 | """
452 | Adds an atom alpha to the primary atom. The new alpha substituent
453 | always adds to the end of the sequence of alpha atom
454 | so if you have '[#1$(*~[#6])]' the next alpha atom [#8] is added in
455 | this way '[#1$(*~[#6])$(*~[#8])]'
456 |
457 | Parameters
458 | ----------
459 | atom1type : current atomtype (smarts, typename)
460 | bondset : bondtype to connect two atoms (smarts, bondname)
461 | atom2type : atom to be added (smarts, typename)
462 |
463 | Returns
464 | -------
465 | Atomtype with new alpha substituent (smarts, typename)
466 | """
467 | proposed_atomtype = atom1type[0][:len(atom1type[0])-1] + '$(*' + bondset[0] + atom2type[0] + ')]'
468 | proposed_typename = atom1type[1] + ' ' + bondset[1] + ' ' + atom2type[1] + ' '
469 | return (proposed_atomtype, proposed_typename)
470 |
471 | def AddBetaSubstituentAtom(self, atom1type, bondset, atom2type):
472 | """
473 | Adds atom2type as a beta substituent bonding it to the
474 | first alpha atom in atom1type. If atom1type does not have
475 | an alpha atom this metho will call addAlphaSubstituentAtom instead.
476 |
477 | Parameters
478 | ----------
479 | atom1type : parent atomtype (smarts, typename)
480 | bondset : bond used to connect atoms (smarts, bondname)
481 | atom2type : atomtype being bonded in beta position (smarts, typename)
482 |
483 | Returns
484 | -------
485 | child atomtype as tuple (smarts, typename)
486 |
487 | """
488 |
489 | # counting '[' tells us how many atoms are in the mix
490 | count = atom1type[0].count('[')
491 | proposed_atomtype = ""
492 | number_brackets = 0
493 | # find closed alpha atom
494 | closeAlpha = atom1type[0].find(']')
495 | # This has two atoms (already has an alpha atom)
496 | if count == 2:
497 | proposed_atomtype = atom1type[0][:closeAlpha+1]
498 | proposed_atomtype += bondset[0] + atom2type[0] + ')]'
499 | proposed_typename = atom1type[1] + bondset[1] + ' ' + atom2type[1]
500 | if self.verbose: print("ADD FIRST BETA SUB: proposed --- %s %s" % ( str(proposed_atomtype), str(proposed_typename)))
501 | elif count > 2:
502 | # Has an alpha atom with at least 1 beta atom
503 | proposed_atomtype = atom1type[0][:closeAlpha+1]
504 | proposed_atomtype += '(' + bondset[0] + atom2type[0] + ')'
505 | proposed_atomtype += atom1type[0][closeAlpha+1:]
506 | proposed_typename = atom1type[1] + ' (' + bondset[1] + ' ' + atom2type[1] + ')'
507 | if self.verbose: print("ADD MORE BETA SUB: proposed --- %s %s" % ( str(proposed_atomtype), str(proposed_typename)))
508 | else:
509 | # Has only 1 atom which means there isn't an alpha atom yet, add an alpha atom instead
510 | proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondset, atom2type)
511 | return (proposed_atomtype, proposed_typename)
512 |
513 |
514 | def sample_atomtypes(self):
515 | """
516 | Perform one step of atom type sampling.
517 | This is done by either removing a current atomtype
518 | or creating a child atom type. Then the proposed
519 | atomtype list is scored and the move is accepted or rejected
520 | """
521 | # Copy current atomtypes for proposal.
522 | proposed_atomtypes = copy.deepcopy(self.atomtypes)
523 | proposed_molecules = copy.deepcopy(self.molecules)
524 | proposed_parents = copy.deepcopy(self.parents)
525 |
526 | if random.random() < 0.5:
527 | # Pick a random index and remove atomtype at that index
528 | (atomtype, typename) = self.PickAnAtom(proposed_atomtypes)
529 | if self.verbose: print("Attempting to destroy atom type %s : %s..." % (atomtype, typename))
530 |
531 | # Reject deletion of (populated) base types as we want to retain
532 | # generics even if empty
533 | if atomtype in self.basetypes_smarts:
534 | if self.verbose: print("Destruction rejected for atom type %s because this is a generic type which was initially populated." % atomtype )
535 | return False
536 |
537 | # remove chosen atomtype
538 | proposed_atomtypes.remove( (atomtype, typename) )
539 | # update proposed parent dictionary
540 | for parent, children in proposed_parents.items():
541 | if atomtype in [at for (at, tn) in children]:
542 | children += proposed_parents[atomtype]
543 | children.remove( (atomtype, typename) )
544 |
545 | del proposed_parents[atomtype]
546 |
547 | # Try to type all molecules.
548 | try:
549 | self.type_molecules(proposed_atomtypes, proposed_molecules, self.element)
550 | except AtomTyper.TypingException as e:
551 | # Reject since typing failed.
552 | if self.verbose: print("Typing failed; rejecting.")
553 | return False
554 | else:
555 | if self.decorator_behavior == 'simple-decorators':
556 | # Pick an atomtype to subtype.
557 | atom1type = self.PickAnAtom(self.atomtypes)
558 | # Pick a decorator to add.
559 | (decorator, decorator_typename) = self.PickAnAtom(self.decorators)
560 |
561 | # Create new atomtype to insert by appending decorator with 'and' operator.
562 | result = re.match('\[(.+)\]', atom1type[0])
563 | proposed_atomtype = '[' + result.groups(1)[0] + '&' + decorator + ']'
564 | proposed_typename = atom1type[1] + ' ' + decorator_typename
565 | if self.verbose: print("Attempting to create new subtype: '%s' (%s) + '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], decorator, decorator_typename, proposed_atomtype, proposed_typename))
566 |
567 | else: # combinatorial-decorators
568 | # Pick an atomtype
569 | atom1type = self.PickAnAtom(self.atomtypes)
570 | # Check if we need to add an alpha or beta substituent
571 | if self.HasAlpha(atom1type):
572 | # Has alpha
573 | bondtype = self.PickAnAtom(self.bondset)
574 | atom2type = self.PickAnAtom(self.basetypes)
575 | if random.random() < 0.5 or atom1type[0][2] == '1': # Add Beta Substituent Atom randomly or when it is Hydrogen
576 | proposed_atomtype, proposed_typename = self.AddBetaSubstituentAtom(atom1type, bondtype, atom2type)
577 | else: # Add another Alpha Substituent if it is not a Hydrogen
578 | proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondtype, atom2type)
579 | if self.verbose: print("Attempting to create new subtype: -> '%s' (%s)" % (proposed_atomtype, proposed_typename))
580 | else:
581 | # Has no alpha
582 | if random.random() < 0.5: # add decorator to primary atom
583 | decorator = self.PickAnAtom(self.decorators)
584 | proposed_atomtype, proposed_typename = self.AtomDecorator(atom1type, decorator)
585 | if self.verbose: print("Attempting to create new subtype: '%s' (%s) + '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], decorator[0], decorator[1], proposed_atomtype, proposed_typename))
586 | else: # add Alpha substituent
587 | bondtype = self.PickAnAtom(self.bondset)
588 | atom2type = self.PickAnAtom(self.basetypes)
589 | proposed_atomtype, proposed_typename = self.AddAlphaSubstituentAtom(atom1type, bondtype, atom2type)
590 | if self.verbose: print("Attempting to create new subtype: '%s' (%s) -> '%s' (%s)" % (atom1type[0], atom1type[1], proposed_atomtype, proposed_typename))
591 |
592 | # Check that we haven't already determined this atom type isn't matched in the dataset.
593 | if proposed_atomtype in self.atomtypes_with_no_matches:
594 | if self.verbose: print("Atom type '%s' (%s) unused in dataset; rejecting." % (proposed_atomtype, proposed_typename))
595 | return False
596 |
597 | # Check that it is a new SMARTS pattern
598 | if proposed_atomtype in [smarts for (smarts, typename) in self.atomtypes]:
599 | if self.verbose: print("Atom type '%s' (%s) is in the existing atomtype list; rejecting." % (proposed_atomtype, proposed_typename))
600 | return False
601 |
602 | # Check the proposed type name is unique
603 | current_typenames = [typename for (smarts, typename) in self.atomtypes]
604 | while proposed_typename in current_typenames:
605 | proposed_typename += '%i' % random.randint(0,10)
606 |
607 | # for either decorator - update proposed parent dictionary
608 | proposed_parents[atom1type[0]].append( (proposed_atomtype, proposed_typename) )
609 | proposed_parents[proposed_atomtype] = []
610 |
611 | # Insert atomtype immediately after.
612 | proposed_atomtypes.append( (proposed_atomtype, proposed_typename) )
613 | # Try to type all molecules.
614 | try:
615 | # Type molecules.
616 | self.type_molecules(proposed_atomtypes, proposed_molecules, self.element)
617 | # Compute updated statistics.
618 | [proposed_atom_typecounts, proposed_molecule_typecounts] = self.compute_type_statistics(proposed_atomtypes, proposed_molecules, self.element)
619 | except AtomTyper.TypingException as e:
620 | print("Exception: %s" % str(e))
621 | # Reject since typing failed.
622 | if self.verbose: print("Typing failed for one or more molecules using proposed atomtypes; rejecting.")
623 | return False
624 |
625 | # Reject if new type is unused.
626 | if (proposed_atom_typecounts[proposed_typename] == 0):
627 | # Reject because new type is unused in dataset.
628 | if self.verbose: print("Atom type '%s' (%s) unused in dataset; rejecting." % (proposed_atomtype, proposed_typename))
629 | # Store this atomtype to speed up future rejections
630 | self.atomtypes_with_no_matches.add(proposed_atomtype)
631 | return False
632 |
633 | # Reject if any type is emptied (UNLESS it is a basetype)
634 | for (smarts, typename) in proposed_atomtypes:
635 | if not smarts in self.basetypes_smarts: # not a base type
636 | if proposed_atom_typecounts[typename] == 0: # no matches
637 | if self.verbose: print("Atomtype '%s' (%s) is now unused in dataset; rejecting." % (smarts, typename))
638 | return False
639 |
640 | if self.verbose: print('Proposal is valid...')
641 |
642 | # Accept automatically if no reference molecules
643 | accept = False
644 | if self.reference_typed_molecules is None:
645 | accept = True
646 | else:
647 | # Find number of matches for current set
648 | (proposed_atom_type_matches, proposed_total_atom_type_matches) = self.best_match_reference_types(proposed_atomtypes, proposed_molecules)
649 | score_dif = (proposed_total_atom_type_matches - self.total_atom_type_matches)
650 | # if temperature is zero only accept increased scores
651 | if self.temperature == 0.0:
652 | print('Proposal score: %d >> %d' % (self.total_atom_type_matches, proposed_total_atom_type_matches))
653 | accept = score_dif > 0.0
654 |
655 | # If finite temperature compute effective temperature and log_P_accept
656 | else:
657 | # Compute effective temperature
658 | effective_temperature = (self.total_atoms * self.temperature)
659 | # Compute likelihood for accept/reject
660 | log_P_accept = score_dif / effective_temperature
661 | print('Proposal score: %d >> %d : log_P_accept = %.5e' % (self.total_atom_type_matches, proposed_total_atom_type_matches, log_P_accept))
662 | accept = (log_P_accept > 0.0) or (numpy.random.uniform() < numpy.exp(log_P_accept))
663 |
664 | # Accept or reject
665 | if accept:
666 | self.atomtypes = proposed_atomtypes
667 | self.molecules = proposed_molecules
668 | self.parents = proposed_parents
669 | self.child_to_parent = self._switch_parent_dict()
670 | self.atom_type_matches = proposed_atom_type_matches
671 | self.total_atom_type_matches = proposed_total_atom_type_matches
672 | return True
673 | else:
674 | return False
675 |
676 | def type_molecules(self, typelist, molecules, element = 0):
677 | """
678 | Type all molecules with the specified typelist.
679 | Parameters
680 | ----------
681 | typelist : list of atomtypes or tuples in the form (smarts, typename)
682 | molecules : list of OEMols
683 | element : integer 0 for all atoms or atomic number being sampled
684 |
685 | For every atom in each molecule the relevant typename is assigned
686 | so it can be accessed at atom.GetStringData(self.typetag)
687 | """
688 | # Create an atom typer.
689 | atomtyper = AtomTyper(typelist, self.typetag, replacements=self.replacements)
690 |
691 | # Type molecules.
692 | for molecule in molecules:
693 | atomtyper.assignTypes(molecule, element)
694 |
695 | return
696 |
697 | def compute_type_statistics(self, typelist, molecules, element = 0):
698 | """
699 | Compute statistics for numnber of molecules assigned each type.
700 | Parameters
701 | ----------
702 | typelist : atomtype list of form (smarts, typename)
703 | molecules : list of OEmols
704 | element : 0 for all atoms or atomic number being sampled
705 | Returns
706 | -------
707 | atom_typecounts (dict) : counts of number of atoms containing each atomtype
708 | molecule_typecounds (dict) : counts of number of molecules containing each atom type
709 | """
710 | # Zero type counts by atom and molecule.
711 | atom_typecounts = dict()
712 | molecule_typecounts = dict()
713 | for [smarts, typename] in typelist:
714 | atom_typecounts[typename] = 0
715 | molecule_typecounts[typename] = 0
716 |
717 | # Count number of atoms with each type.
718 | for molecule in molecules:
719 | types_in_this_molecule = set()
720 | for atom in self._GetAtoms(molecule, element):
721 | atomtype = atom.GetStringData(self.typetag)
722 | types_in_this_molecule.add(atomtype)
723 | atom_typecounts[atomtype] += 1
724 | for atomtype in types_in_this_molecule:
725 | molecule_typecounts[atomtype] += 1
726 |
727 | return (atom_typecounts, molecule_typecounts)
728 |
729 | def show_type_statistics(self, typelist, atom_typecounts, molecule_typecounts, atomtype_matches=None):
730 | """
731 | Print atom type statistics to the commandline
732 | Parameters
733 | ----------
734 | typelist : atomtype list of form (smarts, typename)
735 | atom_typecounts : dictionary result from compute_type_statistics
736 | molecule_typecounts : dictionary result from compute_type_statistics
737 | atomtype_matches : dictionary result from best_match_references_types
738 | if there are reference molecules
739 | """
740 | index = 1
741 | natoms = 0
742 |
743 | if atomtype_matches is not None:
744 | reference_type_info = dict()
745 | for (typename, reference_atomtype, count) in atomtype_matches:
746 | reference_type_info[typename] = (reference_atomtype, count)
747 |
748 | # Print header
749 | if atomtype_matches is not None:
750 | print("%5s %10s %10s %64s %32s %8s %46s" % ('INDEX', 'ATOMS', 'MOLECULES', 'TYPE NAME', 'SMARTS', 'REF TYPE', 'FRACTION OF REF TYPED MOLECULES MATCHED'))
751 | else:
752 | print("%5s %10s %10s %64s %32s" % ('INDEX', 'ATOMS', 'MOLECULES', 'TYPE NAME', 'SMARTS'))
753 |
754 | # Print counts
755 | for [smarts, typename] in typelist:
756 | if atomtype_matches is not None:
757 | (reference_atomtype, reference_count) = reference_type_info[typename]
758 | if reference_atomtype is not None:
759 | reference_total = self.reference_atomtypes_atomcount[reference_atomtype]
760 | reference_fraction = float(reference_count) / float(reference_total)
761 | print("%5d : %10d %10d | %64s %32s %8s %16d / %16d (%7.3f%%)" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts, reference_atomtype, reference_count, reference_total, reference_fraction*100))
762 | else:
763 | print("%5d : %10d %10d | %64s %32s" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts))
764 | else:
765 | print("%5d : %10d %10d | %64s %32s" % (index, atom_typecounts[typename], molecule_typecounts[typename], typename, smarts))
766 |
767 | natoms += atom_typecounts[typename]
768 | index += 1
769 |
770 | nmolecules = len(self.molecules)
771 |
772 | if atomtype_matches is not None:
773 | print("%5s : %10d %10d | %64s %32s %8d / %8d match (%.3f %%)" % ('TOTAL', natoms, nmolecules, '', '', self.total_atom_type_matches, self.total_atoms, (float(self.total_atom_type_matches) / float(self.total_atoms)) * 100))
774 | else:
775 | print("%5s : %10d %10d" % ('TOTAL', natoms, nmolecules))
776 |
777 | return
778 |
779 | def save_type_statistics(self, typelist, atom_typecounts, molecule_typecounts, atomtype_matches=None):
780 | """
781 | Saves the match information in format for a trajectory file
782 | Parameters
783 | ----------
784 | typelist : atomtype list of form (smarts, typename)
785 | atom_typecounts : dictionary result from compute_type_statistics
786 | molecule_typecounts : dictionary result from compute_type_statistics
787 | atomtype_matches : dictionary result from best_match_references_types
788 | if there are reference molecules
789 | Returns
790 | -------
791 | output : string line for trajectory file
792 | """
793 | if atomtype_matches is not None:
794 | reference_type_info = dict()
795 | for (typename, reference_atomtype, count) in atomtype_matches:
796 | reference_type_info[typename] = (reference_atomtype, count)
797 |
798 | index = 1
799 | output = []
800 | ntypes = 0
801 | # Print counts
802 | # INDEX, SMARTS, PARENT INDEX, REF TYPE, MATCHES, MOLECULES, FRACTION, OUT of, PERCENTAGE
803 | for [smarts, typename] in typelist:
804 | parent = str(self.child_to_parent[smarts])
805 | if atomtype_matches is not None:
806 | (reference_atomtype, reference_count) = reference_type_info[typename]
807 | if reference_atomtype is not None:
808 | reference_total = self.reference_atomtypes_atomcount[reference_atomtype]
809 | reference_fraction = float(reference_count) / float(reference_total)
810 | # Save output
811 | output.append("%i,'%s','%s','%s','%s',%i,%i,%i,%i" % (index, smarts, typename, parent, reference_atomtype, atom_typecounts[typename], molecule_typecounts[typename], reference_count, reference_total))
812 | else:
813 | output.append("%i,'%s','%s','%s','%s',%i,%i,%i,%i" % (index, smarts, typename, parent, 'NONE', atom_typecounts[typename], molecule_typecounts[typename], 0, 0))
814 |
815 | else:
816 | output.append("%i,'%s',%i,%i,'%s',%i,%i,%i,%i" % (index, smarts, typename, parent, 'NONE', atom_typecounts[typename], molecule_typecounts[typename], 0, 0))
817 |
818 | ntypes += atom_typecounts[typename]
819 | index += 1
820 |
821 | nmolecules = len(self.molecules)
822 | if atomtype_matches is None:
823 | output.append("-1,'total','all','None','all',%i,%i,0,0" % (ntypes, nmolecules))
824 | else:
825 | output.append("-1,'total','all','None','all',%i,%i,%i,%i" % (ntypes,nmolecules,self.total_atom_type_matches,self.total_atoms))
826 | return output
827 |
828 | def _switch_parent_dict(self):
829 | """
830 | Takes the parent dictionary and returns a dictionary in the form
831 | {child: parent}
832 | """
833 | child_to_parent = dict()
834 | for smarts in self.parents.keys():
835 | child_to_parent[smarts] = None
836 |
837 | for smarts, children in self.parents.items():
838 | for [child_smarts, child_typename] in children:
839 | child_to_parent[child_smarts] = smarts
840 |
841 | return child_to_parent
842 |
843 | def print_parent_tree(self, roots, start=''):
844 | """
845 | Recursively prints the parent tree.
846 | Parameters
847 | ----------
848 | roots = list of smarts strings to print
849 | """
850 | for r in roots:
851 | print("%s%s" % (start, r))
852 | if r in self.parents:
853 | new_roots = [smart for [smart, name] in self.parents[r]]
854 | self.print_parent_tree(new_roots, start+'\t')
855 |
856 |
857 | def run(self, niterations, trajFile=None):
858 | """
859 | Run atomtype sampler for the specified number of iterations.
860 | Parameters
861 | ----------
862 | niterations : int
863 | The specified number of iterations
864 | trajFile : str, optional, default=None
865 | Output trajectory filename
866 | Returns
867 | ----------
868 | fraction_matched_atoms : float
869 | fraction of total atoms matched successfully at end of run
870 | """
871 | if trajFile is not None:
872 | # make "trajectory" file
873 | if os.path.isfile(trajFile):
874 | print("trajectory file already exists, it was overwritten")
875 | self.traj = open(trajFile, 'w')
876 | self.traj.write('Iteration,Index,Smarts,Typename,ParentSMARTS,RefType,Matches,Molecules,FractionMatched,Denominator\n')
877 |
878 | for iteration in range(niterations):
879 | if self.verbose:
880 | print("Iteration %d / %d" % (iteration, niterations))
881 |
882 | accepted = self.sample_atomtypes()
883 | [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element)
884 |
885 | if trajFile is not None:
886 | # Get data as list of csv strings
887 | lines = self.save_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
888 | # Add lines to trajectory with iteration number:
889 | for l in lines:
890 | self.traj.write('%i,%s \n' % (iteration, l))
891 |
892 | if self.verbose:
893 | if accepted:
894 | print('Accepted.')
895 | else:
896 | print('Rejected.')
897 |
898 | # Compute atomtype statistics on molecules.
899 | self.show_type_statistics(self.atomtypes, atom_typecounts, molecule_typecounts, atomtype_matches=self.atom_type_matches)
900 | print('')
901 |
902 | # Print parent tree as it is now.
903 | roots = [r for r in self.child_to_parent.keys() if self.child_to_parent[r] is None]
904 |
905 | print("Atom type hierarchy:")
906 | self.print_parent_tree(roots, '\t')
907 |
908 | if trajFile is not None:
909 | self.traj.close()
910 | # Get/print some stats on trajectory
911 | # Load timeseries
912 | timeseries = load_trajectory( trajFile )
913 | time_fractions = scores_vs_time( timeseries )
914 | print("Maximum score achieved: %.2f" % max(time_fractions['all']))
915 |
916 |
917 | #Compute final type stats
918 | [atom_typecounts, molecule_typecounts] = self.compute_type_statistics(self.atomtypes, self.molecules, self.element)
919 | fraction_matched_atoms = self.show_type_matches(self.atom_type_matches)
920 |
921 | # If verbose print parent tree:
922 | if self.verbose:
923 | roots = self.parents.keys()
924 | # Remove keys from roots if they are children
925 | for parent, children in self.parents.items():
926 | child_smarts = [smarts for [smarts, name] in children]
927 | for child in child_smarts:
928 | if child in roots:
929 | roots.remove(child)
930 |
931 | print("Atom type hierarchy:")
932 | self.print_parent_tree(roots, '\t')
933 | return fraction_matched_atoms
934 |
--------------------------------------------------------------------------------
/smarty/score_utils.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import pandas as pd
3 | import matplotlib
4 | matplotlib.use('pdf')
5 | import pylab as pl
6 |
7 | def load_trajectory( trajFile):
8 | """Load data from a specified smarty trajectory .csv file and return a summary.
9 |
10 | Note that any SMARTS patterns which do not match anything are ignored in the resulting summary.
11 |
12 | Parameters
13 | ----------
14 |
15 | trajFile (str) : filename to read from
16 |
17 | Returns
18 | -------
19 | timeseries (dict) : status by iteration number
20 | Dictionary, keyed by iteration, storing the state at each iteration
21 | Subsequent keys are by reference types, (i.e. timeseries[1]['HO'])
22 | and an entry for total if included in the trajectory file at timeseries[1]['total']
23 | gives data at step 1 on what (if anything) matches 'HO'. Subsequent
24 | keys are 'smarts', 'matches', 'molecules', 'fractionmatched', 'index' (serial #
25 | of match), `ParNum` (parameter number/label), `ParentParNum` (parameter number/label of parent)
26 | `denominator` (number of possible matches of this type), `fraction`
27 | (fraction of this type matched).
28 |
29 | """
30 | data = pd.read_csv(trajFile, quotechar="'")
31 | data_dict = data.to_dict()
32 | # If the number if headers is not as expected, this is a different version and we can't parse
33 | if len(data.columns) != 10:
34 | raise Exception("Number of headers in trajectory not as expected; can't parse.")
35 |
36 | # Initialize storage
37 | timeseries = {}
38 |
39 | # Number of lines
40 | max_lines = data.index[-1]
41 |
42 | # How many iterations are we looking at?
43 | max_its = data.Iteration[max_lines]
44 |
45 | keys = list(data.columns)
46 | keys.remove('RefType')
47 | keys.remove('Iteration')
48 |
49 | numerator = data.columns[-2].lower()
50 | denominator = data.columns[-1].lower()
51 | # Process file
52 | for linenr in data.index:
53 | iteration = data.Iteration[linenr]
54 |
55 | # Pull elements from line and store
56 | if not iteration in timeseries: timeseries[iteration] = {}
57 | reftype = data.RefType[linenr]
58 |
59 | if not reftype=="'NONE'":
60 | timeseries[iteration][reftype]={}
61 | for k in keys:
62 | if k in ['ParNum', 'ParentParNum']:
63 | timeseries[iteration][reftype][k] = data_dict[k][linenr]
64 | else:
65 | timeseries[iteration][reftype][k.lower()] = data_dict[k][linenr]
66 | den = float(timeseries[iteration][reftype][denominator])
67 | if den == 0.0:
68 | print("At iteration %s, found %s matched atoms and a denominator of %s for reftype %s..." % (iteration, timeseries[iteration][reftype][numerator], timeseries[iteration][reftype][denominator], reftype))
69 | timeseries[iteration][reftype]['fraction'] = numpy.nan
70 | else:
71 | timeseries[iteration][reftype]['fraction'] = timeseries[iteration][reftype][numerator]/den
72 |
73 | return timeseries
74 |
75 | def scores_vs_time(timeseries, numerator = 'fractionmatched'):
76 | """Process a timeseries as read by load_trajectory and return the fraction of each reference atom type found at each time.
77 |
78 |
79 | Parameters
80 | ----------
81 | trajectory : dict
82 | Trajectory information as output by load_trajectory
83 |
84 | Returns
85 | -------
86 | time_fractions : dict
87 | Dictionary of NumPy arrays, keyed by reference type.
88 | The full score across all types is under `all`.
89 | 'all' is from the total list if available or calculated from other references
90 | """
91 |
92 | # How many iterations are present?
93 | max_its = numpy.max([k for k in timeseries])
94 |
95 | # Retrieve keys of all reference types
96 | reftypes = set()
97 | for it in timeseries:
98 | for reftype in timeseries[it]:
99 | if reftype not in reftypes:
100 | reftypes.add(reftype)
101 |
102 | # Allocate storage
103 | time_fractions = {}
104 | time_fractions['all'] = numpy.zeros( max_its, float)
105 | for reftype in reftypes:
106 | time_fractions[reftype] = numpy.zeros( max_its, float)
107 |
108 | # Update with data
109 | for it in range(max_its):
110 | # Update reference types occuring at this iteration
111 | denom = 0
112 | numer = 0
113 | for reftype in reftypes:
114 | if reftype in timeseries[it]:
115 | try:
116 | time_fractions[reftype][it] = timeseries[it][reftype]['fraction']
117 | except KeyError:
118 | print("Can't find key set %s, %s, %s for timeseries." % (it, reftype, 'fraction'))
119 | print("Available keys:", timeseries[it][reftype])
120 | denom += timeseries[it][reftype]['denominator']
121 | numer += timeseries[it][reftype][numerator]
122 |
123 | # Any reference type which does not appear at this time point has zero matches so we just leave the value at zero
124 |
125 | # Handle 'all' case last
126 | if time_fractions['all'][it] == 0:
127 | time_fractions['all'][it] = numer/float(denom)
128 |
129 | return time_fractions
130 |
131 | def create_plot_file(trajFile, plot_filename, plot_others=False, verbose = False):
132 | """
133 | Creates plot to demonstrate performance of smarty or smirky
134 |
135 | trajFile - csv file generated by smarty, smarty_elemental, or smirky
136 | plot_filename - pdf to save plot file to
137 | plot_others - if True plots data for all reftypes separately, optional
138 | """
139 |
140 | data = pd.read_csv(trajFile, quotechar="'")
141 | numerator = data.columns[-2].lower()
142 |
143 | timeseries = load_trajectory(trajFile)
144 | time_fractions = scores_vs_time(timeseries, numerator)
145 |
146 | max_score = max(time_fractions['all']) *100.0
147 | if verbose: print("Maximum score was %.1f %%" % max_score)
148 | # plot overall score
149 | pl.plot( time_fractions['all'], 'k-', linewidth = 2.0)
150 |
151 | if plot_others:
152 | reftypes = [k for k in time_fractions]
153 | reftypes.remove('all')
154 |
155 | # Plot scors for individual types
156 | for reftype in reftypes:
157 | pl.plot(time_fractions[reftype])
158 |
159 | pl.legend(['all']+reftypes, loc='lower right')
160 |
161 | pl.xlabel('Iterations')
162 | pl.ylabel('Fraction of reference type found')
163 | pl.ylim(-0.1, 1.1)
164 |
165 | pl.savefig(plot_filename)
166 |
167 |
--------------------------------------------------------------------------------
/smarty/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openforcefield/smarty/882d54b6d6d0fada748c71789964b07be2210a6a/smarty/tests/__init__.py
--------------------------------------------------------------------------------
/smarty/tests/test_atomtyper.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | from smarty import AtomTyper
3 | import smarty
4 | from smarty.utils import get_data_filename
5 | from openforcefield.utils import read_molecules
6 | from unittest import TestCase
7 |
8 | class TestAtomTyper(TestCase):
9 | def test_read_typelist(self):
10 | atomtypes = AtomTyper.read_typelist(get_data_filename('atomtypes/basetypes.smarts'))
11 | decorators = AtomTyper.read_typelist(get_data_filename('atomtypes/decorators.smarts'))
12 | replacements = AtomTyper.read_typelist(get_data_filename('atomtypes/replacements.smarts'))
13 |
14 | def test_atomtyper(self):
15 | typetag = 'atomtype'
16 | atomtypes = AtomTyper.read_typelist(get_data_filename('atomtypes/basetypes.smarts'))
17 | replacements = AtomTyper.read_typelist(get_data_filename('atomtypes/replacements.smarts'))
18 | molecules = read_molecules('zinc-subset-tripos.mol2.gz', verbose=False)
19 |
20 | atomtyper = AtomTyper(atomtypes, typetag, replacements=replacements)
21 | for molecule in molecules:
22 | atomtyper.assignTypes(molecule)
23 |
--------------------------------------------------------------------------------
/smarty/tests/test_sampler.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | import smarty
3 | from smarty import AtomTyper, AtomTypeSampler, score_utils
4 | from smarty.utils import get_data_filename
5 | from openforcefield.utils import read_molecules
6 | from openforcefield import utils
7 | import unittest
8 | from unittest import TestCase
9 |
10 | class TestAtomTypeSampler(TestCase):
11 | def __init__(self, *args, **kwargs):
12 | """
13 | Initialize TestCase including files used in all smarty tests
14 | """
15 | unittest.TestCase.__init__(self, *args, **kwargs)
16 |
17 | self.basetypes = get_data_filename('atomtypes/basetypes.smarts')
18 | self.alkethoh_answers = get_data_filename('atomtypes/initial_AlkEthOH.smarts')
19 | self.simple_decs = get_data_filename('atomtypes/decorators.smarts')
20 | self.combine_decs = get_data_filename('atomtypes/new-decorators.smarts')
21 | self.replacements = get_data_filename('atomtypes/replacements.smarts')
22 |
23 | # import molecules
24 | self.mols_zinc = read_molecules('zinc-subset-tripos.mol2.gz', verbose=False)
25 | self.mols_zinc_ref = read_molecules('zinc-subset-parm@frosst.mol2.gz', verbose=False)
26 |
27 | self.mols_alkethoh = read_molecules('AlkEthOH_test_filt1_tripos.mol2', verbose=False)
28 | self.mols_alkethoh_ref = read_molecules('AlkEthOH_test_filt1_ff.mol2', verbose=False)
29 |
30 |
31 | def test_atomtyper(self):
32 | """
33 | Test atomtype sampler with simple-decorators
34 | """
35 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_zinc,
36 | self.basetypes, self.basetypes, self.simple_decs,
37 | replacements_filename = self.replacements,
38 | reference_typed_molecules =self.mols_zinc_ref,
39 | temperature = 0.1, verbose = False,
40 | decorator_behavior = 'simple-decorators', element =0)
41 | atomtype_sampler.run(2)
42 |
43 | def test_atomtyper_combinatorial(self):
44 | """
45 | Test atomtype sampler with combinatorial-decorators and optional output files
46 | """
47 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_zinc,
48 | self.basetypes, self.basetypes, self.combine_decs,
49 | replacements_filename = self.replacements,
50 | reference_typed_molecules =self.mols_zinc_ref,
51 | temperature = 0.1, verbose = False)
52 |
53 | # run sampler with optional outputs
54 | traj = 'test_smarty.csv'
55 | plot = 'test_smarty.pdf'
56 | atomtype_sampler.run(5, traj)
57 | # test trajectory analysis functions on smarty output
58 | timeseries = score_utils.load_trajectory(traj)
59 | scores_vs_time = score_utils.scores_vs_time(timeseries)
60 | score_utils.create_plot_file(traj, plot, True, False)
61 |
62 | # check if score is 100% at first iteration
63 | if scores_vs_time['all'][0] == 1.0:
64 | raise Exception("Scoring problem, 100% at first iteration for total")
65 |
66 | def test_atomtyper_elemental(self):
67 | """
68 | Test elemental atomtype sampler for hydrogen
69 | """
70 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh,
71 | self.basetypes, self.basetypes, self.combine_decs,
72 | replacements_filename = self.replacements,
73 | reference_typed_molecules = self.mols_alkethoh_ref,
74 | temperature = 0.1, verbose = False,
75 | decorator_behavior = 'combinatorial-decorators', element=1)
76 | # run sampler with optional outputs
77 | traj = 'test_smarty.csv'
78 | plot = 'test_smarty.pdf'
79 | atomtype_sampler.run(5, traj)
80 | # test trajectory analysis functions on smarty output
81 | timeseries = score_utils.load_trajectory(traj)
82 | scores_vs_time = score_utils.scores_vs_time(timeseries)
83 | score_utils.create_plot_file(traj, plot, True, False)
84 |
85 | # check if score is 100% at first iteration
86 | if scores_vs_time['all'][0] == 1.0:
87 | raise Exception("Scoring problem, 100% at first iteration for total")
88 |
89 |
90 | def test_atomtyper_AlkEthOH(self):
91 | """
92 | Test atomtype sampler with correct "answers"
93 | """
94 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh,
95 | self.basetypes, self.alkethoh_answers, self.combine_decs,
96 | replacements_filename = self.replacements,
97 | reference_typed_molecules = self.mols_alkethoh_ref,
98 | temperature = 0, verbose = False)
99 | # Start sampling atom types.
100 | fracfound = atomtype_sampler.run(2)
101 | # Ensure fraction found is 1.0
102 | if fracfound < 1.0:
103 | raise Exception("Not finding 100% of AlkEthOH when starting from"
104 | " correct SMARTS.")
105 |
106 | def test_atomtyper_elemental_AlkEthOH(self):
107 | """
108 | Test elemental sampler with correct "answers"
109 | """
110 | atomtype_sampler = smarty.AtomTypeSampler(self.mols_alkethoh,
111 | self.basetypes, self.alkethoh_answers, self.combine_decs,
112 | replacements_filename = self.replacements,
113 | reference_typed_molecules = self.mols_alkethoh_ref,
114 | temperature = 0, verbose = False,
115 | decorator_behavior = 'combinatorial-decorators',element = 1)
116 | # Start sampling atom types.
117 | fracfound = atomtype_sampler.run(2)
118 |
119 | # Ensure fraction found is 1.0
120 | if fracfound < 1.0:
121 | raise Exception("Not finding 100% of Hydrogens of AlkEthOH when starting from"
122 | " correct SMARTS.")
123 |
124 |
--------------------------------------------------------------------------------
/smarty/tests/test_smirky_sampler.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import smarty
3 | from openforcefield.typing.chemistry.environment import *
4 | from openforcefield.utils.utils import read_molecules
5 | from smarty.sampler_smirky import *
6 | from smarty import utils
7 | from smarty import score_utils
8 | from operator import itemgetter, attrgetter
9 | import openeye.oechem
10 | from openeye.oechem import *
11 | import copy
12 | import sys # used to exit while testing
13 |
14 | class TestSmirkySampler(unittest.TestCase):
15 | def __init__(self, *args, **kwargs):
16 | """
17 | Initialize TestCase and then read in odds from files in smarty/data
18 | """
19 | unittest.TestCase.__init__(self,*args, **kwargs)
20 |
21 | self.atom_OR_bases = utils.parse_odds_file("odds_files/atom_OR_bases.smarts" , False)
22 | self.atom_OR_decors = utils.parse_odds_file("odds_files/atom_decorators.smarts", False)
23 | self.atom_AND_decors = utils.parse_odds_file("odds_files/atom_decorators.smarts", False)
24 | self.bond_OR_bases = utils.parse_odds_file("odds_files/bond_OR_bases.smarts", False)
25 | self.bond_AND_decors = utils.parse_odds_file("odds_files/bond_AND_decorators.smarts", False)
26 | self.atom_odds = utils.parse_odds_file("odds_files/atom_index_odds.smarts", False)
27 | self.bond_odds = utils.parse_odds_file("odds_files/bond_index_odds.smarts", False)
28 | self.molecules = read_molecules("test_filt1_tripos.mol2", False)
29 | self.SMIRFF = "forcefield/Frosst_AlkEthOH.ffxml"
30 | self.outputFile = 'test_smirky'
31 | replacement_file = utils.get_data_filename("odds_files/substitutions.smarts")
32 | self.replacements = smarty.AtomTyper.read_typelist(replacement_file)
33 | self.replacements = [ [short, smarts] for [smarts, short] in self.replacements]
34 |
35 | self.correctDict = {'VdW': [ ["[#1:1]-[#6]", 'HC'], [ "[#1:1]-[#6]-[#7,#8,F,#16,Cl,Br]", 'H1'], [ "[#1:1]-[#6](-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br]", 'H2'], [ "[#1:1]-[#6](-[#7,#8,F,#16,Cl,Br])(-[#7,#8,F,#16,Cl,Br])-[#7,#8,F,#16,Cl,Br]", 'H3'], [ "[#1:1]-[#8]", 'HO'], [ "[#6X4:1]", 'CT'], [ "[#8X2:1]", 'OS'], [ "[#8X2+0:1]-[#1]", 'OH'] ],
36 | 'Bond': [ ["[#6X4:1]-[#6X4:2]", 'CT-CT'], [ "[#6X4:1]-[#1:2]", 'CT-H'], [ "[#8:1]~[#1:2]", 'O~H'], [ "[#6X4:1]-[#8;X2;H1:2]", "CT-OH"], [ "[#6X4:1]-[#8;X2;H0:2]", "CT-OS"] ],
37 | 'Angle': [ [ "[a,A:1]-[#6&X4:2]-[a,A:3]", 'any-CT-any'], [ "[#1:1]-[#6&X4:2]-[#1:3]", "H-CT-H"], [ "[#6&X4:1]-[#6&X4:2]-[#6&X4:3]", 'CT-CT-CT'], [ "[#8&X2:1]-[#6&X4:2]-[#8&X2:3]", 'O-CT-O'], [ "[#6&X4:1]-[#8&X2:2]-[#1:3]", 'CT-OH-HO'], [ "[#6X4:1]-[#8X2:2]-[#6X4:3]", 'CT-OS-CT'] ],
38 | 'Torsion': [["[a,A:1]-[#6&X4:2]-[#6&X4:3]-[a,A:4]", 'any-CT-CT-any'], [ "[a,A:1]-[#6&X4:2]-[#8&X2:3]-[#1:4]", 'any-CT-OH-HO'], [ "[a,A:1]-[#6&X4:2]-[#8&X2:3]-[!#1:4]", 'any-CT-OS-!H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[#1:4]", 'H-CT-CT-H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[#6&X4:4]", 'H-CT-CT-CT'], [ "[#6&X4:1]-[#6&X4:2]-[#8&X2:3]-[#1:4]", 'CT-CT-OH-HO'], [ "[#6&X4:1]-[#6&X4:2]-[#6&X4:3]-[#6&X4:4]", 'CT-CT-CT-CT'], [ "[#6&X4:1]-[#6&X4:2]-[#8&X2:3]-[#6&X4:4]", 'CT-CT-OS-CT'], [ "[#6&X4:1]-[#8&X2:2]-[#6&X4:3]-[O&X2&H0:4]", 'CT-OS-CT-OS'], [ "[#8&X2:1]-[#6&X4:2]-[#6&X4:3]-[#8&X2:4]", 'O-CT-CT-O'], [ "[#8&X2:1]-[#6&X4:2]-[#6&X4:3]-[#1:4]", 'O-CT-CT-H'], [ "[#1:1]-[#6&X4:2]-[#6&X4:3]-[O&X2:4]", 'H-CT-CT-O'] ]}
39 |
40 | def test_correct_fragments(self):
41 | """
42 | Test score is 100% if correct VdW, Bond, Angles, or Torsions
43 | from AlkEthOH are used as input to the FragmentSampler
44 | """
45 |
46 | for typetag, initialtypes in self.correctDict.items():
47 | sampler = FragmentSampler(self.molecules, typetag,
48 | self.atom_OR_bases, self.atom_OR_decors, self.atom_AND_decors,
49 | self.bond_OR_bases, self.bond_AND_decors,
50 | AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
51 | replacements = self.replacements, initialtypes = initialtypes,
52 | SMIRFF = self.SMIRFF, temperature = 0.0, outputFile =self.outputFile)
53 |
54 | fracfound = sampler.run(1)
55 | self.assertAlmostEqual(fracfound, 1.0, msg = "Not finding 100%% of AlkEthOH when starting from correct %s SMIRKS." % typetag)
56 |
57 | def test_random_sampler(self):
58 | """
59 | Test FragmentSampler runs for 10 iterations with no failures
60 | Test score_utils functions with the outputFile
61 | """
62 | typetag = 'Torsion'
63 | sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases,
64 | self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases,
65 | self.bond_AND_decors,
66 | AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
67 | replacements = self.replacements, initialtypes = None,
68 | SMIRFF = self.SMIRFF, temperature = 0.0, outputFile = self.outputFile)
69 | fracfound = sampler.run(10)
70 | # load_trajectory converts csv file to dictionary
71 | timeseries = score_utils.load_trajectory('%s.csv' % self.outputFile)
72 | # scores_vs_time converts num/den entries to fractional scores
73 | scores_vs_time = score_utils.scores_vs_time(timeseries)
74 | # test plotting function
75 | score_utils.create_plot_file('%s.csv' % self.outputFile, '%s.pdf' % self.outputFile)
76 |
77 |
78 | def test_sampler_functions(self):
79 | """
80 | Test fragment sampler functions are working
81 | """
82 | typetag = 'Angle'
83 | sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases,
84 | self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases,
85 | self.bond_AND_decors,
86 | AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
87 | replacements = self.replacements, initialtypes = None,
88 | SMIRFF = self.SMIRFF, temperature = 0.0, outputFile = self.outputFile)
89 |
90 | typetags = [ ('VdW', 'NonbondedGenerator'),
91 | ('Bond', 'HarmonicBondGenerator'),
92 | ('Angle', 'HarmonicAngleGenerator'),
93 | ('Torsion', 'PeriodicTorsionGenerator'),
94 | ('Improper','PeriodicTorsionGenerator'),
95 | ('None', None)]
96 |
97 | for (tag, expected) in typetags:
98 | sample_tag, edges, sym_odds = sampler.get_type_info(tag)
99 | self.assertEqual(sample_tag, expected, msg = "get_force_type(%s) should return %s, but %s was returned instead" % (tag, expected, sample_tag))
100 |
101 | # Running each method just to make sure they work
102 | # get environment
103 | env = sampler.envList[0]
104 | new_env, prob = sampler.create_new_environment(env)
105 | # check atom methods
106 | atom,prob = sampler.pick_an_atom(new_env)
107 | removeable = sampler.isremoveable(new_env,atom)
108 | prob = sampler.add_atom(new_env,atom)
109 | prob = sampler.change_atom(new_env, atom)
110 | atom.addORtype('#6', ['X4'])
111 | prob = sampler.change_ORdecorator(atom, self.atom_OR_decors)
112 | prob = sampler.change_ORbase(atom, self.atom_OR_bases, self.atom_OR_decors)
113 | prob = sampler.change_ANDdecorators(atom, self.atom_AND_decors)
114 |
115 | # check bond methods
116 | bond,prob = sampler.pick_a_bond(new_env)
117 | prob = sampler.change_bond(new_env, bond)
118 | prob = sampler.change_ORbase(bond, self.bond_OR_bases, sampler.BondORdecorators)
119 | prob = sampler.change_ANDdecorators(bond, self.bond_AND_decors)
120 |
121 | def test_no_reference_smirff(self):
122 | """
123 | Test that sampling still works with no reference SMIRFF provided
124 | """
125 | typetag = 'Bond'
126 | sampler = FragmentSampler(self.molecules, typetag, self.atom_OR_bases,
127 | self.atom_OR_decors, self.atom_AND_decors, self.bond_OR_bases,
128 | self.bond_AND_decors,
129 | AtomIndexOdds = self.atom_odds, BondIndexOdds = self.bond_odds,
130 | replacements = self.replacements, initialtypes = None,
131 | SMIRFF = None, temperature = 0.0, outputFile = self.outputFile)
132 | fracfound = sampler.run(10)
133 |
134 |
--------------------------------------------------------------------------------
/smarty/tests/test_utils.py:
--------------------------------------------------------------------------------
1 | from functools import partial
2 | from smarty.utils import get_data_filename
3 | from unittest import TestCase
4 |
5 | import smarty
6 |
7 | class TestUtils(TestCase):
8 | def test_parse_odds_file(self):
9 | """
10 | Testing parse_odds_file and get_data_filename
11 | """
12 | # parse_odds_file uses get_data_filename so this run checks both
13 | odds = smarty.utils.parse_odds_file('odds_files/atom_index_odds.smarts', verbose = True)
14 | odds = smarty.utils.parse_odds_file('odds_files/bond_OR_bases.smarts')
15 | self.assertIsNone(odds[1], msg = "Parsing odds file with no odds should give None as the second entry")
16 |
--------------------------------------------------------------------------------
/smarty/utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | """
4 | Utility subroutines for SMARTY atom type sampling
5 |
6 | """
7 | #=============================================================================================
8 | # GLOBAL IMPORTS
9 | #=============================================================================================
10 |
11 | import os
12 |
13 | #=============================================================================================
14 | # UTILITY ROUTINES
15 | #=============================================================================================
16 |
17 | def get_data_filename(relative_path):
18 | """Get the full path to one of the reference files in testsystems.
19 |
20 | In the source distribution, these files are in ``smarty/data/``,
21 | but on installation, they're moved to somewhere in the user's python
22 | site-packages directory.
23 |
24 | Parameters
25 | ----------
26 | name : str
27 | Name of the file to load (with respect to the repex folder).
28 |
29 | """
30 |
31 | from pkg_resources import resource_filename
32 | fn = resource_filename('smarty', os.path.join('data', relative_path))
33 |
34 | if not os.path.exists(fn):
35 | raise ValueError("Sorry! %s does not exist. If you just added it, you'll have to re-install" % fn)
36 |
37 | return fn
38 |
39 |
40 | def parse_odds_file(filename, verbose = False):
41 | """
42 | parses files that have the form
43 | decorator odds
44 | if only one column odds will be assumed equally probable
45 |
46 | Parameters
47 | -----------
48 | filename: string or file object
49 | may be an absolute file path, a path relative to the current working directory, a path relative to this module's data subdirectory (for built in decorator files), or an opten file-like object with a readlines() method.
50 |
51 | Returns
52 | --------
53 | choices: 2-tuple of the form ( [decorators], [odds] )
54 | """
55 | # if no file return None
56 | if filename is None:
57 | if verbose: print("No filename provided, returning None")
58 | return None
59 |
60 | # if input is a file object
61 | try:
62 | input_lines = filename.readlines()
63 | if verbose: print("Attempting to parse file '%s'" % filename.name)
64 | except AttributeError:
65 | if verbose: print("Attempting to parse file '%s'" % filename)
66 | try:
67 | ifs = open(filename, 'r')
68 | input_lines = ifs.readlines()
69 | except IOError:
70 | ifs = get_data_filename(filename)
71 | ifs = open(ifs, 'r')
72 | input_lines = ifs.readlines()
73 | except Exception as e:
74 | raise Exception("%s\nProvided file (%s) could not be parsed" % (str(e), filename))
75 | except Exception as e:
76 | msg = str(e) + '\n'
77 | msg += "Could not read data from file %s" % filename
78 | raise Exception(msg)
79 |
80 | # close file
81 | ifs.close()
82 |
83 | decorators = []
84 | odds = []
85 | noOdds = False
86 | for l in input_lines:
87 | # skip empty lines
88 | if len(l) == 0:
89 | continue
90 | # check for and remove comments
91 | comment = l.find('%')
92 | if comment == -1: # no comment
93 | entry = l.split()
94 | elif comment > 0: # remove trailing comment
95 | entry = l[:comment].split()
96 | else: # whole line is a comment skip
97 | continue
98 |
99 | # add decorator
100 | if entry[0] == "''" or entry[0] == '""':
101 | decorators.append('')
102 | else:
103 | decorators.append(entry[0])
104 |
105 | if len(entry) == 2:
106 | odds.append(float(entry[1]))
107 | elif len(entry) == 1:
108 | noOdds = True
109 | else:
110 | raise Exception("Error entry (%s) in decorator file '%s' is invalid" % (l, filename))
111 |
112 | if (odds.count(0) == len(odds)) or noOdds:
113 | odds = None
114 | #TODO: handle case where 1 line is missing odds entry
115 |
116 | return (decorators, odds)
117 |
118 |
--------------------------------------------------------------------------------
/utilities/README.md:
--------------------------------------------------------------------------------
1 | # Utilities related to SMARTY and SMIRKY
2 |
3 | * `test_smirks_or_environment_speed/` - compares computing time cost of storing SMIRKS strings compared to storing Chemical Environments
4 |
--------------------------------------------------------------------------------
/utilities/test_smirks_or_environment_speed/README.md:
--------------------------------------------------------------------------------
1 | # Testing slow down when storing ChemicalEnvironments
2 |
3 | We were concerned that storing chemical environments would be slower than storing strings.
4 | Since ChemicalEnvironments can easily be converted to and from SMIRKS strings you could store a list of SMIRKS instead of a list of chemical environments when sampling parameter types (such as smirky).
5 | The notebook `testing_smirks_speed.ipynb` logs the time to store a list of SMIRKS or environments for a number of steps. It uses `Torsion_0_0.00e+00_results.smarts` as an example of the complext SMIRKS patterns that can be generated during a smirky simulation.
6 |
7 | Below are the results for this test. For each test data is reported in this order:
8 | * Parameter type list
9 | - generic: starts with only `"[*:1]~[*:2]~[*:3]~[*:4]"`
10 | - short: starts with first 10 SMIRKS in `*_results.smarts`
11 | - long: starts with all 82 SMIRKS in `*_results.smarts`
12 | * Time in minutes to do X iterations storing SMIRKS strings
13 | * Time in minutes to do X iterations storing Chemical Environments for each input SMIRKS
14 | * Difference in Chemical Environment and SMIRKS time in minutes
15 |
16 | ```
17 | ------------------------------ 2 Iterations ------------------------------
18 | short 1.97e-05 6.54e-05 4.57e-05
19 | long 1.93e-05 4.58e-04 4.39e-04
20 | generic 1.34e-05 1.82e-05 4.84e-06
21 |
22 |
23 | ------------------------------ 10 Iterations ------------------------------
24 | short 7.12e-05 1.16e-04 4.53e-05
25 | long 8.27e-05 5.40e-04 4.58e-04
26 | generic 6.60e-05 6.47e-05 -1.23e-06
27 |
28 |
29 | ------------------------------ 100 Iterations ------------------------------
30 | short 6.19e-04 7.01e-04 8.20e-05
31 | long 7.44e-04 1.36e-03 6.12e-04
32 | generic 5.49e-04 6.28e-04 7.92e-05
33 |
34 |
35 | ------------------------------ 1000 Iterations ------------------------------
36 | short 7.59e-03 1.73e-02 9.76e-03
37 | long 8.42e-03 2.10e-02 1.26e-02
38 | generic 6.89e-03 1.61e-02 9.20e-03
39 |
40 |
41 | ------------------------------ 10000 Iterations ------------------------------
42 | short 8.89e-02 1.09e+00 9.98e-01
43 | long 9.37e-02 1.17e+00 1.08e+00
44 | generic 7.18e-02 1.12e+00 1.05e+00
45 |
46 |
47 | ------------------------------ 30000 Iterations ------------------------------
48 | short 3.61e-01 1.04e+01 1.00e+01
49 | long 4.51e-01 1.08e+01 1.04e+01
50 | generic 3.13e-01 1.01e+01 9.76e+00
51 | ```
52 |
53 | We concluded from this that while the timing difference isn't so significant on the number of iterations typically run with smirky, future move proposal engines would probably benefit from storing SMIRKS patterns rather than Chemical Environments.
54 |
--------------------------------------------------------------------------------
/utilities/test_smirks_or_environment_speed/Torsion_0_0.00e+00_results.smarts:
--------------------------------------------------------------------------------
1 | % Results for sampling Torsions at 0.00e+00
2 | %% SMIRKS patterns for final results are below
3 | % followed by a their matched reference SMIRKS from /beegfs/DATA/mobley/bannanc/smirky_testing/SMIRKY/inputFiles//smirff99Frosst.ffxml
4 | %Final Score was 51.963 %
5 | %%
6 | [*:1]~[#6:2]~[#6:3]~[*:4] C~C
7 | % [*:1]~[#6X3:2]:[#6X3:3]~[*:4] t45
8 | [*:1]~[#6:2]~[#7:3]~[*:4] C~N
9 | % [*:1]-[#6X4:2]-[#7X3$(*~[#6X3,#6X2]):3]~[*:4] t59
10 | [*:1]~[#6:2]~[#8:3]~[*:4] C~O
11 | % [*:1]-[#6X4:2]-[#8X2H0:3]-[*:4] t85
12 | [*:1]~[#6:2]~[#15:3]~[*:4] C~P
13 | % [*:1]~[#15:2]-[#6:3]-[*:4] t112
14 | [*:1]~[#6:2]~[#16:3]~[*:4] C~S
15 | % [*:1]-[#16X2,#16X3+1:2]-[#6:3]~[*:4] t104
16 | [*:1]~[#7:2]~[#7:3]~[*:4] N~N
17 | % [*:1]~[#7X2:2]-[#7X3:3]~[*:4] t124
18 | [*:1]~[#7:2]~[#8:3]~[*:4] N~O
19 | % [*:1]-[#8X2r5:2]-;@[#7X3r5:3]~[*:4] t115
20 | [*:1]~[#7:2]~[#16:3]~[*:4] N~S
21 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X3:3]-[#6X3:4] t139
22 | [*:1]~[#8:2]~[#15:3]~[*:4] O~P
23 | % [*:1]-[#8X2:2]-[#15:3]~[*:4] t146
24 | [*:1]~[#8:2]~[#16:3]~[*:4] O~S
25 | % [*:1]~[#16X4,#16X3+0:2]-[#8X2:3]-[*:4] t144
26 | [*:1]~[#16:2]~[#16:3]~[*:4] S~S
27 | % [*:1]-[#16X2,#16X3+1:2]-[#16X2,#16X3+1:3]-[*:4] t145
28 | [*;X3:1](~[#6:2]~[#7:3]~[*:4])~[#6] 2936
29 | % [*:1]-,:[#6X3:2]=[#7X2:3]-[*:4] t76
30 | [*;X3:1](~[#6:2]~[#7:3]~[*;a:4])~[$ewg1] 8087
31 | % [*:1]=[#7X2,#7X3+1:2]-[#6X3:3]=,:[*:4] t73
32 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[*:4] 7890
33 | % [#6X3:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t134
34 | [#7!R:1]~[#7:2]~[#7:3]~[*:4] 2323
35 | % [*:1]~[*:2]=[#6,#7,#16,#15;X2:3]=[*:4] t150
36 | [*;X3:1]~[#6:2]~[#7:3]~[*;a:4]~[#1] 2632
37 | % [#6X3:1]:[#7X2:2]:[#6X3:3]:[#6X3:4] t75
38 | [#1:1]~[#7:2]~[#7:3]~[#1:4] 2525
39 | % [*:1]-[#7X4,#7X3:2]-[#7X3$(*~[#6X3,#6X2]):3]~[*:4] t121
40 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[$ewg2&:4] 4181
41 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t136
42 | [*;!X4:1]~[#7:2]~[#16:3]~[#6H2:4] 3345
43 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4] t131
44 | [*;!R:1]~[#6:2]~[#6:3]~[*;!R:4] 8243
45 | % [*:1]-[#6X4;r3:2]-@[#6X4;r3:3]-[*:4] t16
46 | [*;!R:1]~[#6:2]~[#6:3]~[*;!R:4]~[#1] 5859
47 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] t5
48 | [*;a:1]~[#6:2]~[#16:3]~[*:4] 1983
49 | % [#6X3:1]-@[#16X2,#16X1-1,#16X3+1:2]-@[#6X3,#7X2;r5:3]=@[#6,#7;r5:4] t106
50 | [#8!X4:1]~[#6:2]~[#7:3]~[*:4] 5660
51 | % [*:1]~[#7X3,#7X2-1:2]-[#6X3:3]~[*:4] t67
52 | [*;!R:1](~[#6:2]~[#6:3]~[*;!R:4]~[#1])~[$ewg2] 9500
53 | % [#6X4:1]-[#6X4:2]-[#6X3:3]-[#7X3:4] t24
54 | [*;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4] 1122
55 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] t10
56 | [#6H2:1]~[#8:2]~[#15:3]~[*;!X1:4] 8301
57 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4])~[#6] 7607
58 | % [*:1]~[#6X3:2]-[#6X4:3]-[*:4] t18
59 | [*;!R:1]~[#6:2]~[#6:3]=[*;!R:4] 6470
60 | % [*:1]~[#6X3:2]-[#6X3:3]~[*:4] t44
61 | [*;X3:1](~[#6:2]~[#7X3:3]~[*:4])~[#6] 1918
62 | % [*:1]-[#7X3;r5:2]-@[#6X3;r5:3]~[*:4] t70
63 | [#7+0X3:1]~[#6:2]~[#8:3]~[*;!X3:4]~[#6] 9934
64 | % [#6X4:1]-[#8X2:2]-[#6X4:3]-[#7X3:4] t89
65 | [*;X3:1](~[#6:2]~[#7R0:3]~[*:4])~[#6] 1518
66 | % [*:1]~[#7X3,#7X2-1:2]-!@[#6X3:3]~[*:4] t68
67 | [*:1]~[#7:2]~[#7:3]~[*;r6:4] 7562
68 | % [*:1]~[#7X2:2]=,:[#7X2:3]~[*:4] t126
69 | [*;!R:1]~[#6:2]~[#6:3]#[*;!R:4]~[#1] 5488
70 | % [*:1]~[*:2]-[*:3]#[*:4] t149
71 | [*;a:1]~[#6:2]~[#16:3]~;!@[*:4] 4949
72 | % [#6:1]-[#16X4,#16X3+0:2]-[#6X3:3]~[*:4] t111
73 | [$ewg1&+0:1]~[#7:2]~[#16:3]~[*:4] 7297
74 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X3:3]-[#7X2:4] t140
75 | [#7H1:1]~[#6:2]~[#6:3]~[#7A:4] 6854
76 | % [#7X3:1]-[#6X4:2]-[#6X3:3]-[#7X3:4] t23
77 | [#1!X4:1]~[#7:2]~[#16:3]~[*;R2:4] 4082
78 | % [#6X3:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4] t132
79 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;!R:4]~[#1])~[$ewg2] 5403
80 | % [#6X4:1]-[#6X4:2]-[#6X4:3]-[#6X4:4] t3
81 | [*;X3:1](~[#6H2:2]~[#7R0:3]~[*:4])~[#6] 1131
82 | % [*:1]-[#6X4:2]-[#7X3:3]-[*:4] t51
83 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;R:4]~[#1])~[$ewg2] 8238
84 | % [#6X4;r3:1]-[#6X4;r3:2]-[#6X4;r3:3]-[*:4] t17
85 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4] 1268
86 | % [#8X2:1]-[#6X4:2]-[#6X4:3]-[#8X2:4] t6
87 | [*:1]~[#6!X4:2]~[#8:3]~[*:4] 2683
88 | % [#1:1]-[#8X2:2]-[#6X3:3]=[#8X1:4] t99
89 | [#8!X4:1]~[#6X3:2]~[#7:3]~[#1:4] 6762
90 | % [#1:1]-[#7X3:2]-[#6X3:3]=[#8,#16,#7:4] t69
91 | [*:1]~[#6:2]~[#6:3]~[#35H0:4] 5347
92 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#35:4] t13
93 | [*:1]-[#6:2]~[#7r6:3]:[*:4] 9273
94 | % [*:1]~[#7X2,#7X3$(*~[#8X1]):2]:[#6X3:3]~[*:4] t74
95 | [*;X3:1](~[#6:2]~[#7:3]~[*;H3:4]):[#6] 1413
96 | % [*:1]-[#7X4:2]-[#6X3:3]~[*:4] t58
97 | [#1:1]~[#6:2]~[#6:3]~[#7A:4] 9141
98 | % [*:1]-[#6X4:2]-[#6X4:3]-[*:4] t2
99 | [#1:1]~[#6:2]~[#6:3]~[#1:4] 5525
100 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#1:4] t4
101 | [*;!R:1]~[#6:2]~[#6;a:3]~[*;!R:4] 4246
102 | % [*:1]-,:[#6X3:2]=[#6X3:3]-,:[*:4] t46
103 | [*:1]~[#6:2]~[#7:3]~[$ewg2&A;+1:4] 3498
104 | % [*:1]~[#7X2:2]-[#6X4:3]-[*:4] t64
105 | [#8a:1]~[#7:2]~[#7:3]~[*;r6:4] 7797
106 | % [*:1]~[#7X3+1:2]=,:[#7X2:3]~[*:4] t127
107 | [*a:1](~[#6:2]~[#6:3]~[#7A:4])~[$ewg1] 6650
108 | % [*:1]~[#6X3:2]-[#6X3$(*=[#8,#16,#7]):3]~[*:4] t48
109 | [*;X3:1](~[#6H2:2]~[#7R0:3]~[*;X3:4])~[#6] 7694
110 | % [#6X3:1]-[#7X3:2]-[#6X4:3]-[#6X3:4] t60
111 | [#1!X4:1]~[#7:2]~[#16:3]~[$ewg1&:4] 4831
112 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#1:4] t135
113 | [#6:1]~[#6!X4:2]~[#8:3]~[*:4] 6039
114 | % [*:1]~[#6X3:2]-[#8X2:3]-[#1:4] t96
115 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;!R:4]~[#1])(~[#8])(~[#9])~[$ewg2] 5351
116 | [#8!X4R2:1](~[#6:2]~[#7:3]~[*:4])~[#6] 3851
117 | % [#8X2H0:1]-[#6X4:2]-[#7X3:3]-[#6X3:4] t62
118 | [#16:1](~[#6:2]~[#6:3]~[#7A:4]~[#6])~[*] 2631
119 | % [#16X2,#16X1-1,#16X3+1:1]-[#6X3:2]-[#6X4:3]-[#7X3$(*-[#6X3,#6X2]):4] t27
120 | [*:1]~[#6:2]~[#16:3]~[*;H2:4] 9632
121 | % [*:1]-[#16X2,#16X3+1:2]-[#6:3]-[#1:4] t105
122 | [#8!X2;!R:1]~[#6:2]~[#6:3]=[*;!R:4] 9260
123 | % [#6X3:1]=[#6X3:2]-[#6X3:3]=[#8X1:4] t49
124 | [*;X4:1]~[#6:2]~[#8:3]~[*;X4:4] 2174
125 | % [#6X4:1]-[#6X4:2]-[#8X2H0:3]-[#6X4:4] t86
126 | [#7+0;R:1](~[#6:2]~[#8:3]~[*!X4;!X3:4])~[#6] 6501
127 | % [*:1]-[#6X4:2]-[#8X2:3]-[#1:4] t83
128 | [*:1]~[#7:2]~[#16:3]~[#7X2:4] 1601
129 | % [*:1]-[#16X2,#16X3+1:2]-[!#6:3]~[*:4] t129
130 | [*;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4] 7827
131 | % [#9:1]-[#6X4:2]-[#6X4:3]-[#9:4] t7
132 | [*;X4:1]~[#7:2]~[#16:3](~[#8])~[*;X4:4] 4974
133 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X4,#7X3:3]-[#6X4:4] t133
134 | [*:1]~[#6!X4:2]~[#8:3]~[#6!X3:4] 5843
135 | % [*:1]~[#6X3:2]-[#8X2:3]-[*:4] t95
136 | [*:1]~[#6:2]~[#16:3]~[$ewg2&A:4] 2986
137 | % [*:1]~[#16X4,#16X3+0:2]-[#6X3:3]~[*:4] t110
138 | [*;!R;+0:1]~[#6X4:2]~[#6:3]=[*;!R:4] 6290
139 | % [*:1]-[#6X4:2]-[#6X3:3]=[*:4] t21
140 | [*;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4] 9724
141 | % [#1:1]-[#6X4:2]-[#6X4:3]-[#17:4] t12
142 | [*:1]~[#6X2:2]~[#7:3]~[*:4] 4773
143 | [#8!X2:1]~[#6!X4:2]~[#8:3]~[#6!X3:4] 2494
144 | % [#8,#16,#7:1]=[#6X3:2]-[#8X2H0:3]-[#6X4:4] t100
145 | [*;!R;+0:1]~[#6X4:2]~[#6:3]=[*;!R:4]~[#8] 9066
146 | [$halogen&;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4] 8919
147 | % [#17:1]-[#6X4:2]-[#6X4:3]-[#17:4] t8
148 | [#6H0:1]~[#7:2](~[#7:3]~[*:4])~[$ewg2] 3774
149 | % [*:1]-[#7X3$(*-[#6X3,#6X2])r5:2]-@[#7X3$(*-[#6X3,#6X2])r5:3]~[*:4] t123
150 | [#7+0X3:1]~[#6:2]~[#8:3]~[*;!X3:4]~[#6X3]~[#6] 3876
151 | % [*:1]~[#6X3:2](=[#8,#16,#7])-[#8X2H0:3]-[*:4] t97
152 | [#6;R0:1]~[#6!X4:2]~[#8:3]~[*:4] 4545
153 | % [*:1]~[#6X3:2](=[#8,#16,#7])-[#8:3]-[#1:4] t98
154 | [$ewg1&+0;!X2:1]~[#7:2]~[#16:3]~[*:4] 6619
155 | % [*:1]~[#16X4,#16X3+0:2]-[#7:3]~[*:4] t130
156 | [#6;R0:1]~[#6!X4:2]~[#8:3]~[$ewg2&X3:4] 3713
157 | [#6H0;!R:1]~[#7:2](~[#7:3]~[*:4])~[$ewg2] 5329
158 | [*:1]-[#6:2]~[#16:3]~[$ewg2&A:4] 3291
159 | % [*:1]~[#16X4,#16X3+0:2]-[#6X4:3]-[*:4] t107
160 | [#7H2:1]~[#6:2]~[#6:3]~[#7A:4] 5987
161 | [*:1]=[#7:2]~[#16:3]~[*;!R:4] 6651
162 | % [#8X1:1]~[#16X4,#16X3+0:2]-[#7X2:3]~[#6X3:4] t143
163 | [#16:1]~[#6:2]~[#6:3]-[#7A;H2:4] 1329
164 | [*;!R:1](~[#6:2]~[#6:3]~[#17!X3;!R:4])~[#7] 3104
165 | [#8!X2;!R:1]~[#6:2]~[#6:3]=[*;!R:4]~[#7] 1340
166 | [*:1]~[#7:2]~[#8:3]-[*:4] 6442
167 | % [*:1]~[#8X2:2]-[#7:3]~[*:4] t114
168 | [*;X3:1](~[#16])(~[#6:2]~[#7:3]~[*;H3:4]):[#6] 6178
169 | % [*:1]-[#6X4:2]-[#7X4:3]-[*:4] t50
170 | [*H2;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4] 6291
171 | [*;X3:1](~[#1])(~[#6:2]~[#7X3:3]~[#8!X2:4])~[#6] 1521
172 | % [#8X1:1]~[#7X3:2]~[#6X3:3]~[*:4] t71
173 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[#15] 3796
174 | [#7H1:1]~[#6:2]~[#6:3]~[#7A!X3:4] 7261
175 | [*A:1]~[#7:2]~[#7:3]:[*;r6:4]~[#8] 4404
176 | % [*:1]-[#7X3$(*-[#6X3,#6X2]):2]-[#7X3$(*-[#6X3,#6X2]):3]-[*:4] t122
177 | [*;X3:1](~[#6:2]~[#7!X3:3]~[*;a:4](~[#16])~[#7])~[$ewg1] 7356
178 | % [*:1]=[#7X2,#7X3+1:2]-[#6X3:3]-[*:4] t72
179 | [*;!R:1](~[#15])(~[#6:2]~[#6;a:3]~[*;!R:4])~[$halogen] 4636
180 | [*;X3:1](~[#6H2:2]~[#7R0:3]=[*;X3:4])~[#6] 8963
181 | % [#6X3:1]=[#7X2,#7X3+1:2]-[#6X4:3]-[#6X3,#6X4:4] t66
182 | [#17;!R:1]~[#6:2]~[#6:3]~[#9H0;!R:4] 7150
183 | [*:1]~[#6!X4:2]~[#15:3]~[*:4] 8692
184 | % [*:1]~[#15:2]-[#6X3:3]~[*:4] t113
185 | [*:1]=[#7:2]~[#16:3]~[*;!R:4]~[#1] 8573
186 | [#8+0;!R:1]~[#6:2]~[#6:3]~[#17!X3;!R:4] 5443
187 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[$ewg2])~[#6] 5499
188 | % [*:1]-[#6X4;r3:2]-[#6X3:3]~[*:4] t28
189 | [*;!R:1](~[#6:2]~[#6:3]~[$ewg1&X2;!R:4]~[#15])~[#6] 3750
190 | [$ewg1&H0:1]~[#6:2]~[#8:3]~[$ewg1&H0:4] 2097
191 | [#16;H1:1](~[#6:2]~[#6:3]~[#7A:4]~[#6])~[*] 7633
192 | [#16H0:1]~[#6:2]~[#6:3]-[#7A;H2:4] 9637
193 | % [#16X2,#16X1-1,#16X3+1:1]-[#6X3:2]-[#6X4:3]-[#7X4,#7X3:4] t26
194 | [#6H2:1]~[#8:2]~[#15:3]~[$ewg1&!X4;!X1:4] 1009
195 | % [#8X2:1]-[#15:2]-[#8X2:3]-[#6X4:4] t147
196 | [*:1]~[#6!X4;!X3:2]~[#8:3]~[*:4] 5204
197 | [*;X3:1]~[#7:2]~[#16:3](~[#8])~[*;X4:4] 7900
198 | % [#6X4:1]-[#16X4,#16X3+0:2]-[#7X3:3]-[#6X3:4] t138
199 | [#8!X4R2:1](~[#6:2]~[#7:3]~[$ewg1&H1:4])~[#6] 6407
200 | [#7H2:1]~[#6:2]~[#6:3]-[#7A:4] 8242
201 | % [*:1]-[#6X4:2]-[#6X4;r3:3]-[*:4] t14
202 | [#7!X4:1]~[#7:2]~[#8:3]~[*:4] 3625
203 | % [*:1]-[#8X2r5:2]-;@[#7X2r5:3]~[*:4] t116
204 | [$ewg1&X4;!R:1]~[#6:2]~[#6:3]~[$ewg1&X2;!R:4] 1369
205 | [*;!R:1](~[#6:2]~[#6X4:3]~[*;R:4](~[#1])~[$ewg2])~[$ewg2&H2] 9719
206 | [*;!R:1](~[#6:2]~[#6X4:3]~[$ewg2&A;R:4]~[#1])~[$ewg2] 2659
207 | [#8!X1;!R:1]~[#6:2]~[#6:3]~[$ewg1&H2;!R:4] 4862
208 |
--------------------------------------------------------------------------------
/utilities/test_smirks_or_environment_speed/testing_smirks_speed.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Does removing ChemicalEnvironments speed up sampling\n",
8 | "\n",
9 | "This ipython notebook is being used to determine if removing the list of chemicalenvironments would significantly increase the speed of smirky"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 2,
15 | "metadata": {
16 | "collapsed": true
17 | },
18 | "outputs": [],
19 | "source": [
20 | "from openforcefield.typing.chemistry.environment import TorsionChemicalEnvironment\n",
21 | "import time\n",
22 | "import copy\n",
23 | "import numpy as np\n",
24 | "from numpy import random\n",
25 | "from smarty.atomtyper import AtomTyper"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "metadata": {
32 | "collapsed": true
33 | },
34 | "outputs": [],
35 | "source": [
36 | "def smirks_sampling(smirks, iterations):\n",
37 | " \"\"\"\n",
38 | " This method takes in a list of smirks strings, then randomly picks one\n",
39 | " creates a chemical environment and writes back out the smirks string\n",
40 | " \n",
41 | " It only stores the smirks strings and only sometimes keeps the \"new one\" \n",
42 | " the new one is just a copy of the randomly chosen current one\n",
43 | " \"\"\"\n",
44 | " current = copy.deepcopy(smirks)\n",
45 | " for i in range(iterations):\n",
46 | " change = random.choice(current)\n",
47 | " \n",
48 | " env = TorsionChemicalEnvironment(smirks = change)\n",
49 | " new_smirks = env.asSMIRKS()\n",
50 | " \n",
51 | " # assume we accept a move 30% of the time and extend the list\n",
52 | " if random.rand() < 0.3: \n",
53 | " current.append(new_smirks)\n",
54 | "\n",
55 | " return current\n",
56 | "\n",
57 | "def environment_sampling(smirks, iterations):\n",
58 | " \"\"\"\n",
59 | " This method taks in a list of smirks, turns them into chemical environments\n",
60 | " and then iterates where some percentage of the time you keep the new environment\n",
61 | " \"\"\"\n",
62 | " current = [TorsionChemicalEnvironment(smirks = c) for c in smirks]\n",
63 | " \n",
64 | " for i in range(iterations):\n",
65 | " change = copy.deepcopy(random.choice(current))\n",
66 | " new_smirks = change.asSMIRKS\n",
67 | " \n",
68 | " # keep the new one 30% of the time\n",
69 | " if random.rand() < 0.3:\n",
70 | " current.append(change)\n",
71 | " \n",
72 | " return [e.asSMIRKS for e in current]\n",
73 | "\n",
74 | "def run_samplings(smirks, iterations):\n",
75 | " \"\"\"\n",
76 | " This method runs smirks_sampling and environment sampling and returns the time for each using \n",
77 | " the same input list and number of iterations\n",
78 | " \"\"\"\n",
79 | " \n",
80 | " # smirks first \n",
81 | " init_time = time.time()\n",
82 | " smirks = smirks_sampling(smirks, iterations)\n",
83 | " end_time = time.time()\n",
84 | " smirks_time = (end_time - init_time) / 60.0\n",
85 | " \n",
86 | " # environments\n",
87 | " init_time = time.time()\n",
88 | " env_smirks = environment_sampling(smirks, iterations)\n",
89 | " end_time = time.time()\n",
90 | " env_time = (end_time - init_time) / 60.0\n",
91 | " \n",
92 | " return smirks_time, env_time"
93 | ]
94 | },
95 | {
96 | "cell_type": "code",
97 | "execution_count": 6,
98 | "metadata": {},
99 | "outputs": [
100 | {
101 | "name": "stdout",
102 | "output_type": "stream",
103 | "text": [
104 | "------------------------------ 2 Iterations ------------------------------\n",
105 | " short\t1.97e-05\t6.54e-05\t4.57e-05\n",
106 | " long\t1.93e-05\t4.58e-04\t4.39e-04\n",
107 | " generic\t1.34e-05\t1.82e-05\t4.84e-06\n",
108 | "\n",
109 | "\n",
110 | "------------------------------ 10 Iterations ------------------------------\n",
111 | " short\t7.12e-05\t1.16e-04\t4.53e-05\n",
112 | " long\t8.27e-05\t5.40e-04\t4.58e-04\n",
113 | " generic\t6.60e-05\t6.47e-05\t-1.23e-06\n",
114 | "\n",
115 | "\n",
116 | "------------------------------ 100 Iterations ------------------------------\n",
117 | " short\t6.19e-04\t7.01e-04\t8.20e-05\n",
118 | " long\t7.44e-04\t1.36e-03\t6.12e-04\n",
119 | " generic\t5.49e-04\t6.28e-04\t7.92e-05\n",
120 | "\n",
121 | "\n",
122 | "------------------------------ 1000 Iterations ------------------------------\n",
123 | " short\t7.59e-03\t1.73e-02\t9.76e-03\n",
124 | " long\t8.42e-03\t2.10e-02\t1.26e-02\n",
125 | " generic\t6.89e-03\t1.61e-02\t9.20e-03\n",
126 | "\n",
127 | "\n",
128 | "------------------------------ 10000 Iterations ------------------------------\n",
129 | " short\t8.89e-02\t1.09e+00\t9.98e-01\n",
130 | " long\t9.37e-02\t1.17e+00\t1.08e+00\n",
131 | " generic\t7.18e-02\t1.12e+00\t1.05e+00\n",
132 | "\n",
133 | "\n",
134 | "------------------------------ 30000 Iterations ------------------------------\n",
135 | " short\t3.61e-01\t1.04e+01\t1.00e+01\n",
136 | " long\t4.51e-01\t1.08e+01\t1.04e+01\n",
137 | " generic\t3.13e-01\t1.01e+01\t9.76e+00\n",
138 | "\n",
139 | "\n"
140 | ]
141 | }
142 | ],
143 | "source": [
144 | "long = AtomTyper.read_typelist('Torsion_0_0.00e+00_results.smarts')\n",
145 | "long = [smirks for (smirks,name) in long if not '$' in smirks]\n",
146 | "smirks_lists = {\n",
147 | " 'generic':['[*:1]~[*:2]~[*:3]~[*:4]'],\n",
148 | " 'short':copy.deepcopy(long[:10]),\n",
149 | " 'long':copy.deepcopy(long)}\n",
150 | "\n",
151 | "iterations = [2, 10, 100, 1000, 10000, 30000]\n",
152 | "\n",
153 | "for its in iterations:\n",
154 | " print('%s %i Iterations %s' % ('-'*30, its, '-'*30))\n",
155 | " for title, smirks in smirks_lists.items():\n",
156 | " smirks_time, env_time = run_samplings(smirks, its)\n",
157 | " dif = env_time - smirks_time\n",
158 | " print(\"%20s\\t%.2e\\t%.2e\\t%.2e\" % (title, smirks_time, env_time,dif))\n",
159 | " print('\\n')"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": null,
165 | "metadata": {
166 | "collapsed": true
167 | },
168 | "outputs": [],
169 | "source": []
170 | }
171 | ],
172 | "metadata": {
173 | "kernelspec": {
174 | "display_name": "Python 3",
175 | "language": "python",
176 | "name": "python3"
177 | },
178 | "language_info": {
179 | "codemirror_mode": {
180 | "name": "ipython",
181 | "version": 3
182 | },
183 | "file_extension": ".py",
184 | "mimetype": "text/x-python",
185 | "name": "python",
186 | "nbconvert_exporter": "python",
187 | "pygments_lexer": "ipython3",
188 | "version": "3.5.3"
189 | }
190 | },
191 | "nbformat": 4,
192 | "nbformat_minor": 2
193 | }
194 |
--------------------------------------------------------------------------------
|