├── .gitattributes
├── .github
└── workflows
│ └── main.yml
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── docs
├── Makefile
└── source
│ ├── about.rst
│ ├── conf.py
│ ├── documentation.rst
│ ├── img
│ └── mastermsm.png
│ ├── index.rst
│ ├── installation.rst
│ ├── mastermsm.fewsm.rst
│ ├── mastermsm.msm.rst
│ ├── mastermsm.rst
│ ├── mastermsm.trajectory.rst
│ ├── modules.rst
│ └── support.rst
├── examples
├── README.md
├── alanine_dipeptide
│ ├── ala_dipeptide.ipynb
│ ├── ala_dipeptide_discretize.ipynb
│ ├── ala_dipeptide_dpca.ipynb
│ ├── ala_dipeptide_maxlike.ipynb
│ └── ala_dipeptide_multi.ipynb
├── alanine_pentapeptide
│ ├── ala_pentapeptide.ipynb
│ ├── ala_pentapeptide_contacts.ipynb
│ └── ala_pentapeptide_dpca.ipynb
├── bistable_potential
│ ├── 1D_smFS_MSM.ipynb
│ └── 2D_smFS_MSM.ipynb
├── mueller_potential
│ ├── .ipynb_checkpoints
│ │ └── mueller_potential-checkpoint.ipynb
│ ├── mueller.py
│ ├── mueller_potential.ipynb
│ └── mueller_potential_openmm.ipynb
└── schutte_potential
│ └── schute_mastermsm.ipynb
├── mastermsm
├── __init__.py
├── fewsm
│ ├── __init__.py
│ ├── fewsm.py
│ └── fewsm_lib.py
├── msm
│ ├── __init__.py
│ ├── msm.py
│ └── msm_lib.py
├── test
│ ├── README.md
│ ├── __init__.py
│ ├── download_data.py
│ ├── test_fewsm.py
│ ├── test_msm.py
│ └── test_trajectory.py
└── trajectory
│ ├── __init__.py
│ ├── traj.py
│ └── traj_lib.py
├── mkdocs.yml
├── requirements.txt
└── setup.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.xtc filter=lfs diff=lfs merge=lfs -text
2 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | # This is a basic workflow to help you get started with Actions
2 |
3 | name: unittests
4 |
5 | ## Controls when the workflow will run
6 | on:
7 | # # Triggers the workflow on push or pull request events but only for the master branch
8 | push:
9 | branches: [ master ]
10 | pull_request:
11 | branches: [ master ]
12 | #
13 | # # Allows you to run this workflow manually from the Actions tab
14 | # workflow_dispatch:
15 |
16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
17 | jobs:
18 | # This workflow contains a single job called "build"
19 | build:
20 | # The type of runner that the job will run on
21 | runs-on: ubuntu-latest
22 |
23 | # Steps represent a sequence of tasks that will be executed as part of the job
24 | steps:
25 | - uses: actions/checkout@v3
26 | - name: Set up Python
27 | uses: actions/setup-python@v3
28 | with:
29 | python-version: '3.x'
30 | - name: Download repository
31 | run: |
32 | python --version
33 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
34 | python setup.py install
35 |
36 | - name: Run tests
37 | run: |
38 | cd $HOME/mastermsm
39 | python -m unittest
40 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | site/
2 | docs/build
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: python
2 | python:
3 | - 3.6
4 |
5 | travis:
6 | - develop
7 |
8 | install:
9 | - python setup.py install
10 |
11 | script:
12 | # avoid running in the checkout directory so nose finds built modules..
13 | - rundir=$HOME
14 | - cd $rundir
15 | - nosetests -v --with-coverage
16 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 2, June 1991
3 |
4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
6 | Everyone is permitted to copy and distribute verbatim copies
7 | of this license document, but changing it is not allowed.
8 |
9 | Preamble
10 |
11 | The licenses for most software are designed to take away your
12 | freedom to share and change it. By contrast, the GNU General Public
13 | License is intended to guarantee your freedom to share and change free
14 | software--to make sure the software is free for all its users. This
15 | General Public License applies to most of the Free Software
16 | Foundation's software and to any other program whose authors commit to
17 | using it. (Some other Free Software Foundation software is covered by
18 | the GNU Lesser General Public License instead.) You can apply it to
19 | your programs, too.
20 |
21 | When we speak of free software, we are referring to freedom, not
22 | price. Our General Public Licenses are designed to make sure that you
23 | have the freedom to distribute copies of free software (and charge for
24 | this service if you wish), that you receive source code or can get it
25 | if you want it, that you can change the software or use pieces of it
26 | in new free programs; and that you know you can do these things.
27 |
28 | To protect your rights, we need to make restrictions that forbid
29 | anyone to deny you these rights or to ask you to surrender the rights.
30 | These restrictions translate to certain responsibilities for you if you
31 | distribute copies of the software, or if you modify it.
32 |
33 | For example, if you distribute copies of such a program, whether
34 | gratis or for a fee, you must give the recipients all the rights that
35 | you have. You must make sure that they, too, receive or can get the
36 | source code. And you must show them these terms so they know their
37 | rights.
38 |
39 | We protect your rights with two steps: (1) copyright the software, and
40 | (2) offer you this license which gives you legal permission to copy,
41 | distribute and/or modify the software.
42 |
43 | Also, for each author's protection and ours, we want to make certain
44 | that everyone understands that there is no warranty for this free
45 | software. If the software is modified by someone else and passed on, we
46 | want its recipients to know that what they have is not the original, so
47 | that any problems introduced by others will not reflect on the original
48 | authors' reputations.
49 |
50 | Finally, any free program is threatened constantly by software
51 | patents. We wish to avoid the danger that redistributors of a free
52 | program will individually obtain patent licenses, in effect making the
53 | program proprietary. To prevent this, we have made it clear that any
54 | patent must be licensed for everyone's free use or not licensed at all.
55 |
56 | The precise terms and conditions for copying, distribution and
57 | modification follow.
58 |
59 | GNU GENERAL PUBLIC LICENSE
60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
61 |
62 | 0. This License applies to any program or other work which contains
63 | a notice placed by the copyright holder saying it may be distributed
64 | under the terms of this General Public License. The "Program", below,
65 | refers to any such program or work, and a "work based on the Program"
66 | means either the Program or any derivative work under copyright law:
67 | that is to say, a work containing the Program or a portion of it,
68 | either verbatim or with modifications and/or translated into another
69 | language. (Hereinafter, translation is included without limitation in
70 | the term "modification".) Each licensee is addressed as "you".
71 |
72 | Activities other than copying, distribution and modification are not
73 | covered by this License; they are outside its scope. The act of
74 | running the Program is not restricted, and the output from the Program
75 | is covered only if its contents constitute a work based on the
76 | Program (independent of having been made by running the Program).
77 | Whether that is true depends on what the Program does.
78 |
79 | 1. You may copy and distribute verbatim copies of the Program's
80 | source code as you receive it, in any medium, provided that you
81 | conspicuously and appropriately publish on each copy an appropriate
82 | copyright notice and disclaimer of warranty; keep intact all the
83 | notices that refer to this License and to the absence of any warranty;
84 | and give any other recipients of the Program a copy of this License
85 | along with the Program.
86 |
87 | You may charge a fee for the physical act of transferring a copy, and
88 | you may at your option offer warranty protection in exchange for a fee.
89 |
90 | 2. You may modify your copy or copies of the Program or any portion
91 | of it, thus forming a work based on the Program, and copy and
92 | distribute such modifications or work under the terms of Section 1
93 | above, provided that you also meet all of these conditions:
94 |
95 | a) You must cause the modified files to carry prominent notices
96 | stating that you changed the files and the date of any change.
97 |
98 | b) You must cause any work that you distribute or publish, that in
99 | whole or in part contains or is derived from the Program or any
100 | part thereof, to be licensed as a whole at no charge to all third
101 | parties under the terms of this License.
102 |
103 | c) If the modified program normally reads commands interactively
104 | when run, you must cause it, when started running for such
105 | interactive use in the most ordinary way, to print or display an
106 | announcement including an appropriate copyright notice and a
107 | notice that there is no warranty (or else, saying that you provide
108 | a warranty) and that users may redistribute the program under
109 | these conditions, and telling the user how to view a copy of this
110 | License. (Exception: if the Program itself is interactive but
111 | does not normally print such an announcement, your work based on
112 | the Program is not required to print an announcement.)
113 |
114 | These requirements apply to the modified work as a whole. If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works. But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 |
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 |
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 |
134 | 3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 |
138 | a) Accompany it with the complete corresponding machine-readable
139 | source code, which must be distributed under the terms of Sections
140 | 1 and 2 above on a medium customarily used for software interchange; or,
141 |
142 | b) Accompany it with a written offer, valid for at least three
143 | years, to give any third party, for a charge no more than your
144 | cost of physically performing source distribution, a complete
145 | machine-readable copy of the corresponding source code, to be
146 | distributed under the terms of Sections 1 and 2 above on a medium
147 | customarily used for software interchange; or,
148 |
149 | c) Accompany it with the information you received as to the offer
150 | to distribute corresponding source code. (This alternative is
151 | allowed only for noncommercial distribution and only if you
152 | received the program in object code or executable form with such
153 | an offer, in accord with Subsection b above.)
154 |
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it. For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable. However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 |
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 |
172 | 4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License. Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 |
180 | 5. You are not required to accept this License, since you have not
181 | signed it. However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works. These actions are
183 | prohibited by law if you do not accept this License. Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 |
189 | 6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions. You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 |
197 | 7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License. If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all. For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 |
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 |
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices. Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 |
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 |
229 | 8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded. In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 |
237 | 9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time. Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 |
242 | Each version is given a distinguishing version number. If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation. If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 |
250 | 10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission. For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this. Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 |
258 | NO WARRANTY
259 |
260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 |
270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 |
280 | END OF TERMS AND CONDITIONS
281 |
282 | How to Apply These Terms to Your New Programs
283 |
284 | If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 |
288 | To do so, attach the following notices to the program. It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 |
293 | {description}
294 | Copyright (C) {year} {fullname}
295 |
296 | This program is free software; you can redistribute it and/or modify
297 | it under the terms of the GNU General Public License as published by
298 | the Free Software Foundation; either version 2 of the License, or
299 | (at your option) any later version.
300 |
301 | This program is distributed in the hope that it will be useful,
302 | but WITHOUT ANY WARRANTY; without even the implied warranty of
303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
304 | GNU General Public License for more details.
305 |
306 | You should have received a copy of the GNU General Public License along
307 | with this program; if not, write to the Free Software Foundation, Inc.,
308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 |
310 | Also add information on how to contact you by electronic and paper mail.
311 |
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 |
315 | Gnomovision version 69, Copyright (C) year name of author
316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 | This is free software, and you are welcome to redistribute it
318 | under certain conditions; type `show c' for details.
319 |
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License. Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 |
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary. Here is a sample; alter the names:
328 |
329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 | `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 |
332 | {signature of Ty Coon}, 1 April 1989
333 | Ty Coon, President of Vice
334 |
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs. If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library. If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 |
341 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://mastermsm.readthedocs.io/en/develop/?badge=develop)
2 | [](https://www.codacy.com/gh/BioKT/MasterMSM/dashboard?utm_source=github.com&utm_medium=referral&utm_content=BioKT/MasterMSM&utm_campaign=Badge_Grade)
3 |
4 | MasterMSM
5 | =========
6 | MasterMSM is a Python package for generating Markov state models (MSMs)
7 | from molecular dynamics trajectories. We use a formulation based on
8 | the chemical master equation. This package will allow you to:
9 |
10 | * Create Markov state / master equation models from biomolecular simulations.
11 |
12 | * Discretize trajectory data using dihedral angle based methods useful
13 | for small peptides.
14 |
15 | * Calculate rate matrices using a variety of methods.
16 |
17 | * Obtain committors and reactive fluxes.
18 |
19 | * Carry out sensitivity analysis of networks.
20 |
21 | You can read the documentation [here](https://mastermsm.readthedocs.io).
22 |
23 | Contributors
24 | ------------
25 | This code has been written by David De Sancho with help from Anne Aguirre.
26 |
27 | Installation
28 | ------------
29 | git clone http://github.com/daviddesancho/MasterMSM destination/MasterMSM
30 | cd destination/mastermsm
31 | python setup.py install --user
32 |
33 | External libraries
34 | ------------------
35 | mdtraj : https://mdtraj.org
36 |
37 | Citation
38 | --------
39 | @article{mastermsm,
40 | author = "David De Sancho and Anne Aguirre",
41 | title = "{MasterMSM: A Package for Constructing Master Equation Models of Molecular Dynamics}",
42 | year = "2019",
43 | month = "6",
44 | journal = "J. Chem. Inf. Model."
45 | url = "https://doi.org/10.1021/acs.jcim.9b00468",
46 | doi = "10.1021/acs.jcim.9b00468"
47 | }
48 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | SPHINXPROJ = MasterMSM
8 | SOURCEDIR = source
9 | BUILDDIR = build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--------------------------------------------------------------------------------
/docs/source/about.rst:
--------------------------------------------------------------------------------
1 | About
2 | ============
3 | In the last decade, Master equation / Markov state models (usually termed MSMs)
4 | have become one of the key methodologies to analyze data from molecular dynamics
5 | (MD) simulations. You can find information about MSMs in general in the following
6 | volume
7 |
8 | * `An Introduction to Markov State Models and Their Application to Long Timescale Molecular Simulation `_, edited by Pande, Bowman and Noe (Springer, 2014).
9 |
10 | The MasterMSM library brings a different flavour of MSMs, based on the methods
11 | introduced by N. V. Buchete and G. Hummer
12 | (`J. Phys. Chem. B, 2008 `_).
13 | The central difference relies in that instead of using transition matrices we focus
14 | in rate matrices, which determine the time evolution of the system as described
15 | by the chemical master equation.
16 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | #
3 | # Configuration file for the Sphinx documentation builder.
4 | #
5 | # This file does only contain a selection of the most common options. For a
6 | # full list see the documentation:
7 | # http://www.sphinx-doc.org/en/master/config
8 |
9 | # -- Path setup --------------------------------------------------------------
10 |
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | import os
16 | import sys
17 |
18 | sys.path.insert(0, os.path.abspath('../..'))
19 | import mastermsm
20 | #sys.path.append(os.path.join(os.path.abspath(os.pardir)))
21 |
22 |
23 |
24 | # -- Project information -----------------------------------------------------
25 |
26 | project = 'MasterMSM'
27 | copyright = '2019, David De Sancho'
28 | author = 'David De Sancho'
29 |
30 | # The short X.Y version
31 | version = ''
32 | # The full version, including alpha/beta/rc tags
33 | release = '1.1.1'
34 |
35 |
36 | # -- General configuration ---------------------------------------------------
37 |
38 | # If your documentation needs a minimal Sphinx version, state it here.
39 | #
40 | # needs_sphinx = '1.0'
41 |
42 | # Add any Sphinx extension module names here, as strings. They can be
43 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
44 | # ones.
45 | extensions = [
46 | 'sphinx.ext.autodoc',
47 | 'sphinx.ext.coverage',
48 | 'sphinx.ext.githubpages',
49 | 'sphinx.ext.mathjax',
50 | 'sphinx.ext.viewcode',
51 | 'sphinx.ext.napoleon',
52 | 'sphinx.ext.autosummary',
53 | 'sphinx.ext.doctest',
54 | 'sphinx.ext.inheritance_diagram']
55 |
56 | # Add any paths that contain templates here, relative to this directory.
57 | templates_path = ['_templates']
58 |
59 | # The suffix(es) of source filenames.
60 | # You can specify multiple suffix as a list of string:
61 | #
62 | source_suffix = ['.rst', '.md']
63 | #source_suffix = '.rst'
64 |
65 | # The master toctree document.
66 | master_doc = 'index'
67 |
68 | # The language for content autogenerated by Sphinx. Refer to documentation
69 | # for a list of supported languages.
70 | #
71 | # This is also used if you do content translation via gettext catalogs.
72 | # Usually you set "language" from the command line for these cases.
73 | language = None
74 |
75 | # List of patterns, relative to source directory, that match files and
76 | # directories to ignore when looking for source files.
77 | # This pattern also affects html_static_path and html_extra_path .
78 | exclude_patterns = []
79 |
80 | # The name of the Pygments (syntax highlighting) style to use.
81 | pygments_style = 'sphinx'
82 |
83 |
84 | # -- Options for HTML output -------------------------------------------------
85 |
86 | # The theme to use for HTML and HTML Help pages. See the documentation for
87 | # a list of builtin themes.
88 | #
89 |
90 | html_theme = 'alabaster'
91 | #html_theme = 'default'
92 | #html_theme = 'sphinx_rtd_theme'
93 | html_logo = "img/mastermsm.png"
94 |
95 | # Theme options are theme-specific and customize the look and feel of a theme
96 | # further. For a list of options available for each theme, see the
97 | # documentation.
98 | #
99 | # html_theme_options = {}
100 |
101 | # Add any paths that contain custom static files (such as style sheets) here,
102 | # relative to this directory. They are copied after the builtin static files,
103 | # so a file named "default.css" will overwrite the builtin "default.css".
104 | html_static_path = ['_static']
105 |
106 | # Custom sidebar templates, must be a dictionary that maps document names
107 | # to template names.
108 | #
109 | # The default sidebars (for documents that don't match any pattern) are
110 | # defined by theme itself. Builtin themes are using these templates by
111 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
112 | # 'searchbox.html']``.
113 | #
114 | # html_sidebars = {}
115 |
116 |
117 | # -- Options for HTMLHelp output ---------------------------------------------
118 |
119 | # Output file base name for HTML help builder.
120 | htmlhelp_basename = 'MasterMSMdoc'
121 |
122 |
123 | # -- Options for LaTeX output ------------------------------------------------
124 |
125 | latex_elements = {
126 | # The paper size ('letterpaper' or 'a4paper').
127 | #
128 | # 'papersize': 'letterpaper',
129 |
130 | # The font size ('10pt', '11pt' or '12pt').
131 | #
132 | # 'pointsize': '10pt',
133 |
134 | # Additional stuff for the LaTeX preamble.
135 | #
136 | # 'preamble': '',
137 |
138 | # Latex figure (float) alignment
139 | #
140 | # 'figure_align': 'htbp',
141 | }
142 |
143 | # Grouping the document tree into LaTeX files. List of tuples
144 | # (source start file, target name, title,
145 | # author, documentclass [howto, manual, or own class]).
146 | latex_documents = [
147 | (master_doc, 'MasterMSM.tex', 'MasterMSM Documentation',
148 | 'David De Sancho', 'manual'),
149 | ]
150 |
151 | # -- Options for manual page output ------------------------------------------
152 |
153 | # One entry per manual page. List of tuples
154 | # (source start file, name, description, authors, manual section).
155 | man_pages = [
156 | (master_doc, 'mastermsm', 'MasterMSM Documentation',
157 | [author], 1)
158 | ]
159 |
160 |
161 | # -- Options for Texinfo output ----------------------------------------------
162 |
163 | # Grouping the document tree into Texinfo files. List of tuples
164 | # (source start file, target name, title, author,
165 | # dir menu entry, description, category)
166 | texinfo_documents = [
167 | (master_doc, 'MasterMSM', 'MasterMSM Documentation',
168 | author, 'MasterMSM', 'One line description of project.',
169 | 'Miscellaneous'),
170 | ]
171 |
172 |
173 | # -- Extension configuration -------------------------------------------------
174 |
175 | # Napoleon settings
176 | napoleon_google_docstring = True
177 | napoleon_numpy_docstring = True
178 | napoleon_include_private_with_doc = False
179 | napoleon_include_special_with_doc = False
180 | napoleon_use_admonition_for_examples = False
181 | napoleon_use_admonition_for_notes = False
182 | napoleon_use_admonition_for_references = False
183 | napoleon_use_ivar = True
184 | napoleon_use_param = True
185 | napoleon_use_rtype = True
186 |
--------------------------------------------------------------------------------
/docs/source/documentation.rst:
--------------------------------------------------------------------------------
1 | .. _documentation:
2 |
3 | Modules
4 | =============
5 | MasterMSM is a Python package that is divided in three main subpackages.
6 | This way of structuring the code derives from the three main types of
7 | objects that are constructed. First, there are trajectories, which
8 | result in objects of the ``TimeSeries`` class; second, there are dynamical
9 | models, which come in the form of instances of the ``MSM`` class; finally,
10 | dynamical models can be postprocessed into simple, few-state models, which
11 | we generate as ``FEWSM`` class objects.
12 |
13 | Trajectory module
14 | -----------------
15 | This module contains everything necessary to get your time series data
16 | into MasterMSM. The main class object within this module is the TimeSeries
17 | object.
18 |
19 | .. currentmodule:: mastermsm
20 |
21 | .. autosummary::
22 | :toctree:
23 |
24 | trajectory
25 |
26 |
27 | MSM module
28 | ----------
29 | .. currentmodule:: mastermsm
30 |
31 | .. autosummary::
32 | :toctree:
33 |
34 | msm
35 |
36 |
37 | FEWSM module
38 | -----------
39 | .. currentmodule:: mastermsm
40 |
41 | .. autosummary::
42 | :toctree:
43 |
44 | fewsm
45 |
46 | Examples
47 | --------
48 | We have put together a few simple Python notebooks to help you learn the basics
49 | of the MasterMSM package. They are based on data derived from either model systems
50 | or from molecular dynamics simulations of some simple (albeit realistic) biomolecules.
51 | You can find the notebooks in the following
52 | `link `_.
53 |
--------------------------------------------------------------------------------
/docs/source/img/mastermsm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/docs/source/img/mastermsm.png
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | .. MasterMSM documentation master file, created by
2 | sphinx-quickstart on Mon Mar 25 23:47:22 2019.
3 | You can adapt this file completely to your liking, but it should at least
4 | contain the root `toctree` directive.
5 |
6 | ==========================================
7 | Welcome to MasterMSM's documentation!
8 | ==========================================
9 |
10 | MasterMSM is a Python package for generating Markov state models (MSMs)
11 | from molecular dynamics trajectories. We use a formulation based on
12 | the chemical master equation. This package will allow you to:
13 |
14 | * Create Markov state / master equation models from biomolecular simulations.
15 | * Discretize trajectory data using dihedral angle based methods useful
16 | for small peptides.
17 | * Calculate rate matrices using a variety of methods.
18 | * Obtain committors and reactive fluxes.
19 | * Carry out sensitivity analysis of networks.
20 |
21 | We have written a `paper `_
22 | on MasterMSM that briefly describes some of the code capabilities.
23 | The MasterMSM code is hosted in `Github `_.
24 | Active development of the MasterMSM code takes place using the git version
25 | control system.
26 |
27 | .. toctree::
28 | :maxdepth: 3
29 | :caption: Contents:
30 |
31 | about
32 | installation
33 | documentation
34 | support
35 |
36 |
37 |
38 | Indices and tables
39 | ==================
40 |
41 | * :ref:`genindex`
42 | * :ref:`modindex`
43 | * :ref:`search`
44 |
--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
1 | Installation
2 | ============
3 | You can install MasterMSM by simply downloading the package from the
4 | `GitHub repository `_
5 | and using the standard installation instructions for packages built
6 | using `Distutils `_.
7 |
8 | .. code-block:: bash
9 |
10 | git clone http://github.com/daviddesancho/mastermsm destination/mastermsm
11 | cd destination/mastermsm
12 | python setup.py install --user
13 |
14 | Parallel processing in Python and MasterMSM
15 | -------------------------------------------
16 | In MasterMSM we make ample use of the ``multiprocessing`` library, which
17 | for MacOS X can conflict with non-Python libraries. In the past we have
18 | found this to be a problem that can result in segmentation faults.
19 | Digging in the internet I found a workaround for this problem, by setting
20 | the following environment variable
21 |
22 | .. code-block:: bash
23 |
24 | export VECLIB_MAXIMUM_THREADS=1
25 |
26 | This should be set in the terminal before you start your Python session
27 | in case you meet this problem.
28 |
29 |
--------------------------------------------------------------------------------
/docs/source/mastermsm.fewsm.rst:
--------------------------------------------------------------------------------
1 | mastermsm.fewsm package
2 | ============================
3 |
4 | Submodules
5 | ----------
6 |
7 | mastermsm.fewsm.traj module
8 | --------------------------------
9 |
10 | .. automodule:: mastermsm.fewsm.fewsm
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | mastermsm.fewsm.traj\_lib module
16 | -------------------------------------
17 |
18 | .. automodule:: mastermsm.fewsm.fewsm_lib
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 |
24 | Module contents
25 | ---------------
26 |
27 | .. automodule:: mastermsm.fewsm
28 | :members:
29 | :undoc-members:
30 | :show-inheritance:
31 |
--------------------------------------------------------------------------------
/docs/source/mastermsm.msm.rst:
--------------------------------------------------------------------------------
1 | mastermsm.msm package
2 | =====================
3 |
4 | Submodules
5 | ----------
6 |
7 | mastermsm.msm.msm module
8 | ------------------------
9 |
10 | .. automodule:: mastermsm.msm.msm
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | mastermsm.msm.msm\_lib module
16 | -----------------------------
17 |
18 | .. automodule:: mastermsm.msm.msm_lib
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 |
24 | Module contents
25 | ---------------
26 |
27 | .. automodule:: mastermsm.msm
28 | :members:
29 | :undoc-members:
30 | :show-inheritance:
31 |
--------------------------------------------------------------------------------
/docs/source/mastermsm.rst:
--------------------------------------------------------------------------------
1 | mastermsm package
2 | =================
3 |
4 | Subpackages
5 | -----------
6 |
7 | .. toctree::
8 |
9 | mastermsm.msm
10 | mastermsm.trajectory
11 | mastermsm.fewsm
12 |
13 | Module contents
14 | ---------------
15 |
16 | .. automodule:: mastermsm
17 | :members:
18 | :undoc-members:
19 | :show-inheritance:
20 |
--------------------------------------------------------------------------------
/docs/source/mastermsm.trajectory.rst:
--------------------------------------------------------------------------------
1 | mastermsm.trajectory package
2 | ============================
3 |
4 | Submodules
5 | ----------
6 |
7 | mastermsm.trajectory.traj module
8 | --------------------------------
9 |
10 | .. automodule:: mastermsm.trajectory.traj
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | mastermsm.trajectory.traj\_lib module
16 | -------------------------------------
17 |
18 | .. automodule:: mastermsm.trajectory.traj_lib
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 |
24 | Module contents
25 | ---------------
26 |
27 | .. automodule:: mastermsm.trajectory
28 | :members:
29 | :undoc-members:
30 | :show-inheritance:
31 |
--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | mastermsm
2 | =========
3 |
4 | .. toctree::
5 | :maxdepth: 4
6 |
7 | mastermsm
8 |
--------------------------------------------------------------------------------
/docs/source/support.rst:
--------------------------------------------------------------------------------
1 | =======
2 | Support
3 | =======
4 |
5 | Development of MasterMSM is based on GitHub. You can get help by opening an
6 | issue on Github_.
7 |
8 | .. _Github: https://github.com/daviddesancho/MasterMSM
9 |
--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
1 | MasterMSM examples
2 | ==================
3 | Here are a set of examples where you can learn some of the fundamentals of
4 | the MasterMSM package. They correspond to either model systems (dynamics
5 | on one or two dimensional potentials) or molecular dynamics simulations
6 | on simple biomolecules.
7 |
8 | Contents
9 | --------
10 | * brownian_dynamics_1D: example corresponding to a one-dimensional two-state model.
11 | * brownian_dynamics_2D: analogous case but now in two dimensions.
12 | * alanine_pentapeptide: example with true MD simulation data for the simplest peptide model, generated with the Gromacs package.
13 | * alanine_pentapeptide: example with true MD simulation data, generated with the Gromacs package.
14 |
--------------------------------------------------------------------------------
/examples/alanine_dipeptide/ala_dipeptide.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## MSM of the alanine dipeptide\n",
8 | "Here we run through most of the things that can be done with this package using a simple two-state model. There are more sophisticated examples that enable for further possibilities.\n",
9 | "\n",
10 | "The first thing one must do is download the data from [OSF](https://osf.io/a2vc7) and then import a number of libraries we will need as we run this example."
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "%load_ext autoreload\n",
20 | "%autoreload 2\n",
21 | "%matplotlib inline\n",
22 | "import math\n",
23 | "import numpy as np"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": null,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "import matplotlib.pyplot as plt\n",
33 | "import seaborn as sns\n",
34 | "sns.set(style=\"ticks\", color_codes=True, font_scale=1.25)\n",
35 | "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})"
36 | ]
37 | },
38 | {
39 | "cell_type": "markdown",
40 | "metadata": {},
41 | "source": [
42 | "### Discretizing the trajectory\n",
43 | "We start loading the simulation data using the `trajectory` module. For this we use the external library [`MDtraj`](http://mdtraj.org), which contains all sorts of methods for parsing and calculating interestign properties of our time-series data."
44 | ]
45 | },
46 | {
47 | "cell_type": "code",
48 | "execution_count": null,
49 | "metadata": {},
50 | "outputs": [],
51 | "source": [
52 | "import mdtraj as md\n",
53 | "from mastermsm.trajectory import traj"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": null,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "tr = traj.TimeSeries(top='data/alaTB.gro', traj=['data/alatb_n1_ppn24.xtc'])\n",
63 | "print (tr.mdt)"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {},
69 | "source": [
70 | "So does what we have calculated look somewhat like a Ramachandran map?"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "phi = md.compute_phi(tr.mdt)\n",
80 | "psi = md.compute_psi(tr.mdt)\n",
81 | "res = [x for x in tr.mdt.topology.residues]"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "fig,ax = plt.subplots(figsize=(3.5,3.5))\n",
91 | "ax.plot(180./math.pi*phi[1],180./math.pi*psi[1],'o', markersize=1)\n",
92 | "ax.set_xlim(-180,180)\n",
93 | "ax.set_ylim(-180,180)\n",
94 | "ax.xaxis.set_ticks(range(-180,181,90))\n",
95 | "ax.yaxis.set_ticks(range(-180,181,90))\n",
96 | "\n",
97 | "ax.set_xlabel(r'$\\phi$', fontsize=18)\n",
98 | "ax.set_ylabel(r'$\\psi$', fontsize=18)"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "Next we proceed to discretize the trajectory based on the Ramachandran angles."
106 | ]
107 | },
108 | {
109 | "cell_type": "code",
110 | "execution_count": null,
111 | "metadata": {},
112 | "outputs": [],
113 | "source": [
114 | "tr.discretize(states=['A', 'E', 'L'])"
115 | ]
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {},
120 | "source": [
121 | "For plotting we convert helical configurations in 1 and beta in 0."
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": null,
127 | "metadata": {},
128 | "outputs": [],
129 | "source": [
130 | "fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True)\n",
131 | "\n",
132 | "ax1.plot(tr.mdt.time, psi[1]*180/math.pi,'o', ms=0.1)\n",
133 | "ax1.set_ylabel(r'$\\psi$', fontsize=14)\n",
134 | "ax1.set_ylim(-180,180)\n",
135 | "ax1.yaxis.set_ticks(range(-180,181,90))\n",
136 | "\n",
137 | "ax2.plot(tr.mdt.time, phi[1]*180/math.pi,'o', ms=0.1)\n",
138 | "ax2.set_ylabel(r'$\\phi$', fontsize=14)\n",
139 | "ax1.set_ylim(-180,180)\n",
140 | "ax1.yaxis.set_ticks(range(-180,181,90))\n",
141 | "\n",
142 | "ax3.set_ylabel('State')\n",
143 | "ax3.set_ylim(-0.2,2.2)\n",
144 | "ax3.yaxis.set_ticks(range(3))\n",
145 | "labels = [item.get_text() for item in ax2.get_xticklabels()]\n",
146 | "labels = ['A', 'E', 'L']\n",
147 | "y = [labels.index(x) if x in labels else 0 for x in tr.distraj ]\n",
148 | "ax3.plot(tr.mdt.time, y, lw=1)\n",
149 | "ax3.set_yticklabels(labels)\n",
150 | "ax3.set_xlabel('Time [ps]')\n",
151 | "\n",
152 | "ax1.set_xlim(0, 2.0e5)\n",
153 | "plt.tight_layout(h_pad=0)"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {
159 | "collapsed": true
160 | },
161 | "source": [
162 | "In the plot we see how we go from the time series of continuous torsion angles converts into a time series of discrete states. We can obtain a list of states in the following way."
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": null,
168 | "metadata": {},
169 | "outputs": [],
170 | "source": [
171 | "tr.find_keys()\n",
172 | "tr.keys"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {
178 | "collapsed": true
179 | },
180 | "source": [
181 | "### Building the master equation model\n",
182 | "After having loaded our trajectory using the functionalities from the `trajectory` module we start building the master equation model. For this, we make use of the `msm` module. There are two steps corresponding to the two main classes within that module. First we create an instance of the `SuperMSM`, which can be used to direct the whole process of constructing and validating the MSM."
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": null,
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "from mastermsm.msm import msm\n",
192 | "msm_alaTB = msm.SuperMSM([tr])"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "Then, using the `do_msm` method, we produce instances of the `MSM` class at a desired lag time, $\\Delta t$. Each of these contains an MSM built at a specific lag time. These are stored as a dictionary in the `msms` attribute of the `SuperMSM` class. "
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": null,
205 | "metadata": {},
206 | "outputs": [],
207 | "source": [
208 | "lagt = 1\n",
209 | "msm_alaTB.do_msm(lagt)\n",
210 | "msm_alaTB.msms[lagt].do_trans()\n",
211 | "msm_alaTB.msms[lagt].boots()"
212 | ]
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "metadata": {},
217 | "source": [
218 | "The resulting model has a number of things we may be interested in, like its eigenvalue spectrum (in this case limited to two relaxation times, corresponding to the exchange of helix, coil and $\\alpha_L$ states) or the equilibrium probabilities of the microstates."
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": null,
224 | "metadata": {},
225 | "outputs": [],
226 | "source": [
227 | "fig, ax = plt.subplots(1, 2, figsize=(6,3))\n",
228 | "\n",
229 | "ax[0].errorbar([1, 2], msm_alaTB.msms[lagt].tau_ave, msm_alaTB.msms[lagt].tau_std ,fmt='o-', markersize=5)\n",
230 | "ax[1].errorbar([1,2,3], msm_alaTB.msms[lagt].peq_ave, msm_alaTB.msms[lagt].peq_std ,fmt='o-', markersize=5)\n",
231 | "\n",
232 | "ax[0].set_xlim(0.5, 2.5)\n",
233 | "ax[0].set_ylim(10,2e3)\n",
234 | "ax[0].set_yscale('log')\n",
235 | "ax[0].set_ylabel(r'$\\tau$ [ps]', fontsize=18)\n",
236 | "ax[0].set_xlabel(r'$\\lambda_1$', fontsize=18)\n",
237 | "\n",
238 | "ax[1].set_ylabel(r'$P_{eq}$', fontsize=18)\n",
239 | "ax[1].set_xlabel(r'state', fontsize=18)\n",
240 | "ax[1].set_yscale('log')\n",
241 | "ax[1].set_ylim(1e-2, 1)\n",
242 | "ax[1].set_xticks([1, 2, 3])\n",
243 | "ax[1].set_xticklabels(labels[:3])\n",
244 | "ax[1].set_xlim(0.5,3.5)\n",
245 | "\n",
246 | "plt.tight_layout(w_pad=1)"
247 | ]
248 | },
249 | {
250 | "cell_type": "markdown",
251 | "metadata": {},
252 | "source": [
253 | "### Validation\n",
254 | "However, from simply calculating these quantities we do not know how informative they really are. In order to understand whether the values we calculate are really reflective of the properties of the underlying system we resort to validation of the MSM. The two-level structure that we have described, consisting of the `SuperMSM` and `MSM` classes, allows for the user to test some global convergence properties first (at the level of the `SuperMSM`). "
255 | ]
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {},
260 | "source": [
261 | "#### Convergence tests\n",
262 | "For validating the model we first see at which point the relaxation times are sufficiently well converged."
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": null,
268 | "metadata": {},
269 | "outputs": [],
270 | "source": [
271 | "msm_alaTB.convergence_test(time=[1, 2, 5, 7, 10, 20, 50, 100], error=True)"
272 | ]
273 | },
274 | {
275 | "cell_type": "code",
276 | "execution_count": null,
277 | "metadata": {},
278 | "outputs": [],
279 | "source": [
280 | "fig, ax = plt.subplots()\n",
281 | "\n",
282 | "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[0],msm_alaTB.msms[x].tau_std[0]] \\\n",
283 | " for x in sorted(msm_alaTB.msms.keys())])\n",
284 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=5)\n",
285 | "\n",
286 | "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[1],msm_alaTB.msms[x].tau_std[1]] \\\n",
287 | " for x in sorted(msm_alaTB.msms.keys())])\n",
288 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=5)\n",
289 | "\n",
290 | "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray', alpha=0.5)\n",
291 | "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n",
292 | "ax.set_ylabel(r'$\\tau_i$ [ps]', fontsize=16)\n",
293 | "ax.set_xlim(0.8,200)\n",
294 | "ax.set_ylim(10,2000)\n",
295 | "_ = ax.set_xscale('log')\n",
296 | "ax.set_yscale('log')\n",
297 | "plt.tight_layout()"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "metadata": {},
303 | "source": [
304 | "Here we see that from the very beginning the relaxation times are independent of the lag time ($\\Delta$t) used in the construction of the model. This convergence is a good indicator of the Markovianity of the model and is a result of the use of transition based assignment. The shaded area corresponds to the range of lag times where the information we obtain is largely unreliable, because the lag time itself is longer than the relaxation time."
305 | ]
306 | },
307 | {
308 | "cell_type": "markdown",
309 | "metadata": {},
310 | "source": [
311 | "#### Chapman-Kolmogorov test\n",
312 | "Another important step in the validation is to carry out is the so-called Chapman-Kolmogorov test. In this case, the predictions from the MSM are validated against the simulation data used for its construction. "
313 | ]
314 | },
315 | {
316 | "cell_type": "code",
317 | "execution_count": null,
318 | "metadata": {},
319 | "outputs": [],
320 | "source": [
321 | "pMSM_E, pMD_E, epMD_E = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['E'])\n",
322 | "pMSM_A, pMD_A, epMD_A = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['A'])\n",
323 | "pMSM_L, pMD_L, epMD_L = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['L'])"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": null,
329 | "metadata": {},
330 | "outputs": [],
331 | "source": [
332 | "fig, ax = plt.subplots(1,3, figsize=(8,3.25), sharex=True, sharey=True)\n",
333 | "ax[0].errorbar(pMD_E[:,0], pMD_E[:,1], epMD_E, fmt='o')\n",
334 | "for p in pMSM_E:\n",
335 | " ax[0].plot(p[0], p[1], label=\"$\\Delta t$=%g\"%p[0][0])\n",
336 | "ax[0].legend(fontsize=10, ncol=2)\n",
337 | "\n",
338 | "ax[1].errorbar(pMD_A[:,0], pMD_A[:,1], epMD_A, fmt='o')\n",
339 | "for p in pMSM_A:\n",
340 | " ax[1].plot(p[0], p[1])\n",
341 | "\n",
342 | "ax[2].errorbar(pMD_L[:,0], pMD_L[:,1], epMD_L, fmt='o')\n",
343 | "for p in pMSM_L:\n",
344 | " ax[2].plot(p[0], p[1])\n",
345 | "\n",
346 | "#ax[0].set_xscale('log')\n",
347 | "ax[0].set_ylabel('P(t)')\n",
348 | "ax[0].set_xlabel('Time (ps)')\n",
349 | "ax[1].set_xlabel('Time (ps)')\n",
350 | "ax[2].set_xlabel('Time (ps)')\n",
351 | "plt.tight_layout(w_pad=0)"
352 | ]
353 | },
354 | {
355 | "cell_type": "markdown",
356 | "metadata": {},
357 | "source": [
358 | "These plots show the decay of the population from a given initial condition. In this case, the left and right plots corresponds to starting in the `E`, `A` and `L` basins respectively. In both cases we compare the calculation from the simulation data (as circles) and the propagation from MSMs calculated at different lag times (lines). The agreement between the simulation data and the model predictions confirm the result from the convergence analysis."
359 | ]
360 | },
361 | {
362 | "cell_type": "markdown",
363 | "metadata": {},
364 | "source": [
365 | "#### Autocorrelation functions\n",
366 | "The MSM can also be validated against the autocorrelation function (ACF) of the eigenmodes. If the simulation data is projected in the eigenmodes, then the ACF for mode $n$ should decay with a timescale equal to $-1/\\lambda_n$."
367 | ]
368 | },
369 | {
370 | "cell_type": "code",
371 | "execution_count": null,
372 | "metadata": {},
373 | "outputs": [],
374 | "source": [
375 | "msm_alaTB.msms[2].do_trans(evecs=True)\n",
376 | "acf = msm_alaTB.msms[2].acf_mode()"
377 | ]
378 | },
379 | {
380 | "cell_type": "code",
381 | "execution_count": null,
382 | "metadata": {},
383 | "outputs": [],
384 | "source": [
385 | "len(tr.mdt.time[1:])"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": null,
391 | "metadata": {},
392 | "outputs": [],
393 | "source": [
394 | "fig, ax = plt.subplots()\n",
395 | "ax.plot(tr.mdt.time[1:], acf[1], 's', label='$i$=1', color='tab:blue', alpha=0.1)\n",
396 | "ax.plot(tr.mdt.time[1:],np.exp(-tr.mdt.time[1:]*1./msm_alaTB.msms[2].tauT[0]), color='tab:blue')\n",
397 | "\n",
398 | "ax.plot(tr.mdt.time[1:], acf[2], 'o', label='$i$=2', color='tab:orange', alpha=0.2)\n",
399 | "ax.plot(tr.mdt.time[1:],np.exp(-tr.mdt.time[1:]*1./msm_alaTB.msms[2].tauT[1]), color='tab:orange')\n",
400 | "\n",
401 | "ax.set_xlim(2,3000)\n",
402 | "ax.set_ylim(0,1)\n",
403 | "\n",
404 | "ax.set_xlabel('Time [ps]')\n",
405 | "ax.set_ylabel('C$_{ii}$(t)')\n",
406 | "ax.set_xscale('log')\n",
407 | "plt.legend()\n",
408 | "plt.tight_layout()"
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {},
414 | "source": [
415 | "This result is particularly interesting. While the fast mode ($\\lambda_2$) is very well determined because there are many transitions, for the slowest mode the agreement is notably worse."
416 | ]
417 | },
418 | {
419 | "cell_type": "markdown",
420 | "metadata": {},
421 | "source": [
422 | "### Calculation of the rate matrix\n",
423 | "From the transition matrix we can calculate the rate matrix. One possibility is to use an approximate method based simply on a Taylor expansion ([De Sancho, Mittal and Best, *JCTC*, 2013](http://dx.doi.org/10.1021/ct301033r)). We can check whether our approximate method gives a good result. We use short times since we have checked that short times are sufficient in this case for obtaining converged relaxation times."
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": null,
429 | "metadata": {},
430 | "outputs": [],
431 | "source": [
432 | "fig, ax = plt.subplots(1,2, figsize=(7.5,3.5))\n",
433 | "for i in [1, 2, 5, 7, 10, 20]:\n",
434 | " msm_alaTB.msms[i].do_rate()\n",
435 | " ax[0].errorbar(msm_alaTB.msms[i].tauT, msm_alaTB.msms[i].tauK, fmt='o', xerr=msm_alaTB.msms[i].tau_std, markersize=10, label=str(i))\n",
436 | " ax[1].errorbar(msm_alaTB.msms[i].peqT, msm_alaTB.msms[i].peqK, fmt='o', xerr=msm_alaTB.msms[i].peq_std, markersize=10, label=str(i))\n",
437 | "\n",
438 | "ax[0].plot([0,1000],[0,1000],'--', color='lightgray')\n",
439 | "ax[0].set_xlabel(r'$\\tau_T$ [ps]', fontsize=20)\n",
440 | "ax[0].set_ylabel(r'$\\tau_K$ [ps]', fontsize=20)\n",
441 | "ax[0].set_xscale('log')\n",
442 | "ax[0].set_yscale('log')\n",
443 | "\n",
444 | "ax[1].plot([0,1],[0,1],'--', color='lightgray')\n",
445 | "ax[1].set_xlabel(r'$p_T$', fontsize=20)\n",
446 | "ax[1].set_ylabel(r'$p_K$', fontsize=20)\n",
447 | "ax[1].set_xscale('log')\n",
448 | "ax[1].set_yscale('log')\n",
449 | "\n",
450 | "\n",
451 | "ax[0].legend(fontsize=9, bbox_to_anchor=(1.0, 0.65))\n",
452 | "plt.tight_layout(pad=0.4, w_pad=3)"
453 | ]
454 | },
455 | {
456 | "cell_type": "markdown",
457 | "metadata": {},
458 | "source": [
459 | "The method produces acceptable solutions for short lag times (up to 5-10 ps) although the result rapidly diverges from the transition matrix relaxation time at long lag times. Equilibrium probabilities are recovered correctly at all lag times from the rate matrices."
460 | ]
461 | }
462 | ],
463 | "metadata": {
464 | "kernelspec": {
465 | "display_name": "Python 3",
466 | "language": "python",
467 | "name": "python3"
468 | },
469 | "language_info": {
470 | "codemirror_mode": {
471 | "name": "ipython",
472 | "version": 3
473 | },
474 | "file_extension": ".py",
475 | "mimetype": "text/x-python",
476 | "name": "python",
477 | "nbconvert_exporter": "python",
478 | "pygments_lexer": "ipython3",
479 | "version": "3.7.4"
480 | }
481 | },
482 | "nbformat": 4,
483 | "nbformat_minor": 1
484 | }
485 |
--------------------------------------------------------------------------------
/examples/alanine_dipeptide/ala_dipeptide_discretize.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Discretizations\n",
8 | "Here we show how different discretizations work within MasterMSM. An important note is that not all discretizations will be sensible for all systems, but as usual the alanine dipeptide is a good testbed.\n",
9 | "\n",
10 | "We start downloading the data from the following [link](https://osf.io/a2vc7) and importing a number of libraries for plotting and analysis that will be useful for our work."
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "metadata": {},
17 | "outputs": [],
18 | "source": [
19 | "%load_ext autoreload\n",
20 | "%matplotlib inline\n",
21 | "import math\n",
22 | "import numpy as np\n",
23 | "import matplotlib.pyplot as plt\n",
24 | "import seaborn as sns\n",
25 | "sns.set(style=\"ticks\", color_codes=True, font_scale=1.5)\n",
26 | "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "Next we import the ```traj``` module and read the molecular simulation trajectory in the ```xtc``` compressed format from Gromacs."
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": null,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "from mastermsm.trajectory import traj\n",
43 | "tr = traj.TimeSeries(top='data/alaTB.gro', traj=['data/alatb_n1_ppn24.xtc'])\n",
44 | "print (tr.mdt)"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "### Core Ramachandran angle regions\n",
52 | "Following previous work we can use core regions in the Ramachandran map to define our states. We use utilities from the [MDtraj](http://mdtraj.org) package to compute the Phi and Psi dihedrals."
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "import mdtraj as md\n",
62 | "phi = md.compute_phi(tr.mdt)\n",
63 | "psi = md.compute_psi(tr.mdt)\n",
64 | "res = [x for x in tr.mdt.topology.residues]"
65 | ]
66 | },
67 | {
68 | "cell_type": "markdown",
69 | "metadata": {},
70 | "source": [
71 | "Then we run the actual discretization, using only two states for the alpha and extended conformations."
72 | ]
73 | },
74 | {
75 | "cell_type": "code",
76 | "execution_count": null,
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "tr.discretize(states=['A', 'E', 'L'])\n",
81 | "tr.find_keys()"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "fig, ax = plt.subplots(figsize=(10,3))\n",
91 | "ax.plot(tr.mdt.time, [tr.keys.index(x) if (x in tr.keys) else 0 for x in tr.distraj ], lw=1)\n",
92 | "ax.set_xlim(0, 1.5e5)\n",
93 | "ax.set_ylim(-0.5, 2.5)\n",
94 | "ax.set_yticks(range(3))\n",
95 | "ax.set_yticklabels(['A', 'E', 'L'])\n",
96 | "ax.set_xlabel('Time (ps)', fontsize=20)\n",
97 | "ax.set_ylabel('state', fontsize=20)"
98 | ]
99 | },
100 | {
101 | "cell_type": "markdown",
102 | "metadata": {},
103 | "source": [
104 | "Finally we derive the MSM using the tools from the ```msm``` module. In particular, we use the ```SuperMSM``` class that will help build MSMs at various lag times."
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "metadata": {},
111 | "outputs": [],
112 | "source": [
113 | "from mastermsm.msm import msm\n",
114 | "msm_alaTB = msm.SuperMSM([tr])\n",
115 | "for i in [1, 2, 5, 10, 20, 50, 100]:\n",
116 | " msm_alaTB.do_msm(i)\n",
117 | " msm_alaTB.msms[i].do_trans()\n",
118 | " msm_alaTB.msms[i].boots()"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "metadata": {},
124 | "source": [
125 | "Next we gather results from all these MSMs and plot the relaxation time corresponding to the two slow transitions."
126 | ]
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": null,
131 | "metadata": {},
132 | "outputs": [],
133 | "source": [
134 | "fig, ax = plt.subplots()\n",
135 | "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[0],msm_alaTB.msms[x].tau_std[0]] \\\n",
136 | " for x in sorted(msm_alaTB.msms.keys())])\n",
137 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=10)\n",
138 | "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[1],msm_alaTB.msms[x].tau_std[1]] \\\n",
139 | " for x in sorted(msm_alaTB.msms.keys())])\n",
140 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=10)\n",
141 | "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray')\n",
142 | "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n",
143 | "ax.set_ylabel(r'$\\tau$ [ps]', fontsize=16)\n",
144 | "ax.set_xlim(0.8,150)\n",
145 | "ax.set_ylim(10,3000)\n",
146 | "ax.set_yscale('log')\n",
147 | "_ = ax.set_xscale('log')"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {},
153 | "source": [
154 | "### Fine grid on the Ramachandran map\n",
155 | "Alternatively we can make a grid on the Ramachandran map with many more states."
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "tr.discretize(method=\"ramagrid\", nbins=30)\n",
165 | "tr.find_keys()"
166 | ]
167 | },
168 | {
169 | "cell_type": "code",
170 | "execution_count": null,
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "fig, ax = plt.subplots(figsize=(10,3))\n",
175 | "ax.plot(tr.mdt.time, [x for x in tr.distraj], '.', ms=1)\n",
176 | "ax.set_xlim(0, 1.5e5)\n",
177 | "ax.set_ylim(-1, 900)\n",
178 | "ax.set_xlabel('Time (ps)', fontsize=20)\n",
179 | "ax.set_ylabel('state', fontsize=20)"
180 | ]
181 | },
182 | {
183 | "cell_type": "markdown",
184 | "metadata": {},
185 | "source": [
186 | "Then we repeat the same steps as before, but with this fine grained MSM."
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": null,
192 | "metadata": {
193 | "scrolled": false
194 | },
195 | "outputs": [],
196 | "source": [
197 | "from mastermsm.msm import msm\n",
198 | "msm_alaTB_grid = msm.SuperMSM([tr])\n",
199 | "for i in [1, 2, 5, 10, 20, 50, 100]:\n",
200 | " msm_alaTB_grid.do_msm(i)\n",
201 | " msm_alaTB_grid.msms[i].do_trans()\n",
202 | " msm_alaTB_grid.msms[i].boots()"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "metadata": {},
208 | "source": [
209 | "First we take a look at the dependence of the slowest relaxation time with the lag time, $\\Delta t$ for the construction of the Markov model as a minimal quality control."
210 | ]
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": null,
215 | "metadata": {},
216 | "outputs": [],
217 | "source": [
218 | "tau1_vs_lagt = np.array([[x, msm_alaTB_grid.msms[x].tauT[0], \\\n",
219 | " msm_alaTB_grid.msms[x].tau_std[0]] \\\n",
220 | " for x in sorted(msm_alaTB_grid.msms.keys())])\n",
221 | "tau2_vs_lagt = np.array([[x, msm_alaTB_grid.msms[x].tauT[1], \\\n",
222 | " msm_alaTB_grid.msms[x].tau_std[1]] \\\n",
223 | " for x in sorted(msm_alaTB_grid.msms.keys())])\n",
224 | "tau3_vs_lagt = np.array([[x,msm_alaTB_grid.msms[x].tauT[2], \\\n",
225 | " msm_alaTB_grid.msms[x].tau_std[2]] \\\n",
226 | " for x in sorted(msm_alaTB_grid.msms.keys())])\n",
227 | "tau4_vs_lagt = np.array([[x,msm_alaTB_grid.msms[x].tauT[3], \\\n",
228 | " msm_alaTB_grid.msms[x].tau_std[3]] \\\n",
229 | " for x in sorted(msm_alaTB_grid.msms.keys())])\n",
230 | "\n",
231 | "fig, ax = plt.subplots()\n",
232 | "ax.errorbar(tau1_vs_lagt[:,0],tau1_vs_lagt[:,1], tau1_vs_lagt[:,2], fmt='o-', markersize=10)\n",
233 | "ax.errorbar(tau2_vs_lagt[:,0],tau2_vs_lagt[:,1], tau2_vs_lagt[:,2], fmt='o-', markersize=10)\n",
234 | "ax.errorbar(tau3_vs_lagt[:,0],tau3_vs_lagt[:,1], tau3_vs_lagt[:,2], fmt='o-', markersize=10)\n",
235 | "ax.errorbar(tau4_vs_lagt[:,0],tau4_vs_lagt[:,1], tau4_vs_lagt[:,2], fmt='o-', markersize=10)\n",
236 | "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray', alpha=0.5)\n",
237 | "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n",
238 | "ax.set_ylabel(r'$\\tau_i$ [ps]', fontsize=16)\n",
239 | "ax.set_xlim(0.8,200)\n",
240 | "ax.set_ylim(1,3000)\n",
241 | "_ = ax.set_xscale('log')\n",
242 | "_ = ax.set_yscale('log')\n",
243 | "plt.tight_layout()"
244 | ]
245 | },
246 | {
247 | "cell_type": "markdown",
248 | "metadata": {},
249 | "source": [
250 | "The slowest relaxation times from the fine-grained MSM agree with those of the core regions, although in this case there is an additional slow mode."
251 | ]
252 | },
253 | {
254 | "cell_type": "code",
255 | "execution_count": null,
256 | "metadata": {},
257 | "outputs": [],
258 | "source": [
259 | "fig, ax = plt.subplots()\n",
260 | "ax.errorbar(range(1,16),msm_alaTB_grid.msms[10].tauT[0:15], fmt='o-', \\\n",
261 | " yerr= msm_alaTB_grid.msms[10].tau_std[0:15], ms=10)\n",
262 | "ax.set_xlabel('Eigenvalue index')\n",
263 | "ax.set_ylabel(r'$\\tau_i$ (ns)')\n",
264 | "ax.set_yscale('log')\n",
265 | "plt.tight_layout()"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "We can understand which dynamical processes the eigenvectors are associated to by looking at the corresponding eigenvectors. For this we recalculate the transition matrix but now recovering the eigenvectors. "
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": null,
278 | "metadata": {},
279 | "outputs": [],
280 | "source": [
281 | "msm_alaTB_grid.msms[10].do_trans(evecs=True)"
282 | ]
283 | },
284 | {
285 | "cell_type": "code",
286 | "execution_count": null,
287 | "metadata": {},
288 | "outputs": [],
289 | "source": [
290 | "fig, ax = plt.subplots(1,4, figsize=(12,3), sharex=True, sharey=True)\n",
291 | "mat = np.zeros((30,30), float)\n",
292 | "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n",
293 | " msm_alaTB_grid.msms[10].rvecsT[:,0])]:\n",
294 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n",
295 | "\n",
296 | " mat[i[0]%30, int(i[0]/30)] = i[1]\n",
297 | "ax[0].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
298 | " cmap='Blues')\n",
299 | "ax[0].set_title(r\"$\\psi_1$\")\n",
300 | "\n",
301 | "mat = np.zeros((30,30), float)\n",
302 | "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n",
303 | " msm_alaTB_grid.msms[10].rvecsT[:,1])]:\n",
304 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n",
305 | " mat[i[0]%30, int(i[0]/30)] = -i[1]\n",
306 | "ax[1].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
307 | " cmap='RdBu')\n",
308 | "ax[1].set_title(r\"$\\psi_2$\")\n",
309 | "\n",
310 | "mat = np.zeros((30,30), float)\n",
311 | "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n",
312 | " msm_alaTB_grid.msms[10].rvecsT[:,2])]:\n",
313 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n",
314 | " mat[i[0]%30, int(i[0]/30)] = -i[1]\n",
315 | "ax[2].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
316 | " cmap='RdBu')\n",
317 | "ax[2].set_title(r\"$\\psi_3$\")\n",
318 | "\n",
319 | "mat = np.zeros((30,30), float)\n",
320 | "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n",
321 | " msm_alaTB_grid.msms[10].rvecsT[:,3])]:\n",
322 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n",
323 | " mat[i[0]%30, int(i[0]/30)] = -i[1]\n",
324 | "ax[3].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
325 | " cmap='RdBu')\n",
326 | "ax[3].set_title(r\"$\\psi_4$\")"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "metadata": {},
332 | "source": [
333 | "Here we are plotting the values of the eigenvectors so that the state indexes match the positions in the Ramachandran map. On the left, we show the stationary eigenvector, $\\psi_1$, which is proportional to the equilibrium population. The other three plots correspond to the slowest dynamical modes. From $\\psi_2$, we find that the slowest transition is the interconversion between the $\\alpha_L$ and the $\\alpha_R/\\beta$ states. These, equilibrate more rapidly, as indicated by $\\psi_3$. Finally, on the right, we find the additional mode that corresponds to a yet faster transition between the $\\alpha_L$ basin and a fourth Ramachandran region."
334 | ]
335 | },
336 | {
337 | "cell_type": "markdown",
338 | "metadata": {},
339 | "source": [
340 | "### Clustering\n",
341 | "So it seems three states only may not be a very good clustering for this particular system. Maybe we need one more. In order to do the clustering systematically we use the ```fewsm``` module from ```MasterMSM```. From the eigenvectors we are immediately able to produce a sensible, albeit still imperfect, partitioning in four states."
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": null,
347 | "metadata": {},
348 | "outputs": [],
349 | "source": [
350 | "from mastermsm.fewsm import fewsm"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": null,
356 | "metadata": {},
357 | "outputs": [],
358 | "source": [
359 | "fewsm4 = fewsm.FEWSM(msm_alaTB_grid.msms[2], N=4)"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": null,
365 | "metadata": {},
366 | "outputs": [],
367 | "source": [
368 | "import matplotlib.cm as cm\n",
369 | "fig, ax = plt.subplots(figsize=(5,5))\n",
370 | "mat = np.zeros((30,30), float)\n",
371 | "for i in msm_alaTB_grid.msms[2].keep_keys:\n",
372 | " j = msm_alaTB_grid.msms[2].keep_keys.index(i)\n",
373 | " if j in fewsm4.macros[0]:\n",
374 | " mat[i%30, int(i/30)] = 1\n",
375 | " elif j in fewsm4.macros[1]:\n",
376 | " mat[i%30, int(i/30)] = 2\n",
377 | " elif j in fewsm4.macros[2]:\n",
378 | " mat[i%30, int(i/30)] = 3\n",
379 | " else:\n",
380 | " mat[i%30, int(i/30)] = 4\n",
381 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n",
382 | "my_cmap = cm.get_cmap('viridis')\n",
383 | "my_cmap.set_under('w')\n",
384 | "ax.imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
385 | " cmap=my_cmap, vmin = 0.5)"
386 | ]
387 | },
388 | {
389 | "cell_type": "markdown",
390 | "metadata": {},
391 | "source": [
392 | "Note how the partitioning based on eigenvectors captures the three important regions in the Ramachandran map."
393 | ]
394 | }
395 | ],
396 | "metadata": {
397 | "kernelspec": {
398 | "display_name": "Python 3",
399 | "language": "python",
400 | "name": "python3"
401 | },
402 | "language_info": {
403 | "codemirror_mode": {
404 | "name": "ipython",
405 | "version": 3
406 | },
407 | "file_extension": ".py",
408 | "mimetype": "text/x-python",
409 | "name": "python",
410 | "nbconvert_exporter": "python",
411 | "pygments_lexer": "ipython3",
412 | "version": "3.8.8"
413 | }
414 | },
415 | "nbformat": 4,
416 | "nbformat_minor": 1
417 | }
418 |
--------------------------------------------------------------------------------
/examples/bistable_potential/2D_smFS_MSM.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "\n",
8 | "## MSM of Brownian dynamics simulations of diffusion on a 2D surface\n",
9 | "Here we analyze simulations on another simple mode system, but one that goes beyond one dimension. Specifically, we use the model by [Berezhkovskii et al, *JCP* (2014)](http://dx.doi.org/10.1063/1.4902243). We run brownian dynamics simulations on this surface and build a simple Markov state model from it. The data can be downloaded from [OSF](https://osf.io/a2vc7/).\n",
10 | "\n",
11 | "As always we start by importing some relevant libraries."
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "%matplotlib inline\n",
21 | "%load_ext autoreload\n",
22 | "%autoreload 2\n",
23 | "import h5py\n",
24 | "import numpy as np"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": null,
30 | "metadata": {},
31 | "outputs": [],
32 | "source": [
33 | "import matplotlib.pyplot as plt\n",
34 | "import matplotlib.cm as cm\n",
35 | "import seaborn as sns\n",
36 | "sns.set(style=\"ticks\", color_codes=True, font_scale=1.25)\n",
37 | "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "#### Discretization"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "Here we upload the data obtained from Brownian Dynamics simulations of isotropic diffusion on a 2D potential."
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "metadata": {},
58 | "outputs": [],
59 | "source": [
60 | "h5file = \"../datafiles/brownian_dynamics/cossio_kl1.3_Dx1_Dq1.h5\"\n",
61 | "f = h5py.File(h5file, 'r')\n",
62 | "data = np.array(f['data'])\n",
63 | "f.close()"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": null,
69 | "metadata": {},
70 | "outputs": [],
71 | "source": [
72 | "fig, ax = plt.subplots(2,1,figsize=(10,3), sharex=True,sharey=False)\n",
73 | "ax[0].plot(data[:,0],data[:,1],'.', markersize=1)\n",
74 | "ax[1].plot(data[:,0],data[:,2],'g.', markersize=1)\n",
75 | "ax[0].set_ylim(-10,10)\n",
76 | "ax[1].set_xlim(0,25000)\n",
77 | "ax[0].set_ylabel('x')\n",
78 | "ax[1].set_ylabel('y')\n",
79 | "ax[1].set_xlabel('Time')\n",
80 | "plt.tight_layout(h_pad=0)"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "metadata": {},
86 | "source": [
87 | "Clearly the system interconverts between two states. Both coordinates, x and y, are highly correlated, although the free energy landscape, which we can estimate from a Boltzmann inversion, varies a bit depending on the projection we use."
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "metadata": {},
94 | "outputs": [],
95 | "source": [
96 | "fig, ax = plt.subplots(figsize=(6,4))\n",
97 | "hist, bin_edges = np.histogram(data[:,1], bins=np.linspace(-9,9,25), \\\n",
98 | " density=True)\n",
99 | "bin_centers = [0.5*(bin_edges[i]+bin_edges[i+1]) \\\n",
100 | " for i in range(len(bin_edges)-1)]\n",
101 | "ax.plot(bin_centers, -np.log(hist), lw=3, label=\"x\")\n",
102 | "hist, bin_edges = np.histogram(data[:,2], bins=np.linspace(-9,9,25), \\\n",
103 | " density=True)\n",
104 | "bin_centers = [0.5*(bin_edges[i]+bin_edges[i+1]) \\\n",
105 | " for i in range(len(bin_edges)-1)]\n",
106 | "ax.plot(bin_centers, -np.log(hist), lw=3, label=\"y\")\n",
107 | "ax.set_xlim(-7,7)\n",
108 | "ax.set_ylim(1,9)\n",
109 | "ax.set_xlabel('coordinate')\n",
110 | "ax.set_ylabel('PMF ($k_BT$)')\n",
111 | "ax.legend()"
112 | ]
113 | },
114 | {
115 | "cell_type": "markdown",
116 | "metadata": {},
117 | "source": [
118 | "We can also represent the energy landscape in two dimensions:"
119 | ]
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": null,
124 | "metadata": {},
125 | "outputs": [],
126 | "source": [
127 | "H, x_edges, y_edges = np.histogram2d(data[:,1],data[:,2], \\\n",
128 | " bins=[np.linspace(-9,9,25), np.linspace(-9,9,25)])\n",
129 | "\n",
130 | "fig, ax = plt.subplots(figsize=(5,4.5))\n",
131 | "pmf = -np.log(H.transpose())\n",
132 | "pmf -= np.min(pmf)\n",
133 | "cs = ax.contourf(pmf, extent=[x_edges.min(), x_edges.max(), \\\n",
134 | " y_edges.min(), y_edges.max()], \\\n",
135 | " levels=np.arange(0, 6.5,0.5), alpha=0.75)\n",
136 | "cbar = plt.colorbar(cs)\n",
137 | "ax.set_xlim(-7,7)\n",
138 | "ax.set_ylim(-7,7)\n",
139 | "ax.set_yticks(range(-5,6,5))\n",
140 | "ax.set_xlabel('$x$', fontsize=18)\n",
141 | "ax.set_ylabel('$y$', fontsize=18)\n",
142 | "plt.tight_layout()"
143 | ]
144 | },
145 | {
146 | "cell_type": "markdown",
147 | "metadata": {},
148 | "source": [
149 | "To construct the MSM, we assigning frames to microstates. We first need to import the function that makes the grid."
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "from scipy.stats import binned_statistic_2d"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": null,
164 | "metadata": {},
165 | "outputs": [],
166 | "source": [
167 | "statistic, x_edge, y_edge, binnumber = \\\n",
168 | " binned_statistic_2d(data[:,1],data[:,2],None,'count', \\\n",
169 | " bins=[np.linspace(-9,9,25), np.linspace(-9,9,25)])"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": null,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "fig, ax = plt.subplots(figsize=(6,5))\n",
179 | "\n",
180 | "grid = ax.imshow(-np.log(statistic.transpose()),origin=\"lower\",cmap=plt.cm.rainbow)\n",
181 | "\n",
182 | "cbar = plt.colorbar(grid)\n",
183 | "ax.set_yticks(range(0,20,5))\n",
184 | "ax.set_xticks(range(0,20,5))\n",
185 | "ax.set_xlabel('$x_{bin}$', fontsize=20)\n",
186 | "ax.set_ylabel('$y_{bin}$', fontsize=20)\n",
187 | "plt.tight_layout()"
188 | ]
189 | },
190 | {
191 | "cell_type": "markdown",
192 | "metadata": {},
193 | "source": [
194 | "In this way, the continuous coordinates x and y are mapped onto a discrete microstate space."
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": null,
200 | "metadata": {},
201 | "outputs": [],
202 | "source": [
203 | "fig,ax=plt.subplots(3,1,figsize=(10,6),sharex=True)\n",
204 | "plt.subplots_adjust(wspace=0, hspace=0)\n",
205 | "ax[0].plot(range(0,len(data[:,1])),data[:,1])\n",
206 | "ax[1].plot(range(0,len(data[:,2])),data[:,2],color=\"g\")\n",
207 | "ax[2].plot(binnumber)\n",
208 | "ax[0].set_ylabel('x')\n",
209 | "ax[1].set_ylabel('y')\n",
210 | "ax[2].set_ylabel(\"s\")\n",
211 | "ax[2].set_xlabel(\"time (ps)\")\n",
212 | "ax[2].set_xlim(0, 1500)"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": null,
218 | "metadata": {},
219 | "outputs": [],
220 | "source": [
221 | "from mastermsm.trajectory import traj"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {},
227 | "source": [
228 | "We then pass the discrete trajectory to the ``traj`` module to generate an instance of the ``TimeSeries`` class. Using some of its methods, we are able to generate and sort the names of the microstates in the trajectory, which will be useful later."
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "metadata": {},
235 | "outputs": [],
236 | "source": [
237 | "distraj = traj.TimeSeries(distraj=list(binnumber), dt=1)\n",
238 | "distraj.find_keys()\n",
239 | "distraj.keys.sort()"
240 | ]
241 | },
242 | {
243 | "cell_type": "markdown",
244 | "metadata": {},
245 | "source": [
246 | "### Master Equation Model \n",
247 | "After generating the discrete trajectory, we can build the master equation model, for which we use the ``msm`` module."
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "execution_count": null,
253 | "metadata": {},
254 | "outputs": [],
255 | "source": [
256 | "from mastermsm.msm import msm"
257 | ]
258 | },
259 | {
260 | "cell_type": "markdown",
261 | "metadata": {},
262 | "source": [
263 | "First of all, we will create an instance of the SuperMSM class, which will be useful to produce and validate dynamical models. We pass two arguments: the \"discrete trajectory\" that we have generated above and a value for the boolean sym. This only tells the program that it can symmetrize the data, as we are assuming our trajectory is long enough as to consider it equilibrium sampling."
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": null,
269 | "metadata": {},
270 | "outputs": [],
271 | "source": [
272 | "msm_2D = msm.SuperMSM([distraj], sym=True)"
273 | ]
274 | },
275 | {
276 | "cell_type": "markdown",
277 | "metadata": {},
278 | "source": [
279 | "We then check the dependence of the slowest relaxation times of the system, $\\tau$ with respect to the choice of lag time $\\Delta t$. These can be accessed as the `tauT` corresponding to the `MSM` instance. We find that they are very well converged even from the shortest value of $\\Delta t$."
280 | ]
281 | },
282 | {
283 | "cell_type": "code",
284 | "execution_count": null,
285 | "metadata": {},
286 | "outputs": [],
287 | "source": [
288 | "for i in [1, 2, 5, 10, 20, 50, 100]:\n",
289 | " msm_2D.do_msm(i)\n",
290 | " msm_2D.msms[i].do_trans(evecs=True)\n",
291 | " msm_2D.msms[i].boots()"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": null,
297 | "metadata": {},
298 | "outputs": [],
299 | "source": [
300 | "tau_vs_lagt = np.array([[x,msm_2D.msms[x].tauT[0], \\\n",
301 | " msm_2D.msms[x].tau_std[0]] \\\n",
302 | " for x in sorted(msm_2D.msms.keys())])"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": null,
308 | "metadata": {},
309 | "outputs": [],
310 | "source": [
311 | "fig, ax = plt.subplots()\n",
312 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', \\\n",
313 | " yerr=tau_vs_lagt[:,2], markersize=10)\n",
314 | "ax.fill_between(tau_vs_lagt[:,0],tau_vs_lagt[:,1]+tau_vs_lagt[:,2], \\\n",
315 | " tau_vs_lagt[:,1]-tau_vs_lagt[:,2], alpha=0.1)\n",
316 | "ax.set_xlabel(r'$\\Delta$t', fontsize=16)\n",
317 | "ax.set_ylabel(r'$\\tau$', fontsize=16)\n",
318 | "ax.set_xlim(0.8,120)\n",
319 | "ax.set_ylim(50,1000)\n",
320 | "ax.set_yscale('log')\n",
321 | "ax.set_xscale('log')\n",
322 | "plt.tight_layout()"
323 | ]
324 | },
325 | {
326 | "cell_type": "markdown",
327 | "metadata": {},
328 | "source": [
329 | "Clearly, there is no dependence of the relaxation times $\\tau$ on the lag time $\\Delta$t.\n"
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {},
335 | "source": [
336 | "#### Estimation"
337 | ]
338 | },
339 | {
340 | "cell_type": "code",
341 | "execution_count": null,
342 | "metadata": {},
343 | "outputs": [],
344 | "source": [
345 | "lt=2\n",
346 | "plt.figure()\n",
347 | "plt.imshow(msm_2D.msms[lt].trans, interpolation='none', \\\n",
348 | " origin=\"lower\")\n",
349 | "plt.ylabel('$\\it{i}$')\n",
350 | "plt.xlabel('$\\it{j}$')\n",
351 | "plt.colorbar()\n",
352 | "plt.figure()\n",
353 | "plt.imshow(np.log(msm_2D.msms[lt].trans), interpolation='none', \\\n",
354 | " origin=\"lower\")\n",
355 | "plt.ylabel('$\\it{i}$')\n",
356 | "plt.xlabel('$\\it{j}$')\n",
357 | "plt.colorbar()"
358 | ]
359 | },
360 | {
361 | "cell_type": "code",
362 | "execution_count": null,
363 | "metadata": {},
364 | "outputs": [],
365 | "source": [
366 | "fig, ax = plt.subplots()\n",
367 | "ax.errorbar(range(1,12),msm_2D.msms[lt].tauT[0:11], fmt='o-', \\\n",
368 | " yerr= msm_2D.msms[lt].tau_std[0:11], ms=10)\n",
369 | "ax.set_xlabel('Eigenvalue')\n",
370 | "ax.set_ylabel(r'$\\tau_i$ [ns]') "
371 | ]
372 | },
373 | {
374 | "cell_type": "markdown",
375 | "metadata": {},
376 | "source": [
377 | "The first mode captured by $\\lambda_1$ is significantly slower than the others. That mode, which is described by the right eigenvector $\\psi^R_1$ as the transition of the protein between the folded and unfolded states."
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": null,
383 | "metadata": {},
384 | "outputs": [],
385 | "source": [
386 | "fig, ax = plt.subplots(figsize=(10,4))\n",
387 | "ax.plot(msm_2D.msms[2].rvecsT[:,1])\n",
388 | "ax.fill_between(range(len(msm_2D.msms[lt].rvecsT[:,1])), 0, \\\n",
389 | " msm_2D.msms[lt].rvecsT[:,1], \\\n",
390 | " where=msm_2D.msms[lt].rvecsT[:,1]>0,\\\n",
391 | " facecolor='c', interpolate=True,alpha=.4)\n",
392 | "ax.fill_between(range(len(msm_2D.msms[lt].rvecsT[:,1])), 0, \\\n",
393 | " msm_2D.msms[lt].rvecsT[:,1], \\\n",
394 | " where=msm_2D.msms[lt].rvecsT[:,1]<0,\\\n",
395 | " facecolor='g', interpolate=True,alpha=.4)\n",
396 | "ax.set_ylabel(\"$\\Psi^R_1$\")\n",
397 | "plt.show()"
398 | ]
399 | },
400 | {
401 | "cell_type": "markdown",
402 | "metadata": {},
403 | "source": [
404 | "The projection of $\\psi^R_1$ on the 2D grid shows the transitions between the two conformational states (red and blue)."
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "execution_count": null,
410 | "metadata": {},
411 | "outputs": [],
412 | "source": [
413 | "fig,ax = plt.subplots(1,2,figsize=(10,5),sharey=True,sharex=True)\n",
414 | "rv_mat = np.zeros((25,25), float)\n",
415 | "for i in [x for x in zip(msm_2D.msms[lt].keep_keys, \\\n",
416 | " msm_2D.msms[lt].rvecsT[:,1])]:\n",
417 | " unr_ind=np.unravel_index(i[0],(26,26)) \n",
418 | " rv_mat[unr_ind[0]-1,unr_ind[1]-1] = -i[1]\n",
419 | "ax[0].imshow(rv_mat.transpose(), interpolation=\"none\", \\\n",
420 | " cmap='bwr',origin=\"lower\")\n",
421 | "ax[1].imshow(-np.log(statistic.transpose()), \\\n",
422 | " cmap=plt.cm.rainbow,origin=\"lower\")\n",
423 | "ax[1].set_yticks(range(0,26,5))\n",
424 | "ax[1].set_xticks(range(0,26,5))\n",
425 | "plt.tight_layout()"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "execution_count": null,
431 | "metadata": {},
432 | "outputs": [],
433 | "source": []
434 | }
435 | ],
436 | "metadata": {
437 | "kernelspec": {
438 | "display_name": "Python 3",
439 | "language": "python",
440 | "name": "python3"
441 | },
442 | "language_info": {
443 | "codemirror_mode": {
444 | "name": "ipython",
445 | "version": 3
446 | },
447 | "file_extension": ".py",
448 | "mimetype": "text/x-python",
449 | "name": "python",
450 | "nbconvert_exporter": "python",
451 | "pygments_lexer": "ipython3",
452 | "version": "3.8.8"
453 | }
454 | },
455 | "nbformat": 4,
456 | "nbformat_minor": 2
457 | }
458 |
--------------------------------------------------------------------------------
/examples/mueller_potential/mueller.py:
--------------------------------------------------------------------------------
1 | #!/bin/env python
2 |
3 | #Copyright 2020 Robert T. McGibbon
4 |
5 | #Permission is hereby granted, free of charge, to any person i
6 | # obtaining a copy of this software and associated documentation
7 | # files (the "Software"), to deal in the Software without restriction,
8 | # including without limitation the rights to use, copy, modify,
9 | # merge, publish, distribute, sublicense, and/or sell copies of the
10 | # Software, and to permit persons to whom the Software is furnished
11 | # to do so, subject to the following conditions:
12 |
13 | # The above copyright notice and this permission notice shall be
14 | # included in all copies or substantial portions of the Software.
15 |
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18 | # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 | # OTHER DEALINGS IN THE SOFTWARE.
24 |
25 | from simtk.unit import kelvin, picosecond, femtosecond, nanometer, dalton
26 | import simtk.openmm as mm
27 | import matplotlib.pyplot as plt
28 | import numpy as np
29 |
30 | class MullerForce(mm.CustomExternalForce):
31 | """
32 | OpenMM custom force for propagation on the Muller Potential. Also
33 | includes pure python evaluation of the potential energy surface so that
34 | you can do some plotting.
35 |
36 |
37 | """
38 | aa = [-1, -1, -6.5, 0.7]
39 | bb = [0, 0, 11, 0.6]
40 | cc = [-10, -10, -6.5, 0.7]
41 | AA = [-200, -100, -170, 15]
42 | XX = [1, 0, -0.5, -1]
43 | YY = [0, 0.5, 1.5, 1]
44 |
45 | def __init__(self):
46 | # start with a harmonic restraint on the Z coordinate
47 | expression = '1000.0 * z^2'
48 | for j in range(4):
49 | # add the muller terms for the X and Y
50 | fmt = dict(aa=self.aa[j], bb=self.bb[j], cc=self.cc[j], AA=self.AA[j], XX=self.XX[j], YY=self.YY[j])
51 | expression += '''+ {AA}*exp({aa}*(x - {XX})^2 + {bb}*(x - {XX})
52 | *(y - {YY}) + {cc}*(y - {YY})^2)'''.format(**fmt)
53 | super(MullerForce, self).__init__(expression)
54 |
55 | @classmethod
56 | def potential(cls, x, y):
57 | "Compute the potential at a given point x,y"
58 | value = 0
59 | for j in range(4):
60 | value += cls.AA[j]*np.exp(cls.aa[j]*(x - cls.XX[j])**2 + \
61 | cls.bb[j]*(x - cls.XX[j])*(y - cls.YY[j]) \
62 | + cls.cc[j]*(y - cls.YY[j])**2)
63 | return value
64 |
65 | @classmethod
66 | def plot(cls, ax=None, minx=-1.5, maxx=1.2, miny=-0.2, maxy=2, **kwargs):
67 | "Plot the Muller potential"
68 | grid_width = max(maxx-minx, maxy-miny) / 200.0
69 | ax = kwargs.pop('ax', None)
70 | xx, yy = np.mgrid[minx : maxx : grid_width, miny : maxy : grid_width]
71 | V = cls.potential(xx, yy)
72 | # clip off any values greater than 200, since they mess up
73 | # the color scheme
74 | if ax is None:
75 | ax = plt
76 | ax.contourf(xx, yy, V.clip(max=200), 40, alpha=0.4, **kwargs)
77 |
78 | if __name__ == "__main__":
79 | ##############################################################################
80 | # Global parameters
81 | ##############################################################################
82 |
83 | # each particle is totally independent, propagating under the same potential
84 | mass = 1.0*dalton
85 | temperature = 750*kelvin
86 | friction = 100/picosecond
87 | timestep = 10.0*femtosecond
88 |
89 | # Choose starting conformations uniform on the grid between (-1.5, -0.2) and (1.2, 2)
90 | startingPositions = (np.random.rand(1, 3)*np.array([2.7, 1.8, 1])) \
91 | + np.array([-1.5, -0.2, 0])
92 |
93 | system = mm.System()
94 | mullerforce = MullerForce()
95 | system.addParticle(mass)
96 | mullerforce.addParticle(0, [])
97 | system.addForce(mullerforce)
98 |
99 | integrator = mm.LangevinIntegrator(temperature, friction, timestep)
100 | context = mm.Context(system, integrator)
101 | context.setPositions(startingPositions)
102 | context.setVelocitiesToTemperature(temperature)
103 |
104 | traj = []
105 | for i in range(int(1e6)):
106 | traj.append(
107 | context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(nanometer)[0])
108 | integrator.step(200)
109 | traj = np.vstack(traj)
110 |
111 | fig, ax = plt.subplots(figsize=(4,4))
112 | MullerForce.plot(ax=ax)
113 | ax.plot(traj[:,0], traj[:,1], c='k', lw=0.1)
114 |
--------------------------------------------------------------------------------
/mastermsm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/__init__.py
--------------------------------------------------------------------------------
/mastermsm/fewsm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/fewsm/__init__.py
--------------------------------------------------------------------------------
/mastermsm/fewsm/fewsm.py:
--------------------------------------------------------------------------------
1 | """
2 | This file is part of the MasterMSM package.
3 |
4 | """
5 |
6 | import copy
7 | #import random
8 | from ..msm import msm
9 | from ..trajectory import traj
10 | #import msm_lib
11 | from ..fewsm import fewsm_lib
12 |
13 | class FEWSM(msm.MSM):
14 | """
15 | A class for doing clustering of MSMs into few-state models
16 |
17 | Attributes
18 | ----------
19 | keys : dict
20 | A dictionary containing the clusters formed.
21 | parent : class
22 | Instance of the MSM class that we aim to cluster.
23 |
24 | """
25 | def __init__(self, parent, N=2, method="robust"):
26 | """
27 |
28 | Parameters
29 | ----------
30 | parent : class
31 | Instance of the MSM class that we aim to cluster.
32 | N : int
33 | The desired number of clusters.
34 |
35 | """
36 | self.parent = parent
37 | self.N = N
38 | self.macros = self.eigen_group(N=self.N, method=method)
39 |
40 | def eigen_group(self, N=2, method="robust"):
41 | """ Splits microstates into macrostates
42 |
43 | Parameters
44 | ----------
45 | N : int
46 | Number of clusters.
47 | method : str
48 | The method used for clustering.
49 |
50 | Returns
51 | -------
52 | macros : dict
53 | A dictionary with the membership to macrostates.
54 |
55 | """
56 |
57 | # generate eigenvectors in case the MSM does not have them
58 | if not hasattr(self.parent, 'lvecsT'):
59 | self.parent.tauT, self.parent.peqT, self.parent.rvecsT, self.parent.lvecsT = \
60 | self.parent.calc_eigsT(evecs=True)
61 | lvecs = self.parent.lvecsT
62 |
63 | # split in desired number of macrostates
64 | macros = {}
65 | keep_states = self.parent.keep_states
66 | macros[0] = list(range(len(keep_states)))
67 | for n in range(1, N):
68 | if method is "robust":
69 | macro_new, _ = fewsm_lib.split_sigma(macros, lvecs[:,n])
70 | elif method is "sign":
71 | macro_new, _ = fewsm_lib.split_sign(macros, lvecs[:,n])
72 | macros = copy.deepcopy(macro_new)
73 | print ("\n Initial membership of microstates to macrostates:")
74 | if len(self.parent.keep_keys) < 100:
75 | for k,v in macros.items():
76 | print (k, [self.parent.keep_keys[x] for x in v])
77 | else:
78 | for k,v in macros.items():
79 | print (k,":", len(v))
80 | return macros
81 |
82 | def map_trajectory(self):
83 | """ Maps trajectory onto the PCCA clusters
84 |
85 | Returns
86 | -------
87 | mappedtraj : str
88 | The mapped trajectory.
89 |
90 | """
91 | print ("\n Mapping trajectory onto macrostates...")
92 | mappedtraj = []
93 | keep_keys = self.parent.keep_keys
94 | mt_states = []
95 | for data in self.parent.data:
96 | for s in data.distraj:
97 | try:
98 | mt_states.append([k for k, v in self.macros.items() \
99 | if keep_keys.index(s) in v][0])
100 | except ValueError:
101 | print (" not in keep_keys")
102 | mt = traj.TimeSeries(distraj=mt_states, dt=data.dt)
103 | mappedtraj.append(mt)
104 | self.mappedtraj = mappedtraj
105 | #super().__init__(mappedtraj, keys=range(self.N), lagt=self.parent.lagt)
106 |
107 | def metastability(self):
108 | """ Calculate metastability according to the definition
109 | in Chodera et al, J Chem Phys, (2007)
110 |
111 | Returns
112 | -------
113 | float
114 | Metastability
115 |
116 | """
117 | return fewsm_lib.metastability(self.trans)
118 |
119 | # def optim(self, nsteps=1, nwrite=None, fout="mc.dat"):
120 | # """ MC optimization using the metastability Q as energy.
121 | #
122 | # Parameters
123 | # ----------
124 | # nsteps : int
125 | # Number of steps per round of MC and per microstate.
126 | # nwrite : int
127 | # Frequency of writing MC output.
128 | # fout : string
129 | # File for output of MC progress.
130 | #
131 | # Returns
132 | # -------
133 | # macro_opt : dict
134 | # Dictionary with the membership to macrostates.
135 | #
136 | # """
137 | # print "\n Optimizing the lumped MSM\n"
138 | # out = open(fout, "w")
139 | # out.write("# iter q \n")
140 | #
141 | # nmac = self.N
142 | # nmic = len(self.parent.keep_keys)
143 | # mcsteps = len(self.count)*nsteps*nmic # mc steps per block
144 | # mcsteps_max = nmic*20000 # maximum number of mc steps
145 | # print self.count
146 | # print self.trans
147 | # q = self.metastability()
148 | # print " initial:", q
149 | # q_opt = q
150 | #
151 | # macro = copy.deepcopy(self.macros)
152 | # cont = True
153 | # nmc = 0 # number of mc blocks
154 | # reject = 0
155 | # while cont:
156 | # imc = 0
157 | # out.write ("%6i %12.10f %10.6e\n"%(imc + nmc*mcsteps,q,1))
158 | # while imc < mcsteps:
159 | # # try ramdom insertion of a microstate in a macrostate
160 | # imac = 0
161 | # jmac = 0
162 | # while imc < mcsteps:
163 | # imc +=1
164 | # while True:
165 | # # choose microstate to move around
166 | # imic = random.choice(range(nmic))
167 | # imac = int([x for x in range(nmac) if imic in macro[x]][0])
168 | # if len(macro[imac]) > 1:
169 | # # choose destination macrostate
170 | # jmac = random.choice([x for x in range(nmac) if x is not imac])
171 | # break
172 | # # move microstate from i to j
173 | # macro_new = copy.deepcopy(macro)
174 | # macro_new[imac].remove(imic)
175 | # macro_new[jmac].append(imic)
176 | # # calculate transition count matrix for new mapping
177 | # count_mac_new = fewsm_lib.map_micro2macro(self.parent.count, macro_new, self.parent.keep_states)
178 | # Tmacro_new = msm_lib.calc_trans(nmac, range(nmac), count_mac_new)
179 | # # calculate metastability
180 | # q_new = fewsm_lib.metastability(Tmacro_new)
181 | # delta = fewsm_lib.beta(imc,mcsteps)*(q - q_new) # calculate increment (Q is a -Energy)
182 | # if fewsm_lib.metropolis(delta):
183 | # #print "ACCEPT"
184 | # macro = copy.deepcopy(macro_new)
185 | # count_mac = count_mac_new
186 | # q = q_new
187 | # if q > q_opt:
188 | # q_opt = q
189 | # macro_opt = copy.deepcopy(macro)
190 | # Tmacro_opt = Tmacro_new
191 | # self.macro = copy.deepcopy(macro_opt)
192 | # else:
193 | # reject+=1
194 | # #print " REJECT"
195 | #
196 | # out.write ("%6i %12.10e %10.6e\n"%(imc + nmc*mcsteps,q,1./fewsm_lib.beta(imc,mcsteps)))
197 | # imc +=1
198 | # cont = False
199 | # print " final :", q
200 | # print " best :", q_opt
201 | # print " acceptance:",1.-float(reject)/mcsteps
202 | #
203 | # self.map_trajectory()
204 | # self.do_count()
205 | # self.do_trans()
206 | #
207 | # def write_mapping(self):
208 | # """
209 | # Prints files with the mapping between states and clusters
210 | #
211 | # """
212 | # for mtraj in self.mappedtraj:
213 | # try:
214 | # idf = mtraj.filename.rfind(".dat")
215 | # filename = mtraj.filename[:idf] + "_mapped_pcca%g.dat"%self.N
216 | # except ValueError:
217 | # filename = mtraj.filename + "_mapped_pcca%g.dat"%self.N
218 | # print " ...writing mapped trajectory at %s"%filename
219 | # fout = open(filename, "w")
220 | # micro_data = [x for x in self.parent.data if x.filename == mtraj.filename][0]
221 | # for x in zip(micro_data.time, micro_data.states, self.data[0].states):
222 | # fout.write("%10.3f %s %8i\n"%(x[0], x[1], x[2]))
223 | # fout.close()
224 |
--------------------------------------------------------------------------------
/mastermsm/fewsm/fewsm_lib.py:
--------------------------------------------------------------------------------
1 | """
2 | This file is part of the MasterMSM package.
3 |
4 | """
5 | import copy, itertools
6 | import numpy as np
7 |
8 | def map_micro2macro(cmic, mac, states):
9 | """ maps microstates into macrostates """
10 | m = len(mac)
11 | cmac = np.zeros((m, m), int)
12 | for i in range(m):
13 | for j in range(m):
14 | if i == j:
15 | cmac[j,i] = reduce(lambda x, y: x + y, \
16 | [cmic[states[x],states[y]] for (x,y) in \
17 | itertools.product(mac[j],mac[i])])
18 | else:
19 | cmac[j,i] = reduce(lambda x, y: x + y, \
20 | [cmic[states[x],states[y]] for (x,y) in \
21 | itertools.product(mac[j],mac[i])])
22 | return cmac
23 |
24 | def test_sign(v):
25 | """check whether positive and negative signs are present in vector"""
26 | test = False
27 | if any(v > 0.) and any(v<0):
28 | test = True
29 | return test
30 |
31 | def split_sign(macro, lvec):
32 | """ split based on sign structure """
33 | # calculate spread in eigenvector
34 | nt = len(macro)
35 | spread = []
36 | vals = lvec
37 | for _, v in macro.items():
38 | # check that there are positive and negative values in evec
39 | if test_sign(vals[v]):
40 | #spread.append(np.sum(vals**2))
41 | spread.append(np.mean(vals[v]**2))
42 | else:
43 | spread.append(0.)
44 | isplit = np.argsort(-np.array(spread))[0]
45 | # print " macrostate to split: %i"%isplit,np.array(spread)
46 | # split
47 | lvec_split = lvec[macro[isplit]]
48 | # print lvec_split
49 | elems = []
50 | for i in filter(lambda x: lvec_split[x] < 0.,\
51 | range(len(macro[isplit]))):
52 | elems.append(macro[isplit][i])
53 | macro_new = copy.deepcopy(macro)
54 | macro_new[nt] = elems
55 | # update old macrostate
56 | for i in elems:
57 | macro_new[isplit].remove(i)
58 | return macro_new, vals
59 |
60 | def split_sigma(macro, lvec):
61 | """ split based on distribution """
62 | nt = len(macro)
63 |
64 | spread = []
65 | for i in macro.keys():
66 | spread.append(np.std(lvec[macro[i]]))
67 | # split macrostates with maximum spread
68 | isplit = np.argsort(-np.array(spread))[0]
69 | #print " macrostate to split: %i"%isplit,spread[isplit]
70 | # split based on distribution
71 | elems = []
72 | keep = []
73 | val_max = np.max(lvec[macro[isplit]])
74 | val_min = np.min(lvec[macro[isplit]])
75 | vals = (lvec[macro[isplit]] - val_min)/(val_max - val_min)
76 | for i in filter(lambda x: vals[x] < 0.5,range(len(macro[isplit]))):
77 | elems.append(macro[isplit][i])
78 | for i in filter(lambda x: vals[x] >= 0.5,range(len(macro[isplit]))):
79 | keep.append(macro[isplit][i])
80 | macro_new = copy.deepcopy(macro)
81 | macro_new[nt] = elems
82 | #print macro_new
83 | # update old macrostate
84 | for i in elems:
85 | macro_new[isplit].remove(i)
86 | macro = copy.deepcopy(macro_new)
87 | return macro, vals
88 |
89 | def metastability(T):
90 | return np.sum(np.diag(T))
91 |
92 | def beta(imc,mcsteps):
93 | # inverse temperature for MCSA
94 | x = imc - 1
95 | a = 4./mcsteps
96 | temp = (1 + (np.exp(-a*x)-1.)/(1.- np.exp(-a*mcsteps))) # MCSA temperature
97 | try:
98 | beta = 1./temp
99 | except ZeroDivisionError:
100 | beta = 1e20
101 | return beta
102 |
103 | def metropolis(delta):
104 | if delta < 0:
105 | return True
106 | else:
107 | accept = False
108 | p = min(1.0,np.exp(-delta))
109 | rand = np.random.random()
110 | if (rand < p):
111 | accept = True
112 | return accept
113 |
--------------------------------------------------------------------------------
/mastermsm/msm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/msm/__init__.py
--------------------------------------------------------------------------------
/mastermsm/msm/msm_lib.py:
--------------------------------------------------------------------------------
1 | """
2 | This file is part of the MasterMSM package.
3 |
4 | """
5 | import copy
6 | import numpy as np
7 | import networkx as nx
8 | import os #, math
9 | import tempfile
10 | from functools import reduce, cmp_to_key
11 | #import operator
12 | from scipy import linalg as spla
13 | #import multiprocessing as mp
14 | import pickle
15 |
16 | # thermal energy (kJ/mol)
17 | beta = 1./(8.314e-3*300)
18 |
19 | #def difference(k1, k2):
20 | # l = len(k1)
21 | # diff = 0
22 | # for i in range(l):
23 | # if k1[i] != k2[i]:
24 | # diff+=1
25 | # return diff
26 |
27 | def calc_eigsK(rate, evecs=False):
28 | """
29 | Calculate eigenvalues and eigenvectors of rate matrix K
30 |
31 | Parameters
32 | -----------
33 | rate : array
34 | The rate matrix to use.
35 | evecs : bool
36 | Whether we want the eigenvectors of the rate matrix.
37 |
38 | Returns:
39 | -------
40 | tauK : numpy array
41 | Relaxation times from K.
42 | peqK : numpy array
43 | Equilibrium probabilities from K.
44 | rvecsK : numpy array, optional
45 | Right eigenvectors of K, sorted.
46 | lvecsK : numpy array, optional
47 | Left eigenvectors of K, sorted.
48 |
49 | """
50 | evalsK, lvecsK, rvecsK = \
51 | spla.eig(rate, left=True)
52 |
53 | # sort modes
54 | nkeys = len(rate)
55 | elistK = []
56 | for i in range(nkeys):
57 | elistK.append([i,np.real(evalsK[i])])
58 | elistK.sort(key=cmp_to_key(esort))
59 |
60 | # calculate relaxation times from K and T
61 | tauK = []
62 | for i in range(nkeys):
63 | if np.abs(elistK[i][1]) > 1e-10:
64 | iiK, lamK = elistK[i]
65 | tauK.append(-1./lamK)
66 | if len(tauK) == 1:
67 | ieqK = iiK
68 |
69 | # equilibrium probabilities
70 | ieqK, _ = elistK[0]
71 | peqK_sum = reduce(lambda x, y: x + y, map(lambda x: rvecsK[x,ieqK],
72 | range(nkeys)))
73 | peqK = rvecsK[:,ieqK]/peqK_sum
74 |
75 | if not evecs:
76 | return tauK, peqK
77 | else:
78 | # sort eigenvectors
79 | rvecsK_sorted = np.zeros((nkeys, nkeys), float)
80 | lvecsK_sorted = np.zeros((nkeys, nkeys), float)
81 | for i in range(nkeys):
82 | iiK, lamK = elistK[i]
83 | rvecsK_sorted[:,i] = rvecsK[:,iiK]
84 | lvecsK_sorted[:,i] = lvecsK[:,iiK]
85 | return tauK, peqK, rvecsK_sorted, lvecsK_sorted
86 |
87 | def esort(ei, ej):
88 | """ Sorts eigenvalues.
89 |
90 | Parameters
91 | ----------
92 | ei : float
93 | Eigenvalue i
94 | ej : float
95 | Eigenvalue j
96 |
97 | Returns
98 | -------
99 | bool :
100 | Whether the first value is larger than the second.
101 |
102 | """
103 | _, eval_i = ei
104 | _, eval_j = ej
105 |
106 | if eval_j.real > eval_i.real:
107 | return 1
108 | elif eval_j.real < eval_i.real:
109 | return -1
110 | else:
111 | return 0
112 |
113 | #def find_keys(state_keys, trans, manually_remove):
114 | # """ eliminate dead ends """
115 | # keep_states = []
116 | # keep_keys = []
117 | # # eliminate dead ends
118 | # nstate = len(state_keys)
119 | # for i in range(nstate):
120 | # key = state_keys[i]
121 | # summ = 0
122 | # sumx = 0
123 | # for j in range(nstate):
124 | # if j!=i:
125 | # summ += trans[j][i] # sources
126 | # sumx += trans[i][j] # sinks
127 | # if summ > 0 and sumx > 0 and trans[i][i] > 0 and key not in manually_remove:
128 | # keep_states.append(i)
129 | # keep_keys.append(state_keys[i])
130 | # return keep_states,keep_keys
131 | #
132 | #def connect_groups(keep_states, trans):
133 | # """ check for connected groups """
134 | # connected_groups = []
135 | # leftover = copy.deepcopy(keep_states)
136 | # while len(leftover) > 0:
137 | # #print leftover
138 | # leftover_new = []
139 | # n_old_new_net = 0
140 | # new_net = [ leftover[0] ]
141 | # n_new_net = len(new_net)
142 | # while n_new_net != n_old_new_net:
143 | # for i in range(len(leftover)):
144 | # l = leftover[i]
145 | # if l in new_net:
146 | # continue
147 | # summ = 0
148 | # for g in new_net:
149 | # summ += trans[l][g]+trans[g][l]
150 | # if summ > 0:
151 | # new_net.append(l)
152 | # n_old_new_net = n_new_net
153 | # n_new_net = len(new_net)
154 | # #print " added %i new members" % (n_new_net-n_old_new_net)
155 | # leftover_new = filter(lambda x: x not in new_net, leftover)
156 | # connected_groups.append(new_net)
157 | # leftover = copy.deepcopy(leftover_new)
158 | # return connected_groups
159 | #
160 | #def isnative(native_string, string):
161 | # s = ""
162 | # for i in range(len(string)):
163 | # if string[i]==native_string[i]:
164 | # s+="1"
165 | # else:
166 | # s+="0"
167 | # return s
168 |
169 | def mat_mul_v(m, v):
170 | """ Multiplies matrix and vector
171 |
172 | Parameters
173 | ----------
174 | m : np.array
175 | The matrix.
176 | v : np.array
177 | The vector.
178 |
179 | Returns
180 | -------
181 | w : np.array
182 | The result
183 |
184 | """
185 | rows = len(m)
186 | w = [0]*rows
187 | irange = range(len(v))
188 | summ = 0
189 | for j in range(rows):
190 | r = m[j]
191 | for i in irange:
192 | summ += r[i]*v[i]
193 | w[j], summ = summ,0
194 | return w
195 |
196 | #def dotproduct(v1, v2, sum=sum, imap=itertools.imap, mul=operator.mul):
197 | # return sum(imap(mul,v1,v2))
198 | #
199 | ##def rate_analyze(rate):
200 | ## # calculates eigenvalues and eigenvectors from rate matrix
201 | ## # calculate symmetrized matrix
202 | ## kjisym = kji*(kji.transpose())
203 | ## kjisym = sqrt(kjisym)
204 | ## for j in arange(nstates):
205 | ## kjisym[j,j] = -kjisym[j,j]
206 | ## # calculate eigenvalues and eigenvectors
207 | ## eigvalsym,eigvectsym = linalg.eig(kjisym)
208 | ## # index the solutions
209 | ## index = argsort(-eigvalsym)
210 | ## ieq = index[0]
211 | ## # equilibrium population
212 | ## peq = eigvectsym[:,ieq]**2
213 | ## # order eigenvalues and calculate left and right eigenvectors
214 | ## eigval = zeros((nstates),float)
215 | ## PsiR = zeros((nstates,nstates),float)
216 | ## PsiL = zeros((nstates,nstates),float)
217 | ## for i in arange(nstates):
218 | ## eigval[i] = eigvalsym[index[i]]
219 | ## PsiR[:,i] = eigvectsym[:,index[i]]*eigvectsym[:,ieq]
220 | ## PsiL[:,i] = eigvectsym[:,index[i]]/eigvectsym[:,ieq]
221 | ## return eigval,PsiR,PsiL,eigvectsym,peq
222 | #
223 | #def propagate(rate, t, pini):
224 | # # propagate dynamics using rate matrix exponential
225 | # expkt = spla.expm2(rate*t)
226 | # return mat_mul_v(expkt,pini)
227 | #
228 | #def propagate_eig(elist, rvecs, lvecs, t, pini):
229 | # # propagate dynamics using rate matrix exponential using eigenvalues and eigenvectors
230 | # nstates = len(pini)
231 | # p = np.zeros((nstates),float)
232 | # for n in range(nstates):
233 | # #print np.exp(-elist[n][1]*t)
234 | # i,e = elist[n]
235 | # p = p + rvecs[:,i]*(np.dot(lvecs[:,i],pini)*\
236 | # np.exp(-abs(e*t)))
237 | # return p
238 | #
239 | #def bootsfiles(traj_list_dt):
240 | # n = len(traj_list_dt)
241 | # traj_list_dt_new = []
242 | # i = 0
243 | # while i < n:
244 | # k = int(np.random.random()*n)
245 | # traj_list_dt_new.append(traj_list_dt[k])
246 | # i += 1
247 | # return traj_list_dt_new
248 | #
249 | #def boots_pick(filename, blocksize):
250 | # raw = open(filename).readlines()
251 | # lraw = len(raw)
252 | # nblocks = int(lraw/blocksize)
253 | # lblock = int(lraw/nblocks)
254 | # try:
255 | # ib = np.random.randint(nblocks-1)
256 | # except ValueError:
257 | # ib = 0
258 | # return raw[ib*lblock:(ib+1)*lblock]
259 | #
260 | #def onrate(states, target, K, peq):
261 | # # steady state rate
262 | # kon = 0.
263 | # for i in states:
264 | # if i != target:
265 | # if K[target,i] > 0:
266 | # kon += K[target,i]*peq[i]
267 | # return kon
268 | #
269 | def run_commit(states, K, peq, FF, UU):
270 | """ Calculate committors and reactive flux
271 |
272 | Parameters
273 | ----------
274 | states : list
275 | States in the MSM.
276 | K : np.array
277 | Rate matrix.
278 | peq : np.array
279 | Equilibrium distribution.
280 | FF : list
281 | Definitely folded states.
282 | UU : list
283 | Definitely unfolded states.
284 |
285 | Returns
286 | -------
287 | J : np.array
288 | Reactive flux matrix.
289 | pfold : np.array
290 | Values of the committor.
291 | sum_flux : float
292 | Sum of reactive fluxes.
293 | kf : float
294 | Folding rate from flux over population relationship.
295 |
296 | """
297 | nstates = len(states)
298 | # define end-states
299 | UUFF = UU + FF
300 | print (" definitely FF and UU states", UUFF)
301 | I = list(filter(lambda x: x not in UU+FF, states))
302 | NI = len(I)
303 |
304 | # calculate committors
305 | b = np.zeros([NI], float)
306 | A = np.zeros([NI,NI], float)
307 | for j_ind in range(NI):
308 | j = I[j_ind]
309 | summ = 0.
310 | for i in FF:
311 | summ += K[i][j]
312 | b[j_ind] = -summ
313 | for i_ind in range(NI):
314 | i = I[i_ind]
315 | A[j_ind][i_ind] = K[i][j]
316 | # solve Ax=b
317 | Ainv = np.linalg.inv(A)
318 | x = np.dot(Ainv,b)
319 | #XX = np.dot(Ainv,A)
320 |
321 | pfold = np.zeros(nstates,float)
322 | for i in range(nstates):
323 | if i in UU:
324 | pfold[i] = 0.0
325 | elif i in FF:
326 | pfold[i] = 1.0
327 | else:
328 | ii = I.index(i)
329 | pfold[i] = x[ii]
330 |
331 | # stationary distribution
332 | pss = np.zeros(nstates,float)
333 | for i in range(nstates):
334 | pss[i] = (1-pfold[i])*peq[i]
335 |
336 | # flux matrix and reactive flux
337 | J = np.zeros([nstates,nstates],float)
338 | for i in range(nstates):
339 | for j in range(nstates):
340 | J[j][i] = K[j][i]*peq[i]*(pfold[j]-pfold[i])
341 |
342 | # dividing line is committor = 0.5
343 | sum_flux = 0
344 | left = [x for x in range(nstates) if pfold[x] < 0.5]
345 | right = [x for x in range(nstates) if pfold[x] > 0.5]
346 | for i in left:
347 | for j in right:
348 | sum_flux += J[j][i]
349 |
350 | #sum of populations for all reactant states
351 | pU = np.sum([peq[x] for x in range(nstates) if pfold[x] < 0.5])
352 | # pU = np.sum(peq[filter(lambda x: x in UU, range(nstates))])
353 | kf = sum_flux/pU
354 | return J, pfold, sum_flux, kf
355 |
356 | def calc_count_worker(x):
357 | """ mp worker that calculates the count matrix from a trajectory
358 |
359 | Parameters
360 | ----------
361 | x : list
362 | List containing input for each mp worker. Includes:
363 | distraj :the time series of states
364 | dt : the timestep for that trajectory
365 | keys : the keys used in the assignment
366 | lagt : the lag time for construction
367 |
368 | Returns
369 | -------
370 | count : array
371 |
372 | """
373 | # parse input from multiprocessing
374 | distraj = x[0]
375 | dt = x[1]
376 | keys = x[2]
377 | nkeys = len(keys)
378 | lagt = x[3]
379 | sliding = x[4]
380 |
381 | ltraj = len(distraj)
382 | lag = int(lagt/dt) # number of frames per lag time
383 | if sliding:
384 | slider = 1 # every state is initial state
385 | else:
386 | slider = lag
387 |
388 | count = np.zeros([nkeys,nkeys], np.int32)
389 | for i in range(0, ltraj-lag, slider):
390 | j = i + lag
391 | state_i = distraj[i]
392 | state_j = distraj[j]
393 | if state_i in keys:
394 | idx_i = keys.index(state_i)
395 | if state_j in keys:
396 | idx_j = keys.index(state_j)
397 | try:
398 | count[idx_j][idx_i] += 1
399 | except UnboundLocalError:
400 | pass
401 | return count
402 |
403 | def calc_lifetime(x):
404 | """ mp worker that calculates the count matrix from a trajectory
405 |
406 | Parameters
407 | ----------
408 | x : list
409 | List containing input for each mp worker. Includes:
410 | distraj :the time series of states
411 | dt : the timestep for that trajectory
412 | keys : the keys used in the assignment
413 |
414 | Returns
415 | -------
416 | life : dict
417 |
418 | """
419 | # parse input from multiprocessing
420 | distraj = x[0]
421 | dt = x[1]
422 | keys = x[2]
423 | ltraj = len(distraj)
424 |
425 | life = {}
426 | l = 0
427 | for j in range(1, ltraj):
428 | i = j - 1
429 | state_i = distraj[i]
430 | state_j = distraj[j]
431 | if state_i == state_j:
432 | l += 1
433 | elif state_j not in keys:
434 | l += 1
435 | else:
436 | try:
437 | life[state_i].append(l*dt)
438 | except KeyError:
439 | life[state_i] = [l*dt]
440 | l = 1
441 | #try:
442 | # life[state_i].append(l*dt)
443 | #except KeyError:
444 | # life[state_i] = [l*dt]
445 | return life
446 |
447 | def traj_split(data=None, lagt=None, fdboots=None):
448 | """ Splits trajectories into fragments for bootstrapping
449 |
450 | Parameters
451 | ----------
452 | data : list
453 | Set of trajectories used for building the MSM.
454 | lagt : float
455 | Lag time for building the MSM.
456 |
457 | Returns:
458 | -------
459 | filetmp : file object
460 | Open file object with trajectory fragments.
461 |
462 | """
463 | trajs = [[x.distraj, x.dt] for x in data]
464 | ltraj = [len(x[0])*x[1] for x in trajs]
465 | ltraj_median = np.median(ltraj)
466 | timetot = np.sum(ltraj) # total simulation time
467 | while ltraj_median > timetot/20. and ltraj_median > 10.*lagt:
468 | trajs_new = []
469 | #cut trajectories in chunks
470 | for x in trajs:
471 | lx = len(x[0])
472 | trajs_new.append([x[0][:int(lx/2)], x[1]])
473 | trajs_new.append([x[0][int(lx/2):], x[1]])
474 | trajs = trajs_new
475 | ltraj = [len(x[0])*x[1] for x in trajs]
476 | ltraj_median = np.median(ltraj)
477 | # save trajs
478 | fd, filetmp = tempfile.mkstemp()
479 | file = os.fdopen(fd, 'wb')
480 | pickle.dump(trajs, file, protocol=pickle.HIGHEST_PROTOCOL)
481 | file.close()
482 | return filetmp
483 |
484 | def do_boots_worker(x):
485 | """ Worker function for parallel bootstrapping.
486 |
487 | Parameters
488 | ----------
489 | x : list
490 | A list containing the trajectory filename, the states, the lag time
491 | and the total number of transitions.
492 |
493 | """
494 |
495 | #print "# Process %s running on input %s"%(mp.current_process(), x[0])
496 | filetmp, keys, lagt, ncount, slider = x
497 | nkeys = len(keys)
498 | finp = open(filetmp, 'rb')
499 | trans = pickle.load(finp)
500 | finp.close()
501 | ltrans = len(trans)
502 | np.random.seed()
503 | ncount_boots = 0
504 | count = np.zeros([nkeys, nkeys], np.int32)
505 | while ncount_boots < ncount:
506 | itrans = np.random.randint(ltrans)
507 | count_inp = [trans[itrans][0], trans[itrans][1], keys, lagt, slider]
508 | c = calc_count_worker(count_inp)
509 | count += np.matrix(c)
510 | ncount_boots += np.sum(c)
511 | #print ncount_boots, "< %g"%ncount
512 | D = nx.DiGraph(count)
513 | #keep_states = sorted(nx.strongly_connected_components(D)[0])
514 | keep_states = list(sorted(list(nx.strongly_connected_components(D)),
515 | key = len, reverse=True)[0])
516 | keep_keys = list(map(lambda x: keys[x], keep_states))
517 | nkeep = len(keep_keys)
518 | trans = np.zeros([nkeep, nkeep], float)
519 | for i in range(nkeep):
520 | ni = reduce(lambda x, y: x + y, map(lambda x:
521 | count[keep_states[x]][keep_states[i]], range(nkeep)))
522 | for j in range(nkeep):
523 | trans[j][i] = float(count[keep_states[j]][keep_states[i]])/float(ni)
524 | evalsT, rvecsT = spla.eig(trans, left=False)
525 | elistT = []
526 | for i in range(nkeep):
527 | elistT.append([i,np.real(evalsT[i])])
528 | elistT.sort(key=cmp_to_key(esort))
529 | tauT = []
530 | for i in range(1,nkeep):
531 | _, lamT = elistT[i]
532 | tauT.append(-lagt/np.log(lamT))
533 | ieqT, _ = elistT[0]
534 | peqT_sum = reduce(lambda x,y: x + y, map(lambda x: rvecsT[x,ieqT],
535 | range(nkeep)))
536 | peqT = rvecsT[:,ieqT]/peqT_sum
537 | return tauT, peqT, trans, keep_keys
538 |
539 | def calc_trans(nkeep=None, keep_states=None, count=None):
540 | """ Calculates transition matrix.
541 |
542 | Uses the maximum likelihood expression by Prinz et al.[1]_
543 |
544 | Parameters
545 | ----------
546 | lagt : float
547 | Lag time for construction of MSM.
548 |
549 | Returns
550 | -------
551 | trans : array
552 | The transition probability matrix.
553 |
554 | Notes
555 | -----
556 | ..[1] J. H. Prinz, H. Wu, M. Sarich, B. Keller, M. Senne, M. Held,
557 | J. D. Chodera, C. Schutte and F. Noe, "Markov state models:
558 | Generation and validation", J. Chem. Phys. (2011).
559 | """
560 | trans = np.zeros([nkeep, nkeep], float)
561 | for i in range(nkeep):
562 | ni = reduce(lambda x, y: x + y, map(lambda x:
563 | count[keep_states[x]][keep_states[i]], range(nkeep)))
564 | for j in range(nkeep):
565 | trans[j][i] = float(count[keep_states[j]][keep_states[i]])/float(ni)
566 | return trans
567 |
568 | def calc_rate(nkeep, trans, lagt):
569 | """ Calculate rate matrix from transition matrix.
570 |
571 | We use a method based on a Taylor expansion.[1]_
572 |
573 | Parameters
574 | ----------
575 | nkeep : int
576 | Number of states in transition matrix.
577 | trans: np.array
578 | Transition matrix.
579 | lagt : float
580 | The lag time.
581 |
582 | Returns
583 | -------
584 | rate : np.array
585 | The rate matrix.
586 |
587 | Notes
588 | -----
589 | ..[1] D. De Sancho, J. Mittal and R. B. Best, "Folding kinetics
590 | and unfolded state dynamics of the GB1 hairpin from molecular
591 | simulation", J. Chem. Theory Comput. (2013).
592 |
593 | """
594 | rate = trans/lagt
595 |
596 | # enforce mass conservation
597 | for i in range(nkeep):
598 | rate[i][i] = -(np.sum(rate[:i,i]) + np.sum(rate[i+1:,i]))
599 | return rate
600 |
601 | def rand_rate(nkeep, count):
602 | """ Randomly generate initial matrix.
603 |
604 | Parameters
605 | ----------
606 | nkeep : int
607 | Number of states in transition matrix.
608 |
609 | count : np.array
610 | Transition matrix.
611 |
612 | Returns
613 | -------
614 | rand_rate : np.array
615 | The random rate matrix.
616 |
617 | """
618 | nkeys = len(count)
619 |
620 | rand_rate = np.zeros((nkeys, nkeys), float)
621 | for i in range(nkeys):
622 | for j in range(nkeys):
623 | if i != j:
624 | if (count[i,j] !=0) and (count[j,i] != 0):
625 | rand_rate[j,i] = np.exp(np.random.randn()*-3)
626 | rand_rate[i,i] = -np.sum(rand_rate[:,i] )
627 | return rand_rate
628 |
629 | def calc_mlrate(nkeep, count, lagt, rate_init):
630 | """ Calculate rate matrix using maximum likelihood Bayesian method.
631 |
632 | We use a the MLPB method described by Buchete and Hummer.[1]_
633 |
634 | Parameters
635 | ----------
636 | nkeep : int
637 | Number of states in transition matrix.
638 | count : np.array
639 | Transition matrix.
640 | lagt : float
641 | The lag time.
642 |
643 | Returns
644 | -------
645 | rate : np.array
646 | The rate matrix.
647 |
648 | Notes
649 | -----
650 | ..[1] N.-V. Buchete and G. Hummer, "Coarse master equations for
651 | peptide folding dynamics", J. Phys. Chem. B (2008).
652 |
653 | """
654 | # initialize rate matrix and equilibrium distribution enforcing detailed balance
655 | p_prev = np.sum(count, axis=0)/np.float(np.sum(count))
656 | rate_prev = detailed_balance(nkeep, rate_init, p_prev)
657 | ml_prev = likelihood(nkeep, rate_prev, count, lagt)
658 |
659 | # initialize MC sampling
660 | print ("MLPB optimization of rate matrix:\n START")
661 | #print rate_prev,"\n", p_prev, ml_prev
662 | ml_ref = ml_prev
663 | ml_cum = [ml_prev]
664 | temp_cum = [1.]
665 | nstep = 0
666 | nsteps = 1000*nkeep**2
667 | k = -3./nsteps
668 | nfreq = 10
669 | ncycle = 0
670 | accept = 0
671 | rate_best = rate_prev
672 | ml_best = ml_prev
673 | while True:
674 | # random choice of MC move
675 | rate, p = mc_move(nkeep, rate_prev, p_prev)
676 | rate = detailed_balance(nkeep, rate, p)
677 |
678 | # calculate likelihood
679 | ml = likelihood(nkeep, rate, count, lagt)
680 |
681 | # Boltzmann acceptance / rejection
682 | if ml < ml_prev:
683 | #print " ACCEPT\n"
684 | rate_prev = rate
685 | p_prev = p
686 | ml_prev = ml
687 | accept +=1
688 | if ml < ml_best:
689 | ml_best = ml
690 | rate_best = rate
691 | else:
692 | delta_ml = ml - ml_prev
693 | beta = (1 - np.exp(k*nsteps))/(np.exp(k*nstep) - np.exp(k*nsteps)) if ncycle > 0 else 1
694 | weight = np.exp(-beta*delta_ml)
695 | if np.random.random() < weight:
696 | #print " ACCEPT BOLTZMANN\n"
697 | rate_prev = rate
698 | p_prev = p
699 | ml_prev = ml
700 | accept +=1
701 | nstep +=1
702 |
703 | if nstep > nsteps:
704 | ncycle +=1
705 | ml_cum.append(ml_prev)
706 | temp_cum.append(1./beta)
707 | print ("\n END of cycle %g"%ncycle)
708 | print (" acceptance :%g"%(np.float(accept)/nsteps))
709 | accept = 0
710 | print (rate_prev)
711 | print (" L old =", ml_ref,"; L new:", ml_prev)
712 | improvement = (ml_ref - ml_cum[-1])/ml_ref
713 | print (" improvement :%g"%improvement)
714 | if improvement > 0.001 or ncycle < 3:
715 | nstep = 0
716 | ml_ref = np.mean(ml_cum[-nsteps:])
717 | else:
718 | break
719 | elif nstep % nfreq == 0:
720 | ml_cum.append(ml_prev)
721 | temp_cum.append(1./beta)
722 |
723 | return rate_best, ml_cum, temp_cum
724 |
725 | def mc_move(nkeep, rate, peq):
726 | """ Make MC move in either rate or equilibrium probability.
727 |
728 | Changes in equilibrium probabilities are introduced so that the new value
729 | is drawn from a normal distribution centered at the current value.
730 |
731 | Parameters
732 | ----------
733 | nkeep : int
734 | The number of states.
735 | rate : array
736 | The rate matrix obeying detailed balance.
737 | peq : array
738 | The equilibrium probability
739 |
740 | """
741 | nparam = nkeep*(nkeep - 1)/2 + nkeep - 1
742 | npeq = nkeep - 1
743 |
744 | while True:
745 | i = np.random.randint(0, nparam)
746 | #print i
747 | rate_new = copy.deepcopy(rate)
748 | peq_new = copy.deepcopy(peq)
749 | if i < npeq:
750 | #print " Peq"
751 | scale = np.mean(peq)*0.1
752 | # peq_new[i] = np.random.normal(loc=peq[i], scale=scale)
753 | peq_new[i] = peq[i] + (np.random.random() - 0.5)*scale
754 | peq_new = peq_new/np.sum(peq_new)
755 | if np.all(peq_new > 0):
756 | break
757 | else:
758 | #print " Rate"
759 | i = np.random.randint(0, nkeep - 1)
760 | try:
761 | j = np.random.randint(i + 1, nkeep - 1)
762 | except ValueError:
763 | j = nkeep - 1
764 | try:
765 | scale = np.mean(np.abs(rate>0.))*0.1
766 | #rate_new[j,i] = np.random.normal(loc=rate[j,i], scale=scale)
767 | rate_new[j,i] = rate[j,i] + (np.random.random() - 0.5)*scale
768 | if np.all((rate_new - np.diag(np.diag(rate_new))) >= 0):
769 | break
770 | except ValueError:
771 | pass
772 | #else:
773 | # print rate_new - np.diag(np.diag(rate_new))
774 |
775 | return rate_new, peq_new
776 |
777 |
778 | def detailed_balance(nkeep, rate, peq):
779 | """ Enforce detailed balance in rate matrix.
780 |
781 | Parameters
782 | ----------
783 | nkeep : int
784 | The number of states.
785 | rate : array
786 | The rate matrix obeying detailed balance.
787 | peq : array
788 | The equilibrium probability
789 |
790 | """
791 | for i in range(nkeep):
792 | for j in range(i):
793 | rate[j,i] = rate[i,j]*peq[j]/peq[i]
794 | rate[i,i] = 0
795 | rate[i,i] = -np.sum(rate[:,i])
796 | return rate
797 |
798 | def likelihood(nkeep, rate, count, lagt):
799 | """ Likelihood of a rate matrix given a count matrix
800 |
801 | We use the procedure described by Buchete and Hummer.[1]_
802 |
803 | Parameters
804 | ----------
805 | nkeep : int
806 | Number of states in transition matrix.
807 | count : np.array
808 | Transition matrix.
809 | lagt : float
810 | The lag time.
811 |
812 | Returns
813 | -------
814 | mlog_like : float
815 | The log likelihood
816 |
817 | Notes
818 | -----
819 | ..[1] N.-V. Buchete and G. Hummer, "Coarse master equations for
820 | peptide folding dynamics", J. Phys. Chem. B (2008).
821 |
822 | """
823 | # calculate symmetrized rate matrix
824 | ratesym = np.multiply(rate,rate.transpose())
825 | ratesym = np.sqrt(ratesym)
826 | for i in range(nkeep):
827 | ratesym[i,i] = -ratesym[i,i]
828 |
829 | # calculate eigenvalues and eigenvectors
830 | evalsym, evectsym = np.linalg.eig(ratesym)
831 |
832 | # index the solutions
833 | indx_eig = np.argsort(-evalsym)
834 |
835 | # equilibrium population
836 | ieq = indx_eig[0]
837 |
838 | # calculate left and right eigenvectors
839 | phiR = np.zeros((nkeep, nkeep))
840 | phiL = np.zeros((nkeep, nkeep))
841 | for i in range(nkeep):
842 | phiR[:,i] = evectsym[:,i]*evectsym[:,ieq]
843 | phiL[:,i] = evectsym[:,i]/evectsym[:,ieq]
844 |
845 | # calculate propagators
846 | prop = np.zeros((nkeep, nkeep), float)
847 | for i in range(nkeep):
848 | for j in range(nkeep):
849 | for n in range(nkeep):
850 | prop[j,i] = prop[j,i] + \
851 | phiR[j,n]*phiL[i,n]*np.exp(-abs(evalsym[n])*lagt)
852 |
853 | # calculate likelihood using matrix of transitions
854 | log_like = 0.
855 | for i in range(nkeep):
856 | for j in range(nkeep):
857 | if count[j,i] > 0:
858 | log_like = log_like + float(count[j,i])*np.log(prop[j,i])
859 |
860 | return -log_like
861 |
862 | def partial_rate(K, elem):
863 | """ Calculates the derivative of the rate matrix
864 |
865 | Parameters
866 | ----------
867 | K : np.array
868 | The rate matrix.
869 | elem : int
870 | Integer corresponding to which we calculate the
871 | partial derivative.
872 |
873 | Returns
874 | -------
875 | d_K : np.array
876 | Partial derivative of rate matrix.
877 |
878 | """
879 | nstates = len(K[0])
880 | d_K = np.zeros((nstates,nstates), float)
881 | for i in range(nstates):
882 | if i != elem:
883 | d_K[i,elem] = beta/2.*K[i,elem];
884 | d_K[elem,i] = -beta/2.*K[elem,i];
885 | for i in range(nstates):
886 | d_K[i,i] = -np.sum(d_K[:,i])
887 | return d_K
888 |
889 | def partial_peq(peq, elem):
890 | """ Calculates derivative of equilibrium distribution
891 |
892 | Parameters
893 | ----------
894 | peq : np.array
895 | Equilibrium probabilities.
896 |
897 | """
898 | nstates = len(peq)
899 | d_peq = []
900 | for i in range(nstates):
901 | if i != elem:
902 | d_peq.append(beta*peq[i]*peq[elem])
903 | else:
904 | d_peq.append(-beta*peq[i]*(1. - peq[i]))
905 | return d_peq
906 |
907 | def partial_pfold(states, K, d_K, FF, UU, elem):
908 | """ Calculates derivative of pfold """
909 | nstates = len(states)
910 | # define end-states
911 | I = list(filter(lambda x: x not in UU+FF, range(nstates)))
912 | NI = len(I)
913 | # calculate committors
914 | b = np.zeros([NI], float)
915 | A = np.zeros([NI,NI], float)
916 | db = np.zeros([NI], float)
917 | dA = np.zeros([NI,NI], float)
918 | for j_ind in range(NI):
919 | j = I[j_ind]
920 | summ = 0.
921 | sumd = 0.
922 | for i in FF:
923 | summ += K[i][j]
924 | sumd += d_K[i][j]
925 | b[j_ind] = -summ
926 | db[j_ind] = -sumd
927 | for i_ind in range(NI):
928 | i = I[i_ind]
929 | A[j_ind][i_ind] = K[i][j]
930 | dA[j_ind][i_ind] = d_K[i][j]
931 |
932 | # solve Ax + Bd(x) = c
933 | Ainv = np.linalg.inv(A)
934 | pfold = np.dot(Ainv,b)
935 | x = np.dot(Ainv,db - np.dot(dA,pfold))
936 |
937 | dpfold = np.zeros(nstates,float)
938 | for i in range(nstates):
939 | if i in UU:
940 | dpfold[i] = 0.0
941 | elif i in FF:
942 | dpfold[i] = 0.0
943 | else:
944 | ii = I.index(i)
945 | dpfold[i] = x[ii]
946 | return dpfold
947 |
948 | def partial_flux(states, peq, K, pfold, d_peq, d_K, d_pfold, target):
949 | """ Calculates derivative of reactive flux """
950 | # flux matrix and reactive flux
951 | nstates = len(states)
952 | sum_d_flux = 0
953 | d_J = np.zeros((nstates,nstates),float)
954 | for i in range(nstates):
955 | for j in range(nstates):
956 | d_J[j][i] = d_K[j][i]*peq[i]*(pfold[j]-pfold[i]) + \
957 | K[j][i]*d_peq[i]*(pfold[j]-pfold[i]) + \
958 | K[j][i]*peq[i]*(d_pfold[j]-d_pfold[i])
959 | if j in target and K[j][i]>0: # dividing line corresponds to I to F transitions
960 | sum_d_flux += d_J[j][i]
961 | return sum_d_flux
962 |
963 | def propagate_worker(x):
964 | """ Propagate dynamics using rate matrix exponential
965 |
966 | Parameters
967 | ----------
968 | x : list
969 | Contains K, the time and the initial population
970 |
971 | Returns
972 | -------
973 | popul : np.array
974 | The propagated population
975 |
976 | """
977 | rate, t, pini = x
978 | expkt = spla.expm(rate*t)
979 | popul = mat_mul_v(expkt, pini)
980 | return popul
981 |
982 | def propagateT_worker(x):
983 | """ Propagate dynamics using power of transition matrix
984 |
985 | Parameters
986 | ----------
987 | x : list
988 | Contains T, the power and initial population
989 |
990 |
991 | Returns
992 | -------
993 | popul : np.array
994 | The propagated population
995 |
996 | """
997 | trans, power, pini = x
998 | trans_pow = np.linalg.matrix_power(trans,power)
999 | popul = mat_mul_v(trans_pow, pini)
1000 | return popul
1001 |
1002 | #def gen_path_lengths(keys, J, pfold, flux, FF, UU):
1003 | # """ use BHS prescription for defining path lenghts """
1004 | # nkeys = len(keys)
1005 | # I = [x for x in range(nkeys) if x not in FF+UU]
1006 | # Jnode = []
1007 | # # calculate flux going through nodes
1008 | # for i in range(nkeys):
1009 | # Jnode.append(np.sum([J[i,x] for x in range(nkeys) \
1010 | # if pfold[x] < pfold[i]]))
1011 | # # define matrix with edge lengths
1012 | # Jpath = np.zeros((nkeys, nkeys), float)
1013 | # for i in UU:
1014 | # for j in I + FF:
1015 | # if J[j,i] > 0:
1016 | # Jpath[j,i] = np.log(flux/J[j,i]) + 1
1017 | # for i in I:
1018 | # for j in [x for x in FF+I if pfold[x] > pfold[i]]:
1019 | # if J[j,i] > 0:
1020 | # Jpath[j,i] = np.log(Jnode[j]/J[j,i]) + 1
1021 | # return Jnode, Jpath
1022 |
1023 | #def calc_acf(x):
1024 | # """ mp worker that calculates the ACF for a given mode
1025 | #
1026 | # Parameters
1027 | # ----------
1028 | # x : list
1029 | # List containing input for each mp worker. Includes:
1030 | # distraj :the time series of states
1031 | # dt : the timestep for that trajectory
1032 | # keys : the keys used in the assignment
1033 | # lagt : the lag time for construction
1034 | #
1035 | # Returns
1036 | # -------
1037 | # acf : array
1038 | # The autocorrelation function from that trajectory.
1039 | #
1040 | # """
1041 | # # parse input from multiprocessing
1042 | # distraj = x[0]
1043 | # dt = x[1]
1044 | # keys = x[2]
1045 | # nkeys = len(keys)
1046 | # lagt = x[3]
1047 | ## time =
1048 | ## sliding = x[4]
1049 | #
1050 | ## ltraj = len(distraj)
1051 | ## lag = int(lagt/dt) # number of frames per lag time
1052 | ## if sliding:
1053 | ## slider = 1 # every state is initial state
1054 | ## else:
1055 | ## slider = lag
1056 | ##
1057 | ## count = np.zeros([nkeys,nkeys], np.int32)
1058 | ## for i in range(0, ltraj-lag, slider):
1059 | ## j = i + lag
1060 | ## state_i = distraj[i]
1061 | ## state_j = distraj[j]
1062 | ## if state_i in keys:
1063 | ## idx_i = keys.index(state_i)
1064 | ## if state_j in keys:
1065 | ## idx_j = keys.index(state_j)
1066 | ## try:
1067 | ## count[idx_j][idx_i] += 1
1068 | ## except UnboundLocalError:
1069 | ## pass
1070 | # return acf
1071 |
1072 | #def project_worker(x):
1073 | # """ project simulation trajectories on eigenmodes"""
1074 | # trans, power, pini = x
1075 | # trans_pow = np.linalg.matrix_power(trans,power)
1076 | # popul = mat_mul_v(trans_pow, pini)
1077 | # return popul
1078 | #
1079 |
1080 | def peq_averages(peq_boots, keep_keys_boots, keys):
1081 | """ Return averages from bootstrap results
1082 |
1083 | Parameters
1084 | ----------
1085 | peq_boots : list
1086 | List of Peq arrays
1087 | keep_keys_boots : list
1088 | List of key lists
1089 | keys : list
1090 | List of keys
1091 |
1092 | Returns:
1093 | -------
1094 | peq_ave : array
1095 | Peq averages
1096 | peq_std : array
1097 | Peq std
1098 |
1099 | """
1100 | peq_ave = []
1101 | peq_std = []
1102 | peq_indexes = []
1103 | peq_keep = []
1104 | for k in keys:
1105 | peq_indexes.append([x.index(k) if k in x else None for x in keep_keys_boots])
1106 | nboots = len(peq_boots)
1107 | for k in keys:
1108 | l = keys.index(k)
1109 | data = []
1110 | for n in range(nboots):
1111 | if peq_indexes[l][n] is not None:
1112 | data.append(peq_boots[n][peq_indexes[l][n]])
1113 | try:
1114 | peq_ave.append(np.mean(data))
1115 | peq_std.append(np.std(data))
1116 | peq_keep.append(data)
1117 | except RuntimeWarning:
1118 | peq_ave.append(0.)
1119 | peq_std.append(0.)
1120 | return peq_ave, peq_std
1121 |
1122 | def tau_averages(tau_boots, keys):
1123 | """ Return averages from bootstrap results
1124 |
1125 | Parameters
1126 | ----------
1127 | tau_boots : list
1128 | List of Tau arrays
1129 |
1130 | Returns:
1131 | -------
1132 | tau_ave : array
1133 | Tau averages
1134 | tau_std : array
1135 | Tau std
1136 |
1137 | """
1138 | tau_ave = []
1139 | tau_std = []
1140 | tau_keep = []
1141 | for n in range(len(keys)-1):
1142 | try:
1143 | data = [x[n] for x in tau_boots if not np.isnan(x[n])]
1144 | tau_ave.append(np.mean(data))
1145 | tau_std.append(np.std(data))
1146 | tau_keep.append(data)
1147 | except IndexError:
1148 | continue
1149 | return tau_ave, tau_std
1150 |
1151 |
1152 | def matrix_ave(mat_boots, keep_keys_boots, keys):
1153 | """ Return averages from bootstrap results
1154 |
1155 | Parameters
1156 | ----------
1157 | mat_boots : list
1158 | List of matrix arrays
1159 | keep_keys_boots : list
1160 | List of key lists
1161 | keys : list
1162 | List of keys
1163 |
1164 | Returns:
1165 | -------
1166 | mat_ave : array
1167 | Matrix averages
1168 | mat_std : array
1169 | Matrix std
1170 |
1171 | """
1172 | mat_ave = []
1173 | mat_std = []
1174 | nboots = len(keep_keys_boots)
1175 | for k in keys:
1176 | mat_ave_keep = []
1177 | mat_std_keep = []
1178 | for kk in keys:
1179 | data = []
1180 | for n in range(nboots):
1181 | try:
1182 | l = keep_keys_boots[n].index(k)
1183 | ll = keep_keys_boots[n].index(kk)
1184 | data.append(mat_boots[n][l,ll])
1185 | except IndexError:
1186 | data.append(0.)
1187 | try:
1188 | mat_ave_keep.append(np.mean(data))
1189 | mat_std_keep.append(np.std(data))
1190 | except RuntimeWarning:
1191 | mat_ave_keep.append(0.)
1192 | mat_std_keep.append(0.)
1193 | mat_ave.append(mat_ave_keep)
1194 | mat_std.append(mat_std_keep)
1195 | return mat_ave, mat_std
1196 |
--------------------------------------------------------------------------------
/mastermsm/test/README.md:
--------------------------------------------------------------------------------
1 | # Testing
2 |
3 | Testing of the modules of MasterMSM is available through Python's `unittest` library. For some of the test cases, MD data will be downloaded into a folder inside `test`. To run the test suite, do:
4 |
5 | ```
6 | cd mastermsm
7 | python -m unittest
8 | ```
9 |
--------------------------------------------------------------------------------
/mastermsm/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/test/__init__.py
--------------------------------------------------------------------------------
/mastermsm/test/download_data.py:
--------------------------------------------------------------------------------
1 | import os
2 | from urllib.request import urlretrieve
3 |
4 | def download_test_data():
5 | base_url = "https://mastermsm.s3.eu-west-2.amazonaws.com/"
6 | gro = "test/data/alaTB.gro"
7 | xtc = "test/data/protein_only.xtc"
8 | cpath = os.getcwd()
9 | if os.path.exists(cpath+"/test/data") is False:
10 | os.mkdir(cpath+"/test/data")
11 | for fname in [gro,xtc]:
12 | if os.path.isfile(cpath+"/%s"%fname) is False:
13 | urlretrieve(base_url+fname, fname)
14 |
--------------------------------------------------------------------------------
/mastermsm/test/test_fewsm.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import mdtraj as md
3 | import numpy as np
4 | from mastermsm.trajectory import traj_lib, traj
5 | from mastermsm.msm import msm, msm_lib
6 | from mastermsm.fewsm import fewsm, fewsm_lib
7 | from test.download_data import download_test_data
8 | import os, pickle
9 |
10 | class TestFewSM_Lib(unittest.TestCase):
11 | def setUp(self):
12 | pass
13 |
14 | def test_sign(self):
15 | v = np.array([0] * 3)
16 | test = fewsm_lib.test_sign(v)
17 | self.assertEqual(test, False)
18 | v = np.array([-1, 0, 1])
19 | test = fewsm_lib.test_sign(v)
20 | self.assertEqual(test, True)
21 |
22 | def test_metastability(self):
23 | T_test = np.random.rand(10,10)
24 | meta = fewsm_lib.metastability(T_test)
25 | self.assertIsInstance(meta, float)
26 | self.assertEqual(meta, np.sum(np.diag(T_test)))
27 |
28 | def test_metropolis(self):
29 | delta = np.random.random()
30 | accept = fewsm_lib.metropolis(delta)
31 | self.assertIsInstance(accept, bool)
32 | delta = -1.
33 | accept = fewsm_lib.metropolis(delta)
34 | self.assertTrue(accept)
35 |
36 | def test_beta(self):
37 | tests = [
38 | {
39 | "imc": 2,
40 | "mcsasteps": 10,
41 | },
42 | {
43 | "imc":1,
44 | "mcsasteps":1
45 | }
46 | ]
47 | for test in tests:
48 |
49 | beta = fewsm_lib.beta(test["imc"], test["mcsasteps"])
50 | self.assertIsInstance(beta, float)
51 | def test_split_sign(self):
52 | macro = {}
53 | for i in range(10):
54 | macro[i] = [i * 10 + j for j in range(10)]
55 | lvec = np.random.rand(100)
56 |
57 | new_macro, vals = fewsm_lib.split_sign(macro, lvec)
58 | self.assertIsInstance(new_macro, dict)
59 | self.assertGreaterEqual(len(new_macro.keys()), len(macro.keys()))
60 |
61 | def test_split_sigma(self):
62 | macro = {}
63 | for i in range(10):
64 | macro[i] = [i * 10 + j for j in range(10)]
65 | lvec = np.random.rand(100)
66 |
67 | new_macro, vals = fewsm_lib.split_sigma(macro, lvec)
68 | self.assertIsInstance(new_macro, dict)
69 | self.assertGreaterEqual(len(new_macro.keys()), len(macro.keys()))
70 |
71 | class TestFewSM(unittest.TestCase):
72 |
73 | def setUp(self):
74 | download_test_data()
75 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
76 | traj=['test/data/protein_only.xtc'])
77 | self.tr.discretize('rama', states=['A', 'E'])
78 | self.tr.find_keys()
79 | self.msm = msm.SuperMSM([self.tr])
80 | self.msm.do_msm(10)
81 | self.msm.msms[10].do_trans()
82 |
83 | def test_attributes(self):
84 | self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10])
85 | self.assertIsNotNone(self.fewsm.macros)
86 | self.assertEqual(len(self.fewsm.macros), 2)
87 |
88 | def test_map_trajectory(self):
89 | self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10])
90 | self.fewsm.map_trajectory()
91 | self.mapped = self.fewsm.mappedtraj[0]
92 | self.assertIsNotNone(self.mapped)
93 | self.assertIsInstance(self.mapped, traj.TimeSeries)
94 | self.assertTrue(hasattr(self.mapped, 'dt'))
95 | self.assertTrue(hasattr(self.mapped, 'distraj'))
96 | self.assertEqual(len(set(self.mapped.distraj)), 2)
97 | self.assertEqual(sorted(set(self.mapped.distraj)), [0, 1])
98 |
99 | def test_eigen_group(self):
100 | self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10])
101 | macros = self.fewsm.eigen_group()
102 | print("MACROS! ", macros)
103 | self.assertIsInstance(macros, dict)
104 |
--------------------------------------------------------------------------------
/mastermsm/test/test_msm.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import mdtraj as md
3 | import numpy as np
4 | from mastermsm.trajectory import traj_lib, traj
5 | from mastermsm.msm import msm, msm_lib
6 | from test.download_data import download_test_data
7 | import os, pickle
8 |
9 | # thermal energy (kJ/mol)
10 | beta = 1./(8.314e-3*300)
11 |
12 | class TestMSMLib(unittest.TestCase):
13 | def test_esort(self):
14 | self.assertTrue(hasattr(msm_lib, 'esort'))
15 | self.assertTrue(callable(msm_lib.esort))
16 | self.esort = msm_lib.esort([0,float(1)], [1,float(2)])
17 | self.assertEqual(self.esort, 1)
18 | self.esort = msm_lib.esort([0,float(100)], [1,float(2)])
19 | self.assertEqual(self.esort, -1)
20 | self.esort = msm_lib.esort([100,float(1)], [1,float(1)])
21 | self.assertEqual(self.esort, 0)
22 |
23 | def test_mat_mul_v(self):
24 | self.assertTrue(hasattr(msm_lib,'mat_mul_v'))
25 | self.assertTrue(callable(msm_lib.mat_mul_v))
26 | self.matrix = np.array([
27 | [1, 2, 3],
28 | [4, 5, 6]
29 | ])
30 | self.vector = np.array(
31 | [1, 0, 1]
32 | )
33 | self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [4, 10])
34 | self.matrix = np.array([
35 | [-5, -4, 2],
36 | [1, 6, -3],
37 | [3, 5.5, -4]
38 | ])
39 | self.vector = np.array(
40 | [1, 2, -3]
41 | )
42 | self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [-19, 22, 26])
43 |
44 | def test_rand_rate(self):
45 | testT = np.array([
46 | [10, 2, 1],
47 | [1, 1, 1],
48 | [0, 1, 0]
49 | ])
50 | self.random1 = msm_lib.rand_rate(nkeep= 3, count= testT)
51 | self.random2 = msm_lib.rand_rate(nkeep= 3, count= testT)
52 | self.assertEqual(self.random1.shape, (3, 3))
53 | self.assertFalse((self.random1 == self.random2).all())
54 |
55 | def test_traj_split(self):
56 | traj1 = traj.TimeSeries(distraj=[1, 2, 3], dt=1.)
57 | traj2 = traj.TimeSeries(distraj=[3, 2, 1], dt=2.)
58 | trajs = [traj1, traj2]
59 | self.filepath = msm_lib.traj_split(data=trajs, lagt=10)
60 | self.assertIsInstance(self.filepath, str)
61 | self.assertTrue(os.path.exists(self.filepath))
62 | os.remove(self.filepath) # clean temp file
63 |
64 | def calc_trans(self):
65 | self.testT = msm_lib.calc_trans(nkeep=10)
66 | self.assertIsInstance(self.testT, np.ndarray)
67 | self.assertEqual(self.testT.shape, (10,10))
68 |
69 | def test_calc_rate(self):
70 | self.testT = np.array([
71 | [1, 2, 3],
72 | [0, 0, 0],
73 | [10, 10, 10]
74 |
75 | ])
76 | self.rate = msm_lib.calc_rate(nkeep=3, trans=self.testT, lagt=10)
77 | self.assertIsInstance(self.rate, np.ndarray)
78 | self.assertEqual(self.rate.shape, (3, 3))
79 |
80 | def test_calc_lifetime(self):
81 | distraj = [1, 1, 1, 2]
82 | dt = 1.
83 | keys = [1, 2]
84 | data = [distraj, dt, keys]
85 | self.life = msm_lib.calc_lifetime(data)
86 | self.assertIsInstance(self.life, dict)
87 |
88 | def test_partial_rate(self):
89 | test_nstates = 3
90 | test_K = np.random.rand(test_nstates,test_nstates)
91 | d_K_1 = msm_lib.partial_rate(test_K, 1)
92 | for i in range(test_nstates):
93 | if i != 1:
94 | self.assertAlmostEqual(d_K_1[i,1] / test_K[i,1], beta/2)
95 | self.assertAlmostEqual(d_K_1[1, i] / test_K[1, i], -beta / 2)
96 | self.assertEqual(d_K_1.shape, (test_nstates, test_nstates))
97 |
98 | def test_partial_peq(self):
99 | test_nstates = 3
100 | test_peq = np.random.rand(3)
101 | d_peq_1 = msm_lib.partial_peq(test_peq,1)
102 | self.assertEqual(len(d_peq_1), test_nstates)
103 | for elem in range(test_nstates):
104 | d_peq_elem = msm_lib.partial_peq(test_peq, elem)
105 | for i in range(test_nstates):
106 | if i != elem:
107 | self.assertAlmostEqual(d_peq_elem[i] / (test_peq[elem] * test_peq[i]), beta)
108 | else:
109 | self.assertAlmostEqual(d_peq_elem[i] / (test_peq[i] * (1. - test_peq[i])), -beta)
110 |
111 | def test_partial_pfold(self):
112 | states = range(3)
113 | K = np.random.rand(2, 2)
114 | d_K = np.random.rand(2, 2)
115 | FF = [0]
116 | UU = [2]
117 | res_dpfold = msm_lib.partial_pfold(states, K, d_K, FF, UU,
118 | np.random.randint(0, 2)) # the last int parameter is not used
119 | self.assertEqual(len(res_dpfold), len(states))
120 | self.assertIsInstance(res_dpfold, np.ndarray)
121 | self.assertIsInstance(res_dpfold[0], float)
122 |
123 | def test_partial_flux(self):
124 | nstates = np.random.randint(2,50)
125 | states = range(nstates)
126 | peq = np.random.rand(nstates)
127 | K = np.random.rand(nstates,nstates)
128 | pfold = np.random.rand(nstates)
129 | d_peq = np.random.rand(nstates)
130 | d_K = np.random.rand(nstates,nstates)
131 | d_pfold = np.random.rand(nstates)
132 | target = [0]
133 |
134 | sum_d_flux = 0
135 | d_J = np.zeros((nstates, nstates), float)
136 | for i in range(nstates):
137 | for j in range(nstates):
138 | d_J[j][i] = d_K[j][i] * peq[i] * (pfold[j] - pfold[i]) + \
139 | K[j][i] * d_peq[i] * (pfold[j] - pfold[i]) + \
140 | K[j][i] * peq[i] * (d_pfold[j] - d_pfold[i])
141 | if j in target and K[j][i] > 0: # dividing line corresponds to I to F transitions
142 | sum_d_flux += d_J[j][i]
143 | res_sum_d_flux = msm_lib.partial_flux(states, peq, K, pfold,d_peq, d_K, d_pfold, target)
144 |
145 | self.assertIsNotNone(res_sum_d_flux)
146 | self.assertIsInstance(res_sum_d_flux, float)
147 |
148 |
149 |
150 | def test_tau_averages(self):
151 | tau_boots_test = np.random.rand(2, 2)
152 | keys_test = range(3)
153 | res_tau_ave, res_tau_std = msm_lib.tau_averages(tau_boots_test, keys_test)
154 | self.assertEqual(len(res_tau_ave),len(keys_test)-1)
155 | self.assertEqual(len(res_tau_std),len(keys_test)-1)
156 | self.assertIsInstance(res_tau_std, list)
157 | self.assertIsInstance(res_tau_ave, list)
158 | self.assertIsInstance(res_tau_ave[0],float)
159 | self.assertIsInstance(res_tau_std[0], float)
160 |
161 | def test_peq_averages(self):
162 | peq_boots_test = np.random.rand(2,3)
163 | keep_keys_boots_test = [['A','E','O'],['A','E','O']]
164 | keys = ['A','E','O']
165 | res_peq_ave, res_peq_std = msm_lib.peq_averages(peq_boots_test, keep_keys_boots_test, keys)
166 | self.assertEqual(len(res_peq_ave),len(keys))
167 | self.assertEqual(len(res_peq_std),len(keys))
168 | self.assertIsInstance(res_peq_ave, list)
169 | self.assertIsInstance(res_peq_std, list)
170 | self.assertIsInstance(res_peq_ave[0], float)
171 | self.assertIsInstance(res_peq_std[0], float)
172 |
173 | def test_propagate_worker(self):
174 | t = 0
175 | rate = np.random.rand(2,2)
176 | pini = np.random.rand(2,2)
177 | x_test = [rate, t, pini]
178 | res_popul = msm_lib.propagate_worker(x_test)
179 | self.assertIsInstance(res_popul, list)
180 | self.assertIsInstance(res_popul[0], np.ndarray)
181 | self.assertIsInstance(res_popul[0][0], float)
182 |
183 | def test_propagateT_worker(self):
184 | t = 0
185 | rate = np.random.rand(2,2)
186 | pini = np.random.rand(2,2)
187 | x_test = [rate, t, pini]
188 | res_popul = msm_lib.propagateT_worker(x_test)
189 | self.assertIsInstance(res_popul, list)
190 | self.assertIsInstance(res_popul[0], np.ndarray)
191 | self.assertIsInstance(res_popul[0][0], float)
192 |
193 | def test_detailed_balance(self):
194 | nkeep_test = 2
195 | rate = np.array(np.random.rand(nkeep_test,nkeep_test))
196 | peq = np.random.rand(nkeep_test)
197 | res_rate = msm_lib.detailed_balance(nkeep_test, rate, peq)
198 | self.assertEqual(res_rate.shape, (nkeep_test,nkeep_test))
199 | self.assertIsInstance(res_rate,np.ndarray)
200 | self.assertIsInstance(res_rate[0][0],float)
201 |
202 | def test_likelihood(self):
203 | nkeep_test = 2
204 | rate = np.array(np.random.rand(nkeep_test,nkeep_test))
205 | count = np.array(np.random.randint(0, 10**5, size=(nkeep_test,nkeep_test)))
206 | lagt = np.random.randint(1,1000)
207 | res_mlog_like = msm_lib.likelihood(nkeep_test,rate,count,lagt)
208 | self.assertIsInstance(res_mlog_like, float)
209 | self.assertIsNotNone(res_mlog_like)
210 | self.assertGreater(res_mlog_like, 0)
211 |
212 | def test_calc_mlrate(self):
213 | nkeep_test = 2
214 | rate_init = np.array(np.random.rand(nkeep_test, nkeep_test))
215 | count = np.array(np.random.randint(0, 10 ** 5, size=(nkeep_test, nkeep_test)))
216 | lagt = np.random.randint(1, 1000)
217 | res_rate, res_ml, res_beta = msm_lib.calc_mlrate(nkeep_test, count, lagt, rate_init)
218 | self.assertIsInstance(res_rate, np.ndarray)
219 | self.assertIsNotNone(res_rate)
220 | self.assertIsNotNone(res_ml)
221 | self.assertIsNotNone(res_beta)
222 |
223 | def test_mc_move(self):
224 | nkeep_test = np.random.randint(2,100)
225 | rate = np.random.rand(nkeep_test,nkeep_test)
226 | peq_test = np.random.rand(nkeep_test)
227 | db_rate = msm_lib.detailed_balance(nkeep_test,rate,peq_test)
228 | new_rate, new_peq = msm_lib.mc_move(nkeep_test, db_rate, peq_test)
229 | self.assertFalse(np.array_equal(db_rate, new_rate))
230 | self.assertEqual(db_rate.shape, new_rate.shape)
231 | self.assertEqual(peq_test.shape, new_peq.shape)
232 |
233 | def test_calc_eigsK(self):
234 | nstates = np.random.randint(2,100)
235 | rate_test = np.random.rand(nstates,nstates)
236 | res_tauK,res_peqK = msm_lib.calc_eigsK(rate_test)
237 | self.assertIsInstance(res_tauK, list)
238 |
239 | self.assertEqual(len(res_tauK), nstates)
240 | self.assertEqual(len(res_peqK), nstates)
241 | self.assertIsInstance(res_tauK[0], np.float)
242 | self.assertIsInstance(res_peqK[0], np.complex)
243 |
244 | res_tauK, res_peqK, res_rvecsK, res_lvecsK = msm_lib.calc_eigsK(rate_test, evecs=True)
245 | self.assertIsNotNone(res_rvecsK)
246 | self.assertIsNotNone(res_lvecsK)
247 | self.assertIsInstance(res_lvecsK, np.ndarray)
248 | self.assertIsInstance(res_rvecsK, np.ndarray)
249 |
250 | def test_run_commits(self):
251 | nstates = np.random.randint(2,100)
252 | states = range(nstates)
253 | K = np.random.rand(nstates, nstates)
254 | peq = np.random.rand(nstates)
255 | FF = [0]
256 | UU = [2]
257 | J, pfold, sum_flux, kf = msm_lib.run_commit(states, K, peq, FF, UU)
258 | self.assertIsNotNone(J)
259 | self.assertIsNotNone(pfold)
260 | self.assertIsNotNone(sum_flux)
261 | self.assertIsNotNone(kf)
262 | self.assertIsInstance(kf, float)
263 | self.assertGreater(kf, 0)
264 | self.assertEqual(J.shape, K.shape)
265 | self.assertEqual(len(pfold), nstates)
266 | self.assertIsInstance(pfold[0], float)
267 | self.assertIsInstance(J[0][0], float)
268 |
269 | def test_do_boots_worker(self):
270 |
271 | filetmp = "test_msm_temp.pickle"
272 | keys = ['A', 'E']
273 | lagt = np.random.randint(1,100)
274 | slider = 1
275 | ncount = 10
276 | x = [filetmp, keys, lagt, ncount, slider]
277 | # result = msm_lib.do_boots_worker(x)
278 | # tauT, peqT, trans, keep_keys = result
279 | # print(tauT, peqT, trans, keep_keys)
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 | class TestSuperMSM(unittest.TestCase):
295 | def setUp(self):
296 | download_test_data()
297 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
298 | traj=['test/data/protein_only.xtc'])
299 | self.tr.discretize('rama', states=['A', 'E', 'O'])
300 | self.tr.find_keys()
301 | self.msm = msm.SuperMSM([self.tr])
302 |
303 | def test_init(self):
304 | self.assertIsNotNone(self.msm)
305 | self.assertTrue( hasattr(self.msm, 'data'))
306 | self.assertEqual(self.msm.data, [self.tr])
307 | self.assertEqual(self.msm.dt, 1.0)
308 | # testing with more than one trajectory
309 | self.msm = msm.SuperMSM([self.tr, self.tr])
310 | self.assertEqual(len(self.msm.data), 2)
311 |
312 |
313 | def test_merge_trajs(self):
314 | # create fake trajectory to merge
315 | traj2 = traj.TimeSeries(distraj=['L', 'L', 'L', 'A'], dt = 2.0)
316 | traj2.keys = ['L','A']
317 | old_keys = self.msm.keys
318 | self.msm.data = [self.tr, traj2]
319 | new_keys = self.msm._merge_trajs()
320 | self.assertEqual(len(new_keys), len(old_keys) + 1)
321 | self.assertEqual(sorted(new_keys), ['A', 'E', 'L'])
322 |
323 | def test_max_dt(self):
324 | traj2 = traj.TimeSeries(distraj=['L', 'L', 'L', 'A'], dt=2.0)
325 | old_dt = self.msm.dt
326 | self.msm.data = [self.tr, traj2]
327 | new_dt = self.msm._max_dt()
328 | self.assertEqual(new_dt, 2.0)
329 |
330 | def test_do_msm(self):
331 |
332 | self.msm.do_msm(lagt=1)
333 | self.assertIsInstance(self.msm.msms[1], msm.MSM)
334 | self.assertEqual(self.msm.msms[1].lagt, 1)
335 |
336 | def test_convergence(self):
337 | lagtimes = np.array(range(10,100,10))
338 | self.msm.convergence_test(time=lagtimes)
339 | for lagt in lagtimes:
340 | self.assertTrue(hasattr(self.msm.msms[lagt], 'tau_ave'))
341 | self.assertTrue(hasattr(self.msm.msms[lagt], 'tau_std'))
342 | self.assertTrue(hasattr(self.msm.msms[lagt], 'peq_ave'))
343 | self.assertTrue(hasattr(self.msm.msms[lagt], 'peq_std'))
344 |
345 | def test_do_boots(self):
346 | self.msm.do_msm(10)
347 | self.msm.msms[10].boots()
348 |
349 | self.assertTrue(hasattr(self.msm.msms[10], 'tau_ave'))
350 | self.assertTrue(hasattr(self.msm.msms[10], 'tau_std'))
351 | self.assertTrue(hasattr(self.msm.msms[10], 'peq_ave'))
352 | self.assertTrue(hasattr(self.msm.msms[10], 'peq_std'))
353 |
354 | def test_ck_test(self):
355 | init = ['A']
356 | time = np.array(range(50,210,25))
357 | pMSM, pMD, epMD = self.msm.ck_test(init=init, time=time)
358 | self.assertIsNotNone(pMSM)
359 | self.assertIsNotNone(pMD)
360 | self.assertIsNotNone(epMD)
361 | self.assertEqual(len(pMSM), len(time))
362 | self.assertEqual(len(epMD), 10)
363 |
364 | self.assertIsInstance(pMSM, list)
365 | self.assertIsInstance(pMSM[0], tuple)
366 | self.assertIsInstance(pMD, np.ndarray)
367 | self.assertIsInstance(epMD, np.ndarray)
368 |
369 | def test_do_pfold(self):
370 | states = [
371 | ['A'],
372 | ['E']
373 | ]
374 | for lagt in [1,10,100]:
375 | self.msm.do_msm(lagt)
376 | self.msm.msms[lagt].boots()
377 | self.msm.msms[lagt].do_trans()
378 | self.msm.msms[lagt].do_rate()
379 |
380 | self.msm.msms[lagt].do_pfold(FF=states[0], UU=states[1])
381 | self.assertTrue(hasattr(self.msm.msms[lagt], 'pfold'))
382 | self.assertTrue(hasattr(self.msm.msms[lagt], 'J'))
383 | self.assertTrue(hasattr(self.msm.msms[lagt], 'sum_flux'))
384 | self.assertTrue(hasattr(self.msm.msms[lagt], 'kf'))
385 | self.assertIsInstance(self.msm.msms[lagt].kf, np.float64)
386 | self.assertEqual(len(self.msm.msms[lagt].J), len(states))
387 |
388 | def test_lb_rate(self):
389 | self.msm.do_lbrate()
390 | self.assertIsNotNone(self.msm.tauK)
391 | self.assertIsNotNone(self.msm.peqK)
392 | self.assertIsNotNone(self.msm.rvecsK)
393 | self.assertIsNotNone(self.msm.lvecsK)
394 | self.assertEqual(len(self.msm.tauK), len(self.msm.keys) - 1)
395 | self.assertEqual(self.msm.rvecsK.shape, (len(self.msm.keys), len(self.msm.keys)))
396 |
397 |
398 |
399 | class TestMSM(unittest.TestCase):
400 | def setUp(self):
401 | download_test_data()
402 | self.nstates = np.random.randint(3,100)
403 | distraj_1 = np.random.randint(1,self.nstates+1, size=1000).tolist()
404 | traj_1 = traj.TimeSeries(distraj= distraj_1, dt=1.)
405 | distraj_2 = np.random.randint(1,self.nstates+1, size=1000).tolist()
406 | traj_2 = traj.TimeSeries(distraj= distraj_2, dt=2.)
407 | self.data = np.array([
408 | traj_1,
409 | traj_2
410 | ])
411 | self.lagt = 10
412 | self.keys = [i for i in range(1,self.nstates+1)]
413 | msm_obj = msm.MSM(data=self.data, lagt=self.lagt, keys=self.keys, sym=True)
414 | self.msm = msm_obj
415 |
416 |
417 | def test_init(self):
418 | self.msm_empty = msm.MSM()
419 | self.assertIsNotNone(self.msm_empty)
420 | self.assertIsNone(self.msm_empty.data)
421 | self.assertIsNone(self.msm_empty.lagt)
422 | self.assertIsNone(self.msm_empty.keys)
423 | self.assertFalse(self.msm_empty.sym)
424 |
425 | self.assertIsNotNone(self.msm)
426 | self.assertIsNotNone(self.msm.data)
427 | self.assertIsNotNone(self.msm.keys)
428 | self.assertIsNotNone(self.msm.lagt)
429 | self.assertTrue(self.msm.sym)
430 | self.assertTrue(np.array_equal(self.data, self.msm.data))
431 | self.assertEqual(self.msm.lagt, self.lagt)
432 | self.assertTrue(np.array_equal(self.keys, self.msm.keys))
433 |
434 | def test_do_count(self):
435 | self.msm.do_count()
436 | self.assertIsNotNone(self.msm.keep_states)
437 | self.assertIsNotNone(self.msm.keep_keys)
438 |
439 | def test_calc_count_multi(self):
440 | count = self.msm.calc_count_multi()
441 | self.assertIsNotNone(count)
442 | self.assertIsInstance(count, np.ndarray)
443 | self.assertEqual(count.shape, (self.nstates, self.nstates))
444 |
445 | def test_check_connect(self):
446 | self.msm.do_count()
447 | keep_states, keep_keys = self.msm.check_connect()
448 | self.assertEqual(len(keep_keys), len(keep_states))
449 | self.assertEqual(self.msm.keep_keys, self.keys)
450 |
451 | def test_do_trans(self):
452 | self.msm.do_count()
453 | self.msm.do_trans(evecs=False)
454 | self.assertIsNotNone(self.msm.tauT)
455 | self.assertIsNotNone(self.msm.trans)
456 | self.assertIsNotNone(self.msm.peqT)
457 | self.assertFalse(hasattr(self.msm, "rvecsT"))
458 | self.assertFalse(hasattr(self.msm, "lvecsT"))
459 | self.assertEqual(len(self.msm.tauT), self.nstates - 1)
460 | self.assertEqual(len(self.msm.peqT), self.nstates)
461 | self.assertEqual(self.msm.trans.shape, (self.nstates, self.nstates))
462 | self.msm.do_trans(evecs=True)
463 | self.assertTrue(hasattr(self.msm, "rvecsT"))
464 | self.assertTrue(hasattr(self.msm, "lvecsT"))
465 | self.assertEqual(len(self.msm.rvecsT), self.nstates)
466 | self.assertEqual(len(self.msm.lvecsT), self.nstates)
467 |
468 | def test_do_rate(self):
469 | self.msm.do_count()
470 | self.msm.do_trans()
471 | self.msm.do_rate(evecs=False)
472 | self.assertIsNotNone(self.msm.rate)
473 | self.assertIsNotNone(self.msm.tauK)
474 | self.assertIsNotNone(self.msm.peqK)
475 | self.assertEqual(len(self.msm.tauK), self.nstates - 1)
476 | self.assertEqual(len(self.msm.peqK), self.nstates)
477 | self.msm.do_rate(evecs=True)
478 | self.assertIsNotNone(self.msm.rvecsK)
479 | self.assertIsNotNone(self.msm.lvecsK)
480 |
481 | def test_calc_eigsT(self):
482 | self.msm.do_count()
483 | self.msm.do_trans()
484 | tauT, peqT, rvecsT_sorted, lvecsT_sorted = self.msm.calc_eigsT(evecs=True)
485 | self.assertIsNotNone(tauT)
486 | self.assertIsNotNone(peqT)
487 | self.assertEqual(len(tauT), self.nstates - 1)
488 | self.assertEqual(len(peqT), self.nstates)
489 | self.assertIsNotNone(rvecsT_sorted)
490 | self.assertIsNotNone(lvecsT_sorted)
491 |
492 | def test_calc_eigsK(self):
493 | self.msm.do_count()
494 | self.msm.do_trans()
495 | tauK, peqK, rvecsK_sorted, lvecsK_sorted = self.msm.calc_eigsT(evecs=True)
496 | self.assertIsNotNone(tauK)
497 | self.assertIsNotNone(peqK)
498 | self.assertEqual(len(tauK), self.nstates - 1)
499 | self.assertEqual(len(peqK), self.nstates)
500 | self.assertIsNotNone(rvecsK_sorted)
501 | self.assertIsNotNone(lvecsK_sorted)
502 |
503 | def test_boots(self):
504 | self.msm.do_count()
505 | self.msm.do_trans()
506 | self.msm.boots()
507 | self.assertIsNotNone(self.msm.tau_ave)
508 | self.assertIsNotNone(self.msm.tau_std)
509 | self.assertIsNotNone(self.msm.peq_ave)
510 | self.assertIsNotNone(self.msm.peq_std)
511 | self.assertEqual(len(self.msm.tau_ave), self.nstates - 1)
512 | self.assertEqual(len(self.msm.tau_std), self.nstates - 1)
513 | self.assertEqual(len(self.msm.peq_std), self.nstates)
514 | self.assertEqual(len(self.msm.peq_ave), self.nstates)
515 |
516 | def test_sensitivity(self):
517 | self.msm.do_count()
518 | self.msm.do_trans()
519 | self.msm.do_rate()
520 | FF = [np.random.randint(1, self.nstates + 1)]
521 |
522 | UU = [np.random.randint(1, self.nstates + 1)]
523 | self.msm.sensitivity(FF=FF, UU=UU)
524 | self.assertIsNotNone(self.msm.kf)
525 | self.assertIsNotNone(self.msm.d_pu)
526 | self.assertIsNotNone(self.msm.d_lnkf)
527 | self.assertIsNotNone(self.msm.dJ)
528 | self.assertIsInstance(self.msm.kf, float)
529 | self.assertEqual(len(self.msm.d_pu), self.nstates)
530 | self.assertEqual(len(self.msm.d_lnkf), self.nstates)
531 | self.assertEqual(len(self.msm.dJ),self.nstates)
532 | self.assertIsInstance(self.msm.d_pu[0], float)
533 | self.assertIsInstance(self.msm.dJ[0], float)
534 | self.assertIsInstance(self.msm.d_lnkf[0], float)
535 |
536 | def test_propagateK(self):
537 | # p0_fn = "p0.txt"
538 | # new_file = open(p0_fn, "w")
539 | random_p0 = np.random.rand(self.nstates)
540 | # random_pini = np.random.randint(1, self.nstates + 1, size = 2)
541 | # new_file.write(np.array2string(random_p0))
542 | # new_file.close()
543 | self.msm.do_count()
544 | self.msm.do_trans()
545 | self.msm.do_rate()
546 | time, popul = self.msm.propagateK(p0=random_p0)
547 | self.assertIsNotNone(time)
548 | self.assertIsInstance(time, np.ndarray)
549 | self.assertIsInstance(popul, list)
550 | self.assertEqual(len(time), 20)
551 | self.assertEqual(len(popul), 20)
552 | self.assertEqual(len(popul[0]), self.nstates)
553 |
554 | for ind, t in enumerate(time):
555 | if ind != 0:
556 | self.assertGreater(t, time[ind - 1])
557 |
558 | def test_propagateT(self):
559 | random_p0 = np.random.rand(self.nstates)
560 | self.msm.do_count()
561 | self.msm.do_trans()
562 | self.msm.do_rate()
563 | tcum, popul = self.msm.propagateT(p0=random_p0)
564 | self.assertIsNotNone(tcum)
565 | self.assertIsInstance(tcum, list)
566 | self.assertIsInstance(popul, list)
567 | self.assertEqual(len(tcum), 20)
568 | self.assertEqual(len(popul), 20)
569 | self.assertEqual(len(popul[0]), self.nstates)
570 |
571 | def test_acf_mode(self):
572 | self.msm.do_count()
573 | self.msm.do_trans(evecs=True)
574 | self.msm.do_rate()
575 | acf_ave = self.msm.acf_mode()
576 | self.assertIsInstance(acf_ave, dict)
577 | self.assertEqual(len(acf_ave.keys()), len(self.msm.keep_keys) - 1)
578 | modes = [key for key in acf_ave.keys()]
579 |
580 | self.assertIsInstance(acf_ave[modes[0]][0], float)
581 |
582 |
583 |
584 |
585 |
586 |
--------------------------------------------------------------------------------
/mastermsm/test/test_trajectory.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import mdtraj as md
3 | import numpy as np
4 | from mastermsm.trajectory import traj_lib, traj
5 | from mastermsm.msm import msm, msm_lib
6 | from test.download_data import download_test_data
7 | import os
8 |
9 |
10 | class TestMDTrajLib(unittest.TestCase):
11 | def setUp(self):
12 | download_test_data()
13 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
14 | traj=['test/data/protein_only.xtc'])
15 |
16 | def test_inrange(self):
17 | self.inrange = traj_lib._inrange(2, 1, 3)
18 | self.assertEqual(self.inrange, 1)
19 | self.inrange = traj_lib._inrange(0, 1, 2)
20 | self.assertEqual(self.inrange, 0)
21 | self.inrange = traj_lib._inrange(1, 1, 2)
22 | self.assertEqual(self.inrange, 0)
23 |
24 | def test_inbounds(self):
25 | TBA_bounds = {}
26 | TBA_bounds['A'] = [-100., -40., -50., -10.]
27 | TBA_bounds['E'] = [-180., -40., 125., 165.]
28 | TBA_bounds['L'] = [50., 100., -40., 70.0]
29 |
30 | # test in alpha helix
31 | self.inbounds = traj_lib._inbounds(TBA_bounds['A'], -90, -40)
32 | self.assertEqual(self.inbounds, 1)
33 | # test in beta-sheet
34 | self.inbounds = traj_lib._inbounds(TBA_bounds['E'], -90, 140)
35 | self.assertEqual(self.inbounds, 1)
36 | # test in left-handed alpha helix
37 | self.inbounds = traj_lib._inbounds(TBA_bounds['L'], 70, 30)
38 | self.assertEqual(self.inbounds, 1)
39 | # test when no conformation
40 | self.inbounds = traj_lib._inbounds(TBA_bounds['A'], 0, 0)
41 | self.assertEqual(self.inbounds, 0)
42 |
43 |
44 | def test_state(self):
45 | psi = [-30, 0, -40, 90, 140, 180]
46 | phi = [60., 0, -90, -90, -90, -180]
47 | states_test = ['L','O','A','O','E','O']
48 | bounds = {}
49 | bounds['A'] = [-100., -40., -50., -10.]
50 | bounds['E'] = [-180., -40., 125., 165.]
51 | bounds['L'] = [50., 100., -40., 70.0]
52 |
53 | for ind in range(len(phi)):
54 | result = traj_lib._state(phi[ind], psi[ind], bounds)
55 | state = result[0]
56 | self.assertEqual(state, states_test[ind], 'expected state %s but got %s'%(state,states_test[ind]))
57 |
58 | def test_stategrid(self):
59 | self.assertIsNotNone(traj_lib._stategrid(-180, -180, 20))
60 | self.assertLess(traj_lib._stategrid(-180, 0, 20),400)
61 | self.assertEqual(traj_lib._stategrid(0, 0, 20), 210)
62 | self.assertEqual(traj_lib._stategrid(-180, 0, 100), 2186)
63 |
64 | def test_discreterama(self):
65 | mdt_test = self.tr.mdt
66 |
67 | phi = md.compute_phi(mdt_test)
68 | psi = md.compute_psi(mdt_test)
69 | # print(psi)
70 | # psi = ([ 6, 8, 14, 16], [-30, 0, -40, 90, 140, 180])
71 | # phi = ([ 4, 6, 8, 14],[60., 0, -90, -90, -90, -180])
72 | states = ['L','A','E']
73 | discrete = traj_lib.discrete_rama(phi, psi, states=states)
74 | unique_st = set(discrete)
75 | for state in unique_st:
76 | self.assertIn(state, ['O', 'A', 'E', 'L'])
77 |
78 | def test_discreteramagrid(self):
79 | mdt_test = self.tr.mdt
80 |
81 | phi = md.compute_phi(mdt_test)
82 | psi = md.compute_psi(mdt_test)
83 | discrete = traj_lib.discrete_ramagrid(phi, psi, nbins=20)
84 | min_ibin = min(discrete)
85 | max_ibin = max(discrete)
86 | self.assertLess(max_ibin,400)
87 | self.assertGreaterEqual(min_ibin,0)
88 |
89 | class TestMDtraj(unittest.TestCase):
90 | def setUp(self):
91 | download_test_data()
92 | self.traj = md.load('test/data/protein_only.xtc', \
93 | top='test/data/alaTB.gro')
94 | self.topfn = 'test/data/alaTB.gro'
95 | self.trajfn = 'test/data/protein_only.xtc'
96 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
97 | traj=['test/data/protein_only.xtc'])
98 |
99 | def test_traj(self):
100 | self.assertIsNotNone(self.traj)
101 | self.assertEqual(self.traj.n_atoms, 19)
102 | self.assertEqual(self.traj.timestep, 1.)
103 | self.assertEqual(self.traj.n_residues, 3)
104 | self.assertEqual(self.traj.n_frames, 10003)
105 |
106 | def test_load_mdtraj(self):
107 | mdtraj = traj._load_mdtraj(top=self.topfn, traj=self.trajfn)
108 | self.assertIsNotNone(mdtraj)
109 | self.assertEqual(mdtraj.__module__, 'mdtraj.core.trajectory')
110 | self.assertEqual(hasattr(mdtraj, '__class__'), True)
111 |
112 | def test_read_distraj(self):
113 | self.assertIsNotNone(self.tr._read_distraj)
114 | self.assertEqual(callable(self.tr._read_distraj), True)
115 | # read distraj from temp file
116 | content = "0.0 A\n" \
117 | "1.0 E\n" \
118 | "2.0 L\n" \
119 | "3.0 O"
120 | fn = 'temp.txt'
121 | fd = open(fn, 'w+')
122 |
123 | try:
124 | fd.write(content)
125 | fd.seek(0)
126 | cstates, dt = self.tr._read_distraj(distraj=fd.name)
127 | self.assertIsInstance(cstates, list)
128 | self.assertEqual(len(cstates), len(content.split('\n')))
129 | self.assertEqual(dt, 1.0)
130 |
131 | finally:
132 | fd.close()
133 | os.remove(fd.name)
134 | # read distraj from array and custom timestamp
135 | distraj_arr = content.split('\n')
136 | cstates, dt = self.tr._read_distraj(distraj=distraj_arr, dt=2.0)
137 | self.assertIsInstance(cstates,list)
138 | self.assertEqual(len(cstates), len(content.split('\n')))
139 | self.assertEqual(dt, 2.0)
140 | # read empty 'discrete' trajectory
141 | cstates, dt = self.tr._read_distraj(distraj=[])
142 | self.assertEqual(len(cstates), 0)
143 | self.assertEqual(dt, 1.0)
144 |
145 | def test_timeseries_init(self):
146 | self.assertIsNotNone(self.tr)
147 | self.assertIsNotNone(self.tr.mdt)
148 | self.assertEqual(hasattr(self.tr.mdt, '__class__'), True)
149 | self.assertEqual(self.tr.mdt.__module__ , 'mdtraj.core.trajectory')
150 | self.assertIsNotNone(self.tr.discretize)
151 |
152 | def test_ts_discretize(self):
153 | self.tr.discretize('rama', states=['A', 'E', 'L'])
154 | self.assertIsNotNone(self.tr.distraj)
155 | unique_states = sorted(set(self.tr.distraj))
156 | self.assertListEqual(unique_states, ['A', 'E', 'L', 'O'])
157 |
158 | def test_ts_find_keys(self):
159 | self.assertIsNotNone(self.tr.find_keys)
160 | # test excluding state O (unassigned)
161 | self.tr.distraj = ['O']*50000
162 | for i in range(len(self.tr.distraj)):
163 | self.tr.distraj[i] = np.random.choice(['A', 'E', 'L', 'O'])
164 |
165 | self.tr.find_keys()
166 | keys = self.tr.keys
167 | self.assertEqual(len(set(keys)), len(keys))
168 | self.assertEqual(len(keys), 3)
169 | for key in keys:
170 | self.assertIn(key,['A','E','L'])
171 |
172 | del self.tr.distraj
173 | # test excluding state in alpha-h
174 | self.tr.distraj = ['O'] * 50000
175 | for i in range(len(self.tr.distraj)):
176 | self.tr.distraj[i] = np.random.choice(['A', 'E', 'L', 'O'])
177 |
178 | self.tr.find_keys(exclude=['A'])
179 | keys = self.tr.keys
180 | self.assertEqual(len(set(keys)),len(keys))
181 | self.assertEqual(len(keys), 3)
182 | for key in keys:
183 | self.assertIn(key,['O','E','L'])
184 |
185 | def test_gc(self):
186 | self.tr.gc()
187 | self.assertIs(hasattr(self.tr, 'mdt'), False)
188 |
189 |
190 | class UseMDtraj(unittest.TestCase):
191 | def setUp(self):
192 | download_test_data()
193 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
194 | traj=['test/data/protein_only.xtc'])
195 |
196 | def test_atributes(self):
197 | self.assertIsNotNone(self.tr.mdt)
198 | self.assertEqual(self.tr.mdt.n_atoms, 19)
199 | self.assertEqual(self.tr.mdt.n_frames, 10003)
200 | self.assertEqual(self.tr.mdt.n_residues, 3)
201 | self.assertIsNotNone(self.tr.discretize)
202 | self.assertIs(callable(self.tr.discretize), True)
203 |
204 |
205 | class TestMSMLib(unittest.TestCase):
206 | def test_esort(self):
207 | self.assertTrue(hasattr(msm_lib, 'esort'))
208 | self.assertTrue(callable(msm_lib.esort))
209 | self.esort = msm_lib.esort([0,float(1)], [1,float(2)])
210 | self.assertEqual(self.esort, 1)
211 | self.esort = msm_lib.esort([0,float(100)], [1,float(2)])
212 | self.assertEqual(self.esort, -1)
213 | self.esort = msm_lib.esort([100,float(1)], [1,float(1)])
214 | self.assertEqual(self.esort, 0)
215 |
216 | def test_mat_mul_v(self):
217 | self.assertTrue(hasattr(msm_lib,'mat_mul_v'))
218 | self.assertTrue(callable(msm_lib.mat_mul_v))
219 | self.matrix = np.array([
220 | [1, 2, 3],
221 | [4, 5, 6]
222 | ])
223 | self.vector = np.array(
224 | [1, 0, 1]
225 | )
226 | self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [4, 10])
227 | self.matrix = np.array([
228 | [-5, -4, 2],
229 | [1, 6, -3],
230 | [3, 5.5, -4]
231 | ])
232 | self.vector = np.array(
233 | [1, 2, -3]
234 | )
235 | self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [-19, 22, 26])
236 |
237 | def test_rand_rate(self):
238 | testT = np.array([
239 | [10, 2, 1],
240 | [1, 1, 1],
241 | [0, 1, 0]
242 | ])
243 | self.random1 = msm_lib.rand_rate(nkeep= 3, count= testT)
244 | self.random2 = msm_lib.rand_rate(nkeep= 3, count= testT)
245 | self.assertEqual(self.random1.shape, (3, 3))
246 | self.assertFalse((self.random1 == self.random2).all())
247 |
248 | def test_traj_split(self):
249 | traj1 = traj.TimeSeries(distraj=[1, 2, 3], dt=1.)
250 | traj2 = traj.TimeSeries(distraj=[3, 2, 1], dt=2.)
251 | trajs = [traj1, traj2]
252 | self.filepath = msm_lib.traj_split(data=trajs, lagt=10)
253 | self.assertIsInstance(self.filepath, str)
254 | self.assertTrue(os.path.exists(self.filepath))
255 | os.remove(self.filepath) # clean temp file
256 |
257 | def calc_trans(self):
258 | self.testT = msm_lib.calc_trans(nkeep=10)
259 | self.assertIsInstance(self.testT, np.ndarray)
260 | self.assertEqual(self.testT.shape, (10,10))
261 |
262 | def test_calc_rate(self):
263 | self.testT = np.array([
264 | [1, 2, 3],
265 | [0, 0, 0],
266 | [10, 10, 10]
267 |
268 | ])
269 | self.rate = msm_lib.calc_rate(nkeep=3, trans=self.testT, lagt=10)
270 | self.assertIsInstance(self.rate, np.ndarray)
271 | self.assertEqual(self.rate.shape, (3, 3))
272 |
273 | def test_calc_lifetime(self):
274 | distraj = [1, 1, 1, 2]
275 | dt = 1.
276 | keys = [1, 2]
277 | data = [distraj, dt, keys]
278 | self.life = msm_lib.calc_lifetime(data)
279 | self.assertIsInstance(self.life, dict)
280 |
--------------------------------------------------------------------------------
/mastermsm/trajectory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/trajectory/__init__.py
--------------------------------------------------------------------------------
/mastermsm/trajectory/traj.py:
--------------------------------------------------------------------------------
1 | """
2 | This file is part of the MasterMSM package.
3 |
4 | """
5 | import os
6 | import numpy as np
7 | import mdtraj as md
8 | from ..trajectory import traj_lib
9 |
10 | def _load_mdtraj(top=None, traj=None, stride=None):
11 | """ Loads trajectories using mdtraj.
12 |
13 | Parameters
14 | ----------
15 | top: str
16 | The topology file, may be a PDB or GRO file.
17 | traj : str
18 | A list with the trajectory filenames to be read.
19 |
20 | Returns
21 | -------
22 | mdtrajs : list
23 | A list of mdtraj Trajectory objects.
24 |
25 | """
26 | return md.load(traj, top=top, stride=stride)
27 |
28 | class MultiTimeSeries(object):
29 | """ A class for generating multiple TimeSeries objects in
30 | a consistent way. In principle this is only needed when
31 | the clustering is not established a priori.
32 |
33 | """
34 | def __init__(self, top=None, trajs=None, dt=None, stride=None):
35 | """
36 | Parameters
37 | ----------
38 | dt : float
39 | The time step.
40 | top : string
41 | The topology file, may be a PDB or GRO file.
42 | trajs : list
43 | A list of trajectory filenames to be read.
44 |
45 | """
46 | self.file_list = trajs
47 | self.traj_list = []
48 | for traj in self.file_list:
49 | tr = TimeSeries(top=top, traj=traj, stride=stride)
50 | self.traj_list.append(tr)
51 |
52 | def joint_discretize(self, method='backbone_torsions', mcs=None, ms=None, dPCA=False):
53 | """
54 | Discretize simultaneously all trajectories with HDBSCAN.
55 |
56 | Parameters
57 | ----------
58 | method : str
59 | The method of choice for the discretization. Options are 'backbone_torsions'
60 | and 'contacts'.
61 | mcs : int
62 | Minimum cluster size for HDBSCAN clustering.
63 | ms : int
64 | Minsamples parameter for HDBSCAN clustering.
65 | dPCA : bool
66 | Whether we are using the dihedral PCA method.
67 |
68 | """
69 | if method=='backbone_torsions':
70 | labels = self.joint_discretize_backbone_torsions(mcs=mcs, ms=ms, dPCA=dPCA)
71 | elif method=='contacts':
72 | labels = self.joint_discretize_contacts(mcs=mcs, ms=ms)
73 |
74 | i = 0
75 | for tr in self.traj_list:
76 | ltraj = tr.mdt.n_frames
77 | tr.distraj = list(labels[i:i+ltraj])
78 | i +=ltraj
79 |
80 | def joint_discretize_backbone_torsions(self, mcs=None, ms=None, dPCA=False):
81 | """
82 | Analyze jointly torsion angles from multiple trajectories.
83 |
84 | Parameters
85 | ----------
86 | mcs : int
87 | Minimum cluster size for HDBSCAN clustering.
88 | ms : int
89 | Minsamples parameter for HDBSCAN clustering.
90 | dPCA : bool
91 | Whether we are using the dihedral PCA method.
92 |
93 | """
94 | # First we build the fake trajectory combining data
95 | phi_cum = []
96 | psi_cum = []
97 | for tr in self.traj_list:
98 | phi = md.compute_phi(tr.mdt)
99 | psi = md.compute_psi(tr.mdt)
100 | phi_cum.append(phi[1])
101 | psi_cum.append(psi[1])
102 | phi_cum = np.vstack(phi_cum)
103 | psi_cum = np.vstack(psi_cum)
104 |
105 | # Then we generate the consistent set of clusters
106 | if dPCA is True:
107 | angles = np.column_stack((phi_cum, psi_cum))
108 | v = traj_lib.dPCA(angles)
109 | labels = traj_lib.discrete_backbone_torsion(mcs, ms, pcs=v, dPCA=True)
110 | else:
111 | phi_fake = [phi[0], phi_cum]
112 | psi_fake = [psi[0], psi_cum]
113 | labels = traj_lib.discrete_backbone_torsion(mcs, ms, phi=phi_fake, psi=psi_fake)
114 | return labels
115 |
116 | def joint_discretize_contacts(self, mcs=None, ms=None):
117 | """
118 | Analyze jointly pairwise contacts from all trajectories.
119 |
120 | Produces a fake trajectory comprising a concatenated set
121 | to recover the labels from HDBSCAN.
122 |
123 | """
124 | mdt_cum = []
125 | for tr in self.traj_list:
126 | mdt_cum.append(tr.mdt) #mdt_cum = np.vstack(mdt_cum)
127 |
128 | labels = traj_lib.discrete_contacts_hdbscan(mcs, ms, mdt_cum)
129 |
130 | return labels
131 |
132 | class TimeSeries(object):
133 | """ A class to read and discretize simulation trajectories.
134 | When simulation trajectories are provided, frames are read
135 | and discretized using mdtraj [1]_. Alternatively, a discrete
136 | trajectory can be provided.
137 |
138 | Attributes
139 | ----------
140 | mdt :
141 | An mdtraj Trajectory object.
142 | file_name : str
143 | The name of the trajectory file.
144 | distraj : list
145 | The assigned trajectory.
146 | dt : float
147 | The time step
148 |
149 |
150 | References
151 | ----------
152 | .. [1] McGibbon, RT., Beauchamp, KA., Harrigan, MP., Klein, C.,
153 | Swails, JM., Hernandez, CX., Schwantes, CR., Wang, LP., Lane,
154 | TJ. and Pande, VS." MDTraj: A Modern Open Library for the Analysis
155 | of Molecular Dynamics Trajectories", Biophys. J. (2015).
156 |
157 | """
158 | def __init__(self, top=None, traj=None, dt=None, \
159 | distraj=None, stride=None):
160 | """
161 | Parameters
162 | ----------
163 | distraj : string
164 | The discrete state trajectory file.
165 | dt : float
166 | The time step.
167 | top : string
168 | The topology file, may be a PDB or GRO file.
169 | traj : string
170 | The trajectory filenames to be read.
171 | stride : int
172 | Only read every stride-th frame
173 |
174 | """
175 | if distraj is not None:
176 | # A discrete trajectory is provided
177 | self.distraj, self.dt = self._read_distraj(distraj=distraj, dt=dt)
178 | else:
179 | # An MD trajectory is provided
180 | self.file_name = traj
181 | mdt = _load_mdtraj(top=top, traj=traj, stride=stride)
182 | self.mdt = mdt
183 | self.dt = self.mdt.timestep
184 |
185 | def _read_distraj(self, distraj=None, dt=None):
186 | """ Loads discrete trajectories directly.
187 |
188 | Parameters
189 | ----------
190 | distraj : str, list
191 | File or list with discrete trajectory.
192 |
193 | Returns
194 | -------
195 | mdtrajs : list
196 | A list of mdtraj Trajectory objects.
197 |
198 | """
199 | if isinstance(distraj, list):
200 | cstates = distraj
201 | if dt is None:
202 | dt = 1.
203 | return cstates, dt
204 |
205 | elif os.path.isfile(distraj):
206 | raw = open(distraj, "r").readlines()
207 | try:
208 | cstates = [x.split()[1] for x in raw]
209 | dt = float(raw[2].split()[0]) - float(raw[1].split()[0])
210 | try: # make them integers if you can
211 | cstates = [int(x) for x in cstates]
212 | except ValueError:
213 | pass
214 | return cstates, dt
215 | except IndexError:
216 | cstates = [x.split()[0] for x in raw]
217 | return cstates, 1.
218 |
219 | def discretize(self, method="rama", states=None, nbins=20,\
220 | mcs=100, ms=50):
221 | """ Discretize the simulation data.
222 |
223 | Parameters
224 | ----------
225 | method : str
226 | A method for doing the clustering. Options are
227 | "rama", "ramagrid", "rama_hdb", "contacts_hdb";
228 | where the latter two use HDBSCAN.
229 | states : list
230 | A list of states to be considered in the discretization.
231 | Only for method "rama".
232 | nbins : int
233 | Number of bins in the grid. Only for "ramagrid".
234 | mcs : int
235 | min_cluster_size for HDBSCAN
236 | ms : int
237 | min_samples for HDBSCAN
238 |
239 | Returns
240 | -------
241 | discrete : list
242 | A list with the set of discrete states visited.
243 |
244 | """
245 | if method == "rama":
246 | phi = md.compute_phi(self.mdt)
247 | psi = md.compute_psi(self.mdt)
248 | self.distraj = traj_lib.discrete_rama(phi, psi, states=states)
249 | elif method == "ramagrid":
250 | phi = md.compute_phi(self.mdt)
251 | psi = md.compute_psi(self.mdt)
252 | self.distraj = traj_lib.discrete_ramagrid(phi, psi, nbins)
253 | elif method == "rama_hdb":
254 | phi = md.compute_phi(self.mdt)
255 | psi = md.compute_psi(self.mdt)
256 | self.distraj = traj_lib.discrete_backbone_torsion(mcs, ms, phi=phi, psi=psi)
257 | elif method == "contacts_hdb":
258 | self.distraj = traj_lib.discrete_contacts_hdbscan(mcs, ms, self.mdt)
259 |
260 | def find_keys(self, exclude=['O']):
261 | """ Finds out the discrete states in the trajectory
262 |
263 | Parameters
264 | ----------
265 | exclude : list
266 | A list of strings with states to exclude.
267 |
268 | """
269 | keys = []
270 | for s in self.distraj:
271 | if s not in keys and s not in exclude:
272 | keys.append(s)
273 | self.keys = keys
274 |
275 | def gc(self):
276 | """
277 | Gets rid of the mdtraj attribute
278 |
279 | """
280 | delattr (self, "mdt")
281 |
282 | # def discrete_rama(self, A=[-100, -40, -60, 0], \
283 | # L=[-180, -40, 120., 180.], \
284 | # E=[50., 100., -40., 70.]):
285 | # """ Discretize based on Ramachandran angles.
286 | #
287 | # """
288 | # for t in self.mdtrajs:
289 | # phi,psi = zip(mdtraj.compute_phi(traj), mdtraj.compute_psi(traj))
290 | #
291 |
--------------------------------------------------------------------------------
/mastermsm/trajectory/traj_lib.py:
--------------------------------------------------------------------------------
1 | """
2 | This file is part of the MasterMSM package.
3 |
4 | """
5 | #import h5py
6 | import copy
7 | import sys
8 | import math
9 | import hdbscan
10 | import numpy as np
11 | from sklearn.preprocessing import StandardScaler
12 | from sklearn.decomposition import PCA
13 | import mdtraj as md
14 | import matplotlib.pyplot as plt
15 |
16 | def discrete_rama(phi, psi, seq=None, bounds=None, states=['A', 'E', 'L']):
17 | """ Assign a set of phi, psi angles to coarse states.
18 |
19 | Parameters
20 | ----------
21 | phi : list
22 | A list of Phi Ramachandran angles.
23 | psi : list
24 | A list of Psi Ramachandran angles.
25 | seq : list
26 | Sequence of states.
27 | bounds : list of lists
28 | Alternative bounds for transition based assignment.
29 | states : list
30 | The states that will be used in the assignment.
31 |
32 | Returns
33 | -------
34 | cstates : list
35 | The sequence of coarse states.
36 |
37 | Notes
38 | -----
39 | Here we follow Buchete and Hummer for the assignment procedure [1]_ .
40 |
41 | .. [1] N. V. Buchete and G. Hummer, "Coarse master equations for peptide folding dynamics", J. Phys. Chem. B. (2008).
42 |
43 | """
44 | if bounds is None:
45 | TBA_bounds = {}
46 | if 'A' in states:
47 | TBA_bounds['A'] = [ -100., -40., -50., -10. ]
48 | if 'E' in states:
49 | TBA_bounds['E'] = [ -180., -40., 125.,165. ]
50 | if 'L' in states:
51 | TBA_bounds['L'] = [ 50., 100., -40.,70.0 ]
52 |
53 | res_idx = 0
54 | if len(phi[0]) != len(psi[0]):
55 | print (" Different number of phi and psi dihedrals")
56 | print (" STOPPING HERE")
57 | sys.exit()
58 |
59 | cstates = []
60 | prev_s_string = ""
61 | ndih = len(phi[0])
62 | for f,y in zip(phi[1],psi[1]):
63 | s_string = []
64 | for n in range(ndih):
65 | s, _ = _state(f[n]*180/math.pi, y[n]*180/math.pi, TBA_bounds)
66 | #if s == "O" and len(prev_s_string) > 0:
67 | if s == "O":
68 | try:
69 | s_string += prev_s_string[n]
70 | except IndexError:
71 | s_string += "O"
72 | else:
73 | s_string += s
74 | cstates.append(''.join(s_string))
75 | prev_s_string = s_string
76 | res_idx += 1
77 | return cstates
78 |
79 | def discrete_ramagrid(phi, psi, nbins):
80 | """ Finely partition the Ramachandran map into a grid of states.
81 |
82 | Parameters
83 | ----------
84 | phi : list
85 | A list of Phi Ramachandran angles.
86 | psi : list
87 | A list of Psi Ramachandran angles.
88 | nbins : int
89 | The number of bins in the grid in each dimension.
90 |
91 | Returns
92 | -------
93 | cstates : list
94 | The sequence of coarse states.
95 |
96 | """
97 | cstates = []
98 | for f, y in zip(phi[1], psi[1]):
99 | s = _stategrid(f, y, nbins)
100 | cstates.append(s)
101 | return cstates
102 |
103 | #stats_out = open(stats_file,"w")
104 | #cum = 0
105 | #for s in stats_list:
106 | # cum+=s[1]
107 | # #stats_out.write("%s %8i %8i %12.6f\n"%\
108 | # # (s[0],s[1],cum,qave[s[0]]/float(s[1])))
109 | # stats_out.write("%s %8i %8i\n"%\
110 | # (s[0],s[1],cum))
111 | #
112 | #stats_out.close()
113 | #state_out.close()
114 | #
115 | #def isnative(native_string, string):
116 | # s = ""
117 | # for i in range(len(string)):
118 | # if string[i]==native_string[i]:
119 | # s+="1"
120 | # else:
121 | # s+="0"
122 | # return s
123 | #
124 | def _inrange( x, lo, hi ):
125 | if x > lo and x < hi:
126 | return 1
127 | else:
128 | return 0
129 |
130 | def _inbounds(bounds,phi, psi):
131 | if _inrange( phi,bounds[0],bounds[1]) and _inrange( psi,bounds[2],bounds[3]):
132 | return 1
133 | if len(bounds) > 4:
134 | if _inrange( phi,bounds[4],bounds[5]) and _inrange( psi,bounds[6],bounds[7]):
135 | return 1
136 | if len(bounds) > 8:
137 | if _inrange( phi,bounds[8],bounds[9]) and _inrange( psi,bounds[10],bounds[11]):
138 | return 1
139 | if len(bounds) > 12:
140 | if _inrange( phi,bounds[12],bounds[13]) and _inrange( psi,bounds[14],bounds[15]):
141 | return 1
142 | return 0
143 |
144 | def _state(phi,psi,bounds):
145 | """ Finds coarse state for a pair of phi-psi dihedrals
146 |
147 | Parameters
148 | ----------
149 | phi : float
150 | Phi dihedral angle
151 | psi : float
152 | Psi dihedral angle
153 | bounds : dict
154 | Dictionary containing list of states and their respective bounds
155 |
156 | Returns
157 | -------
158 | k : string
159 | Key for assigned state
160 |
161 | """
162 | # if type == "GLY":
163 | # for k in g_bounds.keys():
164 | # if inbounds( g_bounds[k], (phi,psi) ):
165 | # return k, []
166 | # # else
167 | # return 'O', [ (phi,psi) ]
168 | # if type == "prePRO":
169 | # for k in pp_bounds.keys():
170 | # if inbounds( pp_bounds[k], (phi,psi) ):
171 | # return k, []
172 | # # else
173 | # return 'O', [ (phi,psi) ]
174 | # else:
175 | for k in bounds.keys():
176 | if _inbounds(bounds[k], phi, psi ):
177 | return k, []
178 | # else
179 | return 'O', [ (phi,psi) ]
180 |
181 | #def stats_sort(x,y):
182 | # xx = x[1]
183 | # yy = y[1]
184 | # return yy-xx
185 | #
186 | ##if len(sys.argv)<5:
187 | ## sys.stdout.write(Usage)
188 | ## sys.exit(0)
189 | #
190 | #torsion_file = sys.argv[1]
191 | ##q_file = sys.argv[2]
192 | #state_file = sys.argv[2]
193 | #stats_file = sys.argv[3]
194 |
195 | def _stategrid(phi, psi, nbins):
196 | """ Finds coarse state for a pair of phi-psi dihedrals
197 |
198 | Parameters
199 | ----------
200 | phi : float
201 | Phi dihedral angle
202 | psi : float
203 | Psi dihedral angle
204 | nbins : int
205 | Number of bins in each dimension of the grid
206 |
207 | Returns
208 | -------
209 | k : int
210 | Index of bin
211 |
212 | """
213 | #print phi, psi
214 | #print "column :", int(0.5*(phi + math.pi)/math.pi*nbins)
215 | #print "row :", int(0.5*(psi + math.pi)/math.pi*nbins)
216 | ibin = int(0.5*nbins*(phi/math.pi + 1.)) + int(0.5*nbins*(psi/math.pi + 1))*nbins
217 | return ibin
218 |
219 | def discrete_backbone_torsion(mcs, ms, phi=None, psi=None, \
220 | pcs=None, dPCA=False):
221 | """
222 | Discretize backbone torsion angles
223 |
224 | Assign a set of phi, psi angles (or their corresponding
225 | dPCA variables if dPCA=True) to coarse states
226 | by using the HDBSCAN algorithm.
227 |
228 | Parameters
229 | ----------
230 | phi : list
231 | A list of Phi Ramachandran angles
232 | psi : list
233 | A list of Psi Ramachandran angles
234 | pcs : matrix
235 | Matrix containing principal components obtained
236 | from PCA of dihedral angles
237 | mcs : int
238 | min_cluster_size for HDBSCAN
239 | ms : int
240 | min_samples for HDBSCAN
241 |
242 | """
243 | if dPCA:
244 | X = pcs
245 | else:
246 | # shift and combine dihedrals
247 | if len(phi[0]) != len(psi[0]):
248 | raise ValueError("Inconsistent dimensions for angles")
249 |
250 | ndih = len(phi[0])
251 | phi_shift, psi_shift = [], []
252 | for f, y in zip(phi[1], psi[1]):
253 | for n in range(ndih):
254 | phi_shift.append(f[n])
255 | psi_shift.append(y[n])
256 | np.savetxt("phi_psi.dat", np.column_stack((phi_shift, psi_shift)))
257 | psi_shift, phi_shift = _shift(psi_shift, phi_shift)
258 | data = np.column_stack((phi_shift, psi_shift))
259 | np.savetxt("phi_psi_shifted.dat", data)
260 | X = StandardScaler().fit_transform(data)
261 |
262 | # Set values for clustering parameters
263 | if mcs is None:
264 | mcs = int(np.sqrt(len(X)))
265 | print("Setting minimum cluster size to: %g" % mcs)
266 | if ms is None:
267 | ms = mcs
268 | print("Setting min samples to: %g" % ms)
269 |
270 | hdb = hdbscan.HDBSCAN(min_cluster_size=mcs, min_samples=ms).fit(X)
271 | hdb.condensed_tree_.plot(select_clusters=True)
272 |
273 | #plt.savefig("alatb-hdbscan-tree.png",dpi=300,transparent=True)
274 |
275 | # n_micro_clusters = len(set(hb.labels_)) - (1 if -1 in hb.labels_ else 0
276 | # if n_micro_clusters > 0:
277 | # print("HDBSCAN mcs value set to %g"%mcs, n_micro_clusters,'clusters.')
278 | # break
279 | # elif mcs < 400:
280 | # mcs += 25
281 | # else:
282 | # sys.exit("Cannot find any valid HDBSCAN mcs value")
283 | # #n_noise = list(labels).count(-1)
284 |
285 | # ## plot clusters
286 | # colors = ['royalblue', 'maroon', 'forestgreen', 'mediumorchid', \
287 | # 'tan', 'deeppink', 'olive', 'goldenrod', 'lightcyan', 'lightgray']
288 | # vectorizer = np.vectorize(lambda x: colors[x % len(colors)])
289 | # fig, ax = plt.subplots(figsize=(7,7))
290 | # assign = hb.labels_ >= 0
291 | # ax.scatter(X[assign,0],X[assign,1], c=hb.labels_[assign])
292 | # ax.set_xlim(-np.pi, np.pi)
293 | # ax.set_ylim(-np.pi, np.pi)
294 | # plt.savefig('alaTB_hdbscan.png', dpi=300, transparent=True)
295 | #
296 | # # remove noise from microstate trajectory and apply TBA (Buchete et al. JPCB 2008)
297 | # labels = _filter_states(hb.labels_)
298 | #
299 | # # remove from clusters points with small (<0.1) probability
300 | # for i in range(len(labels)):
301 | # if hb.probabilities_[i] < 0.1:
302 | # labels[i] = -1
303 |
304 | return hdb.labels_
305 |
306 | def dPCA(angles):
307 | """
308 | Compute PCA of dihedral angles
309 |
310 | We follow the methods described in A. Altis et al.
311 | *J. Chem. Phys.* 244111 (2007)
312 |
313 | Parameters
314 | ----------
315 | angles : angles ordered by columns
316 |
317 | Returns
318 | -------
319 | X_transf : dPCA components to retrieve 80%
320 | of variance ordered by columns
321 |
322 | """
323 | shape = np.shape(angles)
324 | #print (shape)
325 | X = np.zeros((shape[0] , \
326 | shape[1]+shape[1]))
327 | for i, ang in enumerate(angles):
328 | p = 0
329 | for phi in ang:
330 | X[i][p], X[i][p+1] = np.cos(phi), np.sin(phi)
331 | p += 2
332 | X_std = StandardScaler().fit_transform(X)
333 | sklearn_pca = PCA(n_components=2*shape[1])
334 |
335 | X_transf = sklearn_pca.fit_transform(X_std)
336 | expl = sklearn_pca.explained_variance_ratio_
337 | print("Ratio of variance retrieved by each component:", expl)
338 |
339 | cum_var = 0.0
340 | i = 0
341 | while cum_var < 0.8:
342 | cum_var += expl[i]
343 | i += 1
344 |
345 | ## Save cos and sin of dihedral angles along the trajectory
346 | #h5file = "data/out/%g_traj_angles.h5"%t
347 | #with h5py.File(h5file, "w") as hf:
348 | # hf.create_dataset("angles_trajectory", data=X)
349 | ## Plot cumulative variance retrieved by new components (i.e. those from PCA)
350 | #plt.figure() #plt.plot(np.cumsum(sklearn_pca.explained_variance_ratio_))
351 | #plt.xlabel('number of components') #plt.ylabel('cumulative explained variance')
352 | #plt.savefig('cum_variance_%g.png'%t)
353 |
354 | #counts, ybins, xbins, image = plt.hist2d(X_transf[:,0], X_transf[:,1], \
355 | # bins=len(X_transf[:,0]), cmap='binary_r', alpha=0.2)#bins=[np.linspace(-np.pi,np.pi,20), np.linspace(-np.pi,np.pi,30)]
356 | ##countmax = np.amax(counts)
357 | ##counts = np.log(countmax) - np.log(counts)
358 | ##print(counts, countmax)
359 | #plt.contour(np.transpose(counts), extent=[xbins.min(), xbins.max(), ybins.min(), ybins.max()], \
360 | # linewidths=1, colors='gray')
361 | #plt.scatter(X_transf[:,0],X_transf[:,1])# c=counts)
362 | #fig, ax = plt.subplots(1,1, figsize=(8,8), sharex=True, sharey=True)
363 | #ax.contour(np.transpose(counts), extent=[xbins.min(), xbins.max(), ybins.min(), ybins.max()], \
364 | # linewidths=1, colors='gray')
365 | #ax.plot(X_transf[:,0],X_transf[:,1], 'o', ms=0.2, color='C%g'%t)
366 | #plt.tight_layout()
367 | #plt.savefig('dpca_%g.png'%t)
368 |
369 | return X_transf[:,:i]
370 |
371 | def discrete_contacts_hdbscan(mcs, ms, mdt_all):
372 | """
373 | HDBSCAN discretization based on contacts
374 |
375 | Parameters
376 | ----------
377 | mdt : object
378 | mdtraj trajectory
379 | mcs : int
380 | min_cluster_size for HDBSCAN
381 | ms : int
382 | min_samples for HDBSCAN
383 |
384 | Returns
385 | -------
386 | labels : list
387 | Indexes corresponding to the clustering
388 |
389 | """
390 |
391 | dists_all = []
392 | for mdt in mdt_all:
393 | dists = md.compute_contacts(mdt, contacts='all', periodic=True)
394 | for dist in dists[0]:
395 | dists_all.append(dist)
396 |
397 | X = StandardScaler().fit_transform(dists_all) #dists[0]
398 | if mcs is None: mcs = int(np.sqrt(len(X)))
399 | if ms is None: ms = 100
400 | hdb = hdbscan.HDBSCAN(min_cluster_size=mcs, min_samples=ms)
401 | hdb.fit(X)
402 | hdb.condensed_tree_.plot(select_clusters=True)
403 | plt.savefig("hdbscan-tree.png",dpi=300,transparent=True)
404 |
405 | # In case not enough states are produced, exit
406 | if (len(np.unique(hdb.labels_))<=2):
407 | raise Exception("Cannot generate clusters from contacts")
408 |
409 | dtraj = _filter_states(hdb.labels_)
410 | return dtraj
411 |
412 | def _filter_states(states):
413 | """
414 | Filters to remove not-assigned frames when using dbscan or hdbscan
415 |
416 | """
417 | fs = []
418 | for s in states:
419 | if s >= 0:
420 | fs.append(s)
421 | else:
422 | try:
423 | fs.append(fs[-1])
424 | except IndexError:
425 | pass
426 | return fs
427 |
428 | def _shift(psi, phi):
429 | psi_s, phi_s = copy.deepcopy(phi), copy.deepcopy(psi)
430 | for i in range(len(phi_s)):
431 | if phi_s[i] < -2:
432 | phi_s[i] += 2*np.pi
433 | for i in range(len(psi_s)):
434 | if psi_s[i] > 2:
435 | psi_s[i] -= 2*np.pi
436 | return phi_s, psi_s
437 |
--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
1 | site_name : MasterMSM
2 | theme : readthedocs
3 | repo_url : https://github.com/daviddesancho/MasterMSM
4 | site_author: David De Sancho
5 | pages :
6 | - ['about.md', 'About']
7 | - ['index.md', 'Introduction']
8 | - ['installation.md', 'Installation']
9 | - ['discretize.md', 'User guide', 'Discretizing the data']
10 | - ['trajectory.md', 'User guide', 'Parsing trajectories']
11 | - ['msm.md', 'User guide', 'Constructing the MSM']
12 | - ['fewsm.md', 'User guide', 'Clustering the MSM']
13 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | matplotlib
4 | networkx
5 | mdtraj
6 | hdbscan
7 | scikit-learn
8 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | # Setup script for bestmsm package
4 |
5 | import os
6 | from setuptools import setup, find_packages
7 |
8 | def read(fname):
9 | return open(os.path.join(os.path.dirname(__file__), fname)).read()
10 |
11 | setup(
12 | name='MasterMSM',
13 | version='0.1dev',
14 | description='Algorithms to construct master equation / Markov state models',
15 | url='http://github.com/daviddesancho/MasterMSM',
16 | author='David De Sancho',
17 | author_email='daviddesancho.at.gmail.com',
18 | license='GPL',
19 | packages=find_packages(),
20 | keywords= "markov state model",
21 | long_description=read('README.md'),
22 | classifiers = ["""\
23 | Development Status :: 1 - Planning
24 | Operating System :: POSIX :: Linux
25 | Operating System :: MacOS
26 | Programming Language :: Python :: 2.7
27 | Topic :: Scientific/Engineering :: Bio-Informatics
28 | Topic :: Scientific/Engineering :: Chemistry
29 | """]
30 | )
31 |
--------------------------------------------------------------------------------