├── .gitattributes
├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── docs
    ├── Makefile
    └── source
    │   ├── about.rst
    │   ├── conf.py
    │   ├── documentation.rst
    │   ├── img
    │       └── mastermsm.png
    │   ├── index.rst
    │   ├── installation.rst
    │   ├── mastermsm.fewsm.rst
    │   ├── mastermsm.msm.rst
    │   ├── mastermsm.rst
    │   ├── mastermsm.trajectory.rst
    │   ├── modules.rst
    │   └── support.rst
├── examples
    ├── README.md
    ├── alanine_dipeptide
    │   ├── ala_dipeptide.ipynb
    │   ├── ala_dipeptide_discretize.ipynb
    │   ├── ala_dipeptide_dpca.ipynb
    │   ├── ala_dipeptide_maxlike.ipynb
    │   └── ala_dipeptide_multi.ipynb
    ├── alanine_pentapeptide
    │   ├── ala_pentapeptide.ipynb
    │   ├── ala_pentapeptide_contacts.ipynb
    │   └── ala_pentapeptide_dpca.ipynb
    ├── bistable_potential
    │   ├── 1D_smFS_MSM.ipynb
    │   └── 2D_smFS_MSM.ipynb
    ├── mueller_potential
    │   ├── .ipynb_checkpoints
    │   │   └── mueller_potential-checkpoint.ipynb
    │   ├── mueller.py
    │   ├── mueller_potential.ipynb
    │   └── mueller_potential_openmm.ipynb
    └── schutte_potential
    │   └── schute_mastermsm.ipynb
├── mastermsm
    ├── __init__.py
    ├── fewsm
    │   ├── __init__.py
    │   ├── fewsm.py
    │   └── fewsm_lib.py
    ├── msm
    │   ├── __init__.py
    │   ├── msm.py
    │   └── msm_lib.py
    ├── test
    │   ├── README.md
    │   ├── __init__.py
    │   ├── download_data.py
    │   ├── test_fewsm.py
    │   ├── test_msm.py
    │   └── test_trajectory.py
    └── trajectory
    │   ├── __init__.py
    │   ├── traj.py
    │   └── traj_lib.py
├── mkdocs.yml
├── requirements.txt
└── setup.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.xtc filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | # This is a basic workflow to help you get started with Actions
 2 | 
 3 | name: unittests
 4 | 
 5 | ## Controls when the workflow will run
 6 | on:
 7 | #  # Triggers the workflow on push or pull request events but only for the master branch
 8 |   push:
 9 |     branches: [ master ]
10 |   pull_request:
11 |     branches: [ master ]
12 | #
13 | #  # Allows you to run this workflow manually from the Actions tab
14 | #  workflow_dispatch:
15 | 
16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel
17 | jobs:
18 |   # This workflow contains a single job called "build"
19 |   build:
20 |     # The type of runner that the job will run on
21 |     runs-on: ubuntu-latest
22 | 
23 |     # Steps represent a sequence of tasks that will be executed as part of the job
24 |     steps:
25 |       - uses: actions/checkout@v3
26 |       - name: Set up Python
27 |         uses: actions/setup-python@v3
28 |         with:
29 |           python-version: '3.x' 
30 |       - name: Download repository
31 |         run: |
32 |            python --version
33 |            if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
34 |            python setup.py install
35 | 
36 |       - name: Run tests
37 |         run: |
38 |            cd $HOME/mastermsm
39 |            python -m unittest 
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | site/
2 | docs/build
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |     - 3.6 
 4 | 
 5 | travis: 
 6 |     - develop
 7 | 
 8 | install:
 9 |     - python setup.py install 
10 | 
11 | script:
12 |   # avoid running in the checkout directory so nose finds built modules..
13 |     - rundir=$HOME
14 |     - cd $rundir
15 |     - nosetests -v --with-coverage
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     {description}
294 |     Copyright (C) {year}  {fullname}
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   {signature of Ty Coon}, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 
341 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Documentation Status](https://readthedocs.org/projects/mastermsm/badge/?version=develop)](https://mastermsm.readthedocs.io/en/develop/?badge=develop)
 2 | [![Codacy Badge](https://app.codacy.com/project/badge/Grade/90d86f571f5c416b910a9dc4d1d8c569)](https://www.codacy.com/gh/BioKT/MasterMSM/dashboard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=BioKT/MasterMSM&amp;utm_campaign=Badge_Grade)
 3 | 
 4 | MasterMSM
 5 | =========
 6 | MasterMSM is a Python package for generating Markov state models (MSMs)
 7 | from molecular dynamics trajectories. We use a formulation based on 
 8 | the chemical master equation. This package will allow you to:
 9 | 
10 | *   Create Markov state / master equation models from biomolecular simulations.
11 | 
12 | *   Discretize trajectory data using dihedral angle based methods useful 
13 | for small peptides.
14 | 
15 | *   Calculate rate matrices using a variety of methods.
16 | 
17 | *   Obtain committors and reactive fluxes.
18 | 
19 | *   Carry out sensitivity analysis of networks.
20 | 
21 | You can read the documentation [here](https://mastermsm.readthedocs.io).
22 | 
23 | Contributors
24 | ------------
25 | This code has been written by David De Sancho with help from Anne Aguirre.
26 | 
27 | Installation
28 | ------------
29 |     git clone http://github.com/daviddesancho/MasterMSM destination/MasterMSM
30 |     cd destination/mastermsm
31 |     python setup.py install --user
32 | 
33 | External libraries
34 | ------------------
35 |     mdtraj : https://mdtraj.org
36 | 
37 | Citation
38 | --------
39 |     @article{mastermsm,
40 |     author = "David De Sancho and Anne Aguirre",
41 |     title = "{MasterMSM: A Package for Constructing Master Equation    Models of Molecular Dynamics}",
42 |     year = "2019",
43 |     month = "6",
44 |     journal = "J. Chem. Inf. Model."
45 |     url = "https://doi.org/10.1021/acs.jcim.9b00468",
46 |     doi = "10.1021/acs.jcim.9b00468"
47 |     }
48 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = MasterMSM
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/source/about.rst:
--------------------------------------------------------------------------------
 1 | About
 2 | ============
 3 | In the last decade, Master equation / Markov state models (usually termed MSMs) 
 4 | have become one of the key methodologies to analyze data from molecular dynamics
 5 | (MD) simulations. You can find information about MSMs in general in the following
 6 | volume
 7 | 
 8 | * `An Introduction to Markov State Models and Their Application to Long Timescale Molecular Simulation <http://dx.doi.org/10.1007/978-94-007-7606-7>`_, edited by Pande, Bowman and Noe (Springer, 2014). 
 9 | 
10 | The MasterMSM library brings a different flavour of MSMs, based on the methods 
11 | introduced by N. V. Buchete and G. Hummer 
12 | (`J. Phys. Chem. B, 2008 <http://dx.doi.org/10.1021/jp0761665>`_).
13 | The central difference relies in that instead of using transition matrices we focus
14 | in rate matrices, which determine the time evolution of the system as described 
15 | by the chemical master equation.
16 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | import os
 16 | import sys
 17 | 
 18 | sys.path.insert(0, os.path.abspath('../..'))
 19 | import mastermsm
 20 | #sys.path.append(os.path.join(os.path.abspath(os.pardir)))
 21 | 
 22 | 
 23 | 
 24 | # -- Project information -----------------------------------------------------
 25 | 
 26 | project = 'MasterMSM'
 27 | copyright = '2019, David De Sancho'
 28 | author = 'David De Sancho'
 29 | 
 30 | # The short X.Y version
 31 | version = ''
 32 | # The full version, including alpha/beta/rc tags
 33 | release = '1.1.1'
 34 | 
 35 | 
 36 | # -- General configuration ---------------------------------------------------
 37 | 
 38 | # If your documentation needs a minimal Sphinx version, state it here.
 39 | #
 40 | # needs_sphinx = '1.0'
 41 | 
 42 | # Add any Sphinx extension module names here, as strings. They can be
 43 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 44 | # ones.
 45 | extensions = [
 46 |     'sphinx.ext.autodoc',
 47 |     'sphinx.ext.coverage',
 48 |     'sphinx.ext.githubpages',
 49 |     'sphinx.ext.mathjax',
 50 |     'sphinx.ext.viewcode',
 51 |     'sphinx.ext.napoleon',
 52 |     'sphinx.ext.autosummary',
 53 |     'sphinx.ext.doctest',
 54 |     'sphinx.ext.inheritance_diagram']
 55 | 
 56 | # Add any paths that contain templates here, relative to this directory.
 57 | templates_path = ['_templates']
 58 | 
 59 | # The suffix(es) of source filenames.
 60 | # You can specify multiple suffix as a list of string:
 61 | #
 62 | source_suffix = ['.rst', '.md']
 63 | #source_suffix = '.rst'
 64 | 
 65 | # The master toctree document.
 66 | master_doc = 'index'
 67 | 
 68 | # The language for content autogenerated by Sphinx. Refer to documentation
 69 | # for a list of supported languages.
 70 | #
 71 | # This is also used if you do content translation via gettext catalogs.
 72 | # Usually you set "language" from the command line for these cases.
 73 | language = None
 74 | 
 75 | # List of patterns, relative to source directory, that match files and
 76 | # directories to ignore when looking for source files.
 77 | # This pattern also affects html_static_path and html_extra_path .
 78 | exclude_patterns = []
 79 | 
 80 | # The name of the Pygments (syntax highlighting) style to use.
 81 | pygments_style = 'sphinx'
 82 | 
 83 | 
 84 | # -- Options for HTML output -------------------------------------------------
 85 | 
 86 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 87 | # a list of builtin themes.
 88 | #
 89 | 
 90 | html_theme = 'alabaster'
 91 | #html_theme = 'default'
 92 | #html_theme = 'sphinx_rtd_theme'
 93 | html_logo = "img/mastermsm.png"
 94 | 
 95 | # Theme options are theme-specific and customize the look and feel of a theme
 96 | # further.  For a list of options available for each theme, see the
 97 | # documentation.
 98 | #
 99 | # html_theme_options = {}
100 | 
101 | # Add any paths that contain custom static files (such as style sheets) here,
102 | # relative to this directory. They are copied after the builtin static files,
103 | # so a file named "default.css" will overwrite the builtin "default.css".
104 | html_static_path = ['_static']
105 | 
106 | # Custom sidebar templates, must be a dictionary that maps document names
107 | # to template names.
108 | #
109 | # The default sidebars (for documents that don't match any pattern) are
110 | # defined by theme itself.  Builtin themes are using these templates by
111 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
112 | # 'searchbox.html']``.
113 | #
114 | # html_sidebars = {}
115 | 
116 | 
117 | # -- Options for HTMLHelp output ---------------------------------------------
118 | 
119 | # Output file base name for HTML help builder.
120 | htmlhelp_basename = 'MasterMSMdoc'
121 | 
122 | 
123 | # -- Options for LaTeX output ------------------------------------------------
124 | 
125 | latex_elements = {
126 |     # The paper size ('letterpaper' or 'a4paper').
127 |     #
128 |     # 'papersize': 'letterpaper',
129 | 
130 |     # The font size ('10pt', '11pt' or '12pt').
131 |     #
132 |     # 'pointsize': '10pt',
133 | 
134 |     # Additional stuff for the LaTeX preamble.
135 |     #
136 |     # 'preamble': '',
137 | 
138 |     # Latex figure (float) alignment
139 |     #
140 |     # 'figure_align': 'htbp',
141 | }
142 | 
143 | # Grouping the document tree into LaTeX files. List of tuples
144 | # (source start file, target name, title,
145 | #  author, documentclass [howto, manual, or own class]).
146 | latex_documents = [
147 |     (master_doc, 'MasterMSM.tex', 'MasterMSM Documentation',
148 |      'David De Sancho', 'manual'),
149 | ]
150 | 
151 | # -- Options for manual page output ------------------------------------------
152 | 
153 | # One entry per manual page. List of tuples
154 | # (source start file, name, description, authors, manual section).
155 | man_pages = [
156 |     (master_doc, 'mastermsm', 'MasterMSM Documentation',
157 |      [author], 1)
158 | ]
159 | 
160 | 
161 | # -- Options for Texinfo output ----------------------------------------------
162 | 
163 | # Grouping the document tree into Texinfo files. List of tuples
164 | # (source start file, target name, title, author,
165 | #  dir menu entry, description, category)
166 | texinfo_documents = [
167 |     (master_doc, 'MasterMSM', 'MasterMSM Documentation',
168 |      author, 'MasterMSM', 'One line description of project.',
169 |      'Miscellaneous'),
170 | ]
171 | 
172 | 
173 | # -- Extension configuration -------------------------------------------------
174 | 
175 | # Napoleon settings
176 | napoleon_google_docstring = True
177 | napoleon_numpy_docstring = True
178 | napoleon_include_private_with_doc = False
179 | napoleon_include_special_with_doc = False
180 | napoleon_use_admonition_for_examples = False
181 | napoleon_use_admonition_for_notes = False
182 | napoleon_use_admonition_for_references = False
183 | napoleon_use_ivar = True
184 | napoleon_use_param = True
185 | napoleon_use_rtype = True
186 | 


--------------------------------------------------------------------------------
/docs/source/documentation.rst:
--------------------------------------------------------------------------------
 1 | .. _documentation:
 2 | 
 3 | Modules
 4 | =============
 5 | MasterMSM is a Python package that is divided in three main subpackages. 
 6 | This way of structuring the code derives from the three main types of 
 7 | objects that are constructed. First, there are trajectories, which 
 8 | result in objects of the ``TimeSeries`` class; second, there are dynamical
 9 | models, which come in the form of instances of the ``MSM`` class; finally,
10 | dynamical models can be postprocessed into simple, few-state models, which
11 | we generate as ``FEWSM`` class objects.
12 | 
13 | Trajectory module
14 | -----------------
15 | This module contains everything necessary to get your time series data
16 | into MasterMSM. The main class object within this module is the TimeSeries
17 | object.
18 | 
19 | .. currentmodule:: mastermsm
20 | 
21 | .. autosummary::
22 |     :toctree: 
23 | 
24 |     trajectory 
25 | 
26 | 
27 | MSM module
28 | ----------
29 | .. currentmodule:: mastermsm
30 | 
31 | .. autosummary::
32 |     :toctree:
33 | 
34 |     msm 
35 | 
36 | 
37 | FEWSM module
38 | -----------
39 | .. currentmodule:: mastermsm
40 | 
41 | .. autosummary::
42 |     :toctree:
43 | 
44 |     fewsm
45 |     
46 | Examples
47 | --------
48 | We have put together a few simple Python notebooks to help you learn the basics
49 | of the MasterMSM package. They are based on data derived from either model systems
50 | or from molecular dynamics simulations of some simple (albeit realistic) biomolecules.
51 | You can find the notebooks in the following 
52 | `link <https://github.com/daviddesancho/MasterMSM/tree/master/examples>`_.
53 | 


--------------------------------------------------------------------------------
/docs/source/img/mastermsm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/docs/source/img/mastermsm.png


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. MasterMSM documentation master file, created by
 2 |    sphinx-quickstart on Mon Mar 25 23:47:22 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | ==========================================
 7 | Welcome to MasterMSM's documentation!
 8 | ==========================================
 9 | 
10 | MasterMSM is a Python package for generating Markov state models (MSMs)
11 | from molecular dynamics trajectories. We use a formulation based on 
12 | the chemical master equation. This package will allow you to:
13 | 
14 | * Create Markov state / master equation models from biomolecular simulations.
15 | * Discretize trajectory data using dihedral angle based methods useful
16 |   for small peptides.
17 | * Calculate rate matrices using a variety of methods.
18 | * Obtain committors and reactive fluxes.
19 | * Carry out sensitivity analysis of networks.
20 | 
21 | We have written a `paper <http://dx.doi.org/10.26434/chemrxiv.8234147>`_ 
22 | on MasterMSM that briefly describes some of the code capabilities. 
23 | The MasterMSM code is hosted in `Github <https://github.com/daviddesancho/MasterMSM>`_.
24 | Active development of the MasterMSM code takes place using the git version 
25 | control system.
26 | 
27 | .. toctree::
28 |    :maxdepth:  3
29 |    :caption: Contents:
30 | 
31 |    about
32 |    installation
33 |    documentation
34 |    support
35 | 
36 | 
37 | 
38 | Indices and tables
39 | ==================
40 | 
41 | * :ref:`genindex`
42 | * :ref:`modindex`
43 | * :ref:`search`
44 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | You can install MasterMSM by simply downloading the package from the 
 4 | `GitHub repository <https://github.com/daviddesancho/MasterMSM>`_
 5 | and using the standard installation instructions for packages built
 6 | using `Distutils <https://docs.python.org/3/distutils/index.html>`_.
 7 | 
 8 | .. code-block:: bash
 9 | 
10 |    git clone http://github.com/daviddesancho/mastermsm destination/mastermsm
11 |    cd destination/mastermsm
12 |    python setup.py install --user
13 | 
14 | Parallel processing in Python and MasterMSM
15 | -------------------------------------------
16 | In MasterMSM we make ample use of the ``multiprocessing`` library, which
17 | for MacOS X can conflict with non-Python libraries. In the past we have
18 | found this to be a problem that can result in segmentation faults. 
19 | Digging in the internet I found a workaround for this problem, by setting 
20 | the following environment variable
21 | 
22 | .. code-block:: bash
23 | 
24 |    export VECLIB_MAXIMUM_THREADS=1
25 | 
26 | This should be set in the terminal before you start your Python session
27 | in case you meet this problem.
28 | 
29 | 


--------------------------------------------------------------------------------
/docs/source/mastermsm.fewsm.rst:
--------------------------------------------------------------------------------
 1 | mastermsm.fewsm package
 2 | ============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | mastermsm.fewsm.traj module
 8 | --------------------------------
 9 | 
10 | .. automodule:: mastermsm.fewsm.fewsm
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | mastermsm.fewsm.traj\_lib module
16 | -------------------------------------
17 | 
18 | .. automodule:: mastermsm.fewsm.fewsm_lib
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: mastermsm.fewsm
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/source/mastermsm.msm.rst:
--------------------------------------------------------------------------------
 1 | mastermsm.msm package
 2 | =====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | mastermsm.msm.msm module
 8 | ------------------------
 9 | 
10 | .. automodule:: mastermsm.msm.msm
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | mastermsm.msm.msm\_lib module
16 | -----------------------------
17 | 
18 | .. automodule:: mastermsm.msm.msm_lib
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: mastermsm.msm
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/source/mastermsm.rst:
--------------------------------------------------------------------------------
 1 | mastermsm package
 2 | =================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     mastermsm.msm
10 |     mastermsm.trajectory
11 |     mastermsm.fewsm
12 | 
13 | Module contents
14 | ---------------
15 | 
16 | .. automodule:: mastermsm
17 |     :members:
18 |     :undoc-members:
19 |     :show-inheritance:
20 | 


--------------------------------------------------------------------------------
/docs/source/mastermsm.trajectory.rst:
--------------------------------------------------------------------------------
 1 | mastermsm.trajectory package
 2 | ============================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | mastermsm.trajectory.traj module
 8 | --------------------------------
 9 | 
10 | .. automodule:: mastermsm.trajectory.traj
11 |     :members:
12 |     :undoc-members:
13 |     :show-inheritance:
14 | 
15 | mastermsm.trajectory.traj\_lib module
16 | -------------------------------------
17 | 
18 | .. automodule:: mastermsm.trajectory.traj_lib
19 |     :members:
20 |     :undoc-members:
21 |     :show-inheritance:
22 | 
23 | 
24 | Module contents
25 | ---------------
26 | 
27 | .. automodule:: mastermsm.trajectory
28 |     :members:
29 |     :undoc-members:
30 |     :show-inheritance:
31 | 


--------------------------------------------------------------------------------
/docs/source/modules.rst:
--------------------------------------------------------------------------------
1 | mastermsm
2 | =========
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    mastermsm
8 | 


--------------------------------------------------------------------------------
/docs/source/support.rst:
--------------------------------------------------------------------------------
1 | =======
2 | Support
3 | =======
4 | 
5 | Development of MasterMSM is based on GitHub. You can get help by opening an 
6 | issue on Github_.
7 | 
8 | .. _Github: https://github.com/daviddesancho/MasterMSM
9 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | MasterMSM examples
 2 | ==================
 3 | Here are a set of examples where you can learn some of the fundamentals of
 4 | the MasterMSM package. They correspond to either model systems (dynamics 
 5 | on one or two dimensional potentials) or molecular dynamics simulations
 6 | on simple biomolecules. 
 7 | 
 8 | Contents
 9 | --------
10 | * brownian_dynamics_1D: example corresponding to a one-dimensional two-state model.
11 | * brownian_dynamics_2D: analogous case but now in two dimensions.
12 | * alanine_pentapeptide: example with true MD simulation data for the simplest peptide model, generated with the Gromacs package. 
13 | * alanine_pentapeptide: example with true MD simulation data, generated with the Gromacs package. 
14 | 


--------------------------------------------------------------------------------
/examples/alanine_dipeptide/ala_dipeptide.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## MSM of the alanine dipeptide\n",
  8 |     "Here we run through most of the things that can be done with this package using a simple two-state model. There are more sophisticated examples that enable for further possibilities.\n",
  9 |     "\n",
 10 |     "The first thing one must do is download the data from [OSF](https://osf.io/a2vc7) and then import a number of libraries we will need as we run this example."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "%load_ext autoreload\n",
 20 |     "%autoreload 2\n",
 21 |     "%matplotlib inline\n",
 22 |     "import math\n",
 23 |     "import numpy as np"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "import matplotlib.pyplot as plt\n",
 33 |     "import seaborn as sns\n",
 34 |     "sns.set(style=\"ticks\", color_codes=True, font_scale=1.25)\n",
 35 |     "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "### Discretizing the trajectory\n",
 43 |     "We start loading the simulation data using the `trajectory` module. For this we use the external library [`MDtraj`](http://mdtraj.org), which contains all sorts of methods for parsing and calculating interestign properties of our time-series data."
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "import mdtraj as md\n",
 53 |     "from mastermsm.trajectory import traj"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "tr = traj.TimeSeries(top='data/alaTB.gro', traj=['data/alatb_n1_ppn24.xtc'])\n",
 63 |     "print (tr.mdt)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "markdown",
 68 |    "metadata": {},
 69 |    "source": [
 70 |     "So does what we have calculated look somewhat like a Ramachandran map?"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "phi = md.compute_phi(tr.mdt)\n",
 80 |     "psi = md.compute_psi(tr.mdt)\n",
 81 |     "res = [x for x in tr.mdt.topology.residues]"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "fig,ax = plt.subplots(figsize=(3.5,3.5))\n",
 91 |     "ax.plot(180./math.pi*phi[1],180./math.pi*psi[1],'o', markersize=1)\n",
 92 |     "ax.set_xlim(-180,180)\n",
 93 |     "ax.set_ylim(-180,180)\n",
 94 |     "ax.xaxis.set_ticks(range(-180,181,90))\n",
 95 |     "ax.yaxis.set_ticks(range(-180,181,90))\n",
 96 |     "\n",
 97 |     "ax.set_xlabel(r'$\\phi$', fontsize=18)\n",
 98 |     "ax.set_ylabel(r'$\\psi$', fontsize=18)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "Next we proceed to discretize the trajectory based on the Ramachandran angles."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "tr.discretize(states=['A', 'E', 'L'])"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "markdown",
119 |    "metadata": {},
120 |    "source": [
121 |     "For plotting we convert helical configurations in 1 and beta in 0."
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {},
128 |    "outputs": [],
129 |    "source": [
130 |     "fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True)\n",
131 |     "\n",
132 |     "ax1.plot(tr.mdt.time, psi[1]*180/math.pi,'o', ms=0.1)\n",
133 |     "ax1.set_ylabel(r'$\\psi$', fontsize=14)\n",
134 |     "ax1.set_ylim(-180,180)\n",
135 |     "ax1.yaxis.set_ticks(range(-180,181,90))\n",
136 |     "\n",
137 |     "ax2.plot(tr.mdt.time, phi[1]*180/math.pi,'o', ms=0.1)\n",
138 |     "ax2.set_ylabel(r'$\\phi$', fontsize=14)\n",
139 |     "ax1.set_ylim(-180,180)\n",
140 |     "ax1.yaxis.set_ticks(range(-180,181,90))\n",
141 |     "\n",
142 |     "ax3.set_ylabel('State')\n",
143 |     "ax3.set_ylim(-0.2,2.2)\n",
144 |     "ax3.yaxis.set_ticks(range(3))\n",
145 |     "labels = [item.get_text() for item in ax2.get_xticklabels()]\n",
146 |     "labels = ['A', 'E', 'L']\n",
147 |     "y = [labels.index(x) if x in labels else 0 for x in tr.distraj ]\n",
148 |     "ax3.plot(tr.mdt.time, y, lw=1)\n",
149 |     "ax3.set_yticklabels(labels)\n",
150 |     "ax3.set_xlabel('Time [ps]')\n",
151 |     "\n",
152 |     "ax1.set_xlim(0, 2.0e5)\n",
153 |     "plt.tight_layout(h_pad=0)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {
159 |     "collapsed": true
160 |    },
161 |    "source": [
162 |     "In the plot we see how we go from the time series of continuous torsion angles converts into a time series of discrete states. We can obtain a list of states in the following way."
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {},
169 |    "outputs": [],
170 |    "source": [
171 |     "tr.find_keys()\n",
172 |     "tr.keys"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "markdown",
177 |    "metadata": {
178 |     "collapsed": true
179 |    },
180 |    "source": [
181 |     "### Building the master equation model\n",
182 |     "After having loaded our trajectory using the functionalities from the `trajectory` module we start building the master equation model. For this, we make use of the `msm` module. There are two steps corresponding to the two main classes within that module. First we create an instance of the `SuperMSM`, which can be used to direct the whole process of constructing and validating the MSM."
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "code",
187 |    "execution_count": null,
188 |    "metadata": {},
189 |    "outputs": [],
190 |    "source": [
191 |     "from mastermsm.msm import msm\n",
192 |     "msm_alaTB = msm.SuperMSM([tr])"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "markdown",
197 |    "metadata": {},
198 |    "source": [
199 |     "Then, using the `do_msm` method, we produce instances of the `MSM` class at a desired lag time, $\\Delta t$. Each of these contains an MSM built at a specific lag time. These are stored as a dictionary in the `msms` attribute of the `SuperMSM` class. "
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "lagt = 1\n",
209 |     "msm_alaTB.do_msm(lagt)\n",
210 |     "msm_alaTB.msms[lagt].do_trans()\n",
211 |     "msm_alaTB.msms[lagt].boots()"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "markdown",
216 |    "metadata": {},
217 |    "source": [
218 |     "The resulting model has a number of things we may be interested in, like its eigenvalue spectrum (in this case limited to two relaxation times, corresponding to the exchange of helix, coil and $\\alpha_L$ states) or the equilibrium probabilities of the microstates."
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": [
227 |     "fig, ax = plt.subplots(1, 2, figsize=(6,3))\n",
228 |     "\n",
229 |     "ax[0].errorbar([1, 2], msm_alaTB.msms[lagt].tau_ave, msm_alaTB.msms[lagt].tau_std ,fmt='o-', markersize=5)\n",
230 |     "ax[1].errorbar([1,2,3], msm_alaTB.msms[lagt].peq_ave, msm_alaTB.msms[lagt].peq_std ,fmt='o-', markersize=5)\n",
231 |     "\n",
232 |     "ax[0].set_xlim(0.5, 2.5)\n",
233 |     "ax[0].set_ylim(10,2e3)\n",
234 |     "ax[0].set_yscale('log')\n",
235 |     "ax[0].set_ylabel(r'$\\tau$ [ps]', fontsize=18)\n",
236 |     "ax[0].set_xlabel(r'$\\lambda_1$', fontsize=18)\n",
237 |     "\n",
238 |     "ax[1].set_ylabel(r'$P_{eq}$', fontsize=18)\n",
239 |     "ax[1].set_xlabel(r'state', fontsize=18)\n",
240 |     "ax[1].set_yscale('log')\n",
241 |     "ax[1].set_ylim(1e-2, 1)\n",
242 |     "ax[1].set_xticks([1, 2, 3])\n",
243 |     "ax[1].set_xticklabels(labels[:3])\n",
244 |     "ax[1].set_xlim(0.5,3.5)\n",
245 |     "\n",
246 |     "plt.tight_layout(w_pad=1)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "### Validation\n",
254 |     "However, from simply calculating these quantities we do not know how informative they really are. In order to understand whether the values we calculate are really reflective of the properties of the underlying system we resort to validation of the MSM. The two-level structure that we have described, consisting of the `SuperMSM` and `MSM` classes, allows for the user to test some global convergence properties first (at the level of the `SuperMSM`). "
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {},
260 |    "source": [
261 |     "#### Convergence tests\n",
262 |     "For validating the model we first see at which point the relaxation times are sufficiently well converged."
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "code",
267 |    "execution_count": null,
268 |    "metadata": {},
269 |    "outputs": [],
270 |    "source": [
271 |     "msm_alaTB.convergence_test(time=[1, 2, 5, 7, 10, 20, 50, 100], error=True)"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {},
278 |    "outputs": [],
279 |    "source": [
280 |     "fig, ax = plt.subplots()\n",
281 |     "\n",
282 |     "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[0],msm_alaTB.msms[x].tau_std[0]] \\\n",
283 |     "               for x in sorted(msm_alaTB.msms.keys())])\n",
284 |     "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=5)\n",
285 |     "\n",
286 |     "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[1],msm_alaTB.msms[x].tau_std[1]] \\\n",
287 |     "               for x in sorted(msm_alaTB.msms.keys())])\n",
288 |     "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=5)\n",
289 |     "\n",
290 |     "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray', alpha=0.5)\n",
291 |     "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n",
292 |     "ax.set_ylabel(r'$\\tau_i$ [ps]', fontsize=16)\n",
293 |     "ax.set_xlim(0.8,200)\n",
294 |     "ax.set_ylim(10,2000)\n",
295 |     "_ = ax.set_xscale('log')\n",
296 |     "ax.set_yscale('log')\n",
297 |     "plt.tight_layout()"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "Here we see that from the very beginning the relaxation times are independent of the lag time ($\\Delta$t) used in the construction of the model. This convergence is a good indicator of the Markovianity of the model and is a result of the use of transition based assignment. The shaded area corresponds to the range of lag times where the information we obtain is largely unreliable, because the lag time itself is longer than the relaxation time."
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {},
310 |    "source": [
311 |     "#### Chapman-Kolmogorov test\n",
312 |     "Another important step in the validation is to carry out is the so-called Chapman-Kolmogorov test. In this case, the predictions from the MSM are validated against the simulation data used for its construction. "
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "metadata": {},
319 |    "outputs": [],
320 |    "source": [
321 |     "pMSM_E, pMD_E, epMD_E = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['E'])\n",
322 |     "pMSM_A, pMD_A, epMD_A = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['A'])\n",
323 |     "pMSM_L, pMD_L, epMD_L = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['L'])"
324 |    ]
325 |   },
326 |   {
327 |    "cell_type": "code",
328 |    "execution_count": null,
329 |    "metadata": {},
330 |    "outputs": [],
331 |    "source": [
332 |     "fig, ax = plt.subplots(1,3, figsize=(8,3.25), sharex=True, sharey=True)\n",
333 |     "ax[0].errorbar(pMD_E[:,0], pMD_E[:,1], epMD_E, fmt='o')\n",
334 |     "for p in pMSM_E:\n",
335 |     "    ax[0].plot(p[0], p[1], label=\"$\\Delta t$=%g\"%p[0][0])\n",
336 |     "ax[0].legend(fontsize=10, ncol=2)\n",
337 |     "\n",
338 |     "ax[1].errorbar(pMD_A[:,0], pMD_A[:,1], epMD_A, fmt='o')\n",
339 |     "for p in pMSM_A:\n",
340 |     "    ax[1].plot(p[0], p[1])\n",
341 |     "\n",
342 |     "ax[2].errorbar(pMD_L[:,0], pMD_L[:,1], epMD_L, fmt='o')\n",
343 |     "for p in pMSM_L:\n",
344 |     "    ax[2].plot(p[0], p[1])\n",
345 |     "\n",
346 |     "#ax[0].set_xscale('log')\n",
347 |     "ax[0].set_ylabel('P(t)')\n",
348 |     "ax[0].set_xlabel('Time (ps)')\n",
349 |     "ax[1].set_xlabel('Time (ps)')\n",
350 |     "ax[2].set_xlabel('Time (ps)')\n",
351 |     "plt.tight_layout(w_pad=0)"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "markdown",
356 |    "metadata": {},
357 |    "source": [
358 |     "These plots show the decay of the population from a given initial condition. In this case, the left and right plots corresponds to starting in the `E`, `A` and `L` basins respectively. In both cases we compare the calculation from the simulation data (as circles) and the propagation from MSMs calculated at different lag times (lines). The agreement between the simulation data and the model predictions confirm the result from the convergence analysis."
359 |    ]
360 |   },
361 |   {
362 |    "cell_type": "markdown",
363 |    "metadata": {},
364 |    "source": [
365 |     "#### Autocorrelation functions\n",
366 |     "The MSM can also be validated against the autocorrelation function (ACF) of the eigenmodes. If the simulation data is projected in the eigenmodes, then the ACF for mode $n$ should decay with a timescale equal to $-1/\\lambda_n$."
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": null,
372 |    "metadata": {},
373 |    "outputs": [],
374 |    "source": [
375 |     "msm_alaTB.msms[2].do_trans(evecs=True)\n",
376 |     "acf = msm_alaTB.msms[2].acf_mode()"
377 |    ]
378 |   },
379 |   {
380 |    "cell_type": "code",
381 |    "execution_count": null,
382 |    "metadata": {},
383 |    "outputs": [],
384 |    "source": [
385 |     "len(tr.mdt.time[1:])"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "code",
390 |    "execution_count": null,
391 |    "metadata": {},
392 |    "outputs": [],
393 |    "source": [
394 |     "fig, ax = plt.subplots()\n",
395 |     "ax.plot(tr.mdt.time[1:], acf[1], 's', label='$i$=1', color='tab:blue', alpha=0.1)\n",
396 |     "ax.plot(tr.mdt.time[1:],np.exp(-tr.mdt.time[1:]*1./msm_alaTB.msms[2].tauT[0]), color='tab:blue')\n",
397 |     "\n",
398 |     "ax.plot(tr.mdt.time[1:], acf[2], 'o', label='$i$=2', color='tab:orange', alpha=0.2)\n",
399 |     "ax.plot(tr.mdt.time[1:],np.exp(-tr.mdt.time[1:]*1./msm_alaTB.msms[2].tauT[1]), color='tab:orange')\n",
400 |     "\n",
401 |     "ax.set_xlim(2,3000)\n",
402 |     "ax.set_ylim(0,1)\n",
403 |     "\n",
404 |     "ax.set_xlabel('Time [ps]')\n",
405 |     "ax.set_ylabel('C$_{ii}$(t)')\n",
406 |     "ax.set_xscale('log')\n",
407 |     "plt.legend()\n",
408 |     "plt.tight_layout()"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "markdown",
413 |    "metadata": {},
414 |    "source": [
415 |     "This result is particularly interesting. While the fast mode ($\\lambda_2$) is very well determined because there are many transitions, for the slowest mode the agreement is notably worse."
416 |    ]
417 |   },
418 |   {
419 |    "cell_type": "markdown",
420 |    "metadata": {},
421 |    "source": [
422 |     "### Calculation of the rate matrix\n",
423 |     "From the transition matrix we can calculate the rate matrix. One possibility is to use an approximate method based simply on a Taylor expansion ([De Sancho, Mittal and Best, *JCTC*, 2013](http://dx.doi.org/10.1021/ct301033r)). We can check whether our approximate method gives a good result. We use short times since we have checked that short times are sufficient in this case for obtaining converged relaxation times."
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {},
430 |    "outputs": [],
431 |    "source": [
432 |     "fig, ax = plt.subplots(1,2, figsize=(7.5,3.5))\n",
433 |     "for i in [1, 2, 5, 7, 10, 20]:\n",
434 |     "    msm_alaTB.msms[i].do_rate()\n",
435 |     "    ax[0].errorbar(msm_alaTB.msms[i].tauT, msm_alaTB.msms[i].tauK, fmt='o', xerr=msm_alaTB.msms[i].tau_std, markersize=10, label=str(i))\n",
436 |     "    ax[1].errorbar(msm_alaTB.msms[i].peqT, msm_alaTB.msms[i].peqK, fmt='o', xerr=msm_alaTB.msms[i].peq_std, markersize=10, label=str(i))\n",
437 |     "\n",
438 |     "ax[0].plot([0,1000],[0,1000],'--', color='lightgray')\n",
439 |     "ax[0].set_xlabel(r'$\\tau_T$ [ps]', fontsize=20)\n",
440 |     "ax[0].set_ylabel(r'$\\tau_K$ [ps]', fontsize=20)\n",
441 |     "ax[0].set_xscale('log')\n",
442 |     "ax[0].set_yscale('log')\n",
443 |     "\n",
444 |     "ax[1].plot([0,1],[0,1],'--', color='lightgray')\n",
445 |     "ax[1].set_xlabel(r'$p_T$', fontsize=20)\n",
446 |     "ax[1].set_ylabel(r'$p_K$', fontsize=20)\n",
447 |     "ax[1].set_xscale('log')\n",
448 |     "ax[1].set_yscale('log')\n",
449 |     "\n",
450 |     "\n",
451 |     "ax[0].legend(fontsize=9, bbox_to_anchor=(1.0, 0.65))\n",
452 |     "plt.tight_layout(pad=0.4, w_pad=3)"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "markdown",
457 |    "metadata": {},
458 |    "source": [
459 |     "The method produces acceptable solutions for short lag times (up to 5-10 ps) although the result rapidly diverges from the transition matrix relaxation time at long lag times. Equilibrium probabilities are recovered correctly at all lag times from the rate matrices."
460 |    ]
461 |   }
462 |  ],
463 |  "metadata": {
464 |   "kernelspec": {
465 |    "display_name": "Python 3",
466 |    "language": "python",
467 |    "name": "python3"
468 |   },
469 |   "language_info": {
470 |    "codemirror_mode": {
471 |     "name": "ipython",
472 |     "version": 3
473 |    },
474 |    "file_extension": ".py",
475 |    "mimetype": "text/x-python",
476 |    "name": "python",
477 |    "nbconvert_exporter": "python",
478 |    "pygments_lexer": "ipython3",
479 |    "version": "3.7.4"
480 |   }
481 |  },
482 |  "nbformat": 4,
483 |  "nbformat_minor": 1
484 | }
485 | 


--------------------------------------------------------------------------------
/examples/alanine_dipeptide/ala_dipeptide_discretize.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Discretizations\n",
  8 |     "Here we show how different discretizations work within MasterMSM. An important note is that not all discretizations will be sensible for all systems, but as usual the alanine dipeptide is a good testbed.\n",
  9 |     "\n",
 10 |     "We start downloading the data from the following [link](https://osf.io/a2vc7) and importing a number of libraries for plotting and analysis that will be useful for our work."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "%load_ext autoreload\n",
 20 |     "%matplotlib inline\n",
 21 |     "import math\n",
 22 |     "import numpy as np\n",
 23 |     "import matplotlib.pyplot as plt\n",
 24 |     "import seaborn as sns\n",
 25 |     "sns.set(style=\"ticks\", color_codes=True, font_scale=1.5)\n",
 26 |     "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "Next we import the ```traj``` module and read the molecular simulation trajectory in the ```xtc``` compressed format from Gromacs."
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "from mastermsm.trajectory import traj\n",
 43 |     "tr = traj.TimeSeries(top='data/alaTB.gro', traj=['data/alatb_n1_ppn24.xtc'])\n",
 44 |     "print (tr.mdt)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "### Core Ramachandran angle regions\n",
 52 |     "Following previous work we can use core regions in the Ramachandran map to define our states. We use utilities from the [MDtraj](http://mdtraj.org) package to compute the Phi and Psi dihedrals."
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "import mdtraj as md\n",
 62 |     "phi = md.compute_phi(tr.mdt)\n",
 63 |     "psi = md.compute_psi(tr.mdt)\n",
 64 |     "res = [x for x in tr.mdt.topology.residues]"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "Then we run the actual discretization, using only two states for the alpha and extended conformations."
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "tr.discretize(states=['A', 'E', 'L'])\n",
 81 |     "tr.find_keys()"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "fig, ax = plt.subplots(figsize=(10,3))\n",
 91 |     "ax.plot(tr.mdt.time, [tr.keys.index(x) if (x in tr.keys) else 0 for x in tr.distraj ], lw=1)\n",
 92 |     "ax.set_xlim(0, 1.5e5)\n",
 93 |     "ax.set_ylim(-0.5, 2.5)\n",
 94 |     "ax.set_yticks(range(3))\n",
 95 |     "ax.set_yticklabels(['A', 'E', 'L'])\n",
 96 |     "ax.set_xlabel('Time (ps)', fontsize=20)\n",
 97 |     "ax.set_ylabel('state', fontsize=20)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "Finally we derive the MSM using the tools from the ```msm``` module. In particular, we use the ```SuperMSM``` class that will help build MSMs at various lag times."
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "from mastermsm.msm import msm\n",
114 |     "msm_alaTB = msm.SuperMSM([tr])\n",
115 |     "for i in [1, 2, 5, 10, 20, 50, 100]:\n",
116 |     "    msm_alaTB.do_msm(i)\n",
117 |     "    msm_alaTB.msms[i].do_trans()\n",
118 |     "    msm_alaTB.msms[i].boots()"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "Next we gather results from all these MSMs and plot the relaxation time corresponding to the two slow transitions."
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": null,
131 |    "metadata": {},
132 |    "outputs": [],
133 |    "source": [
134 |     "fig, ax = plt.subplots()\n",
135 |     "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[0],msm_alaTB.msms[x].tau_std[0]] \\\n",
136 |     "               for x in sorted(msm_alaTB.msms.keys())])\n",
137 |     "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=10)\n",
138 |     "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[1],msm_alaTB.msms[x].tau_std[1]] \\\n",
139 |     "               for x in sorted(msm_alaTB.msms.keys())])\n",
140 |     "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=10)\n",
141 |     "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray')\n",
142 |     "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n",
143 |     "ax.set_ylabel(r'$\\tau$ [ps]', fontsize=16)\n",
144 |     "ax.set_xlim(0.8,150)\n",
145 |     "ax.set_ylim(10,3000)\n",
146 |     "ax.set_yscale('log')\n",
147 |     "_ = ax.set_xscale('log')"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "### Fine grid on the Ramachandran map\n",
155 |     "Alternatively we can make a grid on the Ramachandran map with many more states."
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "tr.discretize(method=\"ramagrid\", nbins=30)\n",
165 |     "tr.find_keys()"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {},
172 |    "outputs": [],
173 |    "source": [
174 |     "fig, ax = plt.subplots(figsize=(10,3))\n",
175 |     "ax.plot(tr.mdt.time, [x for x in tr.distraj], '.', ms=1)\n",
176 |     "ax.set_xlim(0, 1.5e5)\n",
177 |     "ax.set_ylim(-1, 900)\n",
178 |     "ax.set_xlabel('Time (ps)', fontsize=20)\n",
179 |     "ax.set_ylabel('state', fontsize=20)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "Then we repeat the same steps as before, but with this fine grained MSM."
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": null,
192 |    "metadata": {
193 |     "scrolled": false
194 |    },
195 |    "outputs": [],
196 |    "source": [
197 |     "from mastermsm.msm import msm\n",
198 |     "msm_alaTB_grid = msm.SuperMSM([tr])\n",
199 |     "for i in [1, 2, 5, 10, 20, 50, 100]:\n",
200 |     "    msm_alaTB_grid.do_msm(i)\n",
201 |     "    msm_alaTB_grid.msms[i].do_trans()\n",
202 |     "    msm_alaTB_grid.msms[i].boots()"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "markdown",
207 |    "metadata": {},
208 |    "source": [
209 |     "First we take a look at the dependence of the slowest relaxation time with the lag time, $\\Delta t$ for the construction of the Markov model as a minimal quality control."
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "metadata": {},
216 |    "outputs": [],
217 |    "source": [
218 |     "tau1_vs_lagt = np.array([[x, msm_alaTB_grid.msms[x].tauT[0], \\\n",
219 |     "                    msm_alaTB_grid.msms[x].tau_std[0]] \\\n",
220 |     "                   for x in sorted(msm_alaTB_grid.msms.keys())])\n",
221 |     "tau2_vs_lagt = np.array([[x, msm_alaTB_grid.msms[x].tauT[1], \\\n",
222 |     "                    msm_alaTB_grid.msms[x].tau_std[1]] \\\n",
223 |     "                   for x in sorted(msm_alaTB_grid.msms.keys())])\n",
224 |     "tau3_vs_lagt = np.array([[x,msm_alaTB_grid.msms[x].tauT[2], \\\n",
225 |     "                    msm_alaTB_grid.msms[x].tau_std[2]] \\\n",
226 |     "                   for x in sorted(msm_alaTB_grid.msms.keys())])\n",
227 |     "tau4_vs_lagt = np.array([[x,msm_alaTB_grid.msms[x].tauT[3], \\\n",
228 |     "                    msm_alaTB_grid.msms[x].tau_std[3]] \\\n",
229 |     "                   for x in sorted(msm_alaTB_grid.msms.keys())])\n",
230 |     "\n",
231 |     "fig, ax = plt.subplots()\n",
232 |     "ax.errorbar(tau1_vs_lagt[:,0],tau1_vs_lagt[:,1], tau1_vs_lagt[:,2], fmt='o-', markersize=10)\n",
233 |     "ax.errorbar(tau2_vs_lagt[:,0],tau2_vs_lagt[:,1], tau2_vs_lagt[:,2], fmt='o-', markersize=10)\n",
234 |     "ax.errorbar(tau3_vs_lagt[:,0],tau3_vs_lagt[:,1], tau3_vs_lagt[:,2], fmt='o-', markersize=10)\n",
235 |     "ax.errorbar(tau4_vs_lagt[:,0],tau4_vs_lagt[:,1], tau4_vs_lagt[:,2], fmt='o-', markersize=10)\n",
236 |     "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray', alpha=0.5)\n",
237 |     "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n",
238 |     "ax.set_ylabel(r'$\\tau_i$ [ps]', fontsize=16)\n",
239 |     "ax.set_xlim(0.8,200)\n",
240 |     "ax.set_ylim(1,3000)\n",
241 |     "_ = ax.set_xscale('log')\n",
242 |     "_ = ax.set_yscale('log')\n",
243 |     "plt.tight_layout()"
244 |    ]
245 |   },
246 |   {
247 |    "cell_type": "markdown",
248 |    "metadata": {},
249 |    "source": [
250 |     "The slowest relaxation times from the fine-grained MSM agree with those of the core regions, although in this case there is an additional slow mode."
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {},
257 |    "outputs": [],
258 |    "source": [
259 |     "fig, ax = plt.subplots()\n",
260 |     "ax.errorbar(range(1,16),msm_alaTB_grid.msms[10].tauT[0:15], fmt='o-', \\\n",
261 |     "            yerr= msm_alaTB_grid.msms[10].tau_std[0:15], ms=10)\n",
262 |     "ax.set_xlabel('Eigenvalue index')\n",
263 |     "ax.set_ylabel(r'$\\tau_i$ (ns)')\n",
264 |     "ax.set_yscale('log')\n",
265 |     "plt.tight_layout()"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {},
271 |    "source": [
272 |     "We can understand which dynamical processes the eigenvectors are associated to by looking at the corresponding eigenvectors. For this we recalculate the transition matrix but now recovering the eigenvectors. "
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {},
279 |    "outputs": [],
280 |    "source": [
281 |     "msm_alaTB_grid.msms[10].do_trans(evecs=True)"
282 |    ]
283 |   },
284 |   {
285 |    "cell_type": "code",
286 |    "execution_count": null,
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": [
290 |     "fig, ax = plt.subplots(1,4, figsize=(12,3), sharex=True, sharey=True)\n",
291 |     "mat = np.zeros((30,30), float)\n",
292 |     "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n",
293 |     "                         msm_alaTB_grid.msms[10].rvecsT[:,0])]:\n",
294 |     "    #print i, i[0]%20, int(i[0]/20), -i[1]\n",
295 |     "\n",
296 |     "    mat[i[0]%30, int(i[0]/30)] = i[1]\n",
297 |     "ax[0].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
298 |     "             cmap='Blues')\n",
299 |     "ax[0].set_title(r\"$\\psi_1$\")\n",
300 |     "\n",
301 |     "mat = np.zeros((30,30), float)\n",
302 |     "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n",
303 |     "                         msm_alaTB_grid.msms[10].rvecsT[:,1])]:\n",
304 |     "    #print i, i[0]%20, int(i[0]/20), -i[1]\n",
305 |     "    mat[i[0]%30, int(i[0]/30)] = -i[1]\n",
306 |     "ax[1].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
307 |     "             cmap='RdBu')\n",
308 |     "ax[1].set_title(r\"$\\psi_2$\")\n",
309 |     "\n",
310 |     "mat = np.zeros((30,30), float)\n",
311 |     "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n",
312 |     "                         msm_alaTB_grid.msms[10].rvecsT[:,2])]:\n",
313 |     "    #print i, i[0]%20, int(i[0]/20), -i[1]\n",
314 |     "    mat[i[0]%30, int(i[0]/30)] = -i[1]\n",
315 |     "ax[2].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
316 |     "                 cmap='RdBu')\n",
317 |     "ax[2].set_title(r\"$\\psi_3$\")\n",
318 |     "\n",
319 |     "mat = np.zeros((30,30), float)\n",
320 |     "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n",
321 |     "                         msm_alaTB_grid.msms[10].rvecsT[:,3])]:\n",
322 |     "    #print i, i[0]%20, int(i[0]/20), -i[1]\n",
323 |     "    mat[i[0]%30, int(i[0]/30)] = -i[1]\n",
324 |     "ax[3].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
325 |     "                 cmap='RdBu')\n",
326 |     "ax[3].set_title(r\"$\\psi_4$\")"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "markdown",
331 |    "metadata": {},
332 |    "source": [
333 |     "Here we are plotting the values of the eigenvectors so that the state indexes match the positions in the Ramachandran map. On the left, we show the stationary eigenvector, $\\psi_1$, which is proportional to the equilibrium population. The other three plots correspond to the slowest dynamical modes. From $\\psi_2$, we find that the slowest transition is the interconversion between the $\\alpha_L$ and the $\\alpha_R/\\beta$ states. These, equilibrate more rapidly, as indicated by $\\psi_3$. Finally, on the right, we find the additional mode that corresponds to a yet faster transition between the $\\alpha_L$ basin and a fourth Ramachandran region."
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "### Clustering\n",
341 |     "So it seems three states only may not be a very good clustering for this particular system. Maybe we need one more. In order to do the clustering systematically we use the ```fewsm``` module from ```MasterMSM```. From the eigenvectors we are immediately able to produce a sensible, albeit still imperfect, partitioning in four states."
342 |    ]
343 |   },
344 |   {
345 |    "cell_type": "code",
346 |    "execution_count": null,
347 |    "metadata": {},
348 |    "outputs": [],
349 |    "source": [
350 |     "from mastermsm.fewsm import fewsm"
351 |    ]
352 |   },
353 |   {
354 |    "cell_type": "code",
355 |    "execution_count": null,
356 |    "metadata": {},
357 |    "outputs": [],
358 |    "source": [
359 |     "fewsm4 = fewsm.FEWSM(msm_alaTB_grid.msms[2], N=4)"
360 |    ]
361 |   },
362 |   {
363 |    "cell_type": "code",
364 |    "execution_count": null,
365 |    "metadata": {},
366 |    "outputs": [],
367 |    "source": [
368 |     "import matplotlib.cm as cm\n",
369 |     "fig, ax = plt.subplots(figsize=(5,5))\n",
370 |     "mat = np.zeros((30,30), float)\n",
371 |     "for i in msm_alaTB_grid.msms[2].keep_keys:\n",
372 |     "    j = msm_alaTB_grid.msms[2].keep_keys.index(i)\n",
373 |     "    if j in fewsm4.macros[0]:\n",
374 |     "        mat[i%30, int(i/30)] = 1\n",
375 |     "    elif j in fewsm4.macros[1]:\n",
376 |     "        mat[i%30, int(i/30)] = 2\n",
377 |     "    elif j in fewsm4.macros[2]:\n",
378 |     "        mat[i%30, int(i/30)] = 3\n",
379 |     "    else:\n",
380 |     "        mat[i%30, int(i/30)] = 4\n",
381 |     "    #print i, i[0]%20, int(i[0]/20), -i[1]\n",
382 |     "my_cmap = cm.get_cmap('viridis')\n",
383 |     "my_cmap.set_under('w')\n",
384 |     "ax.imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n",
385 |     "             cmap=my_cmap, vmin = 0.5)"
386 |    ]
387 |   },
388 |   {
389 |    "cell_type": "markdown",
390 |    "metadata": {},
391 |    "source": [
392 |     "Note how the partitioning based on eigenvectors captures the three important regions in the Ramachandran map."
393 |    ]
394 |   }
395 |  ],
396 |  "metadata": {
397 |   "kernelspec": {
398 |    "display_name": "Python 3",
399 |    "language": "python",
400 |    "name": "python3"
401 |   },
402 |   "language_info": {
403 |    "codemirror_mode": {
404 |     "name": "ipython",
405 |     "version": 3
406 |    },
407 |    "file_extension": ".py",
408 |    "mimetype": "text/x-python",
409 |    "name": "python",
410 |    "nbconvert_exporter": "python",
411 |    "pygments_lexer": "ipython3",
412 |    "version": "3.8.8"
413 |   }
414 |  },
415 |  "nbformat": 4,
416 |  "nbformat_minor": 1
417 | }
418 | 


--------------------------------------------------------------------------------
/examples/bistable_potential/2D_smFS_MSM.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "\n",
  8 |     "## MSM of Brownian dynamics simulations of diffusion on a 2D surface\n",
  9 |     "Here we analyze simulations on another simple mode system, but one that goes beyond one dimension. Specifically, we use the model by [Berezhkovskii et al, *JCP* (2014)](http://dx.doi.org/10.1063/1.4902243). We run brownian dynamics simulations on this surface and build a simple Markov state model from it. The data can be downloaded from [OSF](https://osf.io/a2vc7/).\n",
 10 |     "\n",
 11 |     "As always we start by importing some relevant libraries."
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "%matplotlib inline\n",
 21 |     "%load_ext autoreload\n",
 22 |     "%autoreload 2\n",
 23 |     "import h5py\n",
 24 |     "import numpy as np"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "import matplotlib.pyplot as plt\n",
 34 |     "import matplotlib.cm as cm\n",
 35 |     "import seaborn as sns\n",
 36 |     "sns.set(style=\"ticks\", color_codes=True, font_scale=1.25)\n",
 37 |     "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "#### Discretization"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "Here we upload the data obtained from Brownian Dynamics simulations of isotropic diffusion on a 2D potential."
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "h5file = \"../datafiles/brownian_dynamics/cossio_kl1.3_Dx1_Dq1.h5\"\n",
 61 |     "f = h5py.File(h5file, 'r')\n",
 62 |     "data = np.array(f['data'])\n",
 63 |     "f.close()"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "fig, ax = plt.subplots(2,1,figsize=(10,3), sharex=True,sharey=False)\n",
 73 |     "ax[0].plot(data[:,0],data[:,1],'.', markersize=1)\n",
 74 |     "ax[1].plot(data[:,0],data[:,2],'g.', markersize=1)\n",
 75 |     "ax[0].set_ylim(-10,10)\n",
 76 |     "ax[1].set_xlim(0,25000)\n",
 77 |     "ax[0].set_ylabel('x')\n",
 78 |     "ax[1].set_ylabel('y')\n",
 79 |     "ax[1].set_xlabel('Time')\n",
 80 |     "plt.tight_layout(h_pad=0)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "Clearly the system interconverts between two states. Both coordinates, x and y, are highly correlated, although the free energy landscape, which we can estimate from a Boltzmann inversion, varies a bit depending on the projection we use."
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "fig, ax = plt.subplots(figsize=(6,4))\n",
 97 |     "hist, bin_edges = np.histogram(data[:,1], bins=np.linspace(-9,9,25), \\\n",
 98 |     "                               density=True)\n",
 99 |     "bin_centers = [0.5*(bin_edges[i]+bin_edges[i+1]) \\\n",
100 |     "               for i in range(len(bin_edges)-1)]\n",
101 |     "ax.plot(bin_centers, -np.log(hist), lw=3, label=\"x\")\n",
102 |     "hist, bin_edges = np.histogram(data[:,2], bins=np.linspace(-9,9,25), \\\n",
103 |     "                               density=True)\n",
104 |     "bin_centers = [0.5*(bin_edges[i]+bin_edges[i+1]) \\\n",
105 |     "               for i in range(len(bin_edges)-1)]\n",
106 |     "ax.plot(bin_centers, -np.log(hist), lw=3, label=\"y\")\n",
107 |     "ax.set_xlim(-7,7)\n",
108 |     "ax.set_ylim(1,9)\n",
109 |     "ax.set_xlabel('coordinate')\n",
110 |     "ax.set_ylabel('PMF ($k_BT$)')\n",
111 |     "ax.legend()"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "We can also represent the energy landscape in two dimensions:"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "H, x_edges, y_edges = np.histogram2d(data[:,1],data[:,2], \\\n",
128 |     "            bins=[np.linspace(-9,9,25), np.linspace(-9,9,25)])\n",
129 |     "\n",
130 |     "fig, ax = plt.subplots(figsize=(5,4.5))\n",
131 |     "pmf = -np.log(H.transpose())\n",
132 |     "pmf -= np.min(pmf)\n",
133 |     "cs = ax.contourf(pmf, extent=[x_edges.min(), x_edges.max(), \\\n",
134 |     "                     y_edges.min(), y_edges.max()], \\\n",
135 |     "                   levels=np.arange(0, 6.5,0.5), alpha=0.75)\n",
136 |     "cbar = plt.colorbar(cs)\n",
137 |     "ax.set_xlim(-7,7)\n",
138 |     "ax.set_ylim(-7,7)\n",
139 |     "ax.set_yticks(range(-5,6,5))\n",
140 |     "ax.set_xlabel('$x$', fontsize=18)\n",
141 |     "ax.set_ylabel('$y$', fontsize=18)\n",
142 |     "plt.tight_layout()"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "To construct the MSM, we assigning frames to microstates. We first need to import the function that makes the grid."
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {},
156 |    "outputs": [],
157 |    "source": [
158 |     "from scipy.stats import binned_statistic_2d"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": [
167 |     "statistic, x_edge, y_edge, binnumber = \\\n",
168 |     "    binned_statistic_2d(data[:,1],data[:,2],None,'count', \\\n",
169 |     "                        bins=[np.linspace(-9,9,25), np.linspace(-9,9,25)])"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": null,
175 |    "metadata": {},
176 |    "outputs": [],
177 |    "source": [
178 |     "fig, ax = plt.subplots(figsize=(6,5))\n",
179 |     "\n",
180 |     "grid = ax.imshow(-np.log(statistic.transpose()),origin=\"lower\",cmap=plt.cm.rainbow)\n",
181 |     "\n",
182 |     "cbar = plt.colorbar(grid)\n",
183 |     "ax.set_yticks(range(0,20,5))\n",
184 |     "ax.set_xticks(range(0,20,5))\n",
185 |     "ax.set_xlabel('$x_{bin}$', fontsize=20)\n",
186 |     "ax.set_ylabel('$y_{bin}$', fontsize=20)\n",
187 |     "plt.tight_layout()"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {},
193 |    "source": [
194 |     "In this way, the continuous coordinates x and y are mapped onto a discrete microstate space."
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": [
203 |     "fig,ax=plt.subplots(3,1,figsize=(10,6),sharex=True)\n",
204 |     "plt.subplots_adjust(wspace=0, hspace=0)\n",
205 |     "ax[0].plot(range(0,len(data[:,1])),data[:,1])\n",
206 |     "ax[1].plot(range(0,len(data[:,2])),data[:,2],color=\"g\")\n",
207 |     "ax[2].plot(binnumber)\n",
208 |     "ax[0].set_ylabel('x')\n",
209 |     "ax[1].set_ylabel('y')\n",
210 |     "ax[2].set_ylabel(\"s\")\n",
211 |     "ax[2].set_xlabel(\"time (ps)\")\n",
212 |     "ax[2].set_xlim(0, 1500)"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "code",
217 |    "execution_count": null,
218 |    "metadata": {},
219 |    "outputs": [],
220 |    "source": [
221 |     "from mastermsm.trajectory import traj"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "markdown",
226 |    "metadata": {},
227 |    "source": [
228 |     "We then pass the discrete trajectory to the ``traj`` module to generate an instance of the ``TimeSeries`` class. Using some of its methods, we are able to generate and sort the names of the microstates in the trajectory, which will be useful later."
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "metadata": {},
235 |    "outputs": [],
236 |    "source": [
237 |     "distraj = traj.TimeSeries(distraj=list(binnumber), dt=1)\n",
238 |     "distraj.find_keys()\n",
239 |     "distraj.keys.sort()"
240 |    ]
241 |   },
242 |   {
243 |    "cell_type": "markdown",
244 |    "metadata": {},
245 |    "source": [
246 |     "### Master Equation Model \n",
247 |     "After generating the discrete trajectory, we can build the master equation model, for which we use the ``msm`` module."
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "from mastermsm.msm import msm"
257 |    ]
258 |   },
259 |   {
260 |    "cell_type": "markdown",
261 |    "metadata": {},
262 |    "source": [
263 |     "First of all, we will create an instance of the SuperMSM class, which will be useful to produce and validate dynamical models. We pass two arguments: the \"discrete trajectory\" that we have generated above and a value for the boolean sym. This only tells the program that it can symmetrize the data, as we are assuming our trajectory is long enough as to consider it equilibrium sampling."
264 |    ]
265 |   },
266 |   {
267 |    "cell_type": "code",
268 |    "execution_count": null,
269 |    "metadata": {},
270 |    "outputs": [],
271 |    "source": [
272 |     "msm_2D = msm.SuperMSM([distraj], sym=True)"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "markdown",
277 |    "metadata": {},
278 |    "source": [
279 |     "We then check the dependence of the slowest relaxation times of the system, $\\tau$ with respect to the choice of lag time $\\Delta t$. These can be accessed as the `tauT` corresponding to the `MSM` instance. We find that they are very well converged even from the shortest value of $\\Delta t$."
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {},
286 |    "outputs": [],
287 |    "source": [
288 |     "for i in [1, 2, 5, 10, 20, 50, 100]:\n",
289 |     "    msm_2D.do_msm(i)\n",
290 |     "    msm_2D.msms[i].do_trans(evecs=True)\n",
291 |     "    msm_2D.msms[i].boots()"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": null,
297 |    "metadata": {},
298 |    "outputs": [],
299 |    "source": [
300 |     "tau_vs_lagt = np.array([[x,msm_2D.msms[x].tauT[0], \\\n",
301 |     "                         msm_2D.msms[x].tau_std[0]] \\\n",
302 |     "               for x in sorted(msm_2D.msms.keys())])"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {},
309 |    "outputs": [],
310 |    "source": [
311 |     "fig, ax = plt.subplots()\n",
312 |     "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', \\\n",
313 |     "            yerr=tau_vs_lagt[:,2], markersize=10)\n",
314 |     "ax.fill_between(tau_vs_lagt[:,0],tau_vs_lagt[:,1]+tau_vs_lagt[:,2], \\\n",
315 |     "                tau_vs_lagt[:,1]-tau_vs_lagt[:,2], alpha=0.1)\n",
316 |     "ax.set_xlabel(r'$\\Delta$t', fontsize=16)\n",
317 |     "ax.set_ylabel(r'$\\tau$', fontsize=16)\n",
318 |     "ax.set_xlim(0.8,120)\n",
319 |     "ax.set_ylim(50,1000)\n",
320 |     "ax.set_yscale('log')\n",
321 |     "ax.set_xscale('log')\n",
322 |     "plt.tight_layout()"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {},
328 |    "source": [
329 |     "Clearly, there is no dependence of the relaxation times $\\tau$ on the lag time $\\Delta$t.\n"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "markdown",
334 |    "metadata": {},
335 |    "source": [
336 |     "#### Estimation"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "code",
341 |    "execution_count": null,
342 |    "metadata": {},
343 |    "outputs": [],
344 |    "source": [
345 |     "lt=2\n",
346 |     "plt.figure()\n",
347 |     "plt.imshow(msm_2D.msms[lt].trans, interpolation='none', \\\n",
348 |     "    origin=\"lower\")\n",
349 |     "plt.ylabel('$\\it{i}$')\n",
350 |     "plt.xlabel('$\\it{j}$')\n",
351 |     "plt.colorbar()\n",
352 |     "plt.figure()\n",
353 |     "plt.imshow(np.log(msm_2D.msms[lt].trans), interpolation='none', \\\n",
354 |     "    origin=\"lower\")\n",
355 |     "plt.ylabel('$\\it{i}$')\n",
356 |     "plt.xlabel('$\\it{j}$')\n",
357 |     "plt.colorbar()"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "code",
362 |    "execution_count": null,
363 |    "metadata": {},
364 |    "outputs": [],
365 |    "source": [
366 |     "fig, ax = plt.subplots()\n",
367 |     "ax.errorbar(range(1,12),msm_2D.msms[lt].tauT[0:11], fmt='o-', \\\n",
368 |     "            yerr= msm_2D.msms[lt].tau_std[0:11], ms=10)\n",
369 |     "ax.set_xlabel('Eigenvalue')\n",
370 |     "ax.set_ylabel(r'$\\tau_i$ [ns]') "
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "markdown",
375 |    "metadata": {},
376 |    "source": [
377 |     "The first mode captured by $\\lambda_1$ is significantly slower than the others. That mode, which is described by the right eigenvector $\\psi^R_1$ as the transition of the protein between the folded and unfolded states."
378 |    ]
379 |   },
380 |   {
381 |    "cell_type": "code",
382 |    "execution_count": null,
383 |    "metadata": {},
384 |    "outputs": [],
385 |    "source": [
386 |     "fig, ax = plt.subplots(figsize=(10,4))\n",
387 |     "ax.plot(msm_2D.msms[2].rvecsT[:,1])\n",
388 |     "ax.fill_between(range(len(msm_2D.msms[lt].rvecsT[:,1])), 0, \\\n",
389 |     "                msm_2D.msms[lt].rvecsT[:,1], \\\n",
390 |     "                where=msm_2D.msms[lt].rvecsT[:,1]>0,\\\n",
391 |     "                facecolor='c', interpolate=True,alpha=.4)\n",
392 |     "ax.fill_between(range(len(msm_2D.msms[lt].rvecsT[:,1])), 0, \\\n",
393 |     "                msm_2D.msms[lt].rvecsT[:,1], \\\n",
394 |     "                where=msm_2D.msms[lt].rvecsT[:,1]<0,\\\n",
395 |     "                facecolor='g', interpolate=True,alpha=.4)\n",
396 |     "ax.set_ylabel(\"$\\Psi^R_1$\")\n",
397 |     "plt.show()"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "markdown",
402 |    "metadata": {},
403 |    "source": [
404 |     "The projection of $\\psi^R_1$ on the 2D grid shows the transitions between the two conformational states (red and blue)."
405 |    ]
406 |   },
407 |   {
408 |    "cell_type": "code",
409 |    "execution_count": null,
410 |    "metadata": {},
411 |    "outputs": [],
412 |    "source": [
413 |     "fig,ax = plt.subplots(1,2,figsize=(10,5),sharey=True,sharex=True)\n",
414 |     "rv_mat = np.zeros((25,25), float)\n",
415 |     "for i in [x for x in zip(msm_2D.msms[lt].keep_keys, \\\n",
416 |     "                         msm_2D.msms[lt].rvecsT[:,1])]:\n",
417 |     "    unr_ind=np.unravel_index(i[0],(26,26))    \n",
418 |     "    rv_mat[unr_ind[0]-1,unr_ind[1]-1] = -i[1]\n",
419 |     "ax[0].imshow(rv_mat.transpose(), interpolation=\"none\", \\\n",
420 |     "             cmap='bwr',origin=\"lower\")\n",
421 |     "ax[1].imshow(-np.log(statistic.transpose()), \\\n",
422 |     "             cmap=plt.cm.rainbow,origin=\"lower\")\n",
423 |     "ax[1].set_yticks(range(0,26,5))\n",
424 |     "ax[1].set_xticks(range(0,26,5))\n",
425 |     "plt.tight_layout()"
426 |    ]
427 |   },
428 |   {
429 |    "cell_type": "code",
430 |    "execution_count": null,
431 |    "metadata": {},
432 |    "outputs": [],
433 |    "source": []
434 |   }
435 |  ],
436 |  "metadata": {
437 |   "kernelspec": {
438 |    "display_name": "Python 3",
439 |    "language": "python",
440 |    "name": "python3"
441 |   },
442 |   "language_info": {
443 |    "codemirror_mode": {
444 |     "name": "ipython",
445 |     "version": 3
446 |    },
447 |    "file_extension": ".py",
448 |    "mimetype": "text/x-python",
449 |    "name": "python",
450 |    "nbconvert_exporter": "python",
451 |    "pygments_lexer": "ipython3",
452 |    "version": "3.8.8"
453 |   }
454 |  },
455 |  "nbformat": 4,
456 |  "nbformat_minor": 2
457 | }
458 | 


--------------------------------------------------------------------------------
/examples/mueller_potential/mueller.py:
--------------------------------------------------------------------------------
  1 | #!/bin/env python
  2 | 
  3 | #Copyright 2020 Robert T. McGibbon
  4 | 
  5 | #Permission is hereby granted, free of charge, to any person i
  6 | # obtaining a copy of this software and associated documentation 
  7 | # files (the "Software"), to deal in the Software without restriction, 
  8 | # including without limitation the rights to use, copy, modify, 
  9 | # merge, publish, distribute, sublicense, and/or sell copies of the 
 10 | # Software, and to permit persons to whom the Software is furnished 
 11 | # to do so, subject to the following conditions:
 12 | 
 13 | # The above copyright notice and this permission notice shall be 
 14 | # included in all copies or substantial portions of the Software.
 15 | 
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 
 18 | # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
 21 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
 23 | # OTHER DEALINGS IN THE SOFTWARE.
 24 | 
 25 | from simtk.unit import kelvin, picosecond, femtosecond, nanometer, dalton
 26 | import simtk.openmm as mm
 27 | import matplotlib.pyplot as plt
 28 | import numpy as np
 29 | 
 30 | class MullerForce(mm.CustomExternalForce):
 31 |     """
 32 |     OpenMM custom force for propagation on the Muller Potential. Also
 33 |     includes pure python evaluation of the potential energy surface so that
 34 |     you can do some plotting.
 35 | 
 36 | 
 37 |     """
 38 |     aa = [-1, -1, -6.5, 0.7]
 39 |     bb = [0, 0, 11, 0.6]
 40 |     cc = [-10, -10, -6.5, 0.7]
 41 |     AA = [-200, -100, -170, 15]
 42 |     XX = [1, 0, -0.5, -1]
 43 |     YY = [0, 0.5, 1.5, 1]
 44 | 
 45 |     def __init__(self):
 46 |         # start with a harmonic restraint on the Z coordinate
 47 |         expression = '1000.0 * z^2'
 48 |         for j in range(4):
 49 |             # add the muller terms for the X and Y
 50 |             fmt = dict(aa=self.aa[j], bb=self.bb[j], cc=self.cc[j], AA=self.AA[j], XX=self.XX[j], YY=self.YY[j])
 51 |             expression += '''+ {AA}*exp({aa}*(x - {XX})^2 + {bb}*(x - {XX}) 
 52 |                                *(y - {YY}) + {cc}*(y - {YY})^2)'''.format(**fmt)
 53 |         super(MullerForce, self).__init__(expression)
 54 |     
 55 |     @classmethod
 56 |     def potential(cls, x, y):
 57 |         "Compute the potential at a given point x,y"
 58 |         value = 0
 59 |         for j in range(4):
 60 |             value += cls.AA[j]*np.exp(cls.aa[j]*(x - cls.XX[j])**2 + \
 61 |                 cls.bb[j]*(x - cls.XX[j])*(y - cls.YY[j]) \
 62 |                 + cls.cc[j]*(y - cls.YY[j])**2)
 63 |         return value
 64 | 
 65 |     @classmethod
 66 |     def plot(cls, ax=None, minx=-1.5, maxx=1.2, miny=-0.2, maxy=2, **kwargs):
 67 |         "Plot the Muller potential"
 68 |         grid_width = max(maxx-minx, maxy-miny) / 200.0
 69 |         ax = kwargs.pop('ax', None)
 70 |         xx, yy = np.mgrid[minx : maxx : grid_width, miny : maxy : grid_width]
 71 |         V = cls.potential(xx, yy)
 72 |         # clip off any values greater than 200, since they mess up
 73 |         # the color scheme
 74 |         if ax is None:
 75 |             ax = plt
 76 |         ax.contourf(xx, yy, V.clip(max=200), 40, alpha=0.4, **kwargs)
 77 | 
 78 | if __name__ == "__main__":
 79 |     ##############################################################################
 80 |     # Global parameters
 81 |     ##############################################################################
 82 |     
 83 |     # each particle is totally independent, propagating under the same potential
 84 |     mass = 1.0*dalton
 85 |     temperature = 750*kelvin
 86 |     friction = 100/picosecond
 87 |     timestep = 10.0*femtosecond
 88 |     
 89 |     # Choose starting conformations uniform on the grid between (-1.5, -0.2) and (1.2, 2)
 90 |     startingPositions = (np.random.rand(1, 3)*np.array([2.7, 1.8, 1])) \
 91 |             + np.array([-1.5, -0.2, 0])
 92 |     
 93 |     system = mm.System()
 94 |     mullerforce = MullerForce()
 95 |     system.addParticle(mass)
 96 |     mullerforce.addParticle(0, [])
 97 |     system.addForce(mullerforce)
 98 |     
 99 |     integrator = mm.LangevinIntegrator(temperature, friction, timestep)
100 |     context = mm.Context(system, integrator)
101 |     context.setPositions(startingPositions)
102 |     context.setVelocitiesToTemperature(temperature)
103 |     
104 |     traj = []
105 |     for i in range(int(1e6)):
106 |         traj.append(
107 |                 context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(nanometer)[0])
108 |         integrator.step(200)
109 |     traj = np.vstack(traj)
110 |     
111 |     fig, ax = plt.subplots(figsize=(4,4))
112 |     MullerForce.plot(ax=ax)
113 |     ax.plot(traj[:,0], traj[:,1], c='k', lw=0.1)
114 | 


--------------------------------------------------------------------------------
/mastermsm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/__init__.py


--------------------------------------------------------------------------------
/mastermsm/fewsm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/fewsm/__init__.py


--------------------------------------------------------------------------------
/mastermsm/fewsm/fewsm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file is part of the MasterMSM package.
  3 | 
  4 | """
  5 | 
  6 | import copy
  7 | #import random
  8 | from ..msm import msm
  9 | from ..trajectory import traj
 10 | #import msm_lib
 11 | from ..fewsm import fewsm_lib
 12 | 
 13 | class FEWSM(msm.MSM):
 14 |     """
 15 |     A class for doing clustering of MSMs into few-state models
 16 | 
 17 |     Attributes
 18 |     ----------
 19 |     keys : dict
 20 |         A dictionary containing the clusters formed.
 21 |     parent : class
 22 |         Instance of the MSM class that we aim to cluster.
 23 | 
 24 |     """
 25 |     def __init__(self, parent, N=2, method="robust"):
 26 |         """
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         parent : class
 31 |             Instance of the MSM class that we aim to cluster.
 32 |         N : int
 33 |             The desired number of clusters.
 34 | 
 35 |         """
 36 |         self.parent = parent
 37 |         self.N = N
 38 |         self.macros = self.eigen_group(N=self.N, method=method)
 39 | 
 40 |     def eigen_group(self, N=2, method="robust"):
 41 |         """ Splits microstates into macrostates
 42 | 
 43 |         Parameters
 44 |         ----------
 45 |         N : int
 46 |             Number of clusters.
 47 |         method : str
 48 |             The method used for clustering.
 49 | 
 50 |         Returns
 51 |         -------
 52 |         macros : dict
 53 |             A dictionary with the membership to macrostates.
 54 | 
 55 |         """
 56 | 
 57 |         # generate eigenvectors in case the MSM does not have them
 58 |         if not hasattr(self.parent, 'lvecsT'):
 59 |             self.parent.tauT, self.parent.peqT, self.parent.rvecsT, self.parent.lvecsT = \
 60 |                    self.parent.calc_eigsT(evecs=True)
 61 |         lvecs = self.parent.lvecsT
 62 | 
 63 |         # split in desired number of macrostates
 64 |         macros = {}
 65 |         keep_states = self.parent.keep_states
 66 |         macros[0] = list(range(len(keep_states)))
 67 |         for n in range(1, N):
 68 |             if method is "robust":
 69 |                 macro_new, _ = fewsm_lib.split_sigma(macros, lvecs[:,n])
 70 |             elif method is "sign":
 71 |                 macro_new, _ = fewsm_lib.split_sign(macros, lvecs[:,n])
 72 |             macros = copy.deepcopy(macro_new)
 73 |         print ("\n Initial membership of microstates to macrostates:")
 74 |         if len(self.parent.keep_keys) < 100:
 75 |             for k,v in macros.items():
 76 |                 print (k, [self.parent.keep_keys[x] for x in v])
 77 |         else:
 78 |             for k,v in macros.items():
 79 |                 print (k,":", len(v))
 80 |         return macros
 81 | 
 82 |     def map_trajectory(self):
 83 |         """ Maps trajectory onto the PCCA clusters
 84 | 
 85 |         Returns
 86 |         -------
 87 |         mappedtraj : str
 88 |             The mapped trajectory.
 89 | 
 90 |         """
 91 |         print ("\n Mapping trajectory onto macrostates...")
 92 |         mappedtraj = []
 93 |         keep_keys = self.parent.keep_keys
 94 |         mt_states = []
 95 |         for data in self.parent.data:
 96 |             for s in data.distraj:
 97 |                 try:
 98 |                     mt_states.append([k for k, v in self.macros.items() \
 99 |                            if keep_keys.index(s) in v][0])
100 |                 except ValueError:
101 |                     print (" not in keep_keys")
102 |             mt = traj.TimeSeries(distraj=mt_states, dt=data.dt)
103 |             mappedtraj.append(mt)
104 |         self.mappedtraj = mappedtraj
105 |         #super().__init__(mappedtraj, keys=range(self.N), lagt=self.parent.lagt)
106 | 
107 |     def metastability(self):
108 |         """ Calculate metastability according to the definition
109 |         in Chodera et al, J Chem Phys, (2007)
110 | 
111 |         Returns
112 |         -------
113 |         float
114 |             Metastability
115 | 
116 |         """
117 |         return fewsm_lib.metastability(self.trans)
118 | 
119 | #    def optim(self, nsteps=1, nwrite=None, fout="mc.dat"):
120 | #        """ MC optimization using the metastability Q as energy.
121 | #
122 | #        Parameters
123 | #        ----------
124 | #        nsteps : int
125 | #            Number of steps per round of MC and per microstate.
126 | #        nwrite : int
127 | #            Frequency of writing MC output.
128 | #        fout : string
129 | #            File for output of MC progress.
130 | #
131 | #        Returns
132 | #        -------
133 | #        macro_opt : dict
134 | #            Dictionary with the membership to macrostates.
135 | #
136 | #        """
137 | #        print "\n Optimizing the lumped MSM\n"
138 | #        out = open(fout, "w")
139 | #        out.write("#    iter       q \n")
140 | #
141 | #        nmac = self.N
142 | #        nmic = len(self.parent.keep_keys)
143 | #        mcsteps = len(self.count)*nsteps*nmic # mc steps per block
144 | #        mcsteps_max = nmic*20000 # maximum number of mc steps
145 | #        print self.count
146 | #        print self.trans
147 | #        q =  self.metastability()
148 | #        print " initial:", q
149 | #        q_opt = q
150 | #
151 | #        macro = copy.deepcopy(self.macros)
152 | #        cont = True
153 | #        nmc = 0 # number of mc blocks
154 | #        reject = 0
155 | #        while cont:
156 | #            imc = 0
157 | #            out.write ("%6i %12.10f %10.6e\n"%(imc + nmc*mcsteps,q,1))
158 | #            while imc < mcsteps:
159 | #                # try ramdom insertion of a microstate in a macrostate
160 | #                imac = 0
161 | #                jmac = 0
162 | #                while imc < mcsteps:
163 | #                    imc +=1
164 | #                    while True:
165 | #                        # choose microstate to move around
166 | #                        imic = random.choice(range(nmic))
167 | #                        imac = int([x for x in range(nmac) if imic in macro[x]][0])
168 | #                        if len(macro[imac]) > 1:
169 | #                            # choose destination macrostate
170 | #                            jmac = random.choice([x for x in range(nmac) if x is not imac])
171 | #                            break
172 | #                    # move microstate from i to j
173 | #                    macro_new = copy.deepcopy(macro)
174 | #                    macro_new[imac].remove(imic)
175 | #                    macro_new[jmac].append(imic)
176 | #                    # calculate transition count matrix for new mapping
177 | #                    count_mac_new = fewsm_lib.map_micro2macro(self.parent.count, macro_new, self.parent.keep_states)
178 | #                    Tmacro_new = msm_lib.calc_trans(nmac, range(nmac), count_mac_new)
179 | #                    # calculate metastability
180 | #                    q_new = fewsm_lib.metastability(Tmacro_new)
181 | #                    delta = fewsm_lib.beta(imc,mcsteps)*(q - q_new) # calculate increment (Q is a -Energy)
182 | #                    if fewsm_lib.metropolis(delta):
183 | #                        #print "ACCEPT"
184 | #                        macro = copy.deepcopy(macro_new)
185 | #                        count_mac = count_mac_new
186 | #                        q = q_new
187 | #                        if q > q_opt:
188 | #                            q_opt = q
189 | #                            macro_opt = copy.deepcopy(macro)
190 | #                            Tmacro_opt = Tmacro_new
191 | #                            self.macro = copy.deepcopy(macro_opt)
192 | #                    else:
193 | #                        reject+=1
194 | #                        #print " REJECT"
195 | #
196 | #                    out.write ("%6i %12.10e %10.6e\n"%(imc + nmc*mcsteps,q,1./fewsm_lib.beta(imc,mcsteps)))
197 | #                    imc +=1
198 | #                cont = False
199 | #        print " final :", q
200 | #        print " best :", q_opt
201 | #        print " acceptance:",1.-float(reject)/mcsteps
202 | #
203 | #        self.map_trajectory()
204 | #        self.do_count()
205 | #        self.do_trans()
206 | #
207 | #    def write_mapping(self):
208 | #        """
209 | #        Prints files with the mapping between states and clusters
210 | #
211 | #        """
212 | #        for mtraj in self.mappedtraj:
213 | #            try:
214 | #                idf = mtraj.filename.rfind(".dat")
215 | #                filename = mtraj.filename[:idf] + "_mapped_pcca%g.dat"%self.N
216 | #            except ValueError:
217 | #                filename = mtraj.filename + "_mapped_pcca%g.dat"%self.N
218 | #            print " ...writing mapped trajectory at %s"%filename
219 | #            fout = open(filename, "w")
220 | #            micro_data = [x for x in self.parent.data if x.filename == mtraj.filename][0]
221 | #            for x in zip(micro_data.time, micro_data.states, self.data[0].states):
222 | #                fout.write("%10.3f %s %8i\n"%(x[0], x[1], x[2]))
223 | #            fout.close()
224 | 


--------------------------------------------------------------------------------
/mastermsm/fewsm/fewsm_lib.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file is part of the MasterMSM package.
  3 | 
  4 | """
  5 | import copy, itertools
  6 | import numpy as np
  7 | 
  8 | def map_micro2macro(cmic, mac, states):
  9 |     """ maps microstates into macrostates """
 10 |     m = len(mac)
 11 |     cmac = np.zeros((m, m), int)
 12 |     for i in range(m):
 13 |         for j in range(m):
 14 |             if i == j:
 15 |                 cmac[j,i] = reduce(lambda x, y: x + y, \
 16 |                     [cmic[states[x],states[y]] for (x,y) in \
 17 |                     itertools.product(mac[j],mac[i])])
 18 |             else:
 19 |                 cmac[j,i] = reduce(lambda x, y: x + y, \
 20 |                     [cmic[states[x],states[y]] for (x,y) in \
 21 |                     itertools.product(mac[j],mac[i])])
 22 |     return cmac
 23 | 
 24 | def test_sign(v):
 25 |     """check whether positive and negative signs are present in vector"""
 26 |     test = False
 27 |     if any(v > 0.) and  any(v<0):
 28 |         test = True
 29 |     return test
 30 | 
 31 | def split_sign(macro, lvec):
 32 |     """ split based on sign structure """
 33 |     # calculate spread in eigenvector
 34 |     nt = len(macro)
 35 |     spread = []
 36 |     vals = lvec
 37 |     for _, v in macro.items():
 38 |         # check that there are positive and negative values in evec
 39 |         if test_sign(vals[v]):
 40 |             #spread.append(np.sum(vals**2))
 41 |             spread.append(np.mean(vals[v]**2))
 42 |         else:
 43 |             spread.append(0.)
 44 |     isplit = np.argsort(-np.array(spread))[0]
 45 | #    print "         macrostate to split: %i"%isplit,np.array(spread)
 46 |     # split
 47 |     lvec_split = lvec[macro[isplit]]
 48 | #    print lvec_split
 49 |     elems = []
 50 |     for i in filter(lambda x: lvec_split[x] < 0.,\
 51 |         range(len(macro[isplit]))):
 52 |         elems.append(macro[isplit][i])
 53 |     macro_new = copy.deepcopy(macro)
 54 |     macro_new[nt] = elems
 55 |     # update old macrostate
 56 |     for i in elems:
 57 |         macro_new[isplit].remove(i)
 58 |     return macro_new, vals
 59 | 
 60 | def split_sigma(macro, lvec):
 61 |     """ split based on distribution """
 62 |     nt = len(macro)
 63 | 
 64 |     spread = []
 65 |     for i in macro.keys():
 66 |         spread.append(np.std(lvec[macro[i]]))
 67 |     # split macrostates with maximum spread
 68 |     isplit = np.argsort(-np.array(spread))[0]
 69 |     #print "         macrostate to split: %i"%isplit,spread[isplit]
 70 |     # split based on distribution
 71 |     elems = []
 72 |     keep = []
 73 |     val_max =  np.max(lvec[macro[isplit]])
 74 |     val_min =  np.min(lvec[macro[isplit]])
 75 |     vals = (lvec[macro[isplit]] - val_min)/(val_max - val_min)
 76 |     for i in filter(lambda x: vals[x] < 0.5,range(len(macro[isplit]))):
 77 |         elems.append(macro[isplit][i])
 78 |     for i in filter(lambda x: vals[x] >= 0.5,range(len(macro[isplit]))):
 79 |         keep.append(macro[isplit][i])
 80 |     macro_new = copy.deepcopy(macro)
 81 |     macro_new[nt] = elems
 82 |     #print macro_new
 83 |     # update old macrostate
 84 |     for i in elems:
 85 |         macro_new[isplit].remove(i)
 86 |     macro = copy.deepcopy(macro_new)
 87 |     return macro, vals
 88 | 
 89 | def metastability(T):
 90 |     return np.sum(np.diag(T))
 91 | 
 92 | def beta(imc,mcsteps):
 93 |     # inverse temperature for MCSA
 94 |     x = imc - 1
 95 |     a = 4./mcsteps
 96 |     temp = (1 + (np.exp(-a*x)-1.)/(1.- np.exp(-a*mcsteps))) # MCSA temperature
 97 |     try:
 98 |         beta = 1./temp
 99 |     except ZeroDivisionError:
100 |         beta = 1e20
101 |     return beta
102 | 
103 | def metropolis(delta):
104 |     if delta < 0:
105 |         return True
106 |     else:
107 |         accept = False
108 |         p = min(1.0,np.exp(-delta))
109 |         rand = np.random.random()
110 |         if (rand < p):
111 |             accept = True
112 |         return accept
113 | 


--------------------------------------------------------------------------------
/mastermsm/msm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/msm/__init__.py


--------------------------------------------------------------------------------
/mastermsm/msm/msm_lib.py:
--------------------------------------------------------------------------------
   1 | """
   2 | This file is part of the MasterMSM package.
   3 | 
   4 | """
   5 | import copy
   6 | import numpy as np
   7 | import networkx as nx
   8 | import os #, math
   9 | import tempfile
  10 | from functools import reduce, cmp_to_key
  11 | #import operator
  12 | from scipy import linalg as spla
  13 | #import multiprocessing as mp
  14 | import pickle
  15 | 
  16 | # thermal energy (kJ/mol)
  17 | beta = 1./(8.314e-3*300)
  18 | 
  19 | #def difference(k1, k2):
  20 | #    l = len(k1)
  21 | #    diff = 0
  22 | #    for i in range(l):
  23 | #        if k1[i] != k2[i]:
  24 | #            diff+=1
  25 | #    return diff
  26 | 
  27 | def calc_eigsK(rate, evecs=False):
  28 |     """ 
  29 |     Calculate eigenvalues and eigenvectors of rate matrix K
  30 | 
  31 |     Parameters
  32 |     -----------
  33 |     rate : array
  34 |         The rate matrix to use.
  35 |     evecs : bool
  36 |         Whether we want the eigenvectors of the rate matrix.
  37 | 
  38 |     Returns:
  39 |     -------
  40 |     tauK : numpy array
  41 |         Relaxation times from K.
  42 |     peqK : numpy array
  43 |         Equilibrium probabilities from K.
  44 |     rvecsK : numpy array, optional
  45 |         Right eigenvectors of K, sorted.
  46 |     lvecsK : numpy array, optional
  47 |         Left eigenvectors of K, sorted.
  48 | 
  49 |     """
  50 |     evalsK, lvecsK, rvecsK = \
  51 |             spla.eig(rate, left=True)
  52 | 
  53 |     # sort modes
  54 |     nkeys = len(rate)
  55 |     elistK = []
  56 |     for i in range(nkeys):
  57 |         elistK.append([i,np.real(evalsK[i])])
  58 |     elistK.sort(key=cmp_to_key(esort))
  59 | 
  60 |     # calculate relaxation times from K and T
  61 |     tauK = []
  62 |     for i in range(nkeys):
  63 |         if np.abs(elistK[i][1]) > 1e-10:
  64 |             iiK, lamK = elistK[i]
  65 |             tauK.append(-1./lamK)
  66 |             if len(tauK) == 1:
  67 |                 ieqK = iiK
  68 | 
  69 |     # equilibrium probabilities
  70 |     ieqK, _ = elistK[0]
  71 |     peqK_sum = reduce(lambda x, y: x + y, map(lambda x: rvecsK[x,ieqK],
  72 |         range(nkeys)))
  73 |     peqK = rvecsK[:,ieqK]/peqK_sum
  74 | 
  75 |     if not evecs:
  76 |         return tauK, peqK
  77 |     else:
  78 |         # sort eigenvectors
  79 |         rvecsK_sorted = np.zeros((nkeys, nkeys), float)
  80 |         lvecsK_sorted = np.zeros((nkeys, nkeys), float)
  81 |         for i in range(nkeys):
  82 |             iiK, lamK = elistK[i]
  83 |             rvecsK_sorted[:,i] = rvecsK[:,iiK]
  84 |             lvecsK_sorted[:,i] = lvecsK[:,iiK]
  85 |         return tauK, peqK, rvecsK_sorted, lvecsK_sorted
  86 | 
  87 | def esort(ei, ej):
  88 |     """ Sorts eigenvalues.
  89 | 
  90 |     Parameters
  91 |     ----------
  92 |     ei : float
  93 |         Eigenvalue i
  94 |     ej : float
  95 |         Eigenvalue j
  96 | 
  97 |     Returns
  98 |     -------
  99 |     bool :
 100 |         Whether the first value is larger than the second.
 101 | 
 102 |     """
 103 |     _, eval_i = ei
 104 |     _, eval_j = ej
 105 | 
 106 |     if eval_j.real > eval_i.real:
 107 |         return 1
 108 |     elif eval_j.real < eval_i.real:
 109 |         return -1
 110 |     else:
 111 |         return 0
 112 | 
 113 | #def find_keys(state_keys, trans, manually_remove):
 114 | #    """ eliminate dead ends """
 115 | #    keep_states = []
 116 | #    keep_keys = []
 117 | #    # eliminate dead ends
 118 | #    nstate = len(state_keys)
 119 | #    for i in range(nstate):
 120 | #        key = state_keys[i]
 121 | #        summ = 0
 122 | #        sumx = 0
 123 | #        for j in range(nstate):
 124 | #            if j!=i:
 125 | #                summ += trans[j][i]   # sources
 126 | #                sumx += trans[i][j] # sinks
 127 | #        if summ > 0 and sumx > 0 and trans[i][i] > 0 and key not in manually_remove:
 128 | #            keep_states.append(i)
 129 | #            keep_keys.append(state_keys[i])
 130 | #    return keep_states,keep_keys
 131 | #
 132 | #def connect_groups(keep_states, trans):
 133 | #    """ check for connected groups """
 134 | #    connected_groups = []
 135 | #    leftover = copy.deepcopy(keep_states)
 136 | #    while len(leftover) > 0:
 137 | #        #print leftover
 138 | #        leftover_new = []
 139 | #        n_old_new_net = 0
 140 | #        new_net = [ leftover[0] ]
 141 | #        n_new_net = len(new_net)
 142 | #        while n_new_net != n_old_new_net:
 143 | #            for i in range(len(leftover)):
 144 | #                l = leftover[i]
 145 | #                if l in new_net:
 146 | #                    continue
 147 | #                summ = 0
 148 | #                for g in new_net:
 149 | #                    summ += trans[l][g]+trans[g][l]
 150 | #                if summ > 0:
 151 | #                    new_net.append(l)
 152 | #            n_old_new_net = n_new_net
 153 | #            n_new_net = len(new_net)
 154 | #            #print " added %i new members" % (n_new_net-n_old_new_net)
 155 | #        leftover_new = filter(lambda x: x not in new_net, leftover)
 156 | #        connected_groups.append(new_net)
 157 | #        leftover = copy.deepcopy(leftover_new)
 158 | #    return connected_groups
 159 | #
 160 | #def isnative(native_string, string):
 161 | #    s = ""
 162 | #    for i in range(len(string)):
 163 | #        if string[i]==native_string[i]:
 164 | #            s+="1"
 165 | #        else:
 166 | #            s+="0"
 167 | #    return s
 168 | 
 169 | def mat_mul_v(m, v):
 170 |     """ Multiplies matrix and vector
 171 | 
 172 |     Parameters
 173 |     ----------
 174 |     m : np.array
 175 |         The matrix.
 176 |     v : np.array
 177 |         The vector.
 178 | 
 179 |     Returns
 180 |     -------
 181 |     w : np.array
 182 |         The result
 183 | 
 184 |     """
 185 |     rows = len(m)
 186 |     w = [0]*rows
 187 |     irange = range(len(v))
 188 |     summ = 0
 189 |     for j in range(rows):
 190 |         r = m[j]
 191 |         for i in irange:
 192 |             summ += r[i]*v[i]
 193 |         w[j], summ = summ,0
 194 |     return w
 195 | 
 196 | #def dotproduct(v1, v2, sum=sum, imap=itertools.imap, mul=operator.mul):
 197 | #    return sum(imap(mul,v1,v2))
 198 | #
 199 | ##def rate_analyze(rate):
 200 | ##   # calculates eigenvalues and eigenvectors from rate matrix
 201 | ##   # calculate symmetrized matrix
 202 | ##   kjisym = kji*(kji.transpose())
 203 | ##   kjisym = sqrt(kjisym)
 204 | ##   for j in arange(nstates):
 205 | ##       kjisym[j,j] = -kjisym[j,j]
 206 | ##   # calculate eigenvalues and eigenvectors
 207 | ##   eigvalsym,eigvectsym = linalg.eig(kjisym)
 208 | ##   # index the solutions
 209 | ##   index = argsort(-eigvalsym)
 210 | ##   ieq = index[0]
 211 | ##   # equilibrium population
 212 | ##   peq = eigvectsym[:,ieq]**2
 213 | ##   # order eigenvalues and calculate left and right eigenvectors
 214 | ##   eigval = zeros((nstates),float)
 215 | ##   PsiR = zeros((nstates,nstates),float)
 216 | ##   PsiL = zeros((nstates,nstates),float)
 217 | ##   for i in arange(nstates):
 218 | ##       eigval[i] = eigvalsym[index[i]]
 219 | ##       PsiR[:,i] = eigvectsym[:,index[i]]*eigvectsym[:,ieq]
 220 | ##       PsiL[:,i] = eigvectsym[:,index[i]]/eigvectsym[:,ieq]
 221 | ##   return eigval,PsiR,PsiL,eigvectsym,peq
 222 | #
 223 | #def propagate(rate, t, pini):
 224 | #    # propagate dynamics using rate matrix exponential
 225 | #    expkt = spla.expm2(rate*t)
 226 | #    return mat_mul_v(expkt,pini)
 227 | #
 228 | #def propagate_eig(elist, rvecs, lvecs, t, pini):
 229 | #    # propagate dynamics using rate matrix exponential using eigenvalues and eigenvectors
 230 | #    nstates = len(pini)
 231 | #    p = np.zeros((nstates),float)
 232 | #    for n in range(nstates):
 233 | #        #print np.exp(-elist[n][1]*t)
 234 | #        i,e = elist[n]
 235 | #        p = p + rvecs[:,i]*(np.dot(lvecs[:,i],pini)*\
 236 | #                np.exp(-abs(e*t)))
 237 | #    return p
 238 | #
 239 | #def bootsfiles(traj_list_dt):
 240 | #    n = len(traj_list_dt)
 241 | #    traj_list_dt_new = []
 242 | #    i = 0
 243 | #    while i < n:
 244 | #        k = int(np.random.random()*n)
 245 | #        traj_list_dt_new.append(traj_list_dt[k])
 246 | #        i += 1
 247 | #    return traj_list_dt_new
 248 | #
 249 | #def boots_pick(filename, blocksize):
 250 | #    raw = open(filename).readlines()
 251 | #    lraw = len(raw)
 252 | #    nblocks = int(lraw/blocksize)
 253 | #    lblock = int(lraw/nblocks)
 254 | #    try:
 255 | #        ib = np.random.randint(nblocks-1)
 256 | #    except ValueError:
 257 | #        ib = 0
 258 | #    return raw[ib*lblock:(ib+1)*lblock]
 259 | #
 260 | #def onrate(states, target, K, peq):
 261 | #    # steady state rate
 262 | #    kon = 0.
 263 | #    for i in states:
 264 | #        if i != target:
 265 | #            if K[target,i] > 0:
 266 | #                kon += K[target,i]*peq[i]
 267 | #    return kon
 268 | #
 269 | def run_commit(states, K, peq, FF, UU):
 270 |     """ Calculate committors and reactive flux
 271 | 
 272 |     Parameters
 273 |     ----------
 274 |     states : list
 275 |         States in the MSM.
 276 |     K : np.array
 277 |         Rate matrix.
 278 |     peq : np.array
 279 |         Equilibrium distribution.
 280 |     FF : list
 281 |         Definitely folded states.
 282 |     UU : list
 283 |         Definitely unfolded states.
 284 | 
 285 |     Returns
 286 |     -------
 287 |     J : np.array
 288 |         Reactive flux matrix.
 289 |     pfold : np.array
 290 |         Values of the committor.
 291 |     sum_flux : float
 292 |         Sum of reactive fluxes.
 293 |     kf : float
 294 |         Folding rate from flux over population relationship.
 295 | 
 296 |     """
 297 |     nstates = len(states)
 298 |     # define end-states
 299 |     UUFF = UU + FF
 300 |     print ("   definitely FF and UU states", UUFF)
 301 |     I = list(filter(lambda x: x not in UU+FF, states))
 302 |     NI = len(I)
 303 | 
 304 |     # calculate committors
 305 |     b = np.zeros([NI], float)
 306 |     A = np.zeros([NI,NI], float)
 307 |     for j_ind in range(NI):
 308 |         j = I[j_ind]
 309 |         summ = 0.
 310 |         for i in FF:
 311 |             summ += K[i][j]
 312 |         b[j_ind] = -summ
 313 |         for i_ind in range(NI):
 314 |             i = I[i_ind]
 315 |             A[j_ind][i_ind] = K[i][j]
 316 |     # solve Ax=b
 317 |     Ainv = np.linalg.inv(A)
 318 |     x = np.dot(Ainv,b)
 319 |     #XX = np.dot(Ainv,A)
 320 | 
 321 |     pfold = np.zeros(nstates,float)
 322 |     for i in range(nstates):
 323 |         if i in UU:
 324 |             pfold[i] = 0.0
 325 |         elif i in FF:
 326 |             pfold[i] = 1.0
 327 |         else:
 328 |             ii = I.index(i)
 329 |             pfold[i] = x[ii]
 330 | 
 331 |     # stationary distribution
 332 |     pss = np.zeros(nstates,float)
 333 |     for i in range(nstates):
 334 |         pss[i] = (1-pfold[i])*peq[i]
 335 | 
 336 |     # flux matrix and reactive flux
 337 |     J = np.zeros([nstates,nstates],float)
 338 |     for i in range(nstates):
 339 |         for j in range(nstates):
 340 |             J[j][i] = K[j][i]*peq[i]*(pfold[j]-pfold[i])
 341 | 
 342 |     # dividing line is committor = 0.5
 343 |     sum_flux = 0
 344 |     left = [x for x in range(nstates) if pfold[x] < 0.5]
 345 |     right = [x for x in range(nstates) if pfold[x] > 0.5]
 346 |     for i in left:
 347 |         for j in right:
 348 |             sum_flux += J[j][i]
 349 | 
 350 |     #sum of populations for all reactant states
 351 |     pU = np.sum([peq[x] for x in range(nstates) if pfold[x] < 0.5])
 352 |  #   pU = np.sum(peq[filter(lambda x: x in UU, range(nstates))])
 353 |     kf = sum_flux/pU
 354 |     return J, pfold, sum_flux, kf
 355 | 
 356 | def calc_count_worker(x):
 357 |     """ mp worker that calculates the count matrix from a trajectory
 358 | 
 359 |     Parameters
 360 |     ----------
 361 |     x : list
 362 |         List containing input for each mp worker. Includes:
 363 |         distraj :the time series of states
 364 |         dt : the timestep for that trajectory
 365 |         keys : the keys used in the assignment
 366 |         lagt : the lag time for construction
 367 | 
 368 |     Returns
 369 |     -------
 370 |     count : array
 371 | 
 372 |     """
 373 |     # parse input from multiprocessing
 374 |     distraj = x[0]
 375 |     dt = x[1]
 376 |     keys = x[2]
 377 |     nkeys = len(keys)
 378 |     lagt = x[3]
 379 |     sliding = x[4]
 380 | 
 381 |     ltraj = len(distraj)
 382 |     lag = int(lagt/dt) # number of frames per lag time
 383 |     if sliding:
 384 |         slider = 1 # every state is initial state
 385 |     else:
 386 |         slider = lag
 387 | 
 388 |     count = np.zeros([nkeys,nkeys], np.int32)
 389 |     for i in range(0, ltraj-lag, slider):
 390 |         j = i + lag
 391 |         state_i = distraj[i]
 392 |         state_j = distraj[j]
 393 |         if state_i in keys:
 394 |             idx_i = keys.index(state_i)
 395 |         if state_j in keys:
 396 |             idx_j = keys.index(state_j)
 397 |         try:
 398 |             count[idx_j][idx_i] += 1
 399 |         except UnboundLocalError:
 400 |             pass
 401 |     return count
 402 | 
 403 | def calc_lifetime(x):
 404 |     """ mp worker that calculates the count matrix from a trajectory
 405 | 
 406 |     Parameters
 407 |     ----------
 408 |     x : list
 409 |         List containing input for each mp worker. Includes:
 410 |         distraj :the time series of states
 411 |         dt : the timestep for that trajectory
 412 |         keys : the keys used in the assignment
 413 | 
 414 |     Returns
 415 |     -------
 416 |     life : dict
 417 | 
 418 |     """
 419 |     # parse input from multiprocessing
 420 |     distraj = x[0]
 421 |     dt = x[1]
 422 |     keys = x[2]
 423 |     ltraj = len(distraj)
 424 | 
 425 |     life = {}
 426 |     l = 0
 427 |     for j in range(1, ltraj):
 428 |         i = j - 1
 429 |         state_i = distraj[i]
 430 |         state_j = distraj[j]
 431 |         if state_i == state_j:
 432 |             l += 1
 433 |         elif state_j not in keys:
 434 |             l += 1
 435 |         else:
 436 |             try:
 437 |                 life[state_i].append(l*dt)
 438 |             except KeyError:
 439 |                 life[state_i] = [l*dt]
 440 |             l = 1
 441 |     #try:
 442 |     #    life[state_i].append(l*dt)
 443 |     #except KeyError:
 444 |     #    life[state_i] = [l*dt]
 445 |     return life
 446 | 
 447 | def traj_split(data=None, lagt=None, fdboots=None):
 448 |     """ Splits trajectories into fragments for bootstrapping
 449 | 
 450 |     Parameters
 451 |     ----------
 452 |     data : list
 453 |         Set of trajectories used for building the MSM.
 454 |     lagt : float
 455 |         Lag time for building the MSM.
 456 | 
 457 |     Returns:
 458 |     -------
 459 |     filetmp : file object
 460 |         Open file object with trajectory fragments.
 461 | 
 462 |     """
 463 |     trajs = [[x.distraj, x.dt] for x in data]
 464 |     ltraj = [len(x[0])*x[1] for x in trajs]
 465 |     ltraj_median = np.median(ltraj)
 466 |     timetot = np.sum(ltraj) # total simulation time
 467 |     while ltraj_median > timetot/20. and ltraj_median > 10.*lagt:
 468 |         trajs_new = []
 469 |         #cut trajectories in chunks
 470 |         for x in trajs:
 471 |             lx = len(x[0])
 472 |             trajs_new.append([x[0][:int(lx/2)], x[1]])
 473 |             trajs_new.append([x[0][int(lx/2):], x[1]])
 474 |         trajs = trajs_new
 475 |         ltraj = [len(x[0])*x[1] for x in trajs]
 476 |         ltraj_median = np.median(ltraj)
 477 |     # save trajs
 478 |     fd, filetmp = tempfile.mkstemp()
 479 |     file = os.fdopen(fd, 'wb')
 480 |     pickle.dump(trajs, file, protocol=pickle.HIGHEST_PROTOCOL)
 481 |     file.close()
 482 |     return filetmp
 483 | 
 484 | def do_boots_worker(x):
 485 |     """ Worker function for parallel bootstrapping.
 486 | 
 487 |     Parameters
 488 |     ----------
 489 |     x : list
 490 |         A list containing the trajectory filename, the states, the lag time
 491 |         and the total number of transitions.
 492 | 
 493 |     """
 494 | 
 495 |     #print "# Process %s running on input %s"%(mp.current_process(), x[0])
 496 |     filetmp, keys, lagt, ncount, slider = x
 497 |     nkeys = len(keys)
 498 |     finp = open(filetmp, 'rb')
 499 |     trans = pickle.load(finp)
 500 |     finp.close()
 501 |     ltrans = len(trans)
 502 |     np.random.seed()
 503 |     ncount_boots = 0
 504 |     count = np.zeros([nkeys, nkeys], np.int32)
 505 |     while ncount_boots < ncount:
 506 |         itrans = np.random.randint(ltrans)
 507 |         count_inp = [trans[itrans][0], trans[itrans][1], keys, lagt, slider]
 508 |         c = calc_count_worker(count_inp)
 509 |         count += np.matrix(c)
 510 |         ncount_boots += np.sum(c)
 511 |         #print ncount_boots, "< %g"%ncount
 512 |     D = nx.DiGraph(count)
 513 |     #keep_states = sorted(nx.strongly_connected_components(D)[0])
 514 |     keep_states = list(sorted(list(nx.strongly_connected_components(D)),
 515 |                 key = len, reverse=True)[0])
 516 |     keep_keys = list(map(lambda x: keys[x], keep_states))
 517 |     nkeep = len(keep_keys)
 518 |     trans = np.zeros([nkeep, nkeep], float)
 519 |     for i in range(nkeep):
 520 |         ni = reduce(lambda x, y: x + y, map(lambda x:
 521 |             count[keep_states[x]][keep_states[i]], range(nkeep)))
 522 |         for j in range(nkeep):
 523 |             trans[j][i] = float(count[keep_states[j]][keep_states[i]])/float(ni)
 524 |     evalsT, rvecsT = spla.eig(trans, left=False)
 525 |     elistT = []
 526 |     for i in range(nkeep):
 527 |         elistT.append([i,np.real(evalsT[i])])
 528 |     elistT.sort(key=cmp_to_key(esort))
 529 |     tauT = []
 530 |     for i in range(1,nkeep):
 531 |         _, lamT = elistT[i]
 532 |         tauT.append(-lagt/np.log(lamT))
 533 |     ieqT, _ = elistT[0]
 534 |     peqT_sum = reduce(lambda x,y: x + y, map(lambda x: rvecsT[x,ieqT],
 535 |              range(nkeep)))
 536 |     peqT = rvecsT[:,ieqT]/peqT_sum
 537 |     return tauT, peqT, trans, keep_keys
 538 | 
 539 | def calc_trans(nkeep=None, keep_states=None, count=None):
 540 |     """ Calculates transition matrix.
 541 | 
 542 |     Uses the maximum likelihood expression by Prinz et al.[1]_
 543 | 
 544 |     Parameters
 545 |     ----------
 546 |     lagt : float
 547 |         Lag time for construction of MSM.
 548 | 
 549 |     Returns
 550 |     -------
 551 |     trans : array
 552 |         The transition probability matrix.
 553 | 
 554 |     Notes
 555 |     -----
 556 |     ..[1] J. H. Prinz, H. Wu, M. Sarich, B. Keller, M. Senne, M. Held,
 557 |     J. D. Chodera, C. Schutte and F. Noe, "Markov state models:
 558 |     Generation and validation", J. Chem. Phys. (2011).
 559 |     """
 560 |     trans = np.zeros([nkeep, nkeep], float)
 561 |     for i in range(nkeep):
 562 |         ni = reduce(lambda x, y: x + y, map(lambda x:
 563 |             count[keep_states[x]][keep_states[i]], range(nkeep)))
 564 |         for j in range(nkeep):
 565 |             trans[j][i] = float(count[keep_states[j]][keep_states[i]])/float(ni)
 566 |     return trans
 567 | 
 568 | def calc_rate(nkeep, trans, lagt):
 569 |     """ Calculate rate matrix from transition matrix.
 570 | 
 571 |     We use a method based on a Taylor expansion.[1]_
 572 | 
 573 |     Parameters
 574 |     ----------
 575 |     nkeep : int
 576 |         Number of states in transition matrix.
 577 |     trans: np.array
 578 |         Transition matrix.
 579 |     lagt : float
 580 |         The lag time.
 581 | 
 582 |     Returns
 583 |     -------
 584 |     rate : np.array
 585 |         The rate matrix.
 586 | 
 587 |     Notes
 588 |     -----
 589 |     ..[1] D. De Sancho, J. Mittal and R. B. Best, "Folding kinetics
 590 |     and unfolded state dynamics of the GB1 hairpin from molecular
 591 |     simulation", J. Chem. Theory Comput. (2013).
 592 | 
 593 |     """
 594 |     rate = trans/lagt
 595 | 
 596 |     # enforce mass conservation
 597 |     for i in range(nkeep):
 598 |         rate[i][i] = -(np.sum(rate[:i,i]) + np.sum(rate[i+1:,i]))
 599 |     return rate
 600 | 
 601 | def rand_rate(nkeep, count):
 602 |     """ Randomly generate initial matrix.
 603 | 
 604 |     Parameters
 605 |     ----------
 606 |     nkeep : int
 607 |         Number of states in transition matrix.
 608 | 
 609 |     count : np.array
 610 |         Transition matrix.
 611 | 
 612 |     Returns
 613 |     -------
 614 |     rand_rate : np.array
 615 |         The random rate matrix.
 616 | 
 617 |     """
 618 |     nkeys = len(count)
 619 | 
 620 |     rand_rate = np.zeros((nkeys, nkeys), float)
 621 |     for i in range(nkeys):
 622 |         for j in range(nkeys):
 623 |             if i != j:
 624 |                 if (count[i,j] !=0)  and (count[j,i] != 0):
 625 |                     rand_rate[j,i] = np.exp(np.random.randn()*-3)
 626 |         rand_rate[i,i] = -np.sum(rand_rate[:,i] )
 627 |     return rand_rate
 628 | 
 629 | def calc_mlrate(nkeep, count, lagt, rate_init):
 630 |     """ Calculate rate matrix using maximum likelihood Bayesian method.
 631 | 
 632 |     We use a the MLPB method described by Buchete and Hummer.[1]_
 633 | 
 634 |     Parameters
 635 |     ----------
 636 |     nkeep : int
 637 |         Number of states in transition matrix.
 638 |     count : np.array
 639 |         Transition matrix.
 640 |     lagt : float
 641 |         The lag time.
 642 | 
 643 |     Returns
 644 |     -------
 645 |     rate : np.array
 646 |         The rate matrix.
 647 | 
 648 |     Notes
 649 |     -----
 650 |     ..[1] N.-V. Buchete and G. Hummer, "Coarse master equations for
 651 |         peptide folding dynamics", J. Phys. Chem. B (2008).
 652 | 
 653 |     """
 654 |     # initialize rate matrix and equilibrium distribution enforcing detailed balance
 655 |     p_prev = np.sum(count, axis=0)/np.float(np.sum(count))
 656 |     rate_prev = detailed_balance(nkeep, rate_init, p_prev)
 657 |     ml_prev = likelihood(nkeep, rate_prev, count, lagt)
 658 | 
 659 |     # initialize MC sampling
 660 |     print ("MLPB optimization of rate matrix:\n START")
 661 |     #print rate_prev,"\n", p_prev, ml_prev
 662 |     ml_ref = ml_prev
 663 |     ml_cum = [ml_prev]
 664 |     temp_cum = [1.]
 665 |     nstep = 0
 666 |     nsteps = 1000*nkeep**2
 667 |     k = -3./nsteps
 668 |     nfreq = 10
 669 |     ncycle = 0
 670 |     accept = 0
 671 |     rate_best = rate_prev
 672 |     ml_best = ml_prev
 673 |     while True:
 674 |         # random choice of MC move
 675 |         rate, p = mc_move(nkeep, rate_prev, p_prev)
 676 |         rate = detailed_balance(nkeep, rate, p)
 677 | 
 678 |         # calculate likelihood
 679 |         ml = likelihood(nkeep, rate, count, lagt)
 680 | 
 681 |         # Boltzmann acceptance / rejection
 682 |         if ml < ml_prev:
 683 |             #print " ACCEPT\n"
 684 |             rate_prev = rate
 685 |             p_prev = p
 686 |             ml_prev = ml
 687 |             accept +=1
 688 |             if ml < ml_best:
 689 |                 ml_best = ml
 690 |                 rate_best = rate
 691 |         else:
 692 |             delta_ml = ml - ml_prev
 693 |             beta = (1 - np.exp(k*nsteps))/(np.exp(k*nstep) - np.exp(k*nsteps)) if ncycle > 0 else 1
 694 |             weight = np.exp(-beta*delta_ml)
 695 |             if np.random.random() < weight:
 696 |                 #print " ACCEPT BOLTZMANN\n"
 697 |                 rate_prev = rate
 698 |                 p_prev = p
 699 |                 ml_prev = ml
 700 |                 accept +=1
 701 |         nstep +=1
 702 | 
 703 |         if nstep > nsteps:
 704 |             ncycle +=1
 705 |             ml_cum.append(ml_prev)
 706 |             temp_cum.append(1./beta)
 707 |             print ("\n END of cycle %g"%ncycle)
 708 |             print ("   acceptance :%g"%(np.float(accept)/nsteps))
 709 |             accept = 0
 710 |             print (rate_prev)
 711 |             print ("   L old =", ml_ref,"; L new:", ml_prev)
 712 |             improvement = (ml_ref - ml_cum[-1])/ml_ref
 713 |             print ("   improvement :%g"%improvement)
 714 |             if improvement > 0.001 or ncycle < 3:
 715 |                 nstep = 0
 716 |                 ml_ref = np.mean(ml_cum[-nsteps:])
 717 |             else:
 718 |                 break
 719 |         elif nstep % nfreq == 0:
 720 |             ml_cum.append(ml_prev)
 721 |             temp_cum.append(1./beta)
 722 | 
 723 |     return rate_best, ml_cum, temp_cum
 724 | 
 725 | def mc_move(nkeep, rate, peq):
 726 |     """ Make MC move in either rate or equilibrium probability.
 727 | 
 728 |     Changes in equilibrium probabilities are introduced so that the new value
 729 |     is drawn from a normal distribution centered at the current value.
 730 | 
 731 |     Parameters
 732 |     ----------
 733 |     nkeep : int
 734 |         The number of states.
 735 |     rate : array
 736 |         The rate matrix obeying detailed balance.
 737 |     peq : array
 738 |         The equilibrium probability
 739 | 
 740 |     """
 741 |     nparam = nkeep*(nkeep - 1)/2 + nkeep - 1
 742 |     npeq = nkeep - 1
 743 | 
 744 |     while True:
 745 |         i = np.random.randint(0, nparam)
 746 |         #print i
 747 |         rate_new = copy.deepcopy(rate)
 748 |         peq_new = copy.deepcopy(peq)
 749 |         if i < npeq:
 750 |             #print " Peq"
 751 |             scale = np.mean(peq)*0.1
 752 | #            peq_new[i] = np.random.normal(loc=peq[i], scale=scale)
 753 |             peq_new[i] = peq[i] + (np.random.random() - 0.5)*scale
 754 |             peq_new = peq_new/np.sum(peq_new)
 755 |             if np.all(peq_new > 0):
 756 |                 break
 757 |         else:
 758 |             #print " Rate"
 759 |             i = np.random.randint(0, nkeep - 1)
 760 |             try:
 761 |                 j = np.random.randint(i + 1, nkeep - 1)
 762 |             except ValueError:
 763 |                 j = nkeep - 1
 764 |             try:
 765 |                 scale = np.mean(np.abs(rate>0.))*0.1
 766 |                 #rate_new[j,i] = np.random.normal(loc=rate[j,i], scale=scale)
 767 |                 rate_new[j,i] = rate[j,i] + (np.random.random() - 0.5)*scale
 768 |                 if np.all((rate_new - np.diag(np.diag(rate_new))) >= 0):
 769 |                     break
 770 |             except ValueError:
 771 |                 pass
 772 |             #else:
 773 |             #    print rate_new - np.diag(np.diag(rate_new))
 774 | 
 775 |     return rate_new, peq_new
 776 | 
 777 | 
 778 | def detailed_balance(nkeep, rate, peq):
 779 |     """ Enforce detailed balance in rate matrix.
 780 | 
 781 |     Parameters
 782 |     ----------
 783 |     nkeep : int
 784 |         The number of states.
 785 |     rate : array
 786 |         The rate matrix obeying detailed balance.
 787 |     peq : array
 788 |         The equilibrium probability
 789 | 
 790 |     """
 791 |     for i in range(nkeep):
 792 |         for j in range(i):
 793 |             rate[j,i] = rate[i,j]*peq[j]/peq[i]
 794 |         rate[i,i] = 0
 795 |         rate[i,i] = -np.sum(rate[:,i])
 796 |     return rate
 797 | 
 798 | def likelihood(nkeep, rate, count, lagt):
 799 |     """ Likelihood of a rate matrix given a count matrix
 800 | 
 801 |     We use the procedure described by Buchete and Hummer.[1]_
 802 | 
 803 |     Parameters
 804 |     ----------
 805 |     nkeep : int
 806 |         Number of states in transition matrix.
 807 |     count : np.array
 808 |         Transition matrix.
 809 |     lagt : float
 810 |         The lag time.
 811 | 
 812 |     Returns
 813 |     -------
 814 |     mlog_like : float
 815 |         The log likelihood
 816 | 
 817 |     Notes
 818 |     -----
 819 |     ..[1] N.-V. Buchete and G. Hummer, "Coarse master equations for
 820 |         peptide folding dynamics", J. Phys. Chem. B (2008).
 821 | 
 822 |     """
 823 |     # calculate symmetrized rate matrix
 824 |     ratesym = np.multiply(rate,rate.transpose())
 825 |     ratesym = np.sqrt(ratesym)
 826 |     for i in range(nkeep):
 827 |         ratesym[i,i] = -ratesym[i,i]
 828 | 
 829 |     # calculate eigenvalues and eigenvectors
 830 |     evalsym, evectsym = np.linalg.eig(ratesym)
 831 | 
 832 |     # index the solutions
 833 |     indx_eig = np.argsort(-evalsym)
 834 | 
 835 |     # equilibrium population
 836 |     ieq = indx_eig[0]
 837 | 
 838 |     # calculate left and right eigenvectors
 839 |     phiR = np.zeros((nkeep, nkeep))
 840 |     phiL = np.zeros((nkeep, nkeep))
 841 |     for i in range(nkeep):
 842 |         phiR[:,i] = evectsym[:,i]*evectsym[:,ieq]
 843 |         phiL[:,i] = evectsym[:,i]/evectsym[:,ieq]
 844 | 
 845 |     # calculate propagators
 846 |     prop = np.zeros((nkeep, nkeep), float)
 847 |     for i in range(nkeep):
 848 |         for j in range(nkeep):
 849 |             for n in range(nkeep):
 850 |                 prop[j,i] = prop[j,i] + \
 851 |                  phiR[j,n]*phiL[i,n]*np.exp(-abs(evalsym[n])*lagt)
 852 | 
 853 |     # calculate likelihood using matrix of transitions
 854 |     log_like = 0.
 855 |     for i in range(nkeep):
 856 |         for j in range(nkeep):
 857 |             if count[j,i] > 0:
 858 |                 log_like = log_like + float(count[j,i])*np.log(prop[j,i])
 859 | 
 860 |     return -log_like
 861 | 
 862 | def partial_rate(K, elem):
 863 |     """ Calculates the derivative of the rate matrix
 864 | 
 865 |     Parameters
 866 |     ----------
 867 |     K : np.array
 868 |         The rate matrix.
 869 |     elem : int
 870 |         Integer corresponding to which we calculate the
 871 |         partial derivative.
 872 | 
 873 |     Returns
 874 |     -------
 875 |     d_K : np.array
 876 |         Partial derivative of rate matrix.
 877 | 
 878 |     """
 879 |     nstates = len(K[0])
 880 |     d_K = np.zeros((nstates,nstates), float)
 881 |     for i in range(nstates):
 882 |         if i != elem:
 883 |             d_K[i,elem] = beta/2.*K[i,elem];
 884 |             d_K[elem,i] = -beta/2.*K[elem,i];
 885 |     for i in range(nstates):
 886 |         d_K[i,i] = -np.sum(d_K[:,i])
 887 |     return d_K
 888 | 
 889 | def partial_peq(peq, elem):
 890 |     """ Calculates derivative of equilibrium distribution
 891 | 
 892 |     Parameters
 893 |     ----------
 894 |     peq : np.array
 895 |         Equilibrium probabilities.
 896 | 
 897 |     """
 898 |     nstates = len(peq)
 899 |     d_peq = []
 900 |     for i in range(nstates):
 901 |         if i != elem:
 902 |             d_peq.append(beta*peq[i]*peq[elem])
 903 |         else:
 904 |             d_peq.append(-beta*peq[i]*(1. - peq[i]))
 905 |     return d_peq
 906 | 
 907 | def partial_pfold(states, K, d_K, FF, UU, elem):
 908 |     """ Calculates derivative of pfold """
 909 |     nstates = len(states)
 910 |     # define end-states
 911 |     I = list(filter(lambda x: x not in UU+FF, range(nstates)))
 912 |     NI = len(I)
 913 |     # calculate committors
 914 |     b = np.zeros([NI], float)
 915 |     A = np.zeros([NI,NI], float)
 916 |     db = np.zeros([NI], float)
 917 |     dA = np.zeros([NI,NI], float)
 918 |     for j_ind in range(NI):
 919 |         j = I[j_ind]
 920 |         summ = 0.
 921 |         sumd = 0.
 922 |         for i in FF:
 923 |             summ += K[i][j]
 924 |             sumd += d_K[i][j]
 925 |         b[j_ind] = -summ
 926 |         db[j_ind] = -sumd
 927 |         for i_ind in range(NI):
 928 |             i = I[i_ind]
 929 |             A[j_ind][i_ind] = K[i][j]
 930 |             dA[j_ind][i_ind] = d_K[i][j]
 931 | 
 932 |     # solve Ax + Bd(x) = c
 933 |     Ainv = np.linalg.inv(A)
 934 |     pfold = np.dot(Ainv,b)
 935 |     x = np.dot(Ainv,db - np.dot(dA,pfold))
 936 | 
 937 |     dpfold = np.zeros(nstates,float)
 938 |     for i in range(nstates):
 939 |         if i in UU:
 940 |             dpfold[i] = 0.0
 941 |         elif i in FF:
 942 |             dpfold[i] = 0.0
 943 |         else:
 944 |             ii = I.index(i)
 945 |             dpfold[i] = x[ii]
 946 |     return dpfold
 947 | 
 948 | def partial_flux(states, peq, K, pfold, d_peq, d_K, d_pfold, target):
 949 |     """ Calculates derivative of reactive flux """
 950 |     # flux matrix and reactive flux
 951 |     nstates = len(states)
 952 |     sum_d_flux = 0
 953 |     d_J = np.zeros((nstates,nstates),float)
 954 |     for i in range(nstates):
 955 |         for j in range(nstates):
 956 |             d_J[j][i] = d_K[j][i]*peq[i]*(pfold[j]-pfold[i]) + \
 957 |                 K[j][i]*d_peq[i]*(pfold[j]-pfold[i]) + \
 958 |                 K[j][i]*peq[i]*(d_pfold[j]-d_pfold[i])
 959 |             if j in target and K[j][i]>0: #  dividing line corresponds to I to F transitions
 960 |                 sum_d_flux += d_J[j][i]
 961 |     return sum_d_flux
 962 | 
 963 | def propagate_worker(x):
 964 |     """ Propagate dynamics using rate matrix exponential
 965 |     
 966 |     Parameters
 967 |     ----------
 968 |     x : list
 969 |         Contains K, the time and the initial population
 970 | 
 971 |     Returns
 972 |     -------
 973 |     popul : np.array
 974 |         The propagated population
 975 |     
 976 |     """
 977 |     rate, t, pini = x
 978 |     expkt = spla.expm(rate*t)
 979 |     popul = mat_mul_v(expkt, pini)
 980 |     return popul
 981 | 
 982 | def propagateT_worker(x):
 983 |     """ Propagate dynamics using power of transition matrix 
 984 |     
 985 |     Parameters
 986 |     ----------
 987 |     x : list
 988 |         Contains T, the power and initial population
 989 | 
 990 | 
 991 |     Returns
 992 |     -------
 993 |     popul : np.array
 994 |         The propagated population
 995 | 
 996 |     """
 997 |     trans, power, pini = x
 998 |     trans_pow = np.linalg.matrix_power(trans,power)
 999 |     popul = mat_mul_v(trans_pow, pini)
1000 |     return popul
1001 | 
1002 | #def gen_path_lengths(keys, J, pfold, flux, FF, UU):
1003 | #    """ use BHS prescription for defining path lenghts """
1004 | #    nkeys = len(keys)
1005 | #    I = [x for x in range(nkeys) if x not in FF+UU]
1006 | #    Jnode = []
1007 | #    # calculate flux going through nodes
1008 | #    for i in range(nkeys):
1009 | #        Jnode.append(np.sum([J[i,x] for x in range(nkeys) \
1010 | #                             if pfold[x] < pfold[i]]))
1011 | #    # define matrix with edge lengths
1012 | #    Jpath = np.zeros((nkeys, nkeys), float)
1013 | #    for i in UU:
1014 | #        for j in I + FF:
1015 | #            if J[j,i] > 0:
1016 | #                Jpath[j,i] = np.log(flux/J[j,i]) + 1
1017 | #    for i in I:
1018 | #        for j in [x for x in FF+I if pfold[x] > pfold[i]]:
1019 | #            if J[j,i] > 0:
1020 | #                Jpath[j,i] = np.log(Jnode[j]/J[j,i]) + 1
1021 | #    return Jnode, Jpath
1022 | 
1023 | #def calc_acf(x):
1024 | #    """ mp worker that calculates the ACF for a given mode
1025 | #
1026 | #    Parameters
1027 | #    ----------
1028 | #    x : list
1029 | #        List containing input for each mp worker. Includes:
1030 | #        distraj :the time series of states
1031 | #        dt : the timestep for that trajectory
1032 | #        keys : the keys used in the assignment
1033 | #        lagt : the lag time for construction
1034 | #
1035 | #    Returns
1036 | #    -------
1037 | #    acf : array
1038 | #        The autocorrelation function from that trajectory.
1039 | #
1040 | #    """
1041 | #    # parse input from multiprocessing
1042 | #    distraj = x[0]
1043 | #    dt = x[1]
1044 | #    keys = x[2]
1045 | #    nkeys = len(keys)
1046 | #    lagt = x[3]
1047 | ##    time =
1048 | ##    sliding = x[4]
1049 | #
1050 | ##    ltraj = len(distraj)
1051 | ##    lag = int(lagt/dt) # number of frames per lag time
1052 | ##    if sliding:
1053 | ##        slider = 1 # every state is initial state
1054 | ##    else:
1055 | ##        slider = lag
1056 | ##
1057 | ##    count = np.zeros([nkeys,nkeys], np.int32)
1058 | ##    for i in range(0, ltraj-lag, slider):
1059 | ##        j = i + lag
1060 | ##        state_i = distraj[i]
1061 | ##        state_j = distraj[j]
1062 | ##        if state_i in keys:
1063 | ##            idx_i = keys.index(state_i)
1064 | ##        if state_j in keys:
1065 | ##            idx_j = keys.index(state_j)
1066 | ##        try:
1067 | ##            count[idx_j][idx_i] += 1
1068 | ##        except UnboundLocalError:
1069 | ##            pass
1070 | #    return acf
1071 | 
1072 | #def project_worker(x):
1073 | #    """ project simulation trajectories on eigenmodes"""
1074 | #    trans, power, pini = x
1075 | #    trans_pow = np.linalg.matrix_power(trans,power)
1076 | #    popul = mat_mul_v(trans_pow, pini)
1077 | #    return popul
1078 | #
1079 | 
1080 | def peq_averages(peq_boots, keep_keys_boots, keys):
1081 |     """ Return averages from bootstrap results
1082 | 
1083 |     Parameters
1084 |     ----------
1085 |     peq_boots : list
1086 |         List of Peq arrays
1087 |     keep_keys_boots : list
1088 |         List of key lists
1089 |     keys : list
1090 |         List of keys
1091 | 
1092 |     Returns:
1093 |     -------
1094 |     peq_ave : array
1095 |         Peq averages
1096 |     peq_std : array
1097 |         Peq std
1098 | 
1099 |     """
1100 |     peq_ave = []
1101 |     peq_std = []
1102 |     peq_indexes = []
1103 |     peq_keep = []
1104 |     for k in keys:
1105 |         peq_indexes.append([x.index(k) if k in x else None for x in keep_keys_boots])
1106 |     nboots = len(peq_boots)
1107 |     for k in keys:
1108 |         l = keys.index(k)
1109 |         data = []
1110 |         for n in range(nboots):
1111 |             if peq_indexes[l][n] is not None:
1112 |                 data.append(peq_boots[n][peq_indexes[l][n]])
1113 |         try:
1114 |             peq_ave.append(np.mean(data))
1115 |             peq_std.append(np.std(data))
1116 |             peq_keep.append(data)
1117 |         except RuntimeWarning:
1118 |             peq_ave.append(0.)
1119 |             peq_std.append(0.)
1120 |     return peq_ave, peq_std
1121 | 
1122 | def tau_averages(tau_boots, keys):
1123 |     """ Return averages from bootstrap results
1124 | 
1125 |     Parameters
1126 |     ----------
1127 |     tau_boots : list
1128 |         List of Tau arrays
1129 | 
1130 |     Returns:
1131 |     -------
1132 |     tau_ave : array
1133 |         Tau averages
1134 |     tau_std : array
1135 |         Tau std
1136 | 
1137 |     """
1138 |     tau_ave = []
1139 |     tau_std = []
1140 |     tau_keep = []
1141 |     for n in range(len(keys)-1):
1142 |         try:
1143 |             data = [x[n] for x in tau_boots if not np.isnan(x[n])]
1144 |             tau_ave.append(np.mean(data))
1145 |             tau_std.append(np.std(data))
1146 |             tau_keep.append(data)
1147 |         except IndexError:
1148 |             continue
1149 |     return tau_ave, tau_std
1150 | 
1151 | 
1152 | def matrix_ave(mat_boots, keep_keys_boots, keys):
1153 |     """ Return averages from bootstrap results
1154 | 
1155 |     Parameters
1156 |     ----------
1157 |     mat_boots : list
1158 |         List of matrix arrays
1159 |     keep_keys_boots : list
1160 |         List of key lists
1161 |     keys : list
1162 |         List of keys
1163 | 
1164 |     Returns:
1165 |     -------
1166 |     mat_ave : array
1167 |         Matrix averages
1168 |     mat_std : array
1169 |         Matrix std
1170 | 
1171 |     """
1172 |     mat_ave = []
1173 |     mat_std = []
1174 |     nboots = len(keep_keys_boots)
1175 |     for k in keys:
1176 |         mat_ave_keep = []
1177 |         mat_std_keep = []
1178 |         for kk in keys:
1179 |             data = []
1180 |             for n in range(nboots):
1181 |                 try:
1182 |                     l = keep_keys_boots[n].index(k)
1183 |                     ll = keep_keys_boots[n].index(kk)
1184 |                     data.append(mat_boots[n][l,ll])
1185 |                 except IndexError:
1186 |                     data.append(0.)
1187 |             try:
1188 |                 mat_ave_keep.append(np.mean(data))
1189 |                 mat_std_keep.append(np.std(data))
1190 |             except RuntimeWarning:
1191 |                 mat_ave_keep.append(0.)
1192 |                 mat_std_keep.append(0.)
1193 |         mat_ave.append(mat_ave_keep)
1194 |         mat_std.append(mat_std_keep)
1195 |     return mat_ave, mat_std
1196 | 


--------------------------------------------------------------------------------
/mastermsm/test/README.md:
--------------------------------------------------------------------------------
1 | # Testing
2 | 
3 | Testing of the modules of MasterMSM is available through Python's `unittest` library. For some of the test cases, MD data will be downloaded into a folder inside `test`. To run the test suite, do:
4 | 
5 | ```
6 | cd mastermsm
7 | python -m unittest
8 | ```
9 | 


--------------------------------------------------------------------------------
/mastermsm/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/test/__init__.py


--------------------------------------------------------------------------------
/mastermsm/test/download_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from urllib.request import urlretrieve
 3 | 
 4 | def download_test_data():
 5 |     base_url = "https://mastermsm.s3.eu-west-2.amazonaws.com/"
 6 |     gro = "test/data/alaTB.gro"
 7 |     xtc = "test/data/protein_only.xtc"
 8 |     cpath = os.getcwd()
 9 |     if os.path.exists(cpath+"/test/data") is False:
10 |         os.mkdir(cpath+"/test/data")
11 |     for fname in [gro,xtc]:
12 |         if os.path.isfile(cpath+"/%s"%fname) is False:
13 |             urlretrieve(base_url+fname, fname)
14 | 


--------------------------------------------------------------------------------
/mastermsm/test/test_fewsm.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import mdtraj as md
  3 | import numpy as np
  4 | from mastermsm.trajectory import traj_lib, traj
  5 | from mastermsm.msm import msm, msm_lib
  6 | from mastermsm.fewsm import fewsm, fewsm_lib
  7 | from test.download_data import download_test_data
  8 | import os, pickle
  9 | 
 10 | class TestFewSM_Lib(unittest.TestCase):
 11 |     def setUp(self):
 12 |         pass
 13 | 
 14 |     def test_sign(self):
 15 |         v = np.array([0] * 3)
 16 |         test = fewsm_lib.test_sign(v)
 17 |         self.assertEqual(test, False)
 18 |         v = np.array([-1, 0, 1])
 19 |         test = fewsm_lib.test_sign(v)
 20 |         self.assertEqual(test, True)
 21 | 
 22 |     def test_metastability(self):
 23 |         T_test = np.random.rand(10,10)
 24 |         meta = fewsm_lib.metastability(T_test)
 25 |         self.assertIsInstance(meta, float)
 26 |         self.assertEqual(meta, np.sum(np.diag(T_test)))
 27 | 
 28 |     def test_metropolis(self):
 29 |         delta = np.random.random()
 30 |         accept = fewsm_lib.metropolis(delta)
 31 |         self.assertIsInstance(accept, bool)
 32 |         delta = -1.
 33 |         accept = fewsm_lib.metropolis(delta)
 34 |         self.assertTrue(accept)
 35 | 
 36 |     def test_beta(self):
 37 |         tests = [
 38 |             {
 39 |                 "imc": 2,
 40 |                 "mcsasteps": 10,
 41 |             },
 42 |             {
 43 |                 "imc":1,
 44 |                 "mcsasteps":1
 45 |             }
 46 |         ]
 47 |         for test in tests:
 48 | 
 49 |             beta = fewsm_lib.beta(test["imc"], test["mcsasteps"])
 50 |             self.assertIsInstance(beta, float)
 51 |     def test_split_sign(self):
 52 |         macro = {}
 53 |         for i in range(10):
 54 |             macro[i] = [i * 10 + j for j in range(10)]
 55 |         lvec = np.random.rand(100)
 56 | 
 57 |         new_macro, vals = fewsm_lib.split_sign(macro, lvec)
 58 |         self.assertIsInstance(new_macro, dict)
 59 |         self.assertGreaterEqual(len(new_macro.keys()), len(macro.keys()))
 60 | 
 61 |     def test_split_sigma(self):
 62 |         macro = {}
 63 |         for i in range(10):
 64 |             macro[i] = [i * 10 + j for j in range(10)]
 65 |         lvec = np.random.rand(100)
 66 | 
 67 |         new_macro, vals = fewsm_lib.split_sigma(macro, lvec)
 68 |         self.assertIsInstance(new_macro, dict)
 69 |         self.assertGreaterEqual(len(new_macro.keys()), len(macro.keys()))
 70 | 
 71 | class TestFewSM(unittest.TestCase):
 72 | 
 73 |     def setUp(self):
 74 |         download_test_data()
 75 |         self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
 76 |                                   traj=['test/data/protein_only.xtc'])
 77 |         self.tr.discretize('rama', states=['A', 'E'])
 78 |         self.tr.find_keys()
 79 |         self.msm = msm.SuperMSM([self.tr])
 80 |         self.msm.do_msm(10)
 81 |         self.msm.msms[10].do_trans()
 82 | 
 83 |     def test_attributes(self):
 84 |         self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10])
 85 |         self.assertIsNotNone(self.fewsm.macros)
 86 |         self.assertEqual(len(self.fewsm.macros), 2)
 87 | 
 88 |     def test_map_trajectory(self):
 89 |         self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10])
 90 |         self.fewsm.map_trajectory()
 91 |         self.mapped = self.fewsm.mappedtraj[0]
 92 |         self.assertIsNotNone(self.mapped)
 93 |         self.assertIsInstance(self.mapped, traj.TimeSeries)
 94 |         self.assertTrue(hasattr(self.mapped, 'dt'))
 95 |         self.assertTrue(hasattr(self.mapped, 'distraj'))
 96 |         self.assertEqual(len(set(self.mapped.distraj)), 2)
 97 |         self.assertEqual(sorted(set(self.mapped.distraj)), [0, 1])
 98 | 
 99 |     def test_eigen_group(self):
100 |         self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10])
101 |         macros = self.fewsm.eigen_group()
102 |         print("MACROS! ", macros)
103 |         self.assertIsInstance(macros, dict)
104 | 


--------------------------------------------------------------------------------
/mastermsm/test/test_msm.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import mdtraj as md
  3 | import numpy as np
  4 | from mastermsm.trajectory import traj_lib, traj
  5 | from mastermsm.msm import msm, msm_lib
  6 | from test.download_data import download_test_data
  7 | import os, pickle
  8 | 
  9 | # thermal energy (kJ/mol)
 10 | beta = 1./(8.314e-3*300)
 11 | 
 12 | class TestMSMLib(unittest.TestCase):
 13 |     def test_esort(self):
 14 |         self.assertTrue(hasattr(msm_lib, 'esort'))
 15 |         self.assertTrue(callable(msm_lib.esort))
 16 |         self.esort = msm_lib.esort([0,float(1)], [1,float(2)])
 17 |         self.assertEqual(self.esort, 1)
 18 |         self.esort = msm_lib.esort([0,float(100)], [1,float(2)])
 19 |         self.assertEqual(self.esort, -1)
 20 |         self.esort = msm_lib.esort([100,float(1)], [1,float(1)])
 21 |         self.assertEqual(self.esort, 0)
 22 | 
 23 |     def test_mat_mul_v(self):
 24 |         self.assertTrue(hasattr(msm_lib,'mat_mul_v'))
 25 |         self.assertTrue(callable(msm_lib.mat_mul_v))
 26 |         self.matrix = np.array([
 27 |             [1, 2, 3],
 28 |             [4, 5, 6]
 29 |         ])
 30 |         self.vector = np.array(
 31 |             [1, 0, 1]
 32 |         )
 33 |         self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector),  [4, 10])
 34 |         self.matrix = np.array([
 35 |             [-5, -4, 2],
 36 |             [1, 6, -3],
 37 |             [3, 5.5, -4]
 38 |         ])
 39 |         self.vector = np.array(
 40 |             [1, 2, -3]
 41 |         )
 42 |         self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [-19, 22, 26])
 43 | 
 44 |     def test_rand_rate(self):
 45 |         testT = np.array([
 46 |             [10, 2, 1],
 47 |             [1, 1, 1],
 48 |             [0, 1, 0]
 49 |         ])
 50 |         self.random1 = msm_lib.rand_rate(nkeep= 3, count= testT)
 51 |         self.random2 = msm_lib.rand_rate(nkeep= 3, count= testT)
 52 |         self.assertEqual(self.random1.shape, (3, 3))
 53 |         self.assertFalse((self.random1 == self.random2).all())
 54 | 
 55 |     def test_traj_split(self):
 56 |         traj1 = traj.TimeSeries(distraj=[1, 2, 3], dt=1.)
 57 |         traj2 = traj.TimeSeries(distraj=[3, 2, 1], dt=2.)
 58 |         trajs = [traj1, traj2]
 59 |         self.filepath = msm_lib.traj_split(data=trajs, lagt=10)
 60 |         self.assertIsInstance(self.filepath, str)
 61 |         self.assertTrue(os.path.exists(self.filepath))
 62 |         os.remove(self.filepath)  # clean temp file
 63 | 
 64 |     def calc_trans(self):
 65 |         self.testT = msm_lib.calc_trans(nkeep=10)
 66 |         self.assertIsInstance(self.testT, np.ndarray)
 67 |         self.assertEqual(self.testT.shape, (10,10))
 68 | 
 69 |     def test_calc_rate(self):
 70 |         self.testT = np.array([
 71 |             [1, 2, 3],
 72 |             [0, 0, 0],
 73 |             [10, 10, 10]
 74 | 
 75 |         ])
 76 |         self.rate = msm_lib.calc_rate(nkeep=3, trans=self.testT, lagt=10)
 77 |         self.assertIsInstance(self.rate, np.ndarray)
 78 |         self.assertEqual(self.rate.shape, (3, 3))
 79 | 
 80 |     def test_calc_lifetime(self):
 81 |         distraj = [1, 1, 1, 2]
 82 |         dt = 1.
 83 |         keys = [1, 2]
 84 |         data = [distraj, dt, keys]
 85 |         self.life = msm_lib.calc_lifetime(data)
 86 |         self.assertIsInstance(self.life, dict)
 87 | 
 88 |     def test_partial_rate(self):
 89 |         test_nstates = 3
 90 |         test_K = np.random.rand(test_nstates,test_nstates)
 91 |         d_K_1 = msm_lib.partial_rate(test_K, 1)
 92 |         for i in range(test_nstates):
 93 |             if i != 1:
 94 |                 self.assertAlmostEqual(d_K_1[i,1] / test_K[i,1], beta/2)
 95 |                 self.assertAlmostEqual(d_K_1[1, i] / test_K[1, i], -beta / 2)
 96 |         self.assertEqual(d_K_1.shape, (test_nstates, test_nstates))
 97 | 
 98 |     def test_partial_peq(self):
 99 |         test_nstates = 3
100 |         test_peq = np.random.rand(3)
101 |         d_peq_1 = msm_lib.partial_peq(test_peq,1)
102 |         self.assertEqual(len(d_peq_1), test_nstates)
103 |         for elem in range(test_nstates):
104 |             d_peq_elem = msm_lib.partial_peq(test_peq, elem)
105 |             for i in range(test_nstates):
106 |                 if i != elem:
107 |                     self.assertAlmostEqual(d_peq_elem[i] / (test_peq[elem] * test_peq[i]), beta)
108 |                 else:
109 |                     self.assertAlmostEqual(d_peq_elem[i] / (test_peq[i] * (1. - test_peq[i])), -beta)
110 | 
111 |     def test_partial_pfold(self):
112 |         states = range(3)
113 |         K = np.random.rand(2, 2)
114 |         d_K = np.random.rand(2, 2)
115 |         FF = [0]
116 |         UU = [2]
117 |         res_dpfold = msm_lib.partial_pfold(states, K, d_K, FF, UU,
118 |                                            np.random.randint(0, 2))  # the last int parameter is not used
119 |         self.assertEqual(len(res_dpfold), len(states))
120 |         self.assertIsInstance(res_dpfold, np.ndarray)
121 |         self.assertIsInstance(res_dpfold[0], float)
122 | 
123 |     def test_partial_flux(self):
124 |         nstates = np.random.randint(2,50)
125 |         states = range(nstates)
126 |         peq = np.random.rand(nstates)
127 |         K = np.random.rand(nstates,nstates)
128 |         pfold = np.random.rand(nstates)
129 |         d_peq = np.random.rand(nstates)
130 |         d_K = np.random.rand(nstates,nstates)
131 |         d_pfold = np.random.rand(nstates)
132 |         target = [0]
133 | 
134 |         sum_d_flux = 0
135 |         d_J = np.zeros((nstates, nstates), float)
136 |         for i in range(nstates):
137 |             for j in range(nstates):
138 |                 d_J[j][i] = d_K[j][i] * peq[i] * (pfold[j] - pfold[i]) + \
139 |                             K[j][i] * d_peq[i] * (pfold[j] - pfold[i]) + \
140 |                             K[j][i] * peq[i] * (d_pfold[j] - d_pfold[i])
141 |                 if j in target and K[j][i] > 0:  # dividing line corresponds to I to F transitions
142 |                     sum_d_flux += d_J[j][i]
143 |         res_sum_d_flux = msm_lib.partial_flux(states, peq, K, pfold,d_peq, d_K, d_pfold, target)
144 | 
145 |         self.assertIsNotNone(res_sum_d_flux)
146 |         self.assertIsInstance(res_sum_d_flux, float)
147 | 
148 | 
149 | 
150 |     def test_tau_averages(self):
151 |         tau_boots_test = np.random.rand(2, 2)
152 |         keys_test = range(3)
153 |         res_tau_ave, res_tau_std = msm_lib.tau_averages(tau_boots_test, keys_test)
154 |         self.assertEqual(len(res_tau_ave),len(keys_test)-1)
155 |         self.assertEqual(len(res_tau_std),len(keys_test)-1)
156 |         self.assertIsInstance(res_tau_std, list)
157 |         self.assertIsInstance(res_tau_ave, list)
158 |         self.assertIsInstance(res_tau_ave[0],float)
159 |         self.assertIsInstance(res_tau_std[0], float)
160 | 
161 |     def test_peq_averages(self):
162 |         peq_boots_test = np.random.rand(2,3)
163 |         keep_keys_boots_test = [['A','E','O'],['A','E','O']]
164 |         keys = ['A','E','O']
165 |         res_peq_ave, res_peq_std = msm_lib.peq_averages(peq_boots_test, keep_keys_boots_test, keys)
166 |         self.assertEqual(len(res_peq_ave),len(keys))
167 |         self.assertEqual(len(res_peq_std),len(keys))
168 |         self.assertIsInstance(res_peq_ave, list)
169 |         self.assertIsInstance(res_peq_std, list)
170 |         self.assertIsInstance(res_peq_ave[0], float)
171 |         self.assertIsInstance(res_peq_std[0], float)
172 | 
173 |     def test_propagate_worker(self):
174 |         t = 0
175 |         rate = np.random.rand(2,2)
176 |         pini = np.random.rand(2,2)
177 |         x_test = [rate, t, pini]
178 |         res_popul = msm_lib.propagate_worker(x_test)
179 |         self.assertIsInstance(res_popul, list)
180 |         self.assertIsInstance(res_popul[0], np.ndarray)
181 |         self.assertIsInstance(res_popul[0][0], float)
182 | 
183 |     def test_propagateT_worker(self):
184 |         t = 0
185 |         rate = np.random.rand(2,2)
186 |         pini = np.random.rand(2,2)
187 |         x_test = [rate, t, pini]
188 |         res_popul = msm_lib.propagateT_worker(x_test)
189 |         self.assertIsInstance(res_popul, list)
190 |         self.assertIsInstance(res_popul[0], np.ndarray)
191 |         self.assertIsInstance(res_popul[0][0], float)
192 | 
193 |     def test_detailed_balance(self):
194 |         nkeep_test = 2
195 |         rate = np.array(np.random.rand(nkeep_test,nkeep_test))
196 |         peq = np.random.rand(nkeep_test)
197 |         res_rate = msm_lib.detailed_balance(nkeep_test, rate, peq)
198 |         self.assertEqual(res_rate.shape, (nkeep_test,nkeep_test))
199 |         self.assertIsInstance(res_rate,np.ndarray)
200 |         self.assertIsInstance(res_rate[0][0],float)
201 | 
202 |     def test_likelihood(self):
203 |         nkeep_test = 2
204 |         rate = np.array(np.random.rand(nkeep_test,nkeep_test))
205 |         count = np.array(np.random.randint(0, 10**5, size=(nkeep_test,nkeep_test)))
206 |         lagt = np.random.randint(1,1000)
207 |         res_mlog_like = msm_lib.likelihood(nkeep_test,rate,count,lagt)
208 |         self.assertIsInstance(res_mlog_like, float)
209 |         self.assertIsNotNone(res_mlog_like)
210 |         self.assertGreater(res_mlog_like, 0)
211 | 
212 |     def test_calc_mlrate(self):
213 |         nkeep_test = 2
214 |         rate_init = np.array(np.random.rand(nkeep_test, nkeep_test))
215 |         count = np.array(np.random.randint(0, 10 ** 5, size=(nkeep_test, nkeep_test)))
216 |         lagt = np.random.randint(1, 1000)
217 |         res_rate, res_ml, res_beta = msm_lib.calc_mlrate(nkeep_test,  count, lagt, rate_init)
218 |         self.assertIsInstance(res_rate, np.ndarray)
219 |         self.assertIsNotNone(res_rate)
220 |         self.assertIsNotNone(res_ml)
221 |         self.assertIsNotNone(res_beta)
222 | 
223 |     def test_mc_move(self):
224 |         nkeep_test = np.random.randint(2,100)
225 |         rate = np.random.rand(nkeep_test,nkeep_test)
226 |         peq_test = np.random.rand(nkeep_test)
227 |         db_rate = msm_lib.detailed_balance(nkeep_test,rate,peq_test)
228 |         new_rate, new_peq = msm_lib.mc_move(nkeep_test, db_rate, peq_test)
229 |         self.assertFalse(np.array_equal(db_rate, new_rate))
230 |         self.assertEqual(db_rate.shape, new_rate.shape)
231 |         self.assertEqual(peq_test.shape, new_peq.shape)
232 | 
233 |     def test_calc_eigsK(self):
234 |         nstates = np.random.randint(2,100)
235 |         rate_test = np.random.rand(nstates,nstates)
236 |         res_tauK,res_peqK = msm_lib.calc_eigsK(rate_test)
237 |         self.assertIsInstance(res_tauK, list)
238 | 
239 |         self.assertEqual(len(res_tauK), nstates)
240 |         self.assertEqual(len(res_peqK), nstates)
241 |         self.assertIsInstance(res_tauK[0], np.float)
242 |         self.assertIsInstance(res_peqK[0], np.complex)
243 | 
244 |         res_tauK, res_peqK, res_rvecsK, res_lvecsK = msm_lib.calc_eigsK(rate_test, evecs=True)
245 |         self.assertIsNotNone(res_rvecsK)
246 |         self.assertIsNotNone(res_lvecsK)
247 |         self.assertIsInstance(res_lvecsK, np.ndarray)
248 |         self.assertIsInstance(res_rvecsK, np.ndarray)
249 | 
250 |     def test_run_commits(self):
251 |         nstates = np.random.randint(2,100)
252 |         states = range(nstates)
253 |         K = np.random.rand(nstates, nstates)
254 |         peq = np.random.rand(nstates)
255 |         FF = [0]
256 |         UU = [2]
257 |         J, pfold, sum_flux, kf = msm_lib.run_commit(states, K, peq, FF, UU)
258 |         self.assertIsNotNone(J)
259 |         self.assertIsNotNone(pfold)
260 |         self.assertIsNotNone(sum_flux)
261 |         self.assertIsNotNone(kf)
262 |         self.assertIsInstance(kf, float)
263 |         self.assertGreater(kf, 0)
264 |         self.assertEqual(J.shape, K.shape)
265 |         self.assertEqual(len(pfold), nstates)
266 |         self.assertIsInstance(pfold[0], float)
267 |         self.assertIsInstance(J[0][0], float)
268 | 
269 |     def test_do_boots_worker(self):
270 | 
271 |         filetmp = "test_msm_temp.pickle"
272 |         keys = ['A', 'E']
273 |         lagt = np.random.randint(1,100)
274 |         slider = 1
275 |         ncount = 10
276 |         x = [filetmp, keys, lagt, ncount, slider]
277 |         # result = msm_lib.do_boots_worker(x)
278 |         # tauT, peqT, trans, keep_keys = result
279 |         # print(tauT, peqT, trans, keep_keys)
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | class TestSuperMSM(unittest.TestCase):
295 |     def setUp(self):
296 |         download_test_data()
297 |         self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
298 |                 traj=['test/data/protein_only.xtc'])
299 |         self.tr.discretize('rama', states=['A', 'E', 'O'])
300 |         self.tr.find_keys()
301 |         self.msm = msm.SuperMSM([self.tr])
302 | 
303 |     def test_init(self):
304 |         self.assertIsNotNone(self.msm)
305 |         self.assertTrue( hasattr(self.msm, 'data'))
306 |         self.assertEqual(self.msm.data, [self.tr])
307 |         self.assertEqual(self.msm.dt, 1.0)
308 |         # testing with more than one trajectory
309 |         self.msm = msm.SuperMSM([self.tr, self.tr])
310 |         self.assertEqual(len(self.msm.data), 2)
311 | 
312 | 
313 |     def test_merge_trajs(self):
314 |     #   create fake trajectory to merge
315 |         traj2 = traj.TimeSeries(distraj=['L', 'L', 'L', 'A'], dt = 2.0)
316 |         traj2.keys = ['L','A']
317 |         old_keys = self.msm.keys
318 |         self.msm.data = [self.tr, traj2]
319 |         new_keys = self.msm._merge_trajs()
320 |         self.assertEqual(len(new_keys), len(old_keys) + 1)
321 |         self.assertEqual(sorted(new_keys), ['A', 'E', 'L'])
322 | 
323 |     def test_max_dt(self):
324 |         traj2 = traj.TimeSeries(distraj=['L', 'L', 'L', 'A'], dt=2.0)
325 |         old_dt = self.msm.dt
326 |         self.msm.data = [self.tr, traj2]
327 |         new_dt = self.msm._max_dt()
328 |         self.assertEqual(new_dt, 2.0)
329 | 
330 |     def test_do_msm(self):
331 | 
332 |         self.msm.do_msm(lagt=1)
333 |         self.assertIsInstance(self.msm.msms[1], msm.MSM)
334 |         self.assertEqual(self.msm.msms[1].lagt, 1)
335 | 
336 |     def test_convergence(self):
337 |         lagtimes = np.array(range(10,100,10))
338 |         self.msm.convergence_test(time=lagtimes)
339 |         for lagt in lagtimes:
340 |             self.assertTrue(hasattr(self.msm.msms[lagt], 'tau_ave'))
341 |             self.assertTrue(hasattr(self.msm.msms[lagt], 'tau_std'))
342 |             self.assertTrue(hasattr(self.msm.msms[lagt], 'peq_ave'))
343 |             self.assertTrue(hasattr(self.msm.msms[lagt], 'peq_std'))
344 | 
345 |     def test_do_boots(self):
346 |         self.msm.do_msm(10)
347 |         self.msm.msms[10].boots()
348 | 
349 |         self.assertTrue(hasattr(self.msm.msms[10], 'tau_ave'))
350 |         self.assertTrue(hasattr(self.msm.msms[10], 'tau_std'))
351 |         self.assertTrue(hasattr(self.msm.msms[10], 'peq_ave'))
352 |         self.assertTrue(hasattr(self.msm.msms[10], 'peq_std'))
353 | 
354 |     def test_ck_test(self):
355 |         init = ['A']
356 |         time = np.array(range(50,210,25))
357 |         pMSM, pMD, epMD = self.msm.ck_test(init=init, time=time)
358 |         self.assertIsNotNone(pMSM)
359 |         self.assertIsNotNone(pMD)
360 |         self.assertIsNotNone(epMD)
361 |         self.assertEqual(len(pMSM), len(time))
362 |         self.assertEqual(len(epMD), 10)
363 | 
364 |         self.assertIsInstance(pMSM, list)
365 |         self.assertIsInstance(pMSM[0], tuple)
366 |         self.assertIsInstance(pMD, np.ndarray)
367 |         self.assertIsInstance(epMD, np.ndarray)
368 | 
369 |     def test_do_pfold(self):
370 |         states = [
371 |             ['A'],
372 |             ['E']
373 |         ]
374 |         for lagt in [1,10,100]:
375 |             self.msm.do_msm(lagt)
376 |             self.msm.msms[lagt].boots()
377 |             self.msm.msms[lagt].do_trans()
378 |             self.msm.msms[lagt].do_rate()
379 | 
380 |             self.msm.msms[lagt].do_pfold(FF=states[0], UU=states[1])
381 |             self.assertTrue(hasattr(self.msm.msms[lagt], 'pfold'))
382 |             self.assertTrue(hasattr(self.msm.msms[lagt], 'J'))
383 |             self.assertTrue(hasattr(self.msm.msms[lagt], 'sum_flux'))
384 |             self.assertTrue(hasattr(self.msm.msms[lagt], 'kf'))
385 |             self.assertIsInstance(self.msm.msms[lagt].kf, np.float64)
386 |             self.assertEqual(len(self.msm.msms[lagt].J), len(states))
387 | 
388 |     def test_lb_rate(self):
389 |         self.msm.do_lbrate()
390 |         self.assertIsNotNone(self.msm.tauK)
391 |         self.assertIsNotNone(self.msm.peqK)
392 |         self.assertIsNotNone(self.msm.rvecsK)
393 |         self.assertIsNotNone(self.msm.lvecsK)
394 |         self.assertEqual(len(self.msm.tauK), len(self.msm.keys) - 1)
395 |         self.assertEqual(self.msm.rvecsK.shape, (len(self.msm.keys), len(self.msm.keys)))
396 | 
397 | 
398 | 
399 | class TestMSM(unittest.TestCase):
400 |     def setUp(self):
401 |         download_test_data()
402 |         self.nstates = np.random.randint(3,100)
403 |         distraj_1 = np.random.randint(1,self.nstates+1, size=1000).tolist()
404 |         traj_1 = traj.TimeSeries(distraj= distraj_1, dt=1.)
405 |         distraj_2 = np.random.randint(1,self.nstates+1, size=1000).tolist()
406 |         traj_2 = traj.TimeSeries(distraj= distraj_2, dt=2.)
407 |         self.data = np.array([
408 |             traj_1,
409 |             traj_2
410 |         ])
411 |         self.lagt = 10
412 |         self.keys = [i for i in range(1,self.nstates+1)]
413 |         msm_obj = msm.MSM(data=self.data, lagt=self.lagt, keys=self.keys, sym=True)
414 |         self.msm = msm_obj
415 | 
416 | 
417 |     def test_init(self):
418 |         self.msm_empty = msm.MSM()
419 |         self.assertIsNotNone(self.msm_empty)
420 |         self.assertIsNone(self.msm_empty.data)
421 |         self.assertIsNone(self.msm_empty.lagt)
422 |         self.assertIsNone(self.msm_empty.keys)
423 |         self.assertFalse(self.msm_empty.sym)
424 | 
425 |         self.assertIsNotNone(self.msm)
426 |         self.assertIsNotNone(self.msm.data)
427 |         self.assertIsNotNone(self.msm.keys)
428 |         self.assertIsNotNone(self.msm.lagt)
429 |         self.assertTrue(self.msm.sym)
430 |         self.assertTrue(np.array_equal(self.data, self.msm.data))
431 |         self.assertEqual(self.msm.lagt, self.lagt)
432 |         self.assertTrue(np.array_equal(self.keys, self.msm.keys))
433 | 
434 |     def test_do_count(self):
435 |         self.msm.do_count()
436 |         self.assertIsNotNone(self.msm.keep_states)
437 |         self.assertIsNotNone(self.msm.keep_keys)
438 | 
439 |     def test_calc_count_multi(self):
440 |         count = self.msm.calc_count_multi()
441 |         self.assertIsNotNone(count)
442 |         self.assertIsInstance(count, np.ndarray)
443 |         self.assertEqual(count.shape, (self.nstates, self.nstates))
444 | 
445 |     def test_check_connect(self):
446 |         self.msm.do_count()
447 |         keep_states, keep_keys = self.msm.check_connect()
448 |         self.assertEqual(len(keep_keys), len(keep_states))
449 |         self.assertEqual(self.msm.keep_keys, self.keys)
450 | 
451 |     def test_do_trans(self):
452 |         self.msm.do_count()
453 |         self.msm.do_trans(evecs=False)
454 |         self.assertIsNotNone(self.msm.tauT)
455 |         self.assertIsNotNone(self.msm.trans)
456 |         self.assertIsNotNone(self.msm.peqT)
457 |         self.assertFalse(hasattr(self.msm, "rvecsT"))
458 |         self.assertFalse(hasattr(self.msm, "lvecsT"))
459 |         self.assertEqual(len(self.msm.tauT), self.nstates - 1)
460 |         self.assertEqual(len(self.msm.peqT), self.nstates)
461 |         self.assertEqual(self.msm.trans.shape, (self.nstates, self.nstates))
462 |         self.msm.do_trans(evecs=True)
463 |         self.assertTrue(hasattr(self.msm, "rvecsT"))
464 |         self.assertTrue(hasattr(self.msm, "lvecsT"))
465 |         self.assertEqual(len(self.msm.rvecsT), self.nstates)
466 |         self.assertEqual(len(self.msm.lvecsT), self.nstates)
467 | 
468 |     def test_do_rate(self):
469 |         self.msm.do_count()
470 |         self.msm.do_trans()
471 |         self.msm.do_rate(evecs=False)
472 |         self.assertIsNotNone(self.msm.rate)
473 |         self.assertIsNotNone(self.msm.tauK)
474 |         self.assertIsNotNone(self.msm.peqK)
475 |         self.assertEqual(len(self.msm.tauK), self.nstates - 1)
476 |         self.assertEqual(len(self.msm.peqK), self.nstates)
477 |         self.msm.do_rate(evecs=True)
478 |         self.assertIsNotNone(self.msm.rvecsK)
479 |         self.assertIsNotNone(self.msm.lvecsK)
480 | 
481 |     def test_calc_eigsT(self):
482 |         self.msm.do_count()
483 |         self.msm.do_trans()
484 |         tauT, peqT, rvecsT_sorted, lvecsT_sorted = self.msm.calc_eigsT(evecs=True)
485 |         self.assertIsNotNone(tauT)
486 |         self.assertIsNotNone(peqT)
487 |         self.assertEqual(len(tauT), self.nstates - 1)
488 |         self.assertEqual(len(peqT), self.nstates)
489 |         self.assertIsNotNone(rvecsT_sorted)
490 |         self.assertIsNotNone(lvecsT_sorted)
491 | 
492 |     def test_calc_eigsK(self):
493 |         self.msm.do_count()
494 |         self.msm.do_trans()
495 |         tauK, peqK, rvecsK_sorted, lvecsK_sorted = self.msm.calc_eigsT(evecs=True)
496 |         self.assertIsNotNone(tauK)
497 |         self.assertIsNotNone(peqK)
498 |         self.assertEqual(len(tauK), self.nstates - 1)
499 |         self.assertEqual(len(peqK), self.nstates)
500 |         self.assertIsNotNone(rvecsK_sorted)
501 |         self.assertIsNotNone(lvecsK_sorted)
502 | 
503 |     def test_boots(self):
504 |         self.msm.do_count()
505 |         self.msm.do_trans()
506 |         self.msm.boots()
507 |         self.assertIsNotNone(self.msm.tau_ave)
508 |         self.assertIsNotNone(self.msm.tau_std)
509 |         self.assertIsNotNone(self.msm.peq_ave)
510 |         self.assertIsNotNone(self.msm.peq_std)
511 |         self.assertEqual(len(self.msm.tau_ave), self.nstates - 1)
512 |         self.assertEqual(len(self.msm.tau_std), self.nstates - 1)
513 |         self.assertEqual(len(self.msm.peq_std), self.nstates)
514 |         self.assertEqual(len(self.msm.peq_ave), self.nstates)
515 | 
516 |     def test_sensitivity(self):
517 |         self.msm.do_count()
518 |         self.msm.do_trans()
519 |         self.msm.do_rate()
520 |         FF = [np.random.randint(1, self.nstates + 1)]
521 | 
522 |         UU = [np.random.randint(1, self.nstates + 1)]
523 |         self.msm.sensitivity(FF=FF, UU=UU)
524 |         self.assertIsNotNone(self.msm.kf)
525 |         self.assertIsNotNone(self.msm.d_pu)
526 |         self.assertIsNotNone(self.msm.d_lnkf)
527 |         self.assertIsNotNone(self.msm.dJ)
528 |         self.assertIsInstance(self.msm.kf, float)
529 |         self.assertEqual(len(self.msm.d_pu), self.nstates)
530 |         self.assertEqual(len(self.msm.d_lnkf), self.nstates)
531 |         self.assertEqual(len(self.msm.dJ),self.nstates)
532 |         self.assertIsInstance(self.msm.d_pu[0], float)
533 |         self.assertIsInstance(self.msm.dJ[0], float)
534 |         self.assertIsInstance(self.msm.d_lnkf[0], float)
535 | 
536 |     def test_propagateK(self):
537 |         # p0_fn = "p0.txt"
538 |         # new_file = open(p0_fn, "w")
539 |         random_p0 = np.random.rand(self.nstates)
540 |         # random_pini = np.random.randint(1, self.nstates + 1, size = 2)
541 |         # new_file.write(np.array2string(random_p0))
542 |         # new_file.close()
543 |         self.msm.do_count()
544 |         self.msm.do_trans()
545 |         self.msm.do_rate()
546 |         time, popul = self.msm.propagateK(p0=random_p0)
547 |         self.assertIsNotNone(time)
548 |         self.assertIsInstance(time, np.ndarray)
549 |         self.assertIsInstance(popul, list)
550 |         self.assertEqual(len(time), 20)
551 |         self.assertEqual(len(popul), 20)
552 |         self.assertEqual(len(popul[0]), self.nstates)
553 | 
554 |         for ind, t in enumerate(time):
555 |             if ind != 0:
556 |                 self.assertGreater(t, time[ind - 1])
557 | 
558 |     def test_propagateT(self):
559 |         random_p0 = np.random.rand(self.nstates)
560 |         self.msm.do_count()
561 |         self.msm.do_trans()
562 |         self.msm.do_rate()
563 |         tcum, popul = self.msm.propagateT(p0=random_p0)
564 |         self.assertIsNotNone(tcum)
565 |         self.assertIsInstance(tcum, list)
566 |         self.assertIsInstance(popul, list)
567 |         self.assertEqual(len(tcum), 20)
568 |         self.assertEqual(len(popul), 20)
569 |         self.assertEqual(len(popul[0]), self.nstates)
570 | 
571 |     def test_acf_mode(self):
572 |         self.msm.do_count()
573 |         self.msm.do_trans(evecs=True)
574 |         self.msm.do_rate()
575 |         acf_ave = self.msm.acf_mode()
576 |         self.assertIsInstance(acf_ave, dict)
577 |         self.assertEqual(len(acf_ave.keys()), len(self.msm.keep_keys) - 1)
578 |         modes = [key for key in acf_ave.keys()]
579 | 
580 |         self.assertIsInstance(acf_ave[modes[0]][0], float)
581 | 
582 | 
583 | 
584 | 
585 | 
586 | 


--------------------------------------------------------------------------------
/mastermsm/test/test_trajectory.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import mdtraj as md
  3 | import numpy as np
  4 | from mastermsm.trajectory import traj_lib, traj
  5 | from mastermsm.msm import msm, msm_lib
  6 | from test.download_data import download_test_data
  7 | import os
  8 | 
  9 | 
 10 | class TestMDTrajLib(unittest.TestCase):
 11 |     def setUp(self):
 12 |         download_test_data()
 13 |         self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
 14 |                                   traj=['test/data/protein_only.xtc'])
 15 | 
 16 |     def test_inrange(self):
 17 |         self.inrange = traj_lib._inrange(2, 1, 3)
 18 |         self.assertEqual(self.inrange, 1)
 19 |         self.inrange = traj_lib._inrange(0, 1, 2)
 20 |         self.assertEqual(self.inrange, 0)
 21 |         self.inrange = traj_lib._inrange(1, 1, 2)
 22 |         self.assertEqual(self.inrange, 0)
 23 | 
 24 |     def test_inbounds(self):
 25 |         TBA_bounds = {}
 26 |         TBA_bounds['A'] = [-100., -40., -50., -10.]
 27 |         TBA_bounds['E'] = [-180., -40., 125., 165.]
 28 |         TBA_bounds['L'] = [50., 100., -40., 70.0]
 29 | 
 30 |     #   test in alpha helix
 31 |         self.inbounds = traj_lib._inbounds(TBA_bounds['A'], -90, -40)
 32 |         self.assertEqual(self.inbounds, 1)
 33 |     #   test in beta-sheet
 34 |         self.inbounds = traj_lib._inbounds(TBA_bounds['E'], -90, 140)
 35 |         self.assertEqual(self.inbounds, 1)
 36 |     #   test in left-handed alpha helix
 37 |         self.inbounds = traj_lib._inbounds(TBA_bounds['L'], 70, 30)
 38 |         self.assertEqual(self.inbounds, 1)
 39 |     #   test when no conformation
 40 |         self.inbounds = traj_lib._inbounds(TBA_bounds['A'], 0, 0)
 41 |         self.assertEqual(self.inbounds, 0)
 42 | 
 43 | 
 44 |     def test_state(self):
 45 |         psi = [-30, 0, -40, 90, 140, 180]
 46 |         phi = [60., 0, -90, -90, -90, -180]
 47 |         states_test = ['L','O','A','O','E','O']
 48 |         bounds = {}
 49 |         bounds['A'] = [-100., -40., -50., -10.]
 50 |         bounds['E'] = [-180., -40., 125., 165.]
 51 |         bounds['L'] = [50., 100., -40., 70.0]
 52 | 
 53 |         for ind in range(len(phi)):
 54 |             result = traj_lib._state(phi[ind], psi[ind], bounds)
 55 |             state = result[0]
 56 |             self.assertEqual(state, states_test[ind], 'expected state %s but got %s'%(state,states_test[ind]))
 57 | 
 58 |     def test_stategrid(self):
 59 |         self.assertIsNotNone(traj_lib._stategrid(-180, -180, 20))
 60 |         self.assertLess(traj_lib._stategrid(-180, 0, 20),400)
 61 |         self.assertEqual(traj_lib._stategrid(0, 0, 20), 210)
 62 |         self.assertEqual(traj_lib._stategrid(-180, 0, 100), 2186)
 63 | 
 64 |     def test_discreterama(self):
 65 |         mdt_test = self.tr.mdt
 66 | 
 67 |         phi = md.compute_phi(mdt_test)
 68 |         psi = md.compute_psi(mdt_test)
 69 |         # print(psi)
 70 |         # psi = ([ 6,  8, 14, 16], [-30, 0, -40, 90, 140, 180])
 71 |         # phi = ([ 4,  6,  8, 14],[60., 0, -90, -90, -90, -180])
 72 |         states = ['L','A','E']
 73 |         discrete = traj_lib.discrete_rama(phi, psi, states=states)
 74 |         unique_st = set(discrete)
 75 |         for state in unique_st:
 76 |             self.assertIn(state, ['O', 'A', 'E', 'L'])
 77 | 
 78 |     def test_discreteramagrid(self):
 79 |         mdt_test = self.tr.mdt
 80 | 
 81 |         phi = md.compute_phi(mdt_test)
 82 |         psi = md.compute_psi(mdt_test)
 83 |         discrete = traj_lib.discrete_ramagrid(phi, psi, nbins=20)
 84 |         min_ibin = min(discrete)
 85 |         max_ibin = max(discrete)
 86 |         self.assertLess(max_ibin,400)
 87 |         self.assertGreaterEqual(min_ibin,0)
 88 | 
 89 | class TestMDtraj(unittest.TestCase):
 90 |     def setUp(self):
 91 |         download_test_data()
 92 |         self.traj = md.load('test/data/protein_only.xtc', \
 93 |                 top='test/data/alaTB.gro')
 94 |         self.topfn = 'test/data/alaTB.gro'
 95 |         self.trajfn = 'test/data/protein_only.xtc'
 96 |         self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
 97 |                                   traj=['test/data/protein_only.xtc'])
 98 | 
 99 |     def test_traj(self):
100 |         self.assertIsNotNone(self.traj)
101 |         self.assertEqual(self.traj.n_atoms, 19)
102 |         self.assertEqual(self.traj.timestep, 1.)
103 |         self.assertEqual(self.traj.n_residues, 3)
104 |         self.assertEqual(self.traj.n_frames, 10003)
105 | 
106 |     def test_load_mdtraj(self):
107 |         mdtraj = traj._load_mdtraj(top=self.topfn, traj=self.trajfn)
108 |         self.assertIsNotNone(mdtraj)
109 |         self.assertEqual(mdtraj.__module__, 'mdtraj.core.trajectory')
110 |         self.assertEqual(hasattr(mdtraj, '__class__'), True)
111 | 
112 |     def test_read_distraj(self):
113 |         self.assertIsNotNone(self.tr._read_distraj)
114 |         self.assertEqual(callable(self.tr._read_distraj), True)
115 |     #   read distraj from temp file
116 |         content = "0.0 A\n" \
117 |                   "1.0 E\n" \
118 |                   "2.0 L\n" \
119 |                   "3.0 O"
120 |         fn = 'temp.txt'
121 |         fd = open(fn, 'w+')
122 | 
123 |         try:
124 |             fd.write(content)
125 |             fd.seek(0)
126 |             cstates, dt = self.tr._read_distraj(distraj=fd.name)
127 |             self.assertIsInstance(cstates, list)
128 |             self.assertEqual(len(cstates), len(content.split('\n')))
129 |             self.assertEqual(dt, 1.0)
130 | 
131 |         finally:
132 |             fd.close()
133 |             os.remove(fd.name)
134 |     #   read distraj from array and custom timestamp
135 |         distraj_arr = content.split('\n')
136 |         cstates, dt = self.tr._read_distraj(distraj=distraj_arr, dt=2.0)
137 |         self.assertIsInstance(cstates,list)
138 |         self.assertEqual(len(cstates), len(content.split('\n')))
139 |         self.assertEqual(dt, 2.0)
140 |     #   read empty 'discrete' trajectory
141 |         cstates, dt = self.tr._read_distraj(distraj=[])
142 |         self.assertEqual(len(cstates), 0)
143 |         self.assertEqual(dt, 1.0)
144 | 
145 |     def test_timeseries_init(self):
146 |         self.assertIsNotNone(self.tr)
147 |         self.assertIsNotNone(self.tr.mdt)
148 |         self.assertEqual(hasattr(self.tr.mdt, '__class__'), True)
149 |         self.assertEqual(self.tr.mdt.__module__ , 'mdtraj.core.trajectory')
150 |         self.assertIsNotNone(self.tr.discretize)
151 | 
152 |     def test_ts_discretize(self):
153 |         self.tr.discretize('rama', states=['A', 'E', 'L'])
154 |         self.assertIsNotNone(self.tr.distraj)
155 |         unique_states = sorted(set(self.tr.distraj))
156 |         self.assertListEqual(unique_states, ['A', 'E', 'L', 'O'])
157 | 
158 |     def test_ts_find_keys(self):
159 |         self.assertIsNotNone(self.tr.find_keys)
160 |     #   test excluding state O (unassigned)
161 |         self.tr.distraj = ['O']*50000
162 |         for i in range(len(self.tr.distraj)):
163 |             self.tr.distraj[i] = np.random.choice(['A', 'E', 'L', 'O'])
164 | 
165 |         self.tr.find_keys()
166 |         keys = self.tr.keys
167 |         self.assertEqual(len(set(keys)), len(keys))
168 |         self.assertEqual(len(keys), 3)
169 |         for key in keys:
170 |             self.assertIn(key,['A','E','L'])
171 | 
172 |         del self.tr.distraj
173 |     #   test excluding state in alpha-h
174 |         self.tr.distraj = ['O'] * 50000
175 |         for i in range(len(self.tr.distraj)):
176 |             self.tr.distraj[i] = np.random.choice(['A', 'E', 'L', 'O'])
177 | 
178 |         self.tr.find_keys(exclude=['A'])
179 |         keys = self.tr.keys
180 |         self.assertEqual(len(set(keys)),len(keys))
181 |         self.assertEqual(len(keys), 3)
182 |         for key in keys:
183 |             self.assertIn(key,['O','E','L'])
184 | 
185 |     def test_gc(self):
186 |         self.tr.gc()
187 |         self.assertIs(hasattr(self.tr, 'mdt'), False)
188 | 
189 | 
190 | class UseMDtraj(unittest.TestCase):
191 |     def setUp(self):
192 |         download_test_data()
193 |         self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \
194 |                 traj=['test/data/protein_only.xtc'])
195 | 
196 |     def test_atributes(self):
197 |         self.assertIsNotNone(self.tr.mdt)
198 |         self.assertEqual(self.tr.mdt.n_atoms, 19)
199 |         self.assertEqual(self.tr.mdt.n_frames, 10003)
200 |         self.assertEqual(self.tr.mdt.n_residues, 3)
201 |         self.assertIsNotNone(self.tr.discretize)
202 |         self.assertIs(callable(self.tr.discretize), True)
203 | 
204 | 
205 | class TestMSMLib(unittest.TestCase):
206 |     def test_esort(self):
207 |         self.assertTrue(hasattr(msm_lib, 'esort'))
208 |         self.assertTrue(callable(msm_lib.esort))
209 |         self.esort = msm_lib.esort([0,float(1)], [1,float(2)])
210 |         self.assertEqual(self.esort, 1)
211 |         self.esort = msm_lib.esort([0,float(100)], [1,float(2)])
212 |         self.assertEqual(self.esort, -1)
213 |         self.esort = msm_lib.esort([100,float(1)], [1,float(1)])
214 |         self.assertEqual(self.esort, 0)
215 | 
216 |     def test_mat_mul_v(self):
217 |         self.assertTrue(hasattr(msm_lib,'mat_mul_v'))
218 |         self.assertTrue(callable(msm_lib.mat_mul_v))
219 |         self.matrix = np.array([
220 |             [1, 2, 3],
221 |             [4, 5, 6]
222 |         ])
223 |         self.vector = np.array(
224 |             [1, 0, 1]
225 |         )
226 |         self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector),  [4, 10])
227 |         self.matrix = np.array([
228 |             [-5, -4, 2],
229 |             [1, 6, -3],
230 |             [3, 5.5, -4]
231 |         ])
232 |         self.vector = np.array(
233 |             [1, 2, -3]
234 |         )
235 |         self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [-19, 22, 26])
236 | 
237 |     def test_rand_rate(self):
238 |         testT = np.array([
239 |             [10, 2, 1],
240 |             [1, 1, 1],
241 |             [0, 1, 0]
242 |         ])
243 |         self.random1 = msm_lib.rand_rate(nkeep= 3, count= testT)
244 |         self.random2 = msm_lib.rand_rate(nkeep= 3, count= testT)
245 |         self.assertEqual(self.random1.shape, (3, 3))
246 |         self.assertFalse((self.random1 == self.random2).all())
247 | 
248 |     def test_traj_split(self):
249 |         traj1 = traj.TimeSeries(distraj=[1, 2, 3], dt=1.)
250 |         traj2 = traj.TimeSeries(distraj=[3, 2, 1], dt=2.)
251 |         trajs = [traj1, traj2]
252 |         self.filepath = msm_lib.traj_split(data=trajs, lagt=10)
253 |         self.assertIsInstance(self.filepath, str)
254 |         self.assertTrue(os.path.exists(self.filepath))
255 |         os.remove(self.filepath)  # clean temp file
256 | 
257 |     def calc_trans(self):
258 |         self.testT = msm_lib.calc_trans(nkeep=10)
259 |         self.assertIsInstance(self.testT, np.ndarray)
260 |         self.assertEqual(self.testT.shape, (10,10))
261 | 
262 |     def test_calc_rate(self):
263 |         self.testT = np.array([
264 |             [1, 2, 3],
265 |             [0, 0, 0],
266 |             [10, 10, 10]
267 | 
268 |         ])
269 |         self.rate = msm_lib.calc_rate(nkeep=3, trans=self.testT, lagt=10)
270 |         self.assertIsInstance(self.rate, np.ndarray)
271 |         self.assertEqual(self.rate.shape, (3, 3))
272 | 
273 |     def test_calc_lifetime(self):
274 |         distraj = [1, 1, 1, 2]
275 |         dt = 1.
276 |         keys = [1, 2]
277 |         data = [distraj, dt, keys]
278 |         self.life = msm_lib.calc_lifetime(data)
279 |         self.assertIsInstance(self.life, dict)
280 | 


--------------------------------------------------------------------------------
/mastermsm/trajectory/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/trajectory/__init__.py


--------------------------------------------------------------------------------
/mastermsm/trajectory/traj.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file is part of the MasterMSM package.
  3 | 
  4 | """
  5 | import os
  6 | import numpy as np
  7 | import mdtraj as md
  8 | from ..trajectory import traj_lib
  9 | 
 10 | def _load_mdtraj(top=None, traj=None, stride=None):
 11 |     """ Loads trajectories using mdtraj.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     top: str
 16 |         The topology file, may be a PDB or GRO file.
 17 |     traj : str
 18 |         A list with the trajectory filenames to be read.
 19 | 
 20 |     Returns
 21 |     -------
 22 |     mdtrajs : list
 23 |         A list of mdtraj Trajectory objects.
 24 | 
 25 |     """
 26 |     return md.load(traj, top=top, stride=stride)
 27 | 
 28 | class MultiTimeSeries(object):
 29 |     """ A class for generating multiple TimeSeries objects in
 30 |     a consistent way. In principle this is only needed when
 31 |     the clustering is not established a priori.
 32 | 
 33 |     """
 34 |     def __init__(self, top=None, trajs=None, dt=None, stride=None):
 35 |         """
 36 |         Parameters
 37 |         ----------
 38 |         dt : float
 39 |             The time step.
 40 |         top : string
 41 |             The topology file, may be a PDB or GRO file.
 42 |         trajs : list 
 43 |             A list of trajectory filenames to be read.
 44 | 
 45 |         """
 46 |         self.file_list = trajs
 47 |         self.traj_list = []
 48 |         for traj in self.file_list:
 49 |             tr = TimeSeries(top=top, traj=traj, stride=stride)
 50 |             self.traj_list.append(tr)
 51 |     
 52 |     def joint_discretize(self, method='backbone_torsions', mcs=None, ms=None, dPCA=False):
 53 |         """
 54 |         Discretize simultaneously all trajectories with HDBSCAN.
 55 | 
 56 |         Parameters
 57 |         ----------
 58 |         method : str
 59 |             The method of choice for the discretization. Options are 'backbone_torsions'
 60 |             and 'contacts'.
 61 |         mcs : int
 62 |             Minimum cluster size for HDBSCAN clustering.
 63 |         ms : int
 64 |             Minsamples parameter for HDBSCAN clustering.
 65 |         dPCA : bool
 66 |             Whether we are using the dihedral PCA method.
 67 | 
 68 |         """
 69 |         if method=='backbone_torsions':
 70 |             labels = self.joint_discretize_backbone_torsions(mcs=mcs, ms=ms, dPCA=dPCA)
 71 |         elif method=='contacts':
 72 |             labels = self.joint_discretize_contacts(mcs=mcs, ms=ms)
 73 | 
 74 |         i = 0
 75 |         for tr in self.traj_list:
 76 |             ltraj = tr.mdt.n_frames
 77 |             tr.distraj = list(labels[i:i+ltraj])
 78 |             i +=ltraj
 79 | 
 80 |     def joint_discretize_backbone_torsions(self, mcs=None, ms=None, dPCA=False):
 81 |         """
 82 |         Analyze jointly torsion angles from multiple trajectories.
 83 | 
 84 |         Parameters
 85 |         ----------
 86 |         mcs : int
 87 |             Minimum cluster size for HDBSCAN clustering.
 88 |         ms : int
 89 |             Minsamples parameter for HDBSCAN clustering.
 90 |         dPCA : bool
 91 |             Whether we are using the dihedral PCA method.
 92 | 
 93 |         """
 94 |         # First we build the fake trajectory combining data
 95 |         phi_cum = []
 96 |         psi_cum = []
 97 |         for tr in self.traj_list:
 98 |             phi = md.compute_phi(tr.mdt)
 99 |             psi = md.compute_psi(tr.mdt)    
100 |             phi_cum.append(phi[1])
101 |             psi_cum.append(psi[1])
102 |         phi_cum = np.vstack(phi_cum)
103 |         psi_cum = np.vstack(psi_cum)
104 | 
105 |         # Then we generate the consistent set of clusters
106 |         if dPCA is True:
107 |             angles = np.column_stack((phi_cum, psi_cum))
108 |             v = traj_lib.dPCA(angles)
109 |             labels = traj_lib.discrete_backbone_torsion(mcs, ms, pcs=v, dPCA=True)
110 |         else:
111 |             phi_fake = [phi[0], phi_cum]
112 |             psi_fake = [psi[0], psi_cum]
113 |             labels = traj_lib.discrete_backbone_torsion(mcs, ms, phi=phi_fake, psi=psi_fake)
114 |         return labels
115 | 
116 |     def joint_discretize_contacts(self, mcs=None, ms=None):
117 |         """
118 |         Analyze jointly pairwise contacts from all trajectories.
119 |         
120 |         Produces a fake trajectory comprising a concatenated set
121 |         to recover the labels from HDBSCAN.
122 | 
123 |         """
124 |         mdt_cum = []
125 |         for tr in self.traj_list:
126 |             mdt_cum.append(tr.mdt) #mdt_cum = np.vstack(mdt_cum)
127 | 
128 |         labels = traj_lib.discrete_contacts_hdbscan(mcs, ms, mdt_cum)
129 | 
130 |         return labels
131 | 
132 | class TimeSeries(object):
133 |     """ A class to read and discretize simulation trajectories.
134 |     When simulation trajectories are provided, frames are read
135 |     and discretized using mdtraj [1]_. Alternatively, a discrete
136 |     trajectory can be provided.
137 | 
138 |     Attributes
139 |     ----------
140 |     mdt :
141 |         An mdtraj Trajectory object.
142 |     file_name : str
143 |         The name of the trajectory file.
144 |     distraj : list
145 |         The assigned trajectory.
146 |     dt : float
147 |         The time step
148 |     
149 | 
150 |     References
151 |     ----------
152 |     .. [1] McGibbon, RT., Beauchamp, KA., Harrigan, MP., Klein, C.,
153 |         Swails, JM., Hernandez, CX., Schwantes, CR., Wang, LP., Lane,
154 |         TJ. and Pande, VS." MDTraj: A Modern Open Library for the Analysis
155 |         of Molecular Dynamics Trajectories", Biophys. J. (2015).
156 | 
157 |     """
158 |     def __init__(self, top=None, traj=None, dt=None, \
159 |             distraj=None, stride=None):
160 |         """
161 |         Parameters
162 |         ----------
163 |         distraj : string
164 |             The discrete state trajectory file.
165 |         dt : float
166 |             The time step.
167 |         top : string
168 |             The topology file, may be a PDB or GRO file.
169 |         traj : string
170 |             The trajectory filenames to be read.
171 |         stride : int
172 |             Only read every stride-th frame
173 | 
174 |         """
175 |         if distraj is not None:
176 |             # A discrete trajectory is provided
177 |             self.distraj, self.dt = self._read_distraj(distraj=distraj, dt=dt)
178 |         else:
179 |             # An MD trajectory is provided
180 |             self.file_name = traj
181 |             mdt = _load_mdtraj(top=top, traj=traj, stride=stride)
182 |             self.mdt = mdt
183 |             self.dt = self.mdt.timestep
184 | 
185 |     def _read_distraj(self, distraj=None, dt=None):
186 |         """ Loads discrete trajectories directly.
187 | 
188 |         Parameters
189 |         ----------
190 |         distraj : str, list
191 |             File or list with discrete trajectory.
192 |         
193 |         Returns
194 |         -------
195 |         mdtrajs : list
196 |            A list of mdtraj Trajectory objects.
197 | 
198 |        """
199 |         if isinstance(distraj, list):
200 |             cstates = distraj
201 |             if dt is None:
202 |                 dt = 1.
203 |             return cstates, dt
204 | 
205 |         elif os.path.isfile(distraj):
206 |             raw = open(distraj, "r").readlines()
207 |             try:
208 |                 cstates = [x.split()[1] for x in raw]
209 |                 dt =  float(raw[2].split()[0]) - float(raw[1].split()[0])
210 |                 try: # make them integers if you can
211 |                     cstates = [int(x) for x in cstates]
212 |                 except ValueError:
213 |                     pass
214 |                 return cstates, dt
215 |             except IndexError:
216 |                 cstates = [x.split()[0] for x in raw]
217 |                 return cstates, 1.
218 | 
219 |     def discretize(self, method="rama", states=None, nbins=20,\
220 |             mcs=100, ms=50):
221 |         """ Discretize the simulation data.
222 | 
223 |         Parameters
224 |         ----------
225 |         method : str
226 |             A method for doing the clustering. Options are
227 |             "rama", "ramagrid", "rama_hdb", "contacts_hdb";
228 |             where the latter two use HDBSCAN.
229 |         states : list
230 |             A list of states to be considered in the discretization.
231 |             Only for method "rama".
232 |         nbins : int
233 |             Number of bins in the grid. Only for "ramagrid".
234 |         mcs : int
235 |             min_cluster_size for HDBSCAN
236 |         ms : int
237 |             min_samples for HDBSCAN
238 | 
239 |         Returns
240 |         -------
241 |         discrete : list
242 |             A list with the set of discrete states visited.
243 | 
244 |         """
245 |         if method == "rama":
246 |             phi = md.compute_phi(self.mdt)
247 |             psi = md.compute_psi(self.mdt)
248 |             self.distraj = traj_lib.discrete_rama(phi, psi, states=states)
249 |         elif method == "ramagrid":
250 |             phi = md.compute_phi(self.mdt)
251 |             psi = md.compute_psi(self.mdt)
252 |             self.distraj = traj_lib.discrete_ramagrid(phi, psi, nbins)
253 |         elif method == "rama_hdb":
254 |             phi = md.compute_phi(self.mdt)
255 |             psi = md.compute_psi(self.mdt)
256 |             self.distraj = traj_lib.discrete_backbone_torsion(mcs, ms, phi=phi, psi=psi)
257 |         elif method == "contacts_hdb":
258 |             self.distraj = traj_lib.discrete_contacts_hdbscan(mcs, ms, self.mdt)
259 | 
260 |     def find_keys(self, exclude=['O']):
261 |         """ Finds out the discrete states in the trajectory
262 | 
263 |         Parameters
264 |         ----------
265 |         exclude : list
266 |             A list of strings with states to exclude.
267 | 
268 |         """
269 |         keys = []
270 |         for s in self.distraj:
271 |             if s not in keys and s not in exclude:
272 |                 keys.append(s)
273 |         self.keys = keys
274 | 
275 |     def gc(self):
276 |         """ 
277 |         Gets rid of the mdtraj attribute
278 | 
279 |         """
280 |         delattr (self, "mdt")
281 | 
282 | #    def discrete_rama(self, A=[-100, -40, -60, 0], \
283 | #            L=[-180, -40, 120., 180.], \
284 | #            E=[50., 100., -40., 70.]):
285 | #        """ Discretize based on Ramachandran angles.
286 | #
287 | #        """
288 | #        for t in self.mdtrajs:
289 | #            phi,psi = zip(mdtraj.compute_phi(traj), mdtraj.compute_psi(traj))
290 | #
291 | 


--------------------------------------------------------------------------------
/mastermsm/trajectory/traj_lib.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file is part of the MasterMSM package.
  3 | 
  4 | """
  5 | #import h5py
  6 | import copy
  7 | import sys
  8 | import math
  9 | import hdbscan
 10 | import numpy as np
 11 | from sklearn.preprocessing import StandardScaler
 12 | from sklearn.decomposition import PCA
 13 | import mdtraj as md
 14 | import matplotlib.pyplot as plt
 15 | 
 16 | def discrete_rama(phi, psi, seq=None, bounds=None, states=['A', 'E', 'L']):
 17 |     """ Assign a set of phi, psi angles to coarse states.
 18 | 
 19 |     Parameters
 20 |    ----------
 21 |     phi : list
 22 |         A list of Phi Ramachandran angles.
 23 |     psi : list
 24 |         A list of Psi Ramachandran angles.
 25 |     seq : list
 26 |         Sequence of states.
 27 |     bounds : list of lists
 28 |         Alternative bounds for transition based assignment.
 29 |     states : list
 30 |         The states that will be used in the assignment.
 31 | 
 32 |     Returns
 33 |     -------
 34 |     cstates : list
 35 |         The sequence of coarse states.
 36 | 
 37 |     Notes
 38 |     -----
 39 |     Here we follow Buchete and Hummer for the assignment procedure [1]_ .
 40 | 
 41 |     .. [1] N. V. Buchete and G. Hummer, "Coarse master equations for peptide folding dynamics", J. Phys. Chem. B. (2008).
 42 | 
 43 |     """
 44 |     if bounds is None:
 45 |         TBA_bounds = {}
 46 |         if 'A' in states:
 47 |             TBA_bounds['A'] = [ -100., -40., -50., -10. ]
 48 |         if 'E' in states:
 49 |             TBA_bounds['E'] = [ -180., -40., 125.,165. ]
 50 |         if 'L' in states:
 51 |             TBA_bounds['L'] = [ 50., 100., -40.,70.0 ]
 52 | 
 53 |     res_idx = 0
 54 |     if len(phi[0]) != len(psi[0]):
 55 |         print (" Different number of phi and psi dihedrals")
 56 |         print (" STOPPING HERE")
 57 |         sys.exit()
 58 | 
 59 |     cstates = []
 60 |     prev_s_string = ""
 61 |     ndih = len(phi[0])
 62 |     for f,y in zip(phi[1],psi[1]):
 63 |         s_string = []
 64 |         for n in range(ndih):
 65 |             s, _ = _state(f[n]*180/math.pi, y[n]*180/math.pi, TBA_bounds)
 66 |         #if s == "O" and len(prev_s_string) > 0:
 67 |             if s == "O":
 68 |                 try:
 69 |                     s_string += prev_s_string[n]
 70 |                 except IndexError:
 71 |                     s_string += "O"
 72 |             else:
 73 |                 s_string += s
 74 |         cstates.append(''.join(s_string))
 75 |         prev_s_string = s_string
 76 |         res_idx += 1
 77 |     return cstates
 78 | 
 79 | def discrete_ramagrid(phi, psi, nbins):
 80 |     """ Finely partition the Ramachandran map into a grid of states.
 81 | 
 82 |     Parameters
 83 |    ----------
 84 |     phi : list
 85 |         A list of Phi Ramachandran angles.
 86 |     psi : list
 87 |         A list of Psi Ramachandran angles.
 88 |     nbins : int
 89 |         The number of bins in the grid in each dimension.
 90 | 
 91 |     Returns
 92 |     -------
 93 |     cstates : list
 94 |         The sequence of coarse states.
 95 | 
 96 |     """
 97 |     cstates = []
 98 |     for f, y in zip(phi[1], psi[1]):
 99 |         s = _stategrid(f, y, nbins)
100 |         cstates.append(s)
101 |     return cstates
102 | 
103 | #stats_out = open(stats_file,"w")
104 | #cum = 0
105 | #for s in stats_list:
106 | #    cum+=s[1]
107 | #    #stats_out.write("%s %8i %8i %12.6f\n"%\
108 | #    #   (s[0],s[1],cum,qave[s[0]]/float(s[1])))
109 | #    stats_out.write("%s %8i %8i\n"%\
110 | #        (s[0],s[1],cum))
111 | #
112 | #stats_out.close()
113 | #state_out.close()
114 | #
115 | #def isnative(native_string, string):
116 | #    s = ""
117 | #    for i in range(len(string)):
118 | #        if string[i]==native_string[i]:
119 | #            s+="1"
120 | #        else:
121 | #            s+="0"
122 | #    return s
123 | #
124 | def _inrange( x, lo, hi ):
125 |         if x > lo and x < hi:
126 |                 return 1
127 |         else:
128 |                 return 0
129 | 
130 | def _inbounds(bounds,phi, psi):
131 |     if _inrange( phi,bounds[0],bounds[1]) and _inrange( psi,bounds[2],bounds[3]):
132 |             return 1
133 |     if len(bounds) > 4:
134 |             if _inrange( phi,bounds[4],bounds[5]) and _inrange( psi,bounds[6],bounds[7]):
135 |                     return 1
136 |     if len(bounds) > 8:
137 |             if _inrange( phi,bounds[8],bounds[9]) and _inrange( psi,bounds[10],bounds[11]):
138 |                     return 1
139 |     if len(bounds) > 12:
140 |             if _inrange( phi,bounds[12],bounds[13]) and _inrange( psi,bounds[14],bounds[15]):
141 |                     return 1
142 |     return 0
143 | 
144 | def _state(phi,psi,bounds):
145 |     """ Finds coarse state for a pair of phi-psi dihedrals
146 | 
147 |     Parameters
148 |     ----------
149 |     phi : float
150 |         Phi dihedral angle
151 |     psi : float
152 |         Psi dihedral angle
153 |     bounds : dict
154 |         Dictionary containing list of states and their respective bounds
155 | 
156 |     Returns
157 |     -------
158 |     k : string
159 |         Key for assigned state
160 | 
161 |     """
162 | #    if type == "GLY":
163 | #        for k in g_bounds.keys():
164 | #            if inbounds( g_bounds[k], (phi,psi) ):
165 | #                return k, []
166 | #        # else
167 | #        return 'O', [ (phi,psi) ]
168 | #    if type == "prePRO":
169 | #        for k in pp_bounds.keys():
170 | #            if inbounds( pp_bounds[k], (phi,psi) ):
171 | #                return k, []
172 | #        # else
173 | #        return 'O', [ (phi,psi) ]
174 | #    else:
175 |     for k in bounds.keys():
176 |         if _inbounds(bounds[k], phi, psi ):
177 |             return k, []
178 |     # else
179 |     return 'O', [ (phi,psi) ]
180 | 
181 | #def stats_sort(x,y):
182 | #    xx = x[1]
183 | #    yy = y[1]
184 | #    return yy-xx
185 | #
186 | ##if len(sys.argv)<5:
187 | ##   sys.stdout.write(Usage)
188 | ##   sys.exit(0)
189 | #
190 | #torsion_file = sys.argv[1]
191 | ##q_file = sys.argv[2]
192 | #state_file = sys.argv[2]
193 | #stats_file = sys.argv[3]
194 | 
195 | def _stategrid(phi, psi, nbins):
196 |     """ Finds coarse state for a pair of phi-psi dihedrals
197 | 
198 |     Parameters
199 |     ----------
200 |     phi : float
201 |         Phi dihedral angle
202 |     psi : float
203 |         Psi dihedral angle
204 |     nbins : int
205 |         Number of bins in each dimension of the grid
206 | 
207 |     Returns
208 |     -------
209 |     k : int
210 |         Index of bin
211 | 
212 |     """
213 |     #print phi, psi
214 |     #print "column :", int(0.5*(phi + math.pi)/math.pi*nbins)
215 |     #print "row :", int(0.5*(psi + math.pi)/math.pi*nbins)
216 |     ibin = int(0.5*nbins*(phi/math.pi + 1.)) + int(0.5*nbins*(psi/math.pi + 1))*nbins
217 |     return ibin
218 | 
219 | def discrete_backbone_torsion(mcs, ms, phi=None, psi=None, \
220 |                               pcs=None, dPCA=False):
221 |     """
222 |     Discretize backbone torsion angles
223 | 
224 |     Assign a set of phi, psi angles (or their corresponding
225 |     dPCA variables if dPCA=True) to coarse states
226 |     by using the HDBSCAN algorithm.
227 | 
228 |     Parameters
229 |     ----------
230 |     phi : list
231 |         A list of Phi Ramachandran angles
232 |     psi : list
233 |         A list of Psi Ramachandran angles
234 |     pcs : matrix
235 |         Matrix containing principal components obtained
236 |         from PCA of dihedral angles
237 |     mcs : int
238 |         min_cluster_size for HDBSCAN
239 |     ms : int
240 |         min_samples for HDBSCAN
241 | 
242 |     """
243 |     if dPCA:
244 |         X = pcs
245 |     else:
246 |         # shift and combine dihedrals
247 |         if len(phi[0]) != len(psi[0]): 
248 |             raise ValueError("Inconsistent dimensions for angles")
249 | 
250 |         ndih = len(phi[0])
251 |         phi_shift, psi_shift = [], []
252 |         for f, y in zip(phi[1], psi[1]):
253 |             for n in range(ndih):
254 |                 phi_shift.append(f[n])
255 |                 psi_shift.append(y[n])
256 |         np.savetxt("phi_psi.dat", np.column_stack((phi_shift, psi_shift)))
257 |         psi_shift, phi_shift = _shift(psi_shift, phi_shift)
258 |         data = np.column_stack((phi_shift, psi_shift))
259 |         np.savetxt("phi_psi_shifted.dat", data)
260 |     X = StandardScaler().fit_transform(data)
261 | 
262 |     # Set values for clustering parameters
263 |     if mcs is None:
264 |         mcs = int(np.sqrt(len(X)))
265 |         print("Setting minimum cluster size to: %g" % mcs)
266 |     if ms  is None:
267 |         ms  = mcs
268 |         print("Setting min samples to: %g" % ms)
269 | 
270 |     hdb = hdbscan.HDBSCAN(min_cluster_size=mcs, min_samples=ms).fit(X)
271 |     hdb.condensed_tree_.plot(select_clusters=True)
272 | 
273 |     #plt.savefig("alatb-hdbscan-tree.png",dpi=300,transparent=True)
274 | 
275 | #    n_micro_clusters = len(set(hb.labels_)) - (1 if -1 in hb.labels_ else 0
276 | #    if n_micro_clusters > 0:
277 | #        print("HDBSCAN mcs value set to %g"%mcs, n_micro_clusters,'clusters.')
278 | #        break
279 | #    elif mcs < 400:
280 | #        mcs += 25
281 | #    else:
282 | #        sys.exit("Cannot find any valid HDBSCAN mcs value")
283 | #    #n_noise = list(labels).count(-1)
284 | 
285 | #    ## plot clusters
286 | #    colors = ['royalblue', 'maroon', 'forestgreen', 'mediumorchid', \
287 | #    'tan', 'deeppink', 'olive', 'goldenrod', 'lightcyan', 'lightgray']
288 | #    vectorizer = np.vectorize(lambda x: colors[x % len(colors)])
289 | #    fig, ax = plt.subplots(figsize=(7,7))
290 | #    assign = hb.labels_ >= 0
291 | #    ax.scatter(X[assign,0],X[assign,1], c=hb.labels_[assign])
292 | #    ax.set_xlim(-np.pi, np.pi)
293 | #    ax.set_ylim(-np.pi, np.pi)
294 | #    plt.savefig('alaTB_hdbscan.png', dpi=300, transparent=True)
295 | #
296 | #    # remove noise from microstate trajectory and apply TBA (Buchete et al. JPCB 2008)
297 | #    labels = _filter_states(hb.labels_)
298 | #
299 | #    # remove from clusters points with small (<0.1) probability
300 | #    for i in range(len(labels)):
301 | #        if hb.probabilities_[i] < 0.1:
302 | #            labels[i] = -1
303 | 
304 |     return hdb.labels_
305 | 
306 | def dPCA(angles):
307 |     """
308 |     Compute PCA of dihedral angles
309 | 
310 |     We follow the methods described in A. Altis et al. 
311 |     *J. Chem. Phys.*  244111 (2007)
312 | 
313 |     Parameters
314 |     ----------
315 |     angles : angles ordered by columns
316 |     
317 |     Returns
318 |     -------
319 |     X_transf : dPCA components to retrieve 80%
320 |         of variance ordered by columns
321 |     
322 |     """
323 |     shape = np.shape(angles)
324 |     #print (shape)
325 |     X = np.zeros((shape[0] , \
326 |                   shape[1]+shape[1]))
327 |     for i, ang in enumerate(angles):
328 |         p = 0
329 |         for phi in ang:
330 |             X[i][p], X[i][p+1] = np.cos(phi), np.sin(phi)
331 |             p += 2
332 |     X_std = StandardScaler().fit_transform(X)
333 |     sklearn_pca = PCA(n_components=2*shape[1])
334 |     
335 |     X_transf = sklearn_pca.fit_transform(X_std)
336 |     expl = sklearn_pca.explained_variance_ratio_
337 |     print("Ratio of variance retrieved by each component:", expl)
338 | 
339 |     cum_var = 0.0
340 |     i = 0
341 |     while cum_var < 0.8:
342 |         cum_var += expl[i]
343 |         i += 1
344 | 
345 |     ## Save cos and sin of dihedral angles along the trajectory
346 |     #h5file = "data/out/%g_traj_angles.h5"%t
347 |     #with h5py.File(h5file, "w") as hf:
348 |     #    hf.create_dataset("angles_trajectory", data=X)
349 |     ## Plot cumulative variance retrieved by new components (i.e. those from PCA)
350 |     #plt.figure()  #plt.plot(np.cumsum(sklearn_pca.explained_variance_ratio_))
351 |     #plt.xlabel('number of components')  #plt.ylabel('cumulative explained variance')
352 |     #plt.savefig('cum_variance_%g.png'%t)
353 | 
354 |     #counts, ybins, xbins, image = plt.hist2d(X_transf[:,0], X_transf[:,1], \
355 |     #    bins=len(X_transf[:,0]), cmap='binary_r', alpha=0.2)#bins=[np.linspace(-np.pi,np.pi,20), np.linspace(-np.pi,np.pi,30)]
356 |     ##countmax = np.amax(counts)
357 |     ##counts = np.log(countmax) - np.log(counts)
358 |     ##print(counts, countmax)
359 |     #plt.contour(np.transpose(counts), extent=[xbins.min(), xbins.max(), ybins.min(), ybins.max()], \
360 |     #              linewidths=1, colors='gray')
361 |     #plt.scatter(X_transf[:,0],X_transf[:,1])# c=counts)
362 |     #fig, ax = plt.subplots(1,1, figsize=(8,8), sharex=True, sharey=True)
363 |     #ax.contour(np.transpose(counts), extent=[xbins.min(), xbins.max(), ybins.min(), ybins.max()], \
364 |     #              linewidths=1, colors='gray')
365 |     #ax.plot(X_transf[:,0],X_transf[:,1], 'o', ms=0.2, color='C%g'%t)
366 |     #plt.tight_layout()
367 |     #plt.savefig('dpca_%g.png'%t)
368 | 
369 |     return X_transf[:,:i]
370 | 
371 | def discrete_contacts_hdbscan(mcs, ms, mdt_all):
372 |     """
373 |     HDBSCAN discretization based on contacts
374 | 
375 |     Parameters
376 |     ----------
377 |     mdt : object
378 |         mdtraj trajectory
379 |     mcs : int
380 |         min_cluster_size for HDBSCAN
381 |     ms : int
382 |         min_samples for HDBSCAN
383 | 
384 |     Returns
385 |     -------
386 |     labels : list
387 |         Indexes corresponding to the clustering
388 | 
389 |     """
390 | 
391 |     dists_all = []
392 |     for mdt in mdt_all:
393 |         dists = md.compute_contacts(mdt, contacts='all', periodic=True)
394 |         for dist in dists[0]:
395 |             dists_all.append(dist)
396 | 
397 |     X = StandardScaler().fit_transform(dists_all) #dists[0]
398 |     if mcs is None: mcs = int(np.sqrt(len(X)))
399 |     if ms  is None: ms  = 100
400 |     hdb = hdbscan.HDBSCAN(min_cluster_size=mcs, min_samples=ms)
401 |     hdb.fit(X)
402 |     hdb.condensed_tree_.plot(select_clusters=True)
403 |     plt.savefig("hdbscan-tree.png",dpi=300,transparent=True)
404 | 
405 |     # In case not enough states are produced, exit
406 |     if (len(np.unique(hdb.labels_))<=2):
407 |         raise Exception("Cannot generate clusters from contacts")
408 | 
409 |     dtraj = _filter_states(hdb.labels_)
410 |     return dtraj
411 | 
412 | def _filter_states(states):
413 |     """
414 |     Filters to remove not-assigned frames when using dbscan or hdbscan
415 |     
416 |     """
417 |     fs = []
418 |     for s in states:
419 |         if s >= 0:
420 |                 fs.append(s)
421 |         else:
422 |             try:
423 |                 fs.append(fs[-1])
424 |             except IndexError:
425 |                 pass
426 |     return fs
427 | 
428 | def _shift(psi, phi):
429 |     psi_s, phi_s = copy.deepcopy(phi), copy.deepcopy(psi)
430 |     for i in range(len(phi_s)):
431 |         if phi_s[i] < -2:
432 |             phi_s[i] += 2*np.pi
433 |     for i in range(len(psi_s)):
434 |         if psi_s[i] > 2:
435 |             psi_s[i] -= 2*np.pi
436 |     return phi_s, psi_s
437 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name : MasterMSM
 2 | theme : readthedocs
 3 | repo_url : https://github.com/daviddesancho/MasterMSM
 4 | site_author: David De Sancho
 5 | pages :
 6 |         - ['about.md', 'About']
 7 |         - ['index.md', 'Introduction']
 8 |         - ['installation.md', 'Installation']
 9 |         - ['discretize.md', 'User guide', 'Discretizing the data']
10 |         - ['trajectory.md', 'User guide', 'Parsing trajectories']
11 |         - ['msm.md', 'User guide', 'Constructing the MSM']
12 |         - ['fewsm.md', 'User guide', 'Clustering the MSM']
13 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | matplotlib
4 | networkx
5 | mdtraj
6 | hdbscan
7 | scikit-learn
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Setup script for bestmsm package
 4 | 
 5 | import os
 6 | from setuptools import setup, find_packages
 7 | 
 8 | def read(fname):
 9 |     return open(os.path.join(os.path.dirname(__file__), fname)).read()
10 | 
11 | setup(
12 | 		name='MasterMSM',
13 | 		version='0.1dev',
14 | 		description='Algorithms to construct master equation / Markov state models',
15 | 		url='http://github.com/daviddesancho/MasterMSM',
16 | 		author='David De Sancho',
17 | 		author_email='daviddesancho.at.gmail.com',
18 | 		license='GPL',
19 |         	packages=find_packages(),
20 | 		keywords= "markov state model",
21 | 		long_description=read('README.md'),
22 | 		classifiers = ["""\
23 | 				Development Status :: 1 - Planning
24 | 				Operating System :: POSIX :: Linux
25 | 				Operating System :: MacOS
26 | 				Programming Language :: Python :: 2.7
27 | 				Topic :: Scientific/Engineering :: Bio-Informatics
28 | 				Topic :: Scientific/Engineering :: Chemistry
29 | 				"""]
30 | 		)
31 | 


--------------------------------------------------------------------------------