├── .gitattributes ├── .github └── workflows │ └── main.yml ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── docs ├── Makefile └── source │ ├── about.rst │ ├── conf.py │ ├── documentation.rst │ ├── img │ └── mastermsm.png │ ├── index.rst │ ├── installation.rst │ ├── mastermsm.fewsm.rst │ ├── mastermsm.msm.rst │ ├── mastermsm.rst │ ├── mastermsm.trajectory.rst │ ├── modules.rst │ └── support.rst ├── examples ├── README.md ├── alanine_dipeptide │ ├── ala_dipeptide.ipynb │ ├── ala_dipeptide_discretize.ipynb │ ├── ala_dipeptide_dpca.ipynb │ ├── ala_dipeptide_maxlike.ipynb │ └── ala_dipeptide_multi.ipynb ├── alanine_pentapeptide │ ├── ala_pentapeptide.ipynb │ ├── ala_pentapeptide_contacts.ipynb │ └── ala_pentapeptide_dpca.ipynb ├── bistable_potential │ ├── 1D_smFS_MSM.ipynb │ └── 2D_smFS_MSM.ipynb ├── mueller_potential │ ├── .ipynb_checkpoints │ │ └── mueller_potential-checkpoint.ipynb │ ├── mueller.py │ ├── mueller_potential.ipynb │ └── mueller_potential_openmm.ipynb └── schutte_potential │ └── schute_mastermsm.ipynb ├── mastermsm ├── __init__.py ├── fewsm │ ├── __init__.py │ ├── fewsm.py │ └── fewsm_lib.py ├── msm │ ├── __init__.py │ ├── msm.py │ └── msm_lib.py ├── test │ ├── README.md │ ├── __init__.py │ ├── download_data.py │ ├── test_fewsm.py │ ├── test_msm.py │ └── test_trajectory.py └── trajectory │ ├── __init__.py │ ├── traj.py │ └── traj_lib.py ├── mkdocs.yml ├── requirements.txt └── setup.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.xtc filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: unittests 4 | 5 | ## Controls when the workflow will run 6 | on: 7 | # # Triggers the workflow on push or pull request events but only for the master branch 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | # 13 | # # Allows you to run this workflow manually from the Actions tab 14 | # workflow_dispatch: 15 | 16 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 17 | jobs: 18 | # This workflow contains a single job called "build" 19 | build: 20 | # The type of runner that the job will run on 21 | runs-on: ubuntu-latest 22 | 23 | # Steps represent a sequence of tasks that will be executed as part of the job 24 | steps: 25 | - uses: actions/checkout@v3 26 | - name: Set up Python 27 | uses: actions/setup-python@v3 28 | with: 29 | python-version: '3.x' 30 | - name: Download repository 31 | run: | 32 | python --version 33 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 34 | python setup.py install 35 | 36 | - name: Run tests 37 | run: | 38 | cd $HOME/mastermsm 39 | python -m unittest 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | site/ 2 | docs/build 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - 3.6 4 | 5 | travis: 6 | - develop 7 | 8 | install: 9 | - python setup.py install 10 | 11 | script: 12 | # avoid running in the checkout directory so nose finds built modules.. 13 | - rundir=$HOME 14 | - cd $rundir 15 | - nosetests -v --with-coverage 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Documentation Status](https://readthedocs.org/projects/mastermsm/badge/?version=develop)](https://mastermsm.readthedocs.io/en/develop/?badge=develop) 2 | [![Codacy Badge](https://app.codacy.com/project/badge/Grade/90d86f571f5c416b910a9dc4d1d8c569)](https://www.codacy.com/gh/BioKT/MasterMSM/dashboard?utm_source=github.com&utm_medium=referral&utm_content=BioKT/MasterMSM&utm_campaign=Badge_Grade) 3 | 4 | MasterMSM 5 | ========= 6 | MasterMSM is a Python package for generating Markov state models (MSMs) 7 | from molecular dynamics trajectories. We use a formulation based on 8 | the chemical master equation. This package will allow you to: 9 | 10 | * Create Markov state / master equation models from biomolecular simulations. 11 | 12 | * Discretize trajectory data using dihedral angle based methods useful 13 | for small peptides. 14 | 15 | * Calculate rate matrices using a variety of methods. 16 | 17 | * Obtain committors and reactive fluxes. 18 | 19 | * Carry out sensitivity analysis of networks. 20 | 21 | You can read the documentation [here](https://mastermsm.readthedocs.io). 22 | 23 | Contributors 24 | ------------ 25 | This code has been written by David De Sancho with help from Anne Aguirre. 26 | 27 | Installation 28 | ------------ 29 | git clone http://github.com/daviddesancho/MasterMSM destination/MasterMSM 30 | cd destination/mastermsm 31 | python setup.py install --user 32 | 33 | External libraries 34 | ------------------ 35 | mdtraj : https://mdtraj.org 36 | 37 | Citation 38 | -------- 39 | @article{mastermsm, 40 | author = "David De Sancho and Anne Aguirre", 41 | title = "{MasterMSM: A Package for Constructing Master Equation Models of Molecular Dynamics}", 42 | year = "2019", 43 | month = "6", 44 | journal = "J. Chem. Inf. Model." 45 | url = "https://doi.org/10.1021/acs.jcim.9b00468", 46 | doi = "10.1021/acs.jcim.9b00468" 47 | } 48 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = MasterMSM 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/source/about.rst: -------------------------------------------------------------------------------- 1 | About 2 | ============ 3 | In the last decade, Master equation / Markov state models (usually termed MSMs) 4 | have become one of the key methodologies to analyze data from molecular dynamics 5 | (MD) simulations. You can find information about MSMs in general in the following 6 | volume 7 | 8 | * `An Introduction to Markov State Models and Their Application to Long Timescale Molecular Simulation `_, edited by Pande, Bowman and Noe (Springer, 2014). 9 | 10 | The MasterMSM library brings a different flavour of MSMs, based on the methods 11 | introduced by N. V. Buchete and G. Hummer 12 | (`J. Phys. Chem. B, 2008 `_). 13 | The central difference relies in that instead of using transition matrices we focus 14 | in rate matrices, which determine the time evolution of the system as described 15 | by the chemical master equation. 16 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | 18 | sys.path.insert(0, os.path.abspath('../..')) 19 | import mastermsm 20 | #sys.path.append(os.path.join(os.path.abspath(os.pardir))) 21 | 22 | 23 | 24 | # -- Project information ----------------------------------------------------- 25 | 26 | project = 'MasterMSM' 27 | copyright = '2019, David De Sancho' 28 | author = 'David De Sancho' 29 | 30 | # The short X.Y version 31 | version = '' 32 | # The full version, including alpha/beta/rc tags 33 | release = '1.1.1' 34 | 35 | 36 | # -- General configuration --------------------------------------------------- 37 | 38 | # If your documentation needs a minimal Sphinx version, state it here. 39 | # 40 | # needs_sphinx = '1.0' 41 | 42 | # Add any Sphinx extension module names here, as strings. They can be 43 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 44 | # ones. 45 | extensions = [ 46 | 'sphinx.ext.autodoc', 47 | 'sphinx.ext.coverage', 48 | 'sphinx.ext.githubpages', 49 | 'sphinx.ext.mathjax', 50 | 'sphinx.ext.viewcode', 51 | 'sphinx.ext.napoleon', 52 | 'sphinx.ext.autosummary', 53 | 'sphinx.ext.doctest', 54 | 'sphinx.ext.inheritance_diagram'] 55 | 56 | # Add any paths that contain templates here, relative to this directory. 57 | templates_path = ['_templates'] 58 | 59 | # The suffix(es) of source filenames. 60 | # You can specify multiple suffix as a list of string: 61 | # 62 | source_suffix = ['.rst', '.md'] 63 | #source_suffix = '.rst' 64 | 65 | # The master toctree document. 66 | master_doc = 'index' 67 | 68 | # The language for content autogenerated by Sphinx. Refer to documentation 69 | # for a list of supported languages. 70 | # 71 | # This is also used if you do content translation via gettext catalogs. 72 | # Usually you set "language" from the command line for these cases. 73 | language = None 74 | 75 | # List of patterns, relative to source directory, that match files and 76 | # directories to ignore when looking for source files. 77 | # This pattern also affects html_static_path and html_extra_path . 78 | exclude_patterns = [] 79 | 80 | # The name of the Pygments (syntax highlighting) style to use. 81 | pygments_style = 'sphinx' 82 | 83 | 84 | # -- Options for HTML output ------------------------------------------------- 85 | 86 | # The theme to use for HTML and HTML Help pages. See the documentation for 87 | # a list of builtin themes. 88 | # 89 | 90 | html_theme = 'alabaster' 91 | #html_theme = 'default' 92 | #html_theme = 'sphinx_rtd_theme' 93 | html_logo = "img/mastermsm.png" 94 | 95 | # Theme options are theme-specific and customize the look and feel of a theme 96 | # further. For a list of options available for each theme, see the 97 | # documentation. 98 | # 99 | # html_theme_options = {} 100 | 101 | # Add any paths that contain custom static files (such as style sheets) here, 102 | # relative to this directory. They are copied after the builtin static files, 103 | # so a file named "default.css" will overwrite the builtin "default.css". 104 | html_static_path = ['_static'] 105 | 106 | # Custom sidebar templates, must be a dictionary that maps document names 107 | # to template names. 108 | # 109 | # The default sidebars (for documents that don't match any pattern) are 110 | # defined by theme itself. Builtin themes are using these templates by 111 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 112 | # 'searchbox.html']``. 113 | # 114 | # html_sidebars = {} 115 | 116 | 117 | # -- Options for HTMLHelp output --------------------------------------------- 118 | 119 | # Output file base name for HTML help builder. 120 | htmlhelp_basename = 'MasterMSMdoc' 121 | 122 | 123 | # -- Options for LaTeX output ------------------------------------------------ 124 | 125 | latex_elements = { 126 | # The paper size ('letterpaper' or 'a4paper'). 127 | # 128 | # 'papersize': 'letterpaper', 129 | 130 | # The font size ('10pt', '11pt' or '12pt'). 131 | # 132 | # 'pointsize': '10pt', 133 | 134 | # Additional stuff for the LaTeX preamble. 135 | # 136 | # 'preamble': '', 137 | 138 | # Latex figure (float) alignment 139 | # 140 | # 'figure_align': 'htbp', 141 | } 142 | 143 | # Grouping the document tree into LaTeX files. List of tuples 144 | # (source start file, target name, title, 145 | # author, documentclass [howto, manual, or own class]). 146 | latex_documents = [ 147 | (master_doc, 'MasterMSM.tex', 'MasterMSM Documentation', 148 | 'David De Sancho', 'manual'), 149 | ] 150 | 151 | # -- Options for manual page output ------------------------------------------ 152 | 153 | # One entry per manual page. List of tuples 154 | # (source start file, name, description, authors, manual section). 155 | man_pages = [ 156 | (master_doc, 'mastermsm', 'MasterMSM Documentation', 157 | [author], 1) 158 | ] 159 | 160 | 161 | # -- Options for Texinfo output ---------------------------------------------- 162 | 163 | # Grouping the document tree into Texinfo files. List of tuples 164 | # (source start file, target name, title, author, 165 | # dir menu entry, description, category) 166 | texinfo_documents = [ 167 | (master_doc, 'MasterMSM', 'MasterMSM Documentation', 168 | author, 'MasterMSM', 'One line description of project.', 169 | 'Miscellaneous'), 170 | ] 171 | 172 | 173 | # -- Extension configuration ------------------------------------------------- 174 | 175 | # Napoleon settings 176 | napoleon_google_docstring = True 177 | napoleon_numpy_docstring = True 178 | napoleon_include_private_with_doc = False 179 | napoleon_include_special_with_doc = False 180 | napoleon_use_admonition_for_examples = False 181 | napoleon_use_admonition_for_notes = False 182 | napoleon_use_admonition_for_references = False 183 | napoleon_use_ivar = True 184 | napoleon_use_param = True 185 | napoleon_use_rtype = True 186 | -------------------------------------------------------------------------------- /docs/source/documentation.rst: -------------------------------------------------------------------------------- 1 | .. _documentation: 2 | 3 | Modules 4 | ============= 5 | MasterMSM is a Python package that is divided in three main subpackages. 6 | This way of structuring the code derives from the three main types of 7 | objects that are constructed. First, there are trajectories, which 8 | result in objects of the ``TimeSeries`` class; second, there are dynamical 9 | models, which come in the form of instances of the ``MSM`` class; finally, 10 | dynamical models can be postprocessed into simple, few-state models, which 11 | we generate as ``FEWSM`` class objects. 12 | 13 | Trajectory module 14 | ----------------- 15 | This module contains everything necessary to get your time series data 16 | into MasterMSM. The main class object within this module is the TimeSeries 17 | object. 18 | 19 | .. currentmodule:: mastermsm 20 | 21 | .. autosummary:: 22 | :toctree: 23 | 24 | trajectory 25 | 26 | 27 | MSM module 28 | ---------- 29 | .. currentmodule:: mastermsm 30 | 31 | .. autosummary:: 32 | :toctree: 33 | 34 | msm 35 | 36 | 37 | FEWSM module 38 | ----------- 39 | .. currentmodule:: mastermsm 40 | 41 | .. autosummary:: 42 | :toctree: 43 | 44 | fewsm 45 | 46 | Examples 47 | -------- 48 | We have put together a few simple Python notebooks to help you learn the basics 49 | of the MasterMSM package. They are based on data derived from either model systems 50 | or from molecular dynamics simulations of some simple (albeit realistic) biomolecules. 51 | You can find the notebooks in the following 52 | `link `_. 53 | -------------------------------------------------------------------------------- /docs/source/img/mastermsm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/docs/source/img/mastermsm.png -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. MasterMSM documentation master file, created by 2 | sphinx-quickstart on Mon Mar 25 23:47:22 2019. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | ========================================== 7 | Welcome to MasterMSM's documentation! 8 | ========================================== 9 | 10 | MasterMSM is a Python package for generating Markov state models (MSMs) 11 | from molecular dynamics trajectories. We use a formulation based on 12 | the chemical master equation. This package will allow you to: 13 | 14 | * Create Markov state / master equation models from biomolecular simulations. 15 | * Discretize trajectory data using dihedral angle based methods useful 16 | for small peptides. 17 | * Calculate rate matrices using a variety of methods. 18 | * Obtain committors and reactive fluxes. 19 | * Carry out sensitivity analysis of networks. 20 | 21 | We have written a `paper `_ 22 | on MasterMSM that briefly describes some of the code capabilities. 23 | The MasterMSM code is hosted in `Github `_. 24 | Active development of the MasterMSM code takes place using the git version 25 | control system. 26 | 27 | .. toctree:: 28 | :maxdepth: 3 29 | :caption: Contents: 30 | 31 | about 32 | installation 33 | documentation 34 | support 35 | 36 | 37 | 38 | Indices and tables 39 | ================== 40 | 41 | * :ref:`genindex` 42 | * :ref:`modindex` 43 | * :ref:`search` 44 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | You can install MasterMSM by simply downloading the package from the 4 | `GitHub repository `_ 5 | and using the standard installation instructions for packages built 6 | using `Distutils `_. 7 | 8 | .. code-block:: bash 9 | 10 | git clone http://github.com/daviddesancho/mastermsm destination/mastermsm 11 | cd destination/mastermsm 12 | python setup.py install --user 13 | 14 | Parallel processing in Python and MasterMSM 15 | ------------------------------------------- 16 | In MasterMSM we make ample use of the ``multiprocessing`` library, which 17 | for MacOS X can conflict with non-Python libraries. In the past we have 18 | found this to be a problem that can result in segmentation faults. 19 | Digging in the internet I found a workaround for this problem, by setting 20 | the following environment variable 21 | 22 | .. code-block:: bash 23 | 24 | export VECLIB_MAXIMUM_THREADS=1 25 | 26 | This should be set in the terminal before you start your Python session 27 | in case you meet this problem. 28 | 29 | -------------------------------------------------------------------------------- /docs/source/mastermsm.fewsm.rst: -------------------------------------------------------------------------------- 1 | mastermsm.fewsm package 2 | ============================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | mastermsm.fewsm.traj module 8 | -------------------------------- 9 | 10 | .. automodule:: mastermsm.fewsm.fewsm 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | mastermsm.fewsm.traj\_lib module 16 | ------------------------------------- 17 | 18 | .. automodule:: mastermsm.fewsm.fewsm_lib 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: mastermsm.fewsm 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/mastermsm.msm.rst: -------------------------------------------------------------------------------- 1 | mastermsm.msm package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | mastermsm.msm.msm module 8 | ------------------------ 9 | 10 | .. automodule:: mastermsm.msm.msm 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | mastermsm.msm.msm\_lib module 16 | ----------------------------- 17 | 18 | .. automodule:: mastermsm.msm.msm_lib 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: mastermsm.msm 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/mastermsm.rst: -------------------------------------------------------------------------------- 1 | mastermsm package 2 | ================= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | mastermsm.msm 10 | mastermsm.trajectory 11 | mastermsm.fewsm 12 | 13 | Module contents 14 | --------------- 15 | 16 | .. automodule:: mastermsm 17 | :members: 18 | :undoc-members: 19 | :show-inheritance: 20 | -------------------------------------------------------------------------------- /docs/source/mastermsm.trajectory.rst: -------------------------------------------------------------------------------- 1 | mastermsm.trajectory package 2 | ============================ 3 | 4 | Submodules 5 | ---------- 6 | 7 | mastermsm.trajectory.traj module 8 | -------------------------------- 9 | 10 | .. automodule:: mastermsm.trajectory.traj 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | mastermsm.trajectory.traj\_lib module 16 | ------------------------------------- 17 | 18 | .. automodule:: mastermsm.trajectory.traj_lib 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | 24 | Module contents 25 | --------------- 26 | 27 | .. automodule:: mastermsm.trajectory 28 | :members: 29 | :undoc-members: 30 | :show-inheritance: 31 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | mastermsm 2 | ========= 3 | 4 | .. toctree:: 5 | :maxdepth: 4 6 | 7 | mastermsm 8 | -------------------------------------------------------------------------------- /docs/source/support.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | Support 3 | ======= 4 | 5 | Development of MasterMSM is based on GitHub. You can get help by opening an 6 | issue on Github_. 7 | 8 | .. _Github: https://github.com/daviddesancho/MasterMSM 9 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | MasterMSM examples 2 | ================== 3 | Here are a set of examples where you can learn some of the fundamentals of 4 | the MasterMSM package. They correspond to either model systems (dynamics 5 | on one or two dimensional potentials) or molecular dynamics simulations 6 | on simple biomolecules. 7 | 8 | Contents 9 | -------- 10 | * brownian_dynamics_1D: example corresponding to a one-dimensional two-state model. 11 | * brownian_dynamics_2D: analogous case but now in two dimensions. 12 | * alanine_pentapeptide: example with true MD simulation data for the simplest peptide model, generated with the Gromacs package. 13 | * alanine_pentapeptide: example with true MD simulation data, generated with the Gromacs package. 14 | -------------------------------------------------------------------------------- /examples/alanine_dipeptide/ala_dipeptide.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## MSM of the alanine dipeptide\n", 8 | "Here we run through most of the things that can be done with this package using a simple two-state model. There are more sophisticated examples that enable for further possibilities.\n", 9 | "\n", 10 | "The first thing one must do is download the data from [OSF](https://osf.io/a2vc7) and then import a number of libraries we will need as we run this example." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "%load_ext autoreload\n", 20 | "%autoreload 2\n", 21 | "%matplotlib inline\n", 22 | "import math\n", 23 | "import numpy as np" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import matplotlib.pyplot as plt\n", 33 | "import seaborn as sns\n", 34 | "sns.set(style=\"ticks\", color_codes=True, font_scale=1.25)\n", 35 | "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "### Discretizing the trajectory\n", 43 | "We start loading the simulation data using the `trajectory` module. For this we use the external library [`MDtraj`](http://mdtraj.org), which contains all sorts of methods for parsing and calculating interestign properties of our time-series data." 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "import mdtraj as md\n", 53 | "from mastermsm.trajectory import traj" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "tr = traj.TimeSeries(top='data/alaTB.gro', traj=['data/alatb_n1_ppn24.xtc'])\n", 63 | "print (tr.mdt)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "So does what we have calculated look somewhat like a Ramachandran map?" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "phi = md.compute_phi(tr.mdt)\n", 80 | "psi = md.compute_psi(tr.mdt)\n", 81 | "res = [x for x in tr.mdt.topology.residues]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "fig,ax = plt.subplots(figsize=(3.5,3.5))\n", 91 | "ax.plot(180./math.pi*phi[1],180./math.pi*psi[1],'o', markersize=1)\n", 92 | "ax.set_xlim(-180,180)\n", 93 | "ax.set_ylim(-180,180)\n", 94 | "ax.xaxis.set_ticks(range(-180,181,90))\n", 95 | "ax.yaxis.set_ticks(range(-180,181,90))\n", 96 | "\n", 97 | "ax.set_xlabel(r'$\\phi$', fontsize=18)\n", 98 | "ax.set_ylabel(r'$\\psi$', fontsize=18)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Next we proceed to discretize the trajectory based on the Ramachandran angles." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "tr.discretize(states=['A', 'E', 'L'])" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "For plotting we convert helical configurations in 1 and beta in 0." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True)\n", 131 | "\n", 132 | "ax1.plot(tr.mdt.time, psi[1]*180/math.pi,'o', ms=0.1)\n", 133 | "ax1.set_ylabel(r'$\\psi$', fontsize=14)\n", 134 | "ax1.set_ylim(-180,180)\n", 135 | "ax1.yaxis.set_ticks(range(-180,181,90))\n", 136 | "\n", 137 | "ax2.plot(tr.mdt.time, phi[1]*180/math.pi,'o', ms=0.1)\n", 138 | "ax2.set_ylabel(r'$\\phi$', fontsize=14)\n", 139 | "ax1.set_ylim(-180,180)\n", 140 | "ax1.yaxis.set_ticks(range(-180,181,90))\n", 141 | "\n", 142 | "ax3.set_ylabel('State')\n", 143 | "ax3.set_ylim(-0.2,2.2)\n", 144 | "ax3.yaxis.set_ticks(range(3))\n", 145 | "labels = [item.get_text() for item in ax2.get_xticklabels()]\n", 146 | "labels = ['A', 'E', 'L']\n", 147 | "y = [labels.index(x) if x in labels else 0 for x in tr.distraj ]\n", 148 | "ax3.plot(tr.mdt.time, y, lw=1)\n", 149 | "ax3.set_yticklabels(labels)\n", 150 | "ax3.set_xlabel('Time [ps]')\n", 151 | "\n", 152 | "ax1.set_xlim(0, 2.0e5)\n", 153 | "plt.tight_layout(h_pad=0)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": { 159 | "collapsed": true 160 | }, 161 | "source": [ 162 | "In the plot we see how we go from the time series of continuous torsion angles converts into a time series of discrete states. We can obtain a list of states in the following way." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "tr.find_keys()\n", 172 | "tr.keys" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "collapsed": true 179 | }, 180 | "source": [ 181 | "### Building the master equation model\n", 182 | "After having loaded our trajectory using the functionalities from the `trajectory` module we start building the master equation model. For this, we make use of the `msm` module. There are two steps corresponding to the two main classes within that module. First we create an instance of the `SuperMSM`, which can be used to direct the whole process of constructing and validating the MSM." 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "from mastermsm.msm import msm\n", 192 | "msm_alaTB = msm.SuperMSM([tr])" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "Then, using the `do_msm` method, we produce instances of the `MSM` class at a desired lag time, $\\Delta t$. Each of these contains an MSM built at a specific lag time. These are stored as a dictionary in the `msms` attribute of the `SuperMSM` class. " 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "lagt = 1\n", 209 | "msm_alaTB.do_msm(lagt)\n", 210 | "msm_alaTB.msms[lagt].do_trans()\n", 211 | "msm_alaTB.msms[lagt].boots()" 212 | ] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": {}, 217 | "source": [ 218 | "The resulting model has a number of things we may be interested in, like its eigenvalue spectrum (in this case limited to two relaxation times, corresponding to the exchange of helix, coil and $\\alpha_L$ states) or the equilibrium probabilities of the microstates." 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": {}, 225 | "outputs": [], 226 | "source": [ 227 | "fig, ax = plt.subplots(1, 2, figsize=(6,3))\n", 228 | "\n", 229 | "ax[0].errorbar([1, 2], msm_alaTB.msms[lagt].tau_ave, msm_alaTB.msms[lagt].tau_std ,fmt='o-', markersize=5)\n", 230 | "ax[1].errorbar([1,2,3], msm_alaTB.msms[lagt].peq_ave, msm_alaTB.msms[lagt].peq_std ,fmt='o-', markersize=5)\n", 231 | "\n", 232 | "ax[0].set_xlim(0.5, 2.5)\n", 233 | "ax[0].set_ylim(10,2e3)\n", 234 | "ax[0].set_yscale('log')\n", 235 | "ax[0].set_ylabel(r'$\\tau$ [ps]', fontsize=18)\n", 236 | "ax[0].set_xlabel(r'$\\lambda_1$', fontsize=18)\n", 237 | "\n", 238 | "ax[1].set_ylabel(r'$P_{eq}$', fontsize=18)\n", 239 | "ax[1].set_xlabel(r'state', fontsize=18)\n", 240 | "ax[1].set_yscale('log')\n", 241 | "ax[1].set_ylim(1e-2, 1)\n", 242 | "ax[1].set_xticks([1, 2, 3])\n", 243 | "ax[1].set_xticklabels(labels[:3])\n", 244 | "ax[1].set_xlim(0.5,3.5)\n", 245 | "\n", 246 | "plt.tight_layout(w_pad=1)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "### Validation\n", 254 | "However, from simply calculating these quantities we do not know how informative they really are. In order to understand whether the values we calculate are really reflective of the properties of the underlying system we resort to validation of the MSM. The two-level structure that we have described, consisting of the `SuperMSM` and `MSM` classes, allows for the user to test some global convergence properties first (at the level of the `SuperMSM`). " 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "#### Convergence tests\n", 262 | "For validating the model we first see at which point the relaxation times are sufficiently well converged." 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "msm_alaTB.convergence_test(time=[1, 2, 5, 7, 10, 20, 50, 100], error=True)" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": {}, 278 | "outputs": [], 279 | "source": [ 280 | "fig, ax = plt.subplots()\n", 281 | "\n", 282 | "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[0],msm_alaTB.msms[x].tau_std[0]] \\\n", 283 | " for x in sorted(msm_alaTB.msms.keys())])\n", 284 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=5)\n", 285 | "\n", 286 | "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[1],msm_alaTB.msms[x].tau_std[1]] \\\n", 287 | " for x in sorted(msm_alaTB.msms.keys())])\n", 288 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=5)\n", 289 | "\n", 290 | "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray', alpha=0.5)\n", 291 | "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n", 292 | "ax.set_ylabel(r'$\\tau_i$ [ps]', fontsize=16)\n", 293 | "ax.set_xlim(0.8,200)\n", 294 | "ax.set_ylim(10,2000)\n", 295 | "_ = ax.set_xscale('log')\n", 296 | "ax.set_yscale('log')\n", 297 | "plt.tight_layout()" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "Here we see that from the very beginning the relaxation times are independent of the lag time ($\\Delta$t) used in the construction of the model. This convergence is a good indicator of the Markovianity of the model and is a result of the use of transition based assignment. The shaded area corresponds to the range of lag times where the information we obtain is largely unreliable, because the lag time itself is longer than the relaxation time." 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "#### Chapman-Kolmogorov test\n", 312 | "Another important step in the validation is to carry out is the so-called Chapman-Kolmogorov test. In this case, the predictions from the MSM are validated against the simulation data used for its construction. " 313 | ] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "execution_count": null, 318 | "metadata": {}, 319 | "outputs": [], 320 | "source": [ 321 | "pMSM_E, pMD_E, epMD_E = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['E'])\n", 322 | "pMSM_A, pMD_A, epMD_A = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['A'])\n", 323 | "pMSM_L, pMD_L, epMD_L = msm_alaTB.ck_test(time=[1, 2, 5, 7, 10, 20, 50, 100], init=['L'])" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": null, 329 | "metadata": {}, 330 | "outputs": [], 331 | "source": [ 332 | "fig, ax = plt.subplots(1,3, figsize=(8,3.25), sharex=True, sharey=True)\n", 333 | "ax[0].errorbar(pMD_E[:,0], pMD_E[:,1], epMD_E, fmt='o')\n", 334 | "for p in pMSM_E:\n", 335 | " ax[0].plot(p[0], p[1], label=\"$\\Delta t$=%g\"%p[0][0])\n", 336 | "ax[0].legend(fontsize=10, ncol=2)\n", 337 | "\n", 338 | "ax[1].errorbar(pMD_A[:,0], pMD_A[:,1], epMD_A, fmt='o')\n", 339 | "for p in pMSM_A:\n", 340 | " ax[1].plot(p[0], p[1])\n", 341 | "\n", 342 | "ax[2].errorbar(pMD_L[:,0], pMD_L[:,1], epMD_L, fmt='o')\n", 343 | "for p in pMSM_L:\n", 344 | " ax[2].plot(p[0], p[1])\n", 345 | "\n", 346 | "#ax[0].set_xscale('log')\n", 347 | "ax[0].set_ylabel('P(t)')\n", 348 | "ax[0].set_xlabel('Time (ps)')\n", 349 | "ax[1].set_xlabel('Time (ps)')\n", 350 | "ax[2].set_xlabel('Time (ps)')\n", 351 | "plt.tight_layout(w_pad=0)" 352 | ] 353 | }, 354 | { 355 | "cell_type": "markdown", 356 | "metadata": {}, 357 | "source": [ 358 | "These plots show the decay of the population from a given initial condition. In this case, the left and right plots corresponds to starting in the `E`, `A` and `L` basins respectively. In both cases we compare the calculation from the simulation data (as circles) and the propagation from MSMs calculated at different lag times (lines). The agreement between the simulation data and the model predictions confirm the result from the convergence analysis." 359 | ] 360 | }, 361 | { 362 | "cell_type": "markdown", 363 | "metadata": {}, 364 | "source": [ 365 | "#### Autocorrelation functions\n", 366 | "The MSM can also be validated against the autocorrelation function (ACF) of the eigenmodes. If the simulation data is projected in the eigenmodes, then the ACF for mode $n$ should decay with a timescale equal to $-1/\\lambda_n$." 367 | ] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "execution_count": null, 372 | "metadata": {}, 373 | "outputs": [], 374 | "source": [ 375 | "msm_alaTB.msms[2].do_trans(evecs=True)\n", 376 | "acf = msm_alaTB.msms[2].acf_mode()" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "len(tr.mdt.time[1:])" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": {}, 392 | "outputs": [], 393 | "source": [ 394 | "fig, ax = plt.subplots()\n", 395 | "ax.plot(tr.mdt.time[1:], acf[1], 's', label='$i$=1', color='tab:blue', alpha=0.1)\n", 396 | "ax.plot(tr.mdt.time[1:],np.exp(-tr.mdt.time[1:]*1./msm_alaTB.msms[2].tauT[0]), color='tab:blue')\n", 397 | "\n", 398 | "ax.plot(tr.mdt.time[1:], acf[2], 'o', label='$i$=2', color='tab:orange', alpha=0.2)\n", 399 | "ax.plot(tr.mdt.time[1:],np.exp(-tr.mdt.time[1:]*1./msm_alaTB.msms[2].tauT[1]), color='tab:orange')\n", 400 | "\n", 401 | "ax.set_xlim(2,3000)\n", 402 | "ax.set_ylim(0,1)\n", 403 | "\n", 404 | "ax.set_xlabel('Time [ps]')\n", 405 | "ax.set_ylabel('C$_{ii}$(t)')\n", 406 | "ax.set_xscale('log')\n", 407 | "plt.legend()\n", 408 | "plt.tight_layout()" 409 | ] 410 | }, 411 | { 412 | "cell_type": "markdown", 413 | "metadata": {}, 414 | "source": [ 415 | "This result is particularly interesting. While the fast mode ($\\lambda_2$) is very well determined because there are many transitions, for the slowest mode the agreement is notably worse." 416 | ] 417 | }, 418 | { 419 | "cell_type": "markdown", 420 | "metadata": {}, 421 | "source": [ 422 | "### Calculation of the rate matrix\n", 423 | "From the transition matrix we can calculate the rate matrix. One possibility is to use an approximate method based simply on a Taylor expansion ([De Sancho, Mittal and Best, *JCTC*, 2013](http://dx.doi.org/10.1021/ct301033r)). We can check whether our approximate method gives a good result. We use short times since we have checked that short times are sufficient in this case for obtaining converged relaxation times." 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": null, 429 | "metadata": {}, 430 | "outputs": [], 431 | "source": [ 432 | "fig, ax = plt.subplots(1,2, figsize=(7.5,3.5))\n", 433 | "for i in [1, 2, 5, 7, 10, 20]:\n", 434 | " msm_alaTB.msms[i].do_rate()\n", 435 | " ax[0].errorbar(msm_alaTB.msms[i].tauT, msm_alaTB.msms[i].tauK, fmt='o', xerr=msm_alaTB.msms[i].tau_std, markersize=10, label=str(i))\n", 436 | " ax[1].errorbar(msm_alaTB.msms[i].peqT, msm_alaTB.msms[i].peqK, fmt='o', xerr=msm_alaTB.msms[i].peq_std, markersize=10, label=str(i))\n", 437 | "\n", 438 | "ax[0].plot([0,1000],[0,1000],'--', color='lightgray')\n", 439 | "ax[0].set_xlabel(r'$\\tau_T$ [ps]', fontsize=20)\n", 440 | "ax[0].set_ylabel(r'$\\tau_K$ [ps]', fontsize=20)\n", 441 | "ax[0].set_xscale('log')\n", 442 | "ax[0].set_yscale('log')\n", 443 | "\n", 444 | "ax[1].plot([0,1],[0,1],'--', color='lightgray')\n", 445 | "ax[1].set_xlabel(r'$p_T$', fontsize=20)\n", 446 | "ax[1].set_ylabel(r'$p_K$', fontsize=20)\n", 447 | "ax[1].set_xscale('log')\n", 448 | "ax[1].set_yscale('log')\n", 449 | "\n", 450 | "\n", 451 | "ax[0].legend(fontsize=9, bbox_to_anchor=(1.0, 0.65))\n", 452 | "plt.tight_layout(pad=0.4, w_pad=3)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "markdown", 457 | "metadata": {}, 458 | "source": [ 459 | "The method produces acceptable solutions for short lag times (up to 5-10 ps) although the result rapidly diverges from the transition matrix relaxation time at long lag times. Equilibrium probabilities are recovered correctly at all lag times from the rate matrices." 460 | ] 461 | } 462 | ], 463 | "metadata": { 464 | "kernelspec": { 465 | "display_name": "Python 3", 466 | "language": "python", 467 | "name": "python3" 468 | }, 469 | "language_info": { 470 | "codemirror_mode": { 471 | "name": "ipython", 472 | "version": 3 473 | }, 474 | "file_extension": ".py", 475 | "mimetype": "text/x-python", 476 | "name": "python", 477 | "nbconvert_exporter": "python", 478 | "pygments_lexer": "ipython3", 479 | "version": "3.7.4" 480 | } 481 | }, 482 | "nbformat": 4, 483 | "nbformat_minor": 1 484 | } 485 | -------------------------------------------------------------------------------- /examples/alanine_dipeptide/ala_dipeptide_discretize.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Discretizations\n", 8 | "Here we show how different discretizations work within MasterMSM. An important note is that not all discretizations will be sensible for all systems, but as usual the alanine dipeptide is a good testbed.\n", 9 | "\n", 10 | "We start downloading the data from the following [link](https://osf.io/a2vc7) and importing a number of libraries for plotting and analysis that will be useful for our work." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "%load_ext autoreload\n", 20 | "%matplotlib inline\n", 21 | "import math\n", 22 | "import numpy as np\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "import seaborn as sns\n", 25 | "sns.set(style=\"ticks\", color_codes=True, font_scale=1.5)\n", 26 | "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "Next we import the ```traj``` module and read the molecular simulation trajectory in the ```xtc``` compressed format from Gromacs." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "from mastermsm.trajectory import traj\n", 43 | "tr = traj.TimeSeries(top='data/alaTB.gro', traj=['data/alatb_n1_ppn24.xtc'])\n", 44 | "print (tr.mdt)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "### Core Ramachandran angle regions\n", 52 | "Following previous work we can use core regions in the Ramachandran map to define our states. We use utilities from the [MDtraj](http://mdtraj.org) package to compute the Phi and Psi dihedrals." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "import mdtraj as md\n", 62 | "phi = md.compute_phi(tr.mdt)\n", 63 | "psi = md.compute_psi(tr.mdt)\n", 64 | "res = [x for x in tr.mdt.topology.residues]" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "Then we run the actual discretization, using only two states for the alpha and extended conformations." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "tr.discretize(states=['A', 'E', 'L'])\n", 81 | "tr.find_keys()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "fig, ax = plt.subplots(figsize=(10,3))\n", 91 | "ax.plot(tr.mdt.time, [tr.keys.index(x) if (x in tr.keys) else 0 for x in tr.distraj ], lw=1)\n", 92 | "ax.set_xlim(0, 1.5e5)\n", 93 | "ax.set_ylim(-0.5, 2.5)\n", 94 | "ax.set_yticks(range(3))\n", 95 | "ax.set_yticklabels(['A', 'E', 'L'])\n", 96 | "ax.set_xlabel('Time (ps)', fontsize=20)\n", 97 | "ax.set_ylabel('state', fontsize=20)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "Finally we derive the MSM using the tools from the ```msm``` module. In particular, we use the ```SuperMSM``` class that will help build MSMs at various lag times." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "from mastermsm.msm import msm\n", 114 | "msm_alaTB = msm.SuperMSM([tr])\n", 115 | "for i in [1, 2, 5, 10, 20, 50, 100]:\n", 116 | " msm_alaTB.do_msm(i)\n", 117 | " msm_alaTB.msms[i].do_trans()\n", 118 | " msm_alaTB.msms[i].boots()" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Next we gather results from all these MSMs and plot the relaxation time corresponding to the two slow transitions." 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": null, 131 | "metadata": {}, 132 | "outputs": [], 133 | "source": [ 134 | "fig, ax = plt.subplots()\n", 135 | "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[0],msm_alaTB.msms[x].tau_std[0]] \\\n", 136 | " for x in sorted(msm_alaTB.msms.keys())])\n", 137 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=10)\n", 138 | "tau_vs_lagt = np.array([[x,msm_alaTB.msms[x].tauT[1],msm_alaTB.msms[x].tau_std[1]] \\\n", 139 | " for x in sorted(msm_alaTB.msms.keys())])\n", 140 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', yerr=tau_vs_lagt[:,2], markersize=10)\n", 141 | "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray')\n", 142 | "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n", 143 | "ax.set_ylabel(r'$\\tau$ [ps]', fontsize=16)\n", 144 | "ax.set_xlim(0.8,150)\n", 145 | "ax.set_ylim(10,3000)\n", 146 | "ax.set_yscale('log')\n", 147 | "_ = ax.set_xscale('log')" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "### Fine grid on the Ramachandran map\n", 155 | "Alternatively we can make a grid on the Ramachandran map with many more states." 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "tr.discretize(method=\"ramagrid\", nbins=30)\n", 165 | "tr.find_keys()" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "fig, ax = plt.subplots(figsize=(10,3))\n", 175 | "ax.plot(tr.mdt.time, [x for x in tr.distraj], '.', ms=1)\n", 176 | "ax.set_xlim(0, 1.5e5)\n", 177 | "ax.set_ylim(-1, 900)\n", 178 | "ax.set_xlabel('Time (ps)', fontsize=20)\n", 179 | "ax.set_ylabel('state', fontsize=20)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "Then we repeat the same steps as before, but with this fine grained MSM." 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": { 193 | "scrolled": false 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "from mastermsm.msm import msm\n", 198 | "msm_alaTB_grid = msm.SuperMSM([tr])\n", 199 | "for i in [1, 2, 5, 10, 20, 50, 100]:\n", 200 | " msm_alaTB_grid.do_msm(i)\n", 201 | " msm_alaTB_grid.msms[i].do_trans()\n", 202 | " msm_alaTB_grid.msms[i].boots()" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "metadata": {}, 208 | "source": [ 209 | "First we take a look at the dependence of the slowest relaxation time with the lag time, $\\Delta t$ for the construction of the Markov model as a minimal quality control." 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "tau1_vs_lagt = np.array([[x, msm_alaTB_grid.msms[x].tauT[0], \\\n", 219 | " msm_alaTB_grid.msms[x].tau_std[0]] \\\n", 220 | " for x in sorted(msm_alaTB_grid.msms.keys())])\n", 221 | "tau2_vs_lagt = np.array([[x, msm_alaTB_grid.msms[x].tauT[1], \\\n", 222 | " msm_alaTB_grid.msms[x].tau_std[1]] \\\n", 223 | " for x in sorted(msm_alaTB_grid.msms.keys())])\n", 224 | "tau3_vs_lagt = np.array([[x,msm_alaTB_grid.msms[x].tauT[2], \\\n", 225 | " msm_alaTB_grid.msms[x].tau_std[2]] \\\n", 226 | " for x in sorted(msm_alaTB_grid.msms.keys())])\n", 227 | "tau4_vs_lagt = np.array([[x,msm_alaTB_grid.msms[x].tauT[3], \\\n", 228 | " msm_alaTB_grid.msms[x].tau_std[3]] \\\n", 229 | " for x in sorted(msm_alaTB_grid.msms.keys())])\n", 230 | "\n", 231 | "fig, ax = plt.subplots()\n", 232 | "ax.errorbar(tau1_vs_lagt[:,0],tau1_vs_lagt[:,1], tau1_vs_lagt[:,2], fmt='o-', markersize=10)\n", 233 | "ax.errorbar(tau2_vs_lagt[:,0],tau2_vs_lagt[:,1], tau2_vs_lagt[:,2], fmt='o-', markersize=10)\n", 234 | "ax.errorbar(tau3_vs_lagt[:,0],tau3_vs_lagt[:,1], tau3_vs_lagt[:,2], fmt='o-', markersize=10)\n", 235 | "ax.errorbar(tau4_vs_lagt[:,0],tau4_vs_lagt[:,1], tau4_vs_lagt[:,2], fmt='o-', markersize=10)\n", 236 | "ax.fill_between(10**np.arange(-0.2,3,0.2), 1e-1, 10**np.arange(-0.2,3,0.2), facecolor='lightgray', alpha=0.5)\n", 237 | "ax.set_xlabel(r'$\\Delta$t [ps]', fontsize=16)\n", 238 | "ax.set_ylabel(r'$\\tau_i$ [ps]', fontsize=16)\n", 239 | "ax.set_xlim(0.8,200)\n", 240 | "ax.set_ylim(1,3000)\n", 241 | "_ = ax.set_xscale('log')\n", 242 | "_ = ax.set_yscale('log')\n", 243 | "plt.tight_layout()" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "The slowest relaxation times from the fine-grained MSM agree with those of the core regions, although in this case there is an additional slow mode." 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": {}, 257 | "outputs": [], 258 | "source": [ 259 | "fig, ax = plt.subplots()\n", 260 | "ax.errorbar(range(1,16),msm_alaTB_grid.msms[10].tauT[0:15], fmt='o-', \\\n", 261 | " yerr= msm_alaTB_grid.msms[10].tau_std[0:15], ms=10)\n", 262 | "ax.set_xlabel('Eigenvalue index')\n", 263 | "ax.set_ylabel(r'$\\tau_i$ (ns)')\n", 264 | "ax.set_yscale('log')\n", 265 | "plt.tight_layout()" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "We can understand which dynamical processes the eigenvectors are associated to by looking at the corresponding eigenvectors. For this we recalculate the transition matrix but now recovering the eigenvectors. " 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": {}, 279 | "outputs": [], 280 | "source": [ 281 | "msm_alaTB_grid.msms[10].do_trans(evecs=True)" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "fig, ax = plt.subplots(1,4, figsize=(12,3), sharex=True, sharey=True)\n", 291 | "mat = np.zeros((30,30), float)\n", 292 | "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n", 293 | " msm_alaTB_grid.msms[10].rvecsT[:,0])]:\n", 294 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n", 295 | "\n", 296 | " mat[i[0]%30, int(i[0]/30)] = i[1]\n", 297 | "ax[0].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n", 298 | " cmap='Blues')\n", 299 | "ax[0].set_title(r\"$\\psi_1$\")\n", 300 | "\n", 301 | "mat = np.zeros((30,30), float)\n", 302 | "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n", 303 | " msm_alaTB_grid.msms[10].rvecsT[:,1])]:\n", 304 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n", 305 | " mat[i[0]%30, int(i[0]/30)] = -i[1]\n", 306 | "ax[1].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n", 307 | " cmap='RdBu')\n", 308 | "ax[1].set_title(r\"$\\psi_2$\")\n", 309 | "\n", 310 | "mat = np.zeros((30,30), float)\n", 311 | "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n", 312 | " msm_alaTB_grid.msms[10].rvecsT[:,2])]:\n", 313 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n", 314 | " mat[i[0]%30, int(i[0]/30)] = -i[1]\n", 315 | "ax[2].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n", 316 | " cmap='RdBu')\n", 317 | "ax[2].set_title(r\"$\\psi_3$\")\n", 318 | "\n", 319 | "mat = np.zeros((30,30), float)\n", 320 | "for i in [x for x in zip(msm_alaTB_grid.msms[10].keep_keys, \\\n", 321 | " msm_alaTB_grid.msms[10].rvecsT[:,3])]:\n", 322 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n", 323 | " mat[i[0]%30, int(i[0]/30)] = -i[1]\n", 324 | "ax[3].imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n", 325 | " cmap='RdBu')\n", 326 | "ax[3].set_title(r\"$\\psi_4$\")" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "Here we are plotting the values of the eigenvectors so that the state indexes match the positions in the Ramachandran map. On the left, we show the stationary eigenvector, $\\psi_1$, which is proportional to the equilibrium population. The other three plots correspond to the slowest dynamical modes. From $\\psi_2$, we find that the slowest transition is the interconversion between the $\\alpha_L$ and the $\\alpha_R/\\beta$ states. These, equilibrate more rapidly, as indicated by $\\psi_3$. Finally, on the right, we find the additional mode that corresponds to a yet faster transition between the $\\alpha_L$ basin and a fourth Ramachandran region." 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "### Clustering\n", 341 | "So it seems three states only may not be a very good clustering for this particular system. Maybe we need one more. In order to do the clustering systematically we use the ```fewsm``` module from ```MasterMSM```. From the eigenvectors we are immediately able to produce a sensible, albeit still imperfect, partitioning in four states." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [ 350 | "from mastermsm.fewsm import fewsm" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "fewsm4 = fewsm.FEWSM(msm_alaTB_grid.msms[2], N=4)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": {}, 366 | "outputs": [], 367 | "source": [ 368 | "import matplotlib.cm as cm\n", 369 | "fig, ax = plt.subplots(figsize=(5,5))\n", 370 | "mat = np.zeros((30,30), float)\n", 371 | "for i in msm_alaTB_grid.msms[2].keep_keys:\n", 372 | " j = msm_alaTB_grid.msms[2].keep_keys.index(i)\n", 373 | " if j in fewsm4.macros[0]:\n", 374 | " mat[i%30, int(i/30)] = 1\n", 375 | " elif j in fewsm4.macros[1]:\n", 376 | " mat[i%30, int(i/30)] = 2\n", 377 | " elif j in fewsm4.macros[2]:\n", 378 | " mat[i%30, int(i/30)] = 3\n", 379 | " else:\n", 380 | " mat[i%30, int(i/30)] = 4\n", 381 | " #print i, i[0]%20, int(i[0]/20), -i[1]\n", 382 | "my_cmap = cm.get_cmap('viridis')\n", 383 | "my_cmap.set_under('w')\n", 384 | "ax.imshow(mat.transpose(), interpolation=\"none\", origin='lower', \\\n", 385 | " cmap=my_cmap, vmin = 0.5)" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "Note how the partitioning based on eigenvectors captures the three important regions in the Ramachandran map." 393 | ] 394 | } 395 | ], 396 | "metadata": { 397 | "kernelspec": { 398 | "display_name": "Python 3", 399 | "language": "python", 400 | "name": "python3" 401 | }, 402 | "language_info": { 403 | "codemirror_mode": { 404 | "name": "ipython", 405 | "version": 3 406 | }, 407 | "file_extension": ".py", 408 | "mimetype": "text/x-python", 409 | "name": "python", 410 | "nbconvert_exporter": "python", 411 | "pygments_lexer": "ipython3", 412 | "version": "3.8.8" 413 | } 414 | }, 415 | "nbformat": 4, 416 | "nbformat_minor": 1 417 | } 418 | -------------------------------------------------------------------------------- /examples/bistable_potential/2D_smFS_MSM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\n", 8 | "## MSM of Brownian dynamics simulations of diffusion on a 2D surface\n", 9 | "Here we analyze simulations on another simple mode system, but one that goes beyond one dimension. Specifically, we use the model by [Berezhkovskii et al, *JCP* (2014)](http://dx.doi.org/10.1063/1.4902243). We run brownian dynamics simulations on this surface and build a simple Markov state model from it. The data can be downloaded from [OSF](https://osf.io/a2vc7/).\n", 10 | "\n", 11 | "As always we start by importing some relevant libraries." 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "%matplotlib inline\n", 21 | "%load_ext autoreload\n", 22 | "%autoreload 2\n", 23 | "import h5py\n", 24 | "import numpy as np" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "import matplotlib.pyplot as plt\n", 34 | "import matplotlib.cm as cm\n", 35 | "import seaborn as sns\n", 36 | "sns.set(style=\"ticks\", color_codes=True, font_scale=1.25)\n", 37 | "sns.set_style({\"xtick.direction\": \"in\", \"ytick.direction\": \"in\"})" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "#### Discretization" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "Here we upload the data obtained from Brownian Dynamics simulations of isotropic diffusion on a 2D potential." 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "h5file = \"../datafiles/brownian_dynamics/cossio_kl1.3_Dx1_Dq1.h5\"\n", 61 | "f = h5py.File(h5file, 'r')\n", 62 | "data = np.array(f['data'])\n", 63 | "f.close()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "fig, ax = plt.subplots(2,1,figsize=(10,3), sharex=True,sharey=False)\n", 73 | "ax[0].plot(data[:,0],data[:,1],'.', markersize=1)\n", 74 | "ax[1].plot(data[:,0],data[:,2],'g.', markersize=1)\n", 75 | "ax[0].set_ylim(-10,10)\n", 76 | "ax[1].set_xlim(0,25000)\n", 77 | "ax[0].set_ylabel('x')\n", 78 | "ax[1].set_ylabel('y')\n", 79 | "ax[1].set_xlabel('Time')\n", 80 | "plt.tight_layout(h_pad=0)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "Clearly the system interconverts between two states. Both coordinates, x and y, are highly correlated, although the free energy landscape, which we can estimate from a Boltzmann inversion, varies a bit depending on the projection we use." 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "fig, ax = plt.subplots(figsize=(6,4))\n", 97 | "hist, bin_edges = np.histogram(data[:,1], bins=np.linspace(-9,9,25), \\\n", 98 | " density=True)\n", 99 | "bin_centers = [0.5*(bin_edges[i]+bin_edges[i+1]) \\\n", 100 | " for i in range(len(bin_edges)-1)]\n", 101 | "ax.plot(bin_centers, -np.log(hist), lw=3, label=\"x\")\n", 102 | "hist, bin_edges = np.histogram(data[:,2], bins=np.linspace(-9,9,25), \\\n", 103 | " density=True)\n", 104 | "bin_centers = [0.5*(bin_edges[i]+bin_edges[i+1]) \\\n", 105 | " for i in range(len(bin_edges)-1)]\n", 106 | "ax.plot(bin_centers, -np.log(hist), lw=3, label=\"y\")\n", 107 | "ax.set_xlim(-7,7)\n", 108 | "ax.set_ylim(1,9)\n", 109 | "ax.set_xlabel('coordinate')\n", 110 | "ax.set_ylabel('PMF ($k_BT$)')\n", 111 | "ax.legend()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "We can also represent the energy landscape in two dimensions:" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "H, x_edges, y_edges = np.histogram2d(data[:,1],data[:,2], \\\n", 128 | " bins=[np.linspace(-9,9,25), np.linspace(-9,9,25)])\n", 129 | "\n", 130 | "fig, ax = plt.subplots(figsize=(5,4.5))\n", 131 | "pmf = -np.log(H.transpose())\n", 132 | "pmf -= np.min(pmf)\n", 133 | "cs = ax.contourf(pmf, extent=[x_edges.min(), x_edges.max(), \\\n", 134 | " y_edges.min(), y_edges.max()], \\\n", 135 | " levels=np.arange(0, 6.5,0.5), alpha=0.75)\n", 136 | "cbar = plt.colorbar(cs)\n", 137 | "ax.set_xlim(-7,7)\n", 138 | "ax.set_ylim(-7,7)\n", 139 | "ax.set_yticks(range(-5,6,5))\n", 140 | "ax.set_xlabel('$x$', fontsize=18)\n", 141 | "ax.set_ylabel('$y$', fontsize=18)\n", 142 | "plt.tight_layout()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "To construct the MSM, we assigning frames to microstates. We first need to import the function that makes the grid." 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "from scipy.stats import binned_statistic_2d" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": null, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "statistic, x_edge, y_edge, binnumber = \\\n", 168 | " binned_statistic_2d(data[:,1],data[:,2],None,'count', \\\n", 169 | " bins=[np.linspace(-9,9,25), np.linspace(-9,9,25)])" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "fig, ax = plt.subplots(figsize=(6,5))\n", 179 | "\n", 180 | "grid = ax.imshow(-np.log(statistic.transpose()),origin=\"lower\",cmap=plt.cm.rainbow)\n", 181 | "\n", 182 | "cbar = plt.colorbar(grid)\n", 183 | "ax.set_yticks(range(0,20,5))\n", 184 | "ax.set_xticks(range(0,20,5))\n", 185 | "ax.set_xlabel('$x_{bin}$', fontsize=20)\n", 186 | "ax.set_ylabel('$y_{bin}$', fontsize=20)\n", 187 | "plt.tight_layout()" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "In this way, the continuous coordinates x and y are mapped onto a discrete microstate space." 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "fig,ax=plt.subplots(3,1,figsize=(10,6),sharex=True)\n", 204 | "plt.subplots_adjust(wspace=0, hspace=0)\n", 205 | "ax[0].plot(range(0,len(data[:,1])),data[:,1])\n", 206 | "ax[1].plot(range(0,len(data[:,2])),data[:,2],color=\"g\")\n", 207 | "ax[2].plot(binnumber)\n", 208 | "ax[0].set_ylabel('x')\n", 209 | "ax[1].set_ylabel('y')\n", 210 | "ax[2].set_ylabel(\"s\")\n", 211 | "ax[2].set_xlabel(\"time (ps)\")\n", 212 | "ax[2].set_xlim(0, 1500)" 213 | ] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": {}, 219 | "outputs": [], 220 | "source": [ 221 | "from mastermsm.trajectory import traj" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "We then pass the discrete trajectory to the ``traj`` module to generate an instance of the ``TimeSeries`` class. Using some of its methods, we are able to generate and sort the names of the microstates in the trajectory, which will be useful later." 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "metadata": {}, 235 | "outputs": [], 236 | "source": [ 237 | "distraj = traj.TimeSeries(distraj=list(binnumber), dt=1)\n", 238 | "distraj.find_keys()\n", 239 | "distraj.keys.sort()" 240 | ] 241 | }, 242 | { 243 | "cell_type": "markdown", 244 | "metadata": {}, 245 | "source": [ 246 | "### Master Equation Model \n", 247 | "After generating the discrete trajectory, we can build the master equation model, for which we use the ``msm`` module." 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": {}, 254 | "outputs": [], 255 | "source": [ 256 | "from mastermsm.msm import msm" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "First of all, we will create an instance of the SuperMSM class, which will be useful to produce and validate dynamical models. We pass two arguments: the \"discrete trajectory\" that we have generated above and a value for the boolean sym. This only tells the program that it can symmetrize the data, as we are assuming our trajectory is long enough as to consider it equilibrium sampling." 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "msm_2D = msm.SuperMSM([distraj], sym=True)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "We then check the dependence of the slowest relaxation times of the system, $\\tau$ with respect to the choice of lag time $\\Delta t$. These can be accessed as the `tauT` corresponding to the `MSM` instance. We find that they are very well converged even from the shortest value of $\\Delta t$." 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "for i in [1, 2, 5, 10, 20, 50, 100]:\n", 289 | " msm_2D.do_msm(i)\n", 290 | " msm_2D.msms[i].do_trans(evecs=True)\n", 291 | " msm_2D.msms[i].boots()" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "tau_vs_lagt = np.array([[x,msm_2D.msms[x].tauT[0], \\\n", 301 | " msm_2D.msms[x].tau_std[0]] \\\n", 302 | " for x in sorted(msm_2D.msms.keys())])" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "fig, ax = plt.subplots()\n", 312 | "ax.errorbar(tau_vs_lagt[:,0],tau_vs_lagt[:,1],fmt='o-', \\\n", 313 | " yerr=tau_vs_lagt[:,2], markersize=10)\n", 314 | "ax.fill_between(tau_vs_lagt[:,0],tau_vs_lagt[:,1]+tau_vs_lagt[:,2], \\\n", 315 | " tau_vs_lagt[:,1]-tau_vs_lagt[:,2], alpha=0.1)\n", 316 | "ax.set_xlabel(r'$\\Delta$t', fontsize=16)\n", 317 | "ax.set_ylabel(r'$\\tau$', fontsize=16)\n", 318 | "ax.set_xlim(0.8,120)\n", 319 | "ax.set_ylim(50,1000)\n", 320 | "ax.set_yscale('log')\n", 321 | "ax.set_xscale('log')\n", 322 | "plt.tight_layout()" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "Clearly, there is no dependence of the relaxation times $\\tau$ on the lag time $\\Delta$t.\n" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": {}, 335 | "source": [ 336 | "#### Estimation" 337 | ] 338 | }, 339 | { 340 | "cell_type": "code", 341 | "execution_count": null, 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "lt=2\n", 346 | "plt.figure()\n", 347 | "plt.imshow(msm_2D.msms[lt].trans, interpolation='none', \\\n", 348 | " origin=\"lower\")\n", 349 | "plt.ylabel('$\\it{i}$')\n", 350 | "plt.xlabel('$\\it{j}$')\n", 351 | "plt.colorbar()\n", 352 | "plt.figure()\n", 353 | "plt.imshow(np.log(msm_2D.msms[lt].trans), interpolation='none', \\\n", 354 | " origin=\"lower\")\n", 355 | "plt.ylabel('$\\it{i}$')\n", 356 | "plt.xlabel('$\\it{j}$')\n", 357 | "plt.colorbar()" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": {}, 364 | "outputs": [], 365 | "source": [ 366 | "fig, ax = plt.subplots()\n", 367 | "ax.errorbar(range(1,12),msm_2D.msms[lt].tauT[0:11], fmt='o-', \\\n", 368 | " yerr= msm_2D.msms[lt].tau_std[0:11], ms=10)\n", 369 | "ax.set_xlabel('Eigenvalue')\n", 370 | "ax.set_ylabel(r'$\\tau_i$ [ns]') " 371 | ] 372 | }, 373 | { 374 | "cell_type": "markdown", 375 | "metadata": {}, 376 | "source": [ 377 | "The first mode captured by $\\lambda_1$ is significantly slower than the others. That mode, which is described by the right eigenvector $\\psi^R_1$ as the transition of the protein between the folded and unfolded states." 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": null, 383 | "metadata": {}, 384 | "outputs": [], 385 | "source": [ 386 | "fig, ax = plt.subplots(figsize=(10,4))\n", 387 | "ax.plot(msm_2D.msms[2].rvecsT[:,1])\n", 388 | "ax.fill_between(range(len(msm_2D.msms[lt].rvecsT[:,1])), 0, \\\n", 389 | " msm_2D.msms[lt].rvecsT[:,1], \\\n", 390 | " where=msm_2D.msms[lt].rvecsT[:,1]>0,\\\n", 391 | " facecolor='c', interpolate=True,alpha=.4)\n", 392 | "ax.fill_between(range(len(msm_2D.msms[lt].rvecsT[:,1])), 0, \\\n", 393 | " msm_2D.msms[lt].rvecsT[:,1], \\\n", 394 | " where=msm_2D.msms[lt].rvecsT[:,1]<0,\\\n", 395 | " facecolor='g', interpolate=True,alpha=.4)\n", 396 | "ax.set_ylabel(\"$\\Psi^R_1$\")\n", 397 | "plt.show()" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "The projection of $\\psi^R_1$ on the 2D grid shows the transitions between the two conformational states (red and blue)." 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "fig,ax = plt.subplots(1,2,figsize=(10,5),sharey=True,sharex=True)\n", 414 | "rv_mat = np.zeros((25,25), float)\n", 415 | "for i in [x for x in zip(msm_2D.msms[lt].keep_keys, \\\n", 416 | " msm_2D.msms[lt].rvecsT[:,1])]:\n", 417 | " unr_ind=np.unravel_index(i[0],(26,26)) \n", 418 | " rv_mat[unr_ind[0]-1,unr_ind[1]-1] = -i[1]\n", 419 | "ax[0].imshow(rv_mat.transpose(), interpolation=\"none\", \\\n", 420 | " cmap='bwr',origin=\"lower\")\n", 421 | "ax[1].imshow(-np.log(statistic.transpose()), \\\n", 422 | " cmap=plt.cm.rainbow,origin=\"lower\")\n", 423 | "ax[1].set_yticks(range(0,26,5))\n", 424 | "ax[1].set_xticks(range(0,26,5))\n", 425 | "plt.tight_layout()" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": null, 431 | "metadata": {}, 432 | "outputs": [], 433 | "source": [] 434 | } 435 | ], 436 | "metadata": { 437 | "kernelspec": { 438 | "display_name": "Python 3", 439 | "language": "python", 440 | "name": "python3" 441 | }, 442 | "language_info": { 443 | "codemirror_mode": { 444 | "name": "ipython", 445 | "version": 3 446 | }, 447 | "file_extension": ".py", 448 | "mimetype": "text/x-python", 449 | "name": "python", 450 | "nbconvert_exporter": "python", 451 | "pygments_lexer": "ipython3", 452 | "version": "3.8.8" 453 | } 454 | }, 455 | "nbformat": 4, 456 | "nbformat_minor": 2 457 | } 458 | -------------------------------------------------------------------------------- /examples/mueller_potential/mueller.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | 3 | #Copyright 2020 Robert T. McGibbon 4 | 5 | #Permission is hereby granted, free of charge, to any person i 6 | # obtaining a copy of this software and associated documentation 7 | # files (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, 9 | # merge, publish, distribute, sublicense, and/or sell copies of the 10 | # Software, and to permit persons to whom the Software is furnished 11 | # to do so, subject to the following conditions: 12 | 13 | # The above copyright notice and this permission notice shall be 14 | # included in all copies or substantial portions of the Software. 15 | 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 18 | # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 21 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 23 | # OTHER DEALINGS IN THE SOFTWARE. 24 | 25 | from simtk.unit import kelvin, picosecond, femtosecond, nanometer, dalton 26 | import simtk.openmm as mm 27 | import matplotlib.pyplot as plt 28 | import numpy as np 29 | 30 | class MullerForce(mm.CustomExternalForce): 31 | """ 32 | OpenMM custom force for propagation on the Muller Potential. Also 33 | includes pure python evaluation of the potential energy surface so that 34 | you can do some plotting. 35 | 36 | 37 | """ 38 | aa = [-1, -1, -6.5, 0.7] 39 | bb = [0, 0, 11, 0.6] 40 | cc = [-10, -10, -6.5, 0.7] 41 | AA = [-200, -100, -170, 15] 42 | XX = [1, 0, -0.5, -1] 43 | YY = [0, 0.5, 1.5, 1] 44 | 45 | def __init__(self): 46 | # start with a harmonic restraint on the Z coordinate 47 | expression = '1000.0 * z^2' 48 | for j in range(4): 49 | # add the muller terms for the X and Y 50 | fmt = dict(aa=self.aa[j], bb=self.bb[j], cc=self.cc[j], AA=self.AA[j], XX=self.XX[j], YY=self.YY[j]) 51 | expression += '''+ {AA}*exp({aa}*(x - {XX})^2 + {bb}*(x - {XX}) 52 | *(y - {YY}) + {cc}*(y - {YY})^2)'''.format(**fmt) 53 | super(MullerForce, self).__init__(expression) 54 | 55 | @classmethod 56 | def potential(cls, x, y): 57 | "Compute the potential at a given point x,y" 58 | value = 0 59 | for j in range(4): 60 | value += cls.AA[j]*np.exp(cls.aa[j]*(x - cls.XX[j])**2 + \ 61 | cls.bb[j]*(x - cls.XX[j])*(y - cls.YY[j]) \ 62 | + cls.cc[j]*(y - cls.YY[j])**2) 63 | return value 64 | 65 | @classmethod 66 | def plot(cls, ax=None, minx=-1.5, maxx=1.2, miny=-0.2, maxy=2, **kwargs): 67 | "Plot the Muller potential" 68 | grid_width = max(maxx-minx, maxy-miny) / 200.0 69 | ax = kwargs.pop('ax', None) 70 | xx, yy = np.mgrid[minx : maxx : grid_width, miny : maxy : grid_width] 71 | V = cls.potential(xx, yy) 72 | # clip off any values greater than 200, since they mess up 73 | # the color scheme 74 | if ax is None: 75 | ax = plt 76 | ax.contourf(xx, yy, V.clip(max=200), 40, alpha=0.4, **kwargs) 77 | 78 | if __name__ == "__main__": 79 | ############################################################################## 80 | # Global parameters 81 | ############################################################################## 82 | 83 | # each particle is totally independent, propagating under the same potential 84 | mass = 1.0*dalton 85 | temperature = 750*kelvin 86 | friction = 100/picosecond 87 | timestep = 10.0*femtosecond 88 | 89 | # Choose starting conformations uniform on the grid between (-1.5, -0.2) and (1.2, 2) 90 | startingPositions = (np.random.rand(1, 3)*np.array([2.7, 1.8, 1])) \ 91 | + np.array([-1.5, -0.2, 0]) 92 | 93 | system = mm.System() 94 | mullerforce = MullerForce() 95 | system.addParticle(mass) 96 | mullerforce.addParticle(0, []) 97 | system.addForce(mullerforce) 98 | 99 | integrator = mm.LangevinIntegrator(temperature, friction, timestep) 100 | context = mm.Context(system, integrator) 101 | context.setPositions(startingPositions) 102 | context.setVelocitiesToTemperature(temperature) 103 | 104 | traj = [] 105 | for i in range(int(1e6)): 106 | traj.append( 107 | context.getState(getPositions=True).getPositions(asNumpy=True).value_in_unit(nanometer)[0]) 108 | integrator.step(200) 109 | traj = np.vstack(traj) 110 | 111 | fig, ax = plt.subplots(figsize=(4,4)) 112 | MullerForce.plot(ax=ax) 113 | ax.plot(traj[:,0], traj[:,1], c='k', lw=0.1) 114 | -------------------------------------------------------------------------------- /mastermsm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/__init__.py -------------------------------------------------------------------------------- /mastermsm/fewsm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/fewsm/__init__.py -------------------------------------------------------------------------------- /mastermsm/fewsm/fewsm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is part of the MasterMSM package. 3 | 4 | """ 5 | 6 | import copy 7 | #import random 8 | from ..msm import msm 9 | from ..trajectory import traj 10 | #import msm_lib 11 | from ..fewsm import fewsm_lib 12 | 13 | class FEWSM(msm.MSM): 14 | """ 15 | A class for doing clustering of MSMs into few-state models 16 | 17 | Attributes 18 | ---------- 19 | keys : dict 20 | A dictionary containing the clusters formed. 21 | parent : class 22 | Instance of the MSM class that we aim to cluster. 23 | 24 | """ 25 | def __init__(self, parent, N=2, method="robust"): 26 | """ 27 | 28 | Parameters 29 | ---------- 30 | parent : class 31 | Instance of the MSM class that we aim to cluster. 32 | N : int 33 | The desired number of clusters. 34 | 35 | """ 36 | self.parent = parent 37 | self.N = N 38 | self.macros = self.eigen_group(N=self.N, method=method) 39 | 40 | def eigen_group(self, N=2, method="robust"): 41 | """ Splits microstates into macrostates 42 | 43 | Parameters 44 | ---------- 45 | N : int 46 | Number of clusters. 47 | method : str 48 | The method used for clustering. 49 | 50 | Returns 51 | ------- 52 | macros : dict 53 | A dictionary with the membership to macrostates. 54 | 55 | """ 56 | 57 | # generate eigenvectors in case the MSM does not have them 58 | if not hasattr(self.parent, 'lvecsT'): 59 | self.parent.tauT, self.parent.peqT, self.parent.rvecsT, self.parent.lvecsT = \ 60 | self.parent.calc_eigsT(evecs=True) 61 | lvecs = self.parent.lvecsT 62 | 63 | # split in desired number of macrostates 64 | macros = {} 65 | keep_states = self.parent.keep_states 66 | macros[0] = list(range(len(keep_states))) 67 | for n in range(1, N): 68 | if method is "robust": 69 | macro_new, _ = fewsm_lib.split_sigma(macros, lvecs[:,n]) 70 | elif method is "sign": 71 | macro_new, _ = fewsm_lib.split_sign(macros, lvecs[:,n]) 72 | macros = copy.deepcopy(macro_new) 73 | print ("\n Initial membership of microstates to macrostates:") 74 | if len(self.parent.keep_keys) < 100: 75 | for k,v in macros.items(): 76 | print (k, [self.parent.keep_keys[x] for x in v]) 77 | else: 78 | for k,v in macros.items(): 79 | print (k,":", len(v)) 80 | return macros 81 | 82 | def map_trajectory(self): 83 | """ Maps trajectory onto the PCCA clusters 84 | 85 | Returns 86 | ------- 87 | mappedtraj : str 88 | The mapped trajectory. 89 | 90 | """ 91 | print ("\n Mapping trajectory onto macrostates...") 92 | mappedtraj = [] 93 | keep_keys = self.parent.keep_keys 94 | mt_states = [] 95 | for data in self.parent.data: 96 | for s in data.distraj: 97 | try: 98 | mt_states.append([k for k, v in self.macros.items() \ 99 | if keep_keys.index(s) in v][0]) 100 | except ValueError: 101 | print (" not in keep_keys") 102 | mt = traj.TimeSeries(distraj=mt_states, dt=data.dt) 103 | mappedtraj.append(mt) 104 | self.mappedtraj = mappedtraj 105 | #super().__init__(mappedtraj, keys=range(self.N), lagt=self.parent.lagt) 106 | 107 | def metastability(self): 108 | """ Calculate metastability according to the definition 109 | in Chodera et al, J Chem Phys, (2007) 110 | 111 | Returns 112 | ------- 113 | float 114 | Metastability 115 | 116 | """ 117 | return fewsm_lib.metastability(self.trans) 118 | 119 | # def optim(self, nsteps=1, nwrite=None, fout="mc.dat"): 120 | # """ MC optimization using the metastability Q as energy. 121 | # 122 | # Parameters 123 | # ---------- 124 | # nsteps : int 125 | # Number of steps per round of MC and per microstate. 126 | # nwrite : int 127 | # Frequency of writing MC output. 128 | # fout : string 129 | # File for output of MC progress. 130 | # 131 | # Returns 132 | # ------- 133 | # macro_opt : dict 134 | # Dictionary with the membership to macrostates. 135 | # 136 | # """ 137 | # print "\n Optimizing the lumped MSM\n" 138 | # out = open(fout, "w") 139 | # out.write("# iter q \n") 140 | # 141 | # nmac = self.N 142 | # nmic = len(self.parent.keep_keys) 143 | # mcsteps = len(self.count)*nsteps*nmic # mc steps per block 144 | # mcsteps_max = nmic*20000 # maximum number of mc steps 145 | # print self.count 146 | # print self.trans 147 | # q = self.metastability() 148 | # print " initial:", q 149 | # q_opt = q 150 | # 151 | # macro = copy.deepcopy(self.macros) 152 | # cont = True 153 | # nmc = 0 # number of mc blocks 154 | # reject = 0 155 | # while cont: 156 | # imc = 0 157 | # out.write ("%6i %12.10f %10.6e\n"%(imc + nmc*mcsteps,q,1)) 158 | # while imc < mcsteps: 159 | # # try ramdom insertion of a microstate in a macrostate 160 | # imac = 0 161 | # jmac = 0 162 | # while imc < mcsteps: 163 | # imc +=1 164 | # while True: 165 | # # choose microstate to move around 166 | # imic = random.choice(range(nmic)) 167 | # imac = int([x for x in range(nmac) if imic in macro[x]][0]) 168 | # if len(macro[imac]) > 1: 169 | # # choose destination macrostate 170 | # jmac = random.choice([x for x in range(nmac) if x is not imac]) 171 | # break 172 | # # move microstate from i to j 173 | # macro_new = copy.deepcopy(macro) 174 | # macro_new[imac].remove(imic) 175 | # macro_new[jmac].append(imic) 176 | # # calculate transition count matrix for new mapping 177 | # count_mac_new = fewsm_lib.map_micro2macro(self.parent.count, macro_new, self.parent.keep_states) 178 | # Tmacro_new = msm_lib.calc_trans(nmac, range(nmac), count_mac_new) 179 | # # calculate metastability 180 | # q_new = fewsm_lib.metastability(Tmacro_new) 181 | # delta = fewsm_lib.beta(imc,mcsteps)*(q - q_new) # calculate increment (Q is a -Energy) 182 | # if fewsm_lib.metropolis(delta): 183 | # #print "ACCEPT" 184 | # macro = copy.deepcopy(macro_new) 185 | # count_mac = count_mac_new 186 | # q = q_new 187 | # if q > q_opt: 188 | # q_opt = q 189 | # macro_opt = copy.deepcopy(macro) 190 | # Tmacro_opt = Tmacro_new 191 | # self.macro = copy.deepcopy(macro_opt) 192 | # else: 193 | # reject+=1 194 | # #print " REJECT" 195 | # 196 | # out.write ("%6i %12.10e %10.6e\n"%(imc + nmc*mcsteps,q,1./fewsm_lib.beta(imc,mcsteps))) 197 | # imc +=1 198 | # cont = False 199 | # print " final :", q 200 | # print " best :", q_opt 201 | # print " acceptance:",1.-float(reject)/mcsteps 202 | # 203 | # self.map_trajectory() 204 | # self.do_count() 205 | # self.do_trans() 206 | # 207 | # def write_mapping(self): 208 | # """ 209 | # Prints files with the mapping between states and clusters 210 | # 211 | # """ 212 | # for mtraj in self.mappedtraj: 213 | # try: 214 | # idf = mtraj.filename.rfind(".dat") 215 | # filename = mtraj.filename[:idf] + "_mapped_pcca%g.dat"%self.N 216 | # except ValueError: 217 | # filename = mtraj.filename + "_mapped_pcca%g.dat"%self.N 218 | # print " ...writing mapped trajectory at %s"%filename 219 | # fout = open(filename, "w") 220 | # micro_data = [x for x in self.parent.data if x.filename == mtraj.filename][0] 221 | # for x in zip(micro_data.time, micro_data.states, self.data[0].states): 222 | # fout.write("%10.3f %s %8i\n"%(x[0], x[1], x[2])) 223 | # fout.close() 224 | -------------------------------------------------------------------------------- /mastermsm/fewsm/fewsm_lib.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is part of the MasterMSM package. 3 | 4 | """ 5 | import copy, itertools 6 | import numpy as np 7 | 8 | def map_micro2macro(cmic, mac, states): 9 | """ maps microstates into macrostates """ 10 | m = len(mac) 11 | cmac = np.zeros((m, m), int) 12 | for i in range(m): 13 | for j in range(m): 14 | if i == j: 15 | cmac[j,i] = reduce(lambda x, y: x + y, \ 16 | [cmic[states[x],states[y]] for (x,y) in \ 17 | itertools.product(mac[j],mac[i])]) 18 | else: 19 | cmac[j,i] = reduce(lambda x, y: x + y, \ 20 | [cmic[states[x],states[y]] for (x,y) in \ 21 | itertools.product(mac[j],mac[i])]) 22 | return cmac 23 | 24 | def test_sign(v): 25 | """check whether positive and negative signs are present in vector""" 26 | test = False 27 | if any(v > 0.) and any(v<0): 28 | test = True 29 | return test 30 | 31 | def split_sign(macro, lvec): 32 | """ split based on sign structure """ 33 | # calculate spread in eigenvector 34 | nt = len(macro) 35 | spread = [] 36 | vals = lvec 37 | for _, v in macro.items(): 38 | # check that there are positive and negative values in evec 39 | if test_sign(vals[v]): 40 | #spread.append(np.sum(vals**2)) 41 | spread.append(np.mean(vals[v]**2)) 42 | else: 43 | spread.append(0.) 44 | isplit = np.argsort(-np.array(spread))[0] 45 | # print " macrostate to split: %i"%isplit,np.array(spread) 46 | # split 47 | lvec_split = lvec[macro[isplit]] 48 | # print lvec_split 49 | elems = [] 50 | for i in filter(lambda x: lvec_split[x] < 0.,\ 51 | range(len(macro[isplit]))): 52 | elems.append(macro[isplit][i]) 53 | macro_new = copy.deepcopy(macro) 54 | macro_new[nt] = elems 55 | # update old macrostate 56 | for i in elems: 57 | macro_new[isplit].remove(i) 58 | return macro_new, vals 59 | 60 | def split_sigma(macro, lvec): 61 | """ split based on distribution """ 62 | nt = len(macro) 63 | 64 | spread = [] 65 | for i in macro.keys(): 66 | spread.append(np.std(lvec[macro[i]])) 67 | # split macrostates with maximum spread 68 | isplit = np.argsort(-np.array(spread))[0] 69 | #print " macrostate to split: %i"%isplit,spread[isplit] 70 | # split based on distribution 71 | elems = [] 72 | keep = [] 73 | val_max = np.max(lvec[macro[isplit]]) 74 | val_min = np.min(lvec[macro[isplit]]) 75 | vals = (lvec[macro[isplit]] - val_min)/(val_max - val_min) 76 | for i in filter(lambda x: vals[x] < 0.5,range(len(macro[isplit]))): 77 | elems.append(macro[isplit][i]) 78 | for i in filter(lambda x: vals[x] >= 0.5,range(len(macro[isplit]))): 79 | keep.append(macro[isplit][i]) 80 | macro_new = copy.deepcopy(macro) 81 | macro_new[nt] = elems 82 | #print macro_new 83 | # update old macrostate 84 | for i in elems: 85 | macro_new[isplit].remove(i) 86 | macro = copy.deepcopy(macro_new) 87 | return macro, vals 88 | 89 | def metastability(T): 90 | return np.sum(np.diag(T)) 91 | 92 | def beta(imc,mcsteps): 93 | # inverse temperature for MCSA 94 | x = imc - 1 95 | a = 4./mcsteps 96 | temp = (1 + (np.exp(-a*x)-1.)/(1.- np.exp(-a*mcsteps))) # MCSA temperature 97 | try: 98 | beta = 1./temp 99 | except ZeroDivisionError: 100 | beta = 1e20 101 | return beta 102 | 103 | def metropolis(delta): 104 | if delta < 0: 105 | return True 106 | else: 107 | accept = False 108 | p = min(1.0,np.exp(-delta)) 109 | rand = np.random.random() 110 | if (rand < p): 111 | accept = True 112 | return accept 113 | -------------------------------------------------------------------------------- /mastermsm/msm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/msm/__init__.py -------------------------------------------------------------------------------- /mastermsm/msm/msm_lib.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is part of the MasterMSM package. 3 | 4 | """ 5 | import copy 6 | import numpy as np 7 | import networkx as nx 8 | import os #, math 9 | import tempfile 10 | from functools import reduce, cmp_to_key 11 | #import operator 12 | from scipy import linalg as spla 13 | #import multiprocessing as mp 14 | import pickle 15 | 16 | # thermal energy (kJ/mol) 17 | beta = 1./(8.314e-3*300) 18 | 19 | #def difference(k1, k2): 20 | # l = len(k1) 21 | # diff = 0 22 | # for i in range(l): 23 | # if k1[i] != k2[i]: 24 | # diff+=1 25 | # return diff 26 | 27 | def calc_eigsK(rate, evecs=False): 28 | """ 29 | Calculate eigenvalues and eigenvectors of rate matrix K 30 | 31 | Parameters 32 | ----------- 33 | rate : array 34 | The rate matrix to use. 35 | evecs : bool 36 | Whether we want the eigenvectors of the rate matrix. 37 | 38 | Returns: 39 | ------- 40 | tauK : numpy array 41 | Relaxation times from K. 42 | peqK : numpy array 43 | Equilibrium probabilities from K. 44 | rvecsK : numpy array, optional 45 | Right eigenvectors of K, sorted. 46 | lvecsK : numpy array, optional 47 | Left eigenvectors of K, sorted. 48 | 49 | """ 50 | evalsK, lvecsK, rvecsK = \ 51 | spla.eig(rate, left=True) 52 | 53 | # sort modes 54 | nkeys = len(rate) 55 | elistK = [] 56 | for i in range(nkeys): 57 | elistK.append([i,np.real(evalsK[i])]) 58 | elistK.sort(key=cmp_to_key(esort)) 59 | 60 | # calculate relaxation times from K and T 61 | tauK = [] 62 | for i in range(nkeys): 63 | if np.abs(elistK[i][1]) > 1e-10: 64 | iiK, lamK = elistK[i] 65 | tauK.append(-1./lamK) 66 | if len(tauK) == 1: 67 | ieqK = iiK 68 | 69 | # equilibrium probabilities 70 | ieqK, _ = elistK[0] 71 | peqK_sum = reduce(lambda x, y: x + y, map(lambda x: rvecsK[x,ieqK], 72 | range(nkeys))) 73 | peqK = rvecsK[:,ieqK]/peqK_sum 74 | 75 | if not evecs: 76 | return tauK, peqK 77 | else: 78 | # sort eigenvectors 79 | rvecsK_sorted = np.zeros((nkeys, nkeys), float) 80 | lvecsK_sorted = np.zeros((nkeys, nkeys), float) 81 | for i in range(nkeys): 82 | iiK, lamK = elistK[i] 83 | rvecsK_sorted[:,i] = rvecsK[:,iiK] 84 | lvecsK_sorted[:,i] = lvecsK[:,iiK] 85 | return tauK, peqK, rvecsK_sorted, lvecsK_sorted 86 | 87 | def esort(ei, ej): 88 | """ Sorts eigenvalues. 89 | 90 | Parameters 91 | ---------- 92 | ei : float 93 | Eigenvalue i 94 | ej : float 95 | Eigenvalue j 96 | 97 | Returns 98 | ------- 99 | bool : 100 | Whether the first value is larger than the second. 101 | 102 | """ 103 | _, eval_i = ei 104 | _, eval_j = ej 105 | 106 | if eval_j.real > eval_i.real: 107 | return 1 108 | elif eval_j.real < eval_i.real: 109 | return -1 110 | else: 111 | return 0 112 | 113 | #def find_keys(state_keys, trans, manually_remove): 114 | # """ eliminate dead ends """ 115 | # keep_states = [] 116 | # keep_keys = [] 117 | # # eliminate dead ends 118 | # nstate = len(state_keys) 119 | # for i in range(nstate): 120 | # key = state_keys[i] 121 | # summ = 0 122 | # sumx = 0 123 | # for j in range(nstate): 124 | # if j!=i: 125 | # summ += trans[j][i] # sources 126 | # sumx += trans[i][j] # sinks 127 | # if summ > 0 and sumx > 0 and trans[i][i] > 0 and key not in manually_remove: 128 | # keep_states.append(i) 129 | # keep_keys.append(state_keys[i]) 130 | # return keep_states,keep_keys 131 | # 132 | #def connect_groups(keep_states, trans): 133 | # """ check for connected groups """ 134 | # connected_groups = [] 135 | # leftover = copy.deepcopy(keep_states) 136 | # while len(leftover) > 0: 137 | # #print leftover 138 | # leftover_new = [] 139 | # n_old_new_net = 0 140 | # new_net = [ leftover[0] ] 141 | # n_new_net = len(new_net) 142 | # while n_new_net != n_old_new_net: 143 | # for i in range(len(leftover)): 144 | # l = leftover[i] 145 | # if l in new_net: 146 | # continue 147 | # summ = 0 148 | # for g in new_net: 149 | # summ += trans[l][g]+trans[g][l] 150 | # if summ > 0: 151 | # new_net.append(l) 152 | # n_old_new_net = n_new_net 153 | # n_new_net = len(new_net) 154 | # #print " added %i new members" % (n_new_net-n_old_new_net) 155 | # leftover_new = filter(lambda x: x not in new_net, leftover) 156 | # connected_groups.append(new_net) 157 | # leftover = copy.deepcopy(leftover_new) 158 | # return connected_groups 159 | # 160 | #def isnative(native_string, string): 161 | # s = "" 162 | # for i in range(len(string)): 163 | # if string[i]==native_string[i]: 164 | # s+="1" 165 | # else: 166 | # s+="0" 167 | # return s 168 | 169 | def mat_mul_v(m, v): 170 | """ Multiplies matrix and vector 171 | 172 | Parameters 173 | ---------- 174 | m : np.array 175 | The matrix. 176 | v : np.array 177 | The vector. 178 | 179 | Returns 180 | ------- 181 | w : np.array 182 | The result 183 | 184 | """ 185 | rows = len(m) 186 | w = [0]*rows 187 | irange = range(len(v)) 188 | summ = 0 189 | for j in range(rows): 190 | r = m[j] 191 | for i in irange: 192 | summ += r[i]*v[i] 193 | w[j], summ = summ,0 194 | return w 195 | 196 | #def dotproduct(v1, v2, sum=sum, imap=itertools.imap, mul=operator.mul): 197 | # return sum(imap(mul,v1,v2)) 198 | # 199 | ##def rate_analyze(rate): 200 | ## # calculates eigenvalues and eigenvectors from rate matrix 201 | ## # calculate symmetrized matrix 202 | ## kjisym = kji*(kji.transpose()) 203 | ## kjisym = sqrt(kjisym) 204 | ## for j in arange(nstates): 205 | ## kjisym[j,j] = -kjisym[j,j] 206 | ## # calculate eigenvalues and eigenvectors 207 | ## eigvalsym,eigvectsym = linalg.eig(kjisym) 208 | ## # index the solutions 209 | ## index = argsort(-eigvalsym) 210 | ## ieq = index[0] 211 | ## # equilibrium population 212 | ## peq = eigvectsym[:,ieq]**2 213 | ## # order eigenvalues and calculate left and right eigenvectors 214 | ## eigval = zeros((nstates),float) 215 | ## PsiR = zeros((nstates,nstates),float) 216 | ## PsiL = zeros((nstates,nstates),float) 217 | ## for i in arange(nstates): 218 | ## eigval[i] = eigvalsym[index[i]] 219 | ## PsiR[:,i] = eigvectsym[:,index[i]]*eigvectsym[:,ieq] 220 | ## PsiL[:,i] = eigvectsym[:,index[i]]/eigvectsym[:,ieq] 221 | ## return eigval,PsiR,PsiL,eigvectsym,peq 222 | # 223 | #def propagate(rate, t, pini): 224 | # # propagate dynamics using rate matrix exponential 225 | # expkt = spla.expm2(rate*t) 226 | # return mat_mul_v(expkt,pini) 227 | # 228 | #def propagate_eig(elist, rvecs, lvecs, t, pini): 229 | # # propagate dynamics using rate matrix exponential using eigenvalues and eigenvectors 230 | # nstates = len(pini) 231 | # p = np.zeros((nstates),float) 232 | # for n in range(nstates): 233 | # #print np.exp(-elist[n][1]*t) 234 | # i,e = elist[n] 235 | # p = p + rvecs[:,i]*(np.dot(lvecs[:,i],pini)*\ 236 | # np.exp(-abs(e*t))) 237 | # return p 238 | # 239 | #def bootsfiles(traj_list_dt): 240 | # n = len(traj_list_dt) 241 | # traj_list_dt_new = [] 242 | # i = 0 243 | # while i < n: 244 | # k = int(np.random.random()*n) 245 | # traj_list_dt_new.append(traj_list_dt[k]) 246 | # i += 1 247 | # return traj_list_dt_new 248 | # 249 | #def boots_pick(filename, blocksize): 250 | # raw = open(filename).readlines() 251 | # lraw = len(raw) 252 | # nblocks = int(lraw/blocksize) 253 | # lblock = int(lraw/nblocks) 254 | # try: 255 | # ib = np.random.randint(nblocks-1) 256 | # except ValueError: 257 | # ib = 0 258 | # return raw[ib*lblock:(ib+1)*lblock] 259 | # 260 | #def onrate(states, target, K, peq): 261 | # # steady state rate 262 | # kon = 0. 263 | # for i in states: 264 | # if i != target: 265 | # if K[target,i] > 0: 266 | # kon += K[target,i]*peq[i] 267 | # return kon 268 | # 269 | def run_commit(states, K, peq, FF, UU): 270 | """ Calculate committors and reactive flux 271 | 272 | Parameters 273 | ---------- 274 | states : list 275 | States in the MSM. 276 | K : np.array 277 | Rate matrix. 278 | peq : np.array 279 | Equilibrium distribution. 280 | FF : list 281 | Definitely folded states. 282 | UU : list 283 | Definitely unfolded states. 284 | 285 | Returns 286 | ------- 287 | J : np.array 288 | Reactive flux matrix. 289 | pfold : np.array 290 | Values of the committor. 291 | sum_flux : float 292 | Sum of reactive fluxes. 293 | kf : float 294 | Folding rate from flux over population relationship. 295 | 296 | """ 297 | nstates = len(states) 298 | # define end-states 299 | UUFF = UU + FF 300 | print (" definitely FF and UU states", UUFF) 301 | I = list(filter(lambda x: x not in UU+FF, states)) 302 | NI = len(I) 303 | 304 | # calculate committors 305 | b = np.zeros([NI], float) 306 | A = np.zeros([NI,NI], float) 307 | for j_ind in range(NI): 308 | j = I[j_ind] 309 | summ = 0. 310 | for i in FF: 311 | summ += K[i][j] 312 | b[j_ind] = -summ 313 | for i_ind in range(NI): 314 | i = I[i_ind] 315 | A[j_ind][i_ind] = K[i][j] 316 | # solve Ax=b 317 | Ainv = np.linalg.inv(A) 318 | x = np.dot(Ainv,b) 319 | #XX = np.dot(Ainv,A) 320 | 321 | pfold = np.zeros(nstates,float) 322 | for i in range(nstates): 323 | if i in UU: 324 | pfold[i] = 0.0 325 | elif i in FF: 326 | pfold[i] = 1.0 327 | else: 328 | ii = I.index(i) 329 | pfold[i] = x[ii] 330 | 331 | # stationary distribution 332 | pss = np.zeros(nstates,float) 333 | for i in range(nstates): 334 | pss[i] = (1-pfold[i])*peq[i] 335 | 336 | # flux matrix and reactive flux 337 | J = np.zeros([nstates,nstates],float) 338 | for i in range(nstates): 339 | for j in range(nstates): 340 | J[j][i] = K[j][i]*peq[i]*(pfold[j]-pfold[i]) 341 | 342 | # dividing line is committor = 0.5 343 | sum_flux = 0 344 | left = [x for x in range(nstates) if pfold[x] < 0.5] 345 | right = [x for x in range(nstates) if pfold[x] > 0.5] 346 | for i in left: 347 | for j in right: 348 | sum_flux += J[j][i] 349 | 350 | #sum of populations for all reactant states 351 | pU = np.sum([peq[x] for x in range(nstates) if pfold[x] < 0.5]) 352 | # pU = np.sum(peq[filter(lambda x: x in UU, range(nstates))]) 353 | kf = sum_flux/pU 354 | return J, pfold, sum_flux, kf 355 | 356 | def calc_count_worker(x): 357 | """ mp worker that calculates the count matrix from a trajectory 358 | 359 | Parameters 360 | ---------- 361 | x : list 362 | List containing input for each mp worker. Includes: 363 | distraj :the time series of states 364 | dt : the timestep for that trajectory 365 | keys : the keys used in the assignment 366 | lagt : the lag time for construction 367 | 368 | Returns 369 | ------- 370 | count : array 371 | 372 | """ 373 | # parse input from multiprocessing 374 | distraj = x[0] 375 | dt = x[1] 376 | keys = x[2] 377 | nkeys = len(keys) 378 | lagt = x[3] 379 | sliding = x[4] 380 | 381 | ltraj = len(distraj) 382 | lag = int(lagt/dt) # number of frames per lag time 383 | if sliding: 384 | slider = 1 # every state is initial state 385 | else: 386 | slider = lag 387 | 388 | count = np.zeros([nkeys,nkeys], np.int32) 389 | for i in range(0, ltraj-lag, slider): 390 | j = i + lag 391 | state_i = distraj[i] 392 | state_j = distraj[j] 393 | if state_i in keys: 394 | idx_i = keys.index(state_i) 395 | if state_j in keys: 396 | idx_j = keys.index(state_j) 397 | try: 398 | count[idx_j][idx_i] += 1 399 | except UnboundLocalError: 400 | pass 401 | return count 402 | 403 | def calc_lifetime(x): 404 | """ mp worker that calculates the count matrix from a trajectory 405 | 406 | Parameters 407 | ---------- 408 | x : list 409 | List containing input for each mp worker. Includes: 410 | distraj :the time series of states 411 | dt : the timestep for that trajectory 412 | keys : the keys used in the assignment 413 | 414 | Returns 415 | ------- 416 | life : dict 417 | 418 | """ 419 | # parse input from multiprocessing 420 | distraj = x[0] 421 | dt = x[1] 422 | keys = x[2] 423 | ltraj = len(distraj) 424 | 425 | life = {} 426 | l = 0 427 | for j in range(1, ltraj): 428 | i = j - 1 429 | state_i = distraj[i] 430 | state_j = distraj[j] 431 | if state_i == state_j: 432 | l += 1 433 | elif state_j not in keys: 434 | l += 1 435 | else: 436 | try: 437 | life[state_i].append(l*dt) 438 | except KeyError: 439 | life[state_i] = [l*dt] 440 | l = 1 441 | #try: 442 | # life[state_i].append(l*dt) 443 | #except KeyError: 444 | # life[state_i] = [l*dt] 445 | return life 446 | 447 | def traj_split(data=None, lagt=None, fdboots=None): 448 | """ Splits trajectories into fragments for bootstrapping 449 | 450 | Parameters 451 | ---------- 452 | data : list 453 | Set of trajectories used for building the MSM. 454 | lagt : float 455 | Lag time for building the MSM. 456 | 457 | Returns: 458 | ------- 459 | filetmp : file object 460 | Open file object with trajectory fragments. 461 | 462 | """ 463 | trajs = [[x.distraj, x.dt] for x in data] 464 | ltraj = [len(x[0])*x[1] for x in trajs] 465 | ltraj_median = np.median(ltraj) 466 | timetot = np.sum(ltraj) # total simulation time 467 | while ltraj_median > timetot/20. and ltraj_median > 10.*lagt: 468 | trajs_new = [] 469 | #cut trajectories in chunks 470 | for x in trajs: 471 | lx = len(x[0]) 472 | trajs_new.append([x[0][:int(lx/2)], x[1]]) 473 | trajs_new.append([x[0][int(lx/2):], x[1]]) 474 | trajs = trajs_new 475 | ltraj = [len(x[0])*x[1] for x in trajs] 476 | ltraj_median = np.median(ltraj) 477 | # save trajs 478 | fd, filetmp = tempfile.mkstemp() 479 | file = os.fdopen(fd, 'wb') 480 | pickle.dump(trajs, file, protocol=pickle.HIGHEST_PROTOCOL) 481 | file.close() 482 | return filetmp 483 | 484 | def do_boots_worker(x): 485 | """ Worker function for parallel bootstrapping. 486 | 487 | Parameters 488 | ---------- 489 | x : list 490 | A list containing the trajectory filename, the states, the lag time 491 | and the total number of transitions. 492 | 493 | """ 494 | 495 | #print "# Process %s running on input %s"%(mp.current_process(), x[0]) 496 | filetmp, keys, lagt, ncount, slider = x 497 | nkeys = len(keys) 498 | finp = open(filetmp, 'rb') 499 | trans = pickle.load(finp) 500 | finp.close() 501 | ltrans = len(trans) 502 | np.random.seed() 503 | ncount_boots = 0 504 | count = np.zeros([nkeys, nkeys], np.int32) 505 | while ncount_boots < ncount: 506 | itrans = np.random.randint(ltrans) 507 | count_inp = [trans[itrans][0], trans[itrans][1], keys, lagt, slider] 508 | c = calc_count_worker(count_inp) 509 | count += np.matrix(c) 510 | ncount_boots += np.sum(c) 511 | #print ncount_boots, "< %g"%ncount 512 | D = nx.DiGraph(count) 513 | #keep_states = sorted(nx.strongly_connected_components(D)[0]) 514 | keep_states = list(sorted(list(nx.strongly_connected_components(D)), 515 | key = len, reverse=True)[0]) 516 | keep_keys = list(map(lambda x: keys[x], keep_states)) 517 | nkeep = len(keep_keys) 518 | trans = np.zeros([nkeep, nkeep], float) 519 | for i in range(nkeep): 520 | ni = reduce(lambda x, y: x + y, map(lambda x: 521 | count[keep_states[x]][keep_states[i]], range(nkeep))) 522 | for j in range(nkeep): 523 | trans[j][i] = float(count[keep_states[j]][keep_states[i]])/float(ni) 524 | evalsT, rvecsT = spla.eig(trans, left=False) 525 | elistT = [] 526 | for i in range(nkeep): 527 | elistT.append([i,np.real(evalsT[i])]) 528 | elistT.sort(key=cmp_to_key(esort)) 529 | tauT = [] 530 | for i in range(1,nkeep): 531 | _, lamT = elistT[i] 532 | tauT.append(-lagt/np.log(lamT)) 533 | ieqT, _ = elistT[0] 534 | peqT_sum = reduce(lambda x,y: x + y, map(lambda x: rvecsT[x,ieqT], 535 | range(nkeep))) 536 | peqT = rvecsT[:,ieqT]/peqT_sum 537 | return tauT, peqT, trans, keep_keys 538 | 539 | def calc_trans(nkeep=None, keep_states=None, count=None): 540 | """ Calculates transition matrix. 541 | 542 | Uses the maximum likelihood expression by Prinz et al.[1]_ 543 | 544 | Parameters 545 | ---------- 546 | lagt : float 547 | Lag time for construction of MSM. 548 | 549 | Returns 550 | ------- 551 | trans : array 552 | The transition probability matrix. 553 | 554 | Notes 555 | ----- 556 | ..[1] J. H. Prinz, H. Wu, M. Sarich, B. Keller, M. Senne, M. Held, 557 | J. D. Chodera, C. Schutte and F. Noe, "Markov state models: 558 | Generation and validation", J. Chem. Phys. (2011). 559 | """ 560 | trans = np.zeros([nkeep, nkeep], float) 561 | for i in range(nkeep): 562 | ni = reduce(lambda x, y: x + y, map(lambda x: 563 | count[keep_states[x]][keep_states[i]], range(nkeep))) 564 | for j in range(nkeep): 565 | trans[j][i] = float(count[keep_states[j]][keep_states[i]])/float(ni) 566 | return trans 567 | 568 | def calc_rate(nkeep, trans, lagt): 569 | """ Calculate rate matrix from transition matrix. 570 | 571 | We use a method based on a Taylor expansion.[1]_ 572 | 573 | Parameters 574 | ---------- 575 | nkeep : int 576 | Number of states in transition matrix. 577 | trans: np.array 578 | Transition matrix. 579 | lagt : float 580 | The lag time. 581 | 582 | Returns 583 | ------- 584 | rate : np.array 585 | The rate matrix. 586 | 587 | Notes 588 | ----- 589 | ..[1] D. De Sancho, J. Mittal and R. B. Best, "Folding kinetics 590 | and unfolded state dynamics of the GB1 hairpin from molecular 591 | simulation", J. Chem. Theory Comput. (2013). 592 | 593 | """ 594 | rate = trans/lagt 595 | 596 | # enforce mass conservation 597 | for i in range(nkeep): 598 | rate[i][i] = -(np.sum(rate[:i,i]) + np.sum(rate[i+1:,i])) 599 | return rate 600 | 601 | def rand_rate(nkeep, count): 602 | """ Randomly generate initial matrix. 603 | 604 | Parameters 605 | ---------- 606 | nkeep : int 607 | Number of states in transition matrix. 608 | 609 | count : np.array 610 | Transition matrix. 611 | 612 | Returns 613 | ------- 614 | rand_rate : np.array 615 | The random rate matrix. 616 | 617 | """ 618 | nkeys = len(count) 619 | 620 | rand_rate = np.zeros((nkeys, nkeys), float) 621 | for i in range(nkeys): 622 | for j in range(nkeys): 623 | if i != j: 624 | if (count[i,j] !=0) and (count[j,i] != 0): 625 | rand_rate[j,i] = np.exp(np.random.randn()*-3) 626 | rand_rate[i,i] = -np.sum(rand_rate[:,i] ) 627 | return rand_rate 628 | 629 | def calc_mlrate(nkeep, count, lagt, rate_init): 630 | """ Calculate rate matrix using maximum likelihood Bayesian method. 631 | 632 | We use a the MLPB method described by Buchete and Hummer.[1]_ 633 | 634 | Parameters 635 | ---------- 636 | nkeep : int 637 | Number of states in transition matrix. 638 | count : np.array 639 | Transition matrix. 640 | lagt : float 641 | The lag time. 642 | 643 | Returns 644 | ------- 645 | rate : np.array 646 | The rate matrix. 647 | 648 | Notes 649 | ----- 650 | ..[1] N.-V. Buchete and G. Hummer, "Coarse master equations for 651 | peptide folding dynamics", J. Phys. Chem. B (2008). 652 | 653 | """ 654 | # initialize rate matrix and equilibrium distribution enforcing detailed balance 655 | p_prev = np.sum(count, axis=0)/np.float(np.sum(count)) 656 | rate_prev = detailed_balance(nkeep, rate_init, p_prev) 657 | ml_prev = likelihood(nkeep, rate_prev, count, lagt) 658 | 659 | # initialize MC sampling 660 | print ("MLPB optimization of rate matrix:\n START") 661 | #print rate_prev,"\n", p_prev, ml_prev 662 | ml_ref = ml_prev 663 | ml_cum = [ml_prev] 664 | temp_cum = [1.] 665 | nstep = 0 666 | nsteps = 1000*nkeep**2 667 | k = -3./nsteps 668 | nfreq = 10 669 | ncycle = 0 670 | accept = 0 671 | rate_best = rate_prev 672 | ml_best = ml_prev 673 | while True: 674 | # random choice of MC move 675 | rate, p = mc_move(nkeep, rate_prev, p_prev) 676 | rate = detailed_balance(nkeep, rate, p) 677 | 678 | # calculate likelihood 679 | ml = likelihood(nkeep, rate, count, lagt) 680 | 681 | # Boltzmann acceptance / rejection 682 | if ml < ml_prev: 683 | #print " ACCEPT\n" 684 | rate_prev = rate 685 | p_prev = p 686 | ml_prev = ml 687 | accept +=1 688 | if ml < ml_best: 689 | ml_best = ml 690 | rate_best = rate 691 | else: 692 | delta_ml = ml - ml_prev 693 | beta = (1 - np.exp(k*nsteps))/(np.exp(k*nstep) - np.exp(k*nsteps)) if ncycle > 0 else 1 694 | weight = np.exp(-beta*delta_ml) 695 | if np.random.random() < weight: 696 | #print " ACCEPT BOLTZMANN\n" 697 | rate_prev = rate 698 | p_prev = p 699 | ml_prev = ml 700 | accept +=1 701 | nstep +=1 702 | 703 | if nstep > nsteps: 704 | ncycle +=1 705 | ml_cum.append(ml_prev) 706 | temp_cum.append(1./beta) 707 | print ("\n END of cycle %g"%ncycle) 708 | print (" acceptance :%g"%(np.float(accept)/nsteps)) 709 | accept = 0 710 | print (rate_prev) 711 | print (" L old =", ml_ref,"; L new:", ml_prev) 712 | improvement = (ml_ref - ml_cum[-1])/ml_ref 713 | print (" improvement :%g"%improvement) 714 | if improvement > 0.001 or ncycle < 3: 715 | nstep = 0 716 | ml_ref = np.mean(ml_cum[-nsteps:]) 717 | else: 718 | break 719 | elif nstep % nfreq == 0: 720 | ml_cum.append(ml_prev) 721 | temp_cum.append(1./beta) 722 | 723 | return rate_best, ml_cum, temp_cum 724 | 725 | def mc_move(nkeep, rate, peq): 726 | """ Make MC move in either rate or equilibrium probability. 727 | 728 | Changes in equilibrium probabilities are introduced so that the new value 729 | is drawn from a normal distribution centered at the current value. 730 | 731 | Parameters 732 | ---------- 733 | nkeep : int 734 | The number of states. 735 | rate : array 736 | The rate matrix obeying detailed balance. 737 | peq : array 738 | The equilibrium probability 739 | 740 | """ 741 | nparam = nkeep*(nkeep - 1)/2 + nkeep - 1 742 | npeq = nkeep - 1 743 | 744 | while True: 745 | i = np.random.randint(0, nparam) 746 | #print i 747 | rate_new = copy.deepcopy(rate) 748 | peq_new = copy.deepcopy(peq) 749 | if i < npeq: 750 | #print " Peq" 751 | scale = np.mean(peq)*0.1 752 | # peq_new[i] = np.random.normal(loc=peq[i], scale=scale) 753 | peq_new[i] = peq[i] + (np.random.random() - 0.5)*scale 754 | peq_new = peq_new/np.sum(peq_new) 755 | if np.all(peq_new > 0): 756 | break 757 | else: 758 | #print " Rate" 759 | i = np.random.randint(0, nkeep - 1) 760 | try: 761 | j = np.random.randint(i + 1, nkeep - 1) 762 | except ValueError: 763 | j = nkeep - 1 764 | try: 765 | scale = np.mean(np.abs(rate>0.))*0.1 766 | #rate_new[j,i] = np.random.normal(loc=rate[j,i], scale=scale) 767 | rate_new[j,i] = rate[j,i] + (np.random.random() - 0.5)*scale 768 | if np.all((rate_new - np.diag(np.diag(rate_new))) >= 0): 769 | break 770 | except ValueError: 771 | pass 772 | #else: 773 | # print rate_new - np.diag(np.diag(rate_new)) 774 | 775 | return rate_new, peq_new 776 | 777 | 778 | def detailed_balance(nkeep, rate, peq): 779 | """ Enforce detailed balance in rate matrix. 780 | 781 | Parameters 782 | ---------- 783 | nkeep : int 784 | The number of states. 785 | rate : array 786 | The rate matrix obeying detailed balance. 787 | peq : array 788 | The equilibrium probability 789 | 790 | """ 791 | for i in range(nkeep): 792 | for j in range(i): 793 | rate[j,i] = rate[i,j]*peq[j]/peq[i] 794 | rate[i,i] = 0 795 | rate[i,i] = -np.sum(rate[:,i]) 796 | return rate 797 | 798 | def likelihood(nkeep, rate, count, lagt): 799 | """ Likelihood of a rate matrix given a count matrix 800 | 801 | We use the procedure described by Buchete and Hummer.[1]_ 802 | 803 | Parameters 804 | ---------- 805 | nkeep : int 806 | Number of states in transition matrix. 807 | count : np.array 808 | Transition matrix. 809 | lagt : float 810 | The lag time. 811 | 812 | Returns 813 | ------- 814 | mlog_like : float 815 | The log likelihood 816 | 817 | Notes 818 | ----- 819 | ..[1] N.-V. Buchete and G. Hummer, "Coarse master equations for 820 | peptide folding dynamics", J. Phys. Chem. B (2008). 821 | 822 | """ 823 | # calculate symmetrized rate matrix 824 | ratesym = np.multiply(rate,rate.transpose()) 825 | ratesym = np.sqrt(ratesym) 826 | for i in range(nkeep): 827 | ratesym[i,i] = -ratesym[i,i] 828 | 829 | # calculate eigenvalues and eigenvectors 830 | evalsym, evectsym = np.linalg.eig(ratesym) 831 | 832 | # index the solutions 833 | indx_eig = np.argsort(-evalsym) 834 | 835 | # equilibrium population 836 | ieq = indx_eig[0] 837 | 838 | # calculate left and right eigenvectors 839 | phiR = np.zeros((nkeep, nkeep)) 840 | phiL = np.zeros((nkeep, nkeep)) 841 | for i in range(nkeep): 842 | phiR[:,i] = evectsym[:,i]*evectsym[:,ieq] 843 | phiL[:,i] = evectsym[:,i]/evectsym[:,ieq] 844 | 845 | # calculate propagators 846 | prop = np.zeros((nkeep, nkeep), float) 847 | for i in range(nkeep): 848 | for j in range(nkeep): 849 | for n in range(nkeep): 850 | prop[j,i] = prop[j,i] + \ 851 | phiR[j,n]*phiL[i,n]*np.exp(-abs(evalsym[n])*lagt) 852 | 853 | # calculate likelihood using matrix of transitions 854 | log_like = 0. 855 | for i in range(nkeep): 856 | for j in range(nkeep): 857 | if count[j,i] > 0: 858 | log_like = log_like + float(count[j,i])*np.log(prop[j,i]) 859 | 860 | return -log_like 861 | 862 | def partial_rate(K, elem): 863 | """ Calculates the derivative of the rate matrix 864 | 865 | Parameters 866 | ---------- 867 | K : np.array 868 | The rate matrix. 869 | elem : int 870 | Integer corresponding to which we calculate the 871 | partial derivative. 872 | 873 | Returns 874 | ------- 875 | d_K : np.array 876 | Partial derivative of rate matrix. 877 | 878 | """ 879 | nstates = len(K[0]) 880 | d_K = np.zeros((nstates,nstates), float) 881 | for i in range(nstates): 882 | if i != elem: 883 | d_K[i,elem] = beta/2.*K[i,elem]; 884 | d_K[elem,i] = -beta/2.*K[elem,i]; 885 | for i in range(nstates): 886 | d_K[i,i] = -np.sum(d_K[:,i]) 887 | return d_K 888 | 889 | def partial_peq(peq, elem): 890 | """ Calculates derivative of equilibrium distribution 891 | 892 | Parameters 893 | ---------- 894 | peq : np.array 895 | Equilibrium probabilities. 896 | 897 | """ 898 | nstates = len(peq) 899 | d_peq = [] 900 | for i in range(nstates): 901 | if i != elem: 902 | d_peq.append(beta*peq[i]*peq[elem]) 903 | else: 904 | d_peq.append(-beta*peq[i]*(1. - peq[i])) 905 | return d_peq 906 | 907 | def partial_pfold(states, K, d_K, FF, UU, elem): 908 | """ Calculates derivative of pfold """ 909 | nstates = len(states) 910 | # define end-states 911 | I = list(filter(lambda x: x not in UU+FF, range(nstates))) 912 | NI = len(I) 913 | # calculate committors 914 | b = np.zeros([NI], float) 915 | A = np.zeros([NI,NI], float) 916 | db = np.zeros([NI], float) 917 | dA = np.zeros([NI,NI], float) 918 | for j_ind in range(NI): 919 | j = I[j_ind] 920 | summ = 0. 921 | sumd = 0. 922 | for i in FF: 923 | summ += K[i][j] 924 | sumd += d_K[i][j] 925 | b[j_ind] = -summ 926 | db[j_ind] = -sumd 927 | for i_ind in range(NI): 928 | i = I[i_ind] 929 | A[j_ind][i_ind] = K[i][j] 930 | dA[j_ind][i_ind] = d_K[i][j] 931 | 932 | # solve Ax + Bd(x) = c 933 | Ainv = np.linalg.inv(A) 934 | pfold = np.dot(Ainv,b) 935 | x = np.dot(Ainv,db - np.dot(dA,pfold)) 936 | 937 | dpfold = np.zeros(nstates,float) 938 | for i in range(nstates): 939 | if i in UU: 940 | dpfold[i] = 0.0 941 | elif i in FF: 942 | dpfold[i] = 0.0 943 | else: 944 | ii = I.index(i) 945 | dpfold[i] = x[ii] 946 | return dpfold 947 | 948 | def partial_flux(states, peq, K, pfold, d_peq, d_K, d_pfold, target): 949 | """ Calculates derivative of reactive flux """ 950 | # flux matrix and reactive flux 951 | nstates = len(states) 952 | sum_d_flux = 0 953 | d_J = np.zeros((nstates,nstates),float) 954 | for i in range(nstates): 955 | for j in range(nstates): 956 | d_J[j][i] = d_K[j][i]*peq[i]*(pfold[j]-pfold[i]) + \ 957 | K[j][i]*d_peq[i]*(pfold[j]-pfold[i]) + \ 958 | K[j][i]*peq[i]*(d_pfold[j]-d_pfold[i]) 959 | if j in target and K[j][i]>0: # dividing line corresponds to I to F transitions 960 | sum_d_flux += d_J[j][i] 961 | return sum_d_flux 962 | 963 | def propagate_worker(x): 964 | """ Propagate dynamics using rate matrix exponential 965 | 966 | Parameters 967 | ---------- 968 | x : list 969 | Contains K, the time and the initial population 970 | 971 | Returns 972 | ------- 973 | popul : np.array 974 | The propagated population 975 | 976 | """ 977 | rate, t, pini = x 978 | expkt = spla.expm(rate*t) 979 | popul = mat_mul_v(expkt, pini) 980 | return popul 981 | 982 | def propagateT_worker(x): 983 | """ Propagate dynamics using power of transition matrix 984 | 985 | Parameters 986 | ---------- 987 | x : list 988 | Contains T, the power and initial population 989 | 990 | 991 | Returns 992 | ------- 993 | popul : np.array 994 | The propagated population 995 | 996 | """ 997 | trans, power, pini = x 998 | trans_pow = np.linalg.matrix_power(trans,power) 999 | popul = mat_mul_v(trans_pow, pini) 1000 | return popul 1001 | 1002 | #def gen_path_lengths(keys, J, pfold, flux, FF, UU): 1003 | # """ use BHS prescription for defining path lenghts """ 1004 | # nkeys = len(keys) 1005 | # I = [x for x in range(nkeys) if x not in FF+UU] 1006 | # Jnode = [] 1007 | # # calculate flux going through nodes 1008 | # for i in range(nkeys): 1009 | # Jnode.append(np.sum([J[i,x] for x in range(nkeys) \ 1010 | # if pfold[x] < pfold[i]])) 1011 | # # define matrix with edge lengths 1012 | # Jpath = np.zeros((nkeys, nkeys), float) 1013 | # for i in UU: 1014 | # for j in I + FF: 1015 | # if J[j,i] > 0: 1016 | # Jpath[j,i] = np.log(flux/J[j,i]) + 1 1017 | # for i in I: 1018 | # for j in [x for x in FF+I if pfold[x] > pfold[i]]: 1019 | # if J[j,i] > 0: 1020 | # Jpath[j,i] = np.log(Jnode[j]/J[j,i]) + 1 1021 | # return Jnode, Jpath 1022 | 1023 | #def calc_acf(x): 1024 | # """ mp worker that calculates the ACF for a given mode 1025 | # 1026 | # Parameters 1027 | # ---------- 1028 | # x : list 1029 | # List containing input for each mp worker. Includes: 1030 | # distraj :the time series of states 1031 | # dt : the timestep for that trajectory 1032 | # keys : the keys used in the assignment 1033 | # lagt : the lag time for construction 1034 | # 1035 | # Returns 1036 | # ------- 1037 | # acf : array 1038 | # The autocorrelation function from that trajectory. 1039 | # 1040 | # """ 1041 | # # parse input from multiprocessing 1042 | # distraj = x[0] 1043 | # dt = x[1] 1044 | # keys = x[2] 1045 | # nkeys = len(keys) 1046 | # lagt = x[3] 1047 | ## time = 1048 | ## sliding = x[4] 1049 | # 1050 | ## ltraj = len(distraj) 1051 | ## lag = int(lagt/dt) # number of frames per lag time 1052 | ## if sliding: 1053 | ## slider = 1 # every state is initial state 1054 | ## else: 1055 | ## slider = lag 1056 | ## 1057 | ## count = np.zeros([nkeys,nkeys], np.int32) 1058 | ## for i in range(0, ltraj-lag, slider): 1059 | ## j = i + lag 1060 | ## state_i = distraj[i] 1061 | ## state_j = distraj[j] 1062 | ## if state_i in keys: 1063 | ## idx_i = keys.index(state_i) 1064 | ## if state_j in keys: 1065 | ## idx_j = keys.index(state_j) 1066 | ## try: 1067 | ## count[idx_j][idx_i] += 1 1068 | ## except UnboundLocalError: 1069 | ## pass 1070 | # return acf 1071 | 1072 | #def project_worker(x): 1073 | # """ project simulation trajectories on eigenmodes""" 1074 | # trans, power, pini = x 1075 | # trans_pow = np.linalg.matrix_power(trans,power) 1076 | # popul = mat_mul_v(trans_pow, pini) 1077 | # return popul 1078 | # 1079 | 1080 | def peq_averages(peq_boots, keep_keys_boots, keys): 1081 | """ Return averages from bootstrap results 1082 | 1083 | Parameters 1084 | ---------- 1085 | peq_boots : list 1086 | List of Peq arrays 1087 | keep_keys_boots : list 1088 | List of key lists 1089 | keys : list 1090 | List of keys 1091 | 1092 | Returns: 1093 | ------- 1094 | peq_ave : array 1095 | Peq averages 1096 | peq_std : array 1097 | Peq std 1098 | 1099 | """ 1100 | peq_ave = [] 1101 | peq_std = [] 1102 | peq_indexes = [] 1103 | peq_keep = [] 1104 | for k in keys: 1105 | peq_indexes.append([x.index(k) if k in x else None for x in keep_keys_boots]) 1106 | nboots = len(peq_boots) 1107 | for k in keys: 1108 | l = keys.index(k) 1109 | data = [] 1110 | for n in range(nboots): 1111 | if peq_indexes[l][n] is not None: 1112 | data.append(peq_boots[n][peq_indexes[l][n]]) 1113 | try: 1114 | peq_ave.append(np.mean(data)) 1115 | peq_std.append(np.std(data)) 1116 | peq_keep.append(data) 1117 | except RuntimeWarning: 1118 | peq_ave.append(0.) 1119 | peq_std.append(0.) 1120 | return peq_ave, peq_std 1121 | 1122 | def tau_averages(tau_boots, keys): 1123 | """ Return averages from bootstrap results 1124 | 1125 | Parameters 1126 | ---------- 1127 | tau_boots : list 1128 | List of Tau arrays 1129 | 1130 | Returns: 1131 | ------- 1132 | tau_ave : array 1133 | Tau averages 1134 | tau_std : array 1135 | Tau std 1136 | 1137 | """ 1138 | tau_ave = [] 1139 | tau_std = [] 1140 | tau_keep = [] 1141 | for n in range(len(keys)-1): 1142 | try: 1143 | data = [x[n] for x in tau_boots if not np.isnan(x[n])] 1144 | tau_ave.append(np.mean(data)) 1145 | tau_std.append(np.std(data)) 1146 | tau_keep.append(data) 1147 | except IndexError: 1148 | continue 1149 | return tau_ave, tau_std 1150 | 1151 | 1152 | def matrix_ave(mat_boots, keep_keys_boots, keys): 1153 | """ Return averages from bootstrap results 1154 | 1155 | Parameters 1156 | ---------- 1157 | mat_boots : list 1158 | List of matrix arrays 1159 | keep_keys_boots : list 1160 | List of key lists 1161 | keys : list 1162 | List of keys 1163 | 1164 | Returns: 1165 | ------- 1166 | mat_ave : array 1167 | Matrix averages 1168 | mat_std : array 1169 | Matrix std 1170 | 1171 | """ 1172 | mat_ave = [] 1173 | mat_std = [] 1174 | nboots = len(keep_keys_boots) 1175 | for k in keys: 1176 | mat_ave_keep = [] 1177 | mat_std_keep = [] 1178 | for kk in keys: 1179 | data = [] 1180 | for n in range(nboots): 1181 | try: 1182 | l = keep_keys_boots[n].index(k) 1183 | ll = keep_keys_boots[n].index(kk) 1184 | data.append(mat_boots[n][l,ll]) 1185 | except IndexError: 1186 | data.append(0.) 1187 | try: 1188 | mat_ave_keep.append(np.mean(data)) 1189 | mat_std_keep.append(np.std(data)) 1190 | except RuntimeWarning: 1191 | mat_ave_keep.append(0.) 1192 | mat_std_keep.append(0.) 1193 | mat_ave.append(mat_ave_keep) 1194 | mat_std.append(mat_std_keep) 1195 | return mat_ave, mat_std 1196 | -------------------------------------------------------------------------------- /mastermsm/test/README.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | Testing of the modules of MasterMSM is available through Python's `unittest` library. For some of the test cases, MD data will be downloaded into a folder inside `test`. To run the test suite, do: 4 | 5 | ``` 6 | cd mastermsm 7 | python -m unittest 8 | ``` 9 | -------------------------------------------------------------------------------- /mastermsm/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/test/__init__.py -------------------------------------------------------------------------------- /mastermsm/test/download_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from urllib.request import urlretrieve 3 | 4 | def download_test_data(): 5 | base_url = "https://mastermsm.s3.eu-west-2.amazonaws.com/" 6 | gro = "test/data/alaTB.gro" 7 | xtc = "test/data/protein_only.xtc" 8 | cpath = os.getcwd() 9 | if os.path.exists(cpath+"/test/data") is False: 10 | os.mkdir(cpath+"/test/data") 11 | for fname in [gro,xtc]: 12 | if os.path.isfile(cpath+"/%s"%fname) is False: 13 | urlretrieve(base_url+fname, fname) 14 | -------------------------------------------------------------------------------- /mastermsm/test/test_fewsm.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import mdtraj as md 3 | import numpy as np 4 | from mastermsm.trajectory import traj_lib, traj 5 | from mastermsm.msm import msm, msm_lib 6 | from mastermsm.fewsm import fewsm, fewsm_lib 7 | from test.download_data import download_test_data 8 | import os, pickle 9 | 10 | class TestFewSM_Lib(unittest.TestCase): 11 | def setUp(self): 12 | pass 13 | 14 | def test_sign(self): 15 | v = np.array([0] * 3) 16 | test = fewsm_lib.test_sign(v) 17 | self.assertEqual(test, False) 18 | v = np.array([-1, 0, 1]) 19 | test = fewsm_lib.test_sign(v) 20 | self.assertEqual(test, True) 21 | 22 | def test_metastability(self): 23 | T_test = np.random.rand(10,10) 24 | meta = fewsm_lib.metastability(T_test) 25 | self.assertIsInstance(meta, float) 26 | self.assertEqual(meta, np.sum(np.diag(T_test))) 27 | 28 | def test_metropolis(self): 29 | delta = np.random.random() 30 | accept = fewsm_lib.metropolis(delta) 31 | self.assertIsInstance(accept, bool) 32 | delta = -1. 33 | accept = fewsm_lib.metropolis(delta) 34 | self.assertTrue(accept) 35 | 36 | def test_beta(self): 37 | tests = [ 38 | { 39 | "imc": 2, 40 | "mcsasteps": 10, 41 | }, 42 | { 43 | "imc":1, 44 | "mcsasteps":1 45 | } 46 | ] 47 | for test in tests: 48 | 49 | beta = fewsm_lib.beta(test["imc"], test["mcsasteps"]) 50 | self.assertIsInstance(beta, float) 51 | def test_split_sign(self): 52 | macro = {} 53 | for i in range(10): 54 | macro[i] = [i * 10 + j for j in range(10)] 55 | lvec = np.random.rand(100) 56 | 57 | new_macro, vals = fewsm_lib.split_sign(macro, lvec) 58 | self.assertIsInstance(new_macro, dict) 59 | self.assertGreaterEqual(len(new_macro.keys()), len(macro.keys())) 60 | 61 | def test_split_sigma(self): 62 | macro = {} 63 | for i in range(10): 64 | macro[i] = [i * 10 + j for j in range(10)] 65 | lvec = np.random.rand(100) 66 | 67 | new_macro, vals = fewsm_lib.split_sigma(macro, lvec) 68 | self.assertIsInstance(new_macro, dict) 69 | self.assertGreaterEqual(len(new_macro.keys()), len(macro.keys())) 70 | 71 | class TestFewSM(unittest.TestCase): 72 | 73 | def setUp(self): 74 | download_test_data() 75 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \ 76 | traj=['test/data/protein_only.xtc']) 77 | self.tr.discretize('rama', states=['A', 'E']) 78 | self.tr.find_keys() 79 | self.msm = msm.SuperMSM([self.tr]) 80 | self.msm.do_msm(10) 81 | self.msm.msms[10].do_trans() 82 | 83 | def test_attributes(self): 84 | self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10]) 85 | self.assertIsNotNone(self.fewsm.macros) 86 | self.assertEqual(len(self.fewsm.macros), 2) 87 | 88 | def test_map_trajectory(self): 89 | self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10]) 90 | self.fewsm.map_trajectory() 91 | self.mapped = self.fewsm.mappedtraj[0] 92 | self.assertIsNotNone(self.mapped) 93 | self.assertIsInstance(self.mapped, traj.TimeSeries) 94 | self.assertTrue(hasattr(self.mapped, 'dt')) 95 | self.assertTrue(hasattr(self.mapped, 'distraj')) 96 | self.assertEqual(len(set(self.mapped.distraj)), 2) 97 | self.assertEqual(sorted(set(self.mapped.distraj)), [0, 1]) 98 | 99 | def test_eigen_group(self): 100 | self.fewsm = fewsm.FEWSM(parent=self.msm.msms[10]) 101 | macros = self.fewsm.eigen_group() 102 | print("MACROS! ", macros) 103 | self.assertIsInstance(macros, dict) 104 | -------------------------------------------------------------------------------- /mastermsm/test/test_msm.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import mdtraj as md 3 | import numpy as np 4 | from mastermsm.trajectory import traj_lib, traj 5 | from mastermsm.msm import msm, msm_lib 6 | from test.download_data import download_test_data 7 | import os, pickle 8 | 9 | # thermal energy (kJ/mol) 10 | beta = 1./(8.314e-3*300) 11 | 12 | class TestMSMLib(unittest.TestCase): 13 | def test_esort(self): 14 | self.assertTrue(hasattr(msm_lib, 'esort')) 15 | self.assertTrue(callable(msm_lib.esort)) 16 | self.esort = msm_lib.esort([0,float(1)], [1,float(2)]) 17 | self.assertEqual(self.esort, 1) 18 | self.esort = msm_lib.esort([0,float(100)], [1,float(2)]) 19 | self.assertEqual(self.esort, -1) 20 | self.esort = msm_lib.esort([100,float(1)], [1,float(1)]) 21 | self.assertEqual(self.esort, 0) 22 | 23 | def test_mat_mul_v(self): 24 | self.assertTrue(hasattr(msm_lib,'mat_mul_v')) 25 | self.assertTrue(callable(msm_lib.mat_mul_v)) 26 | self.matrix = np.array([ 27 | [1, 2, 3], 28 | [4, 5, 6] 29 | ]) 30 | self.vector = np.array( 31 | [1, 0, 1] 32 | ) 33 | self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [4, 10]) 34 | self.matrix = np.array([ 35 | [-5, -4, 2], 36 | [1, 6, -3], 37 | [3, 5.5, -4] 38 | ]) 39 | self.vector = np.array( 40 | [1, 2, -3] 41 | ) 42 | self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [-19, 22, 26]) 43 | 44 | def test_rand_rate(self): 45 | testT = np.array([ 46 | [10, 2, 1], 47 | [1, 1, 1], 48 | [0, 1, 0] 49 | ]) 50 | self.random1 = msm_lib.rand_rate(nkeep= 3, count= testT) 51 | self.random2 = msm_lib.rand_rate(nkeep= 3, count= testT) 52 | self.assertEqual(self.random1.shape, (3, 3)) 53 | self.assertFalse((self.random1 == self.random2).all()) 54 | 55 | def test_traj_split(self): 56 | traj1 = traj.TimeSeries(distraj=[1, 2, 3], dt=1.) 57 | traj2 = traj.TimeSeries(distraj=[3, 2, 1], dt=2.) 58 | trajs = [traj1, traj2] 59 | self.filepath = msm_lib.traj_split(data=trajs, lagt=10) 60 | self.assertIsInstance(self.filepath, str) 61 | self.assertTrue(os.path.exists(self.filepath)) 62 | os.remove(self.filepath) # clean temp file 63 | 64 | def calc_trans(self): 65 | self.testT = msm_lib.calc_trans(nkeep=10) 66 | self.assertIsInstance(self.testT, np.ndarray) 67 | self.assertEqual(self.testT.shape, (10,10)) 68 | 69 | def test_calc_rate(self): 70 | self.testT = np.array([ 71 | [1, 2, 3], 72 | [0, 0, 0], 73 | [10, 10, 10] 74 | 75 | ]) 76 | self.rate = msm_lib.calc_rate(nkeep=3, trans=self.testT, lagt=10) 77 | self.assertIsInstance(self.rate, np.ndarray) 78 | self.assertEqual(self.rate.shape, (3, 3)) 79 | 80 | def test_calc_lifetime(self): 81 | distraj = [1, 1, 1, 2] 82 | dt = 1. 83 | keys = [1, 2] 84 | data = [distraj, dt, keys] 85 | self.life = msm_lib.calc_lifetime(data) 86 | self.assertIsInstance(self.life, dict) 87 | 88 | def test_partial_rate(self): 89 | test_nstates = 3 90 | test_K = np.random.rand(test_nstates,test_nstates) 91 | d_K_1 = msm_lib.partial_rate(test_K, 1) 92 | for i in range(test_nstates): 93 | if i != 1: 94 | self.assertAlmostEqual(d_K_1[i,1] / test_K[i,1], beta/2) 95 | self.assertAlmostEqual(d_K_1[1, i] / test_K[1, i], -beta / 2) 96 | self.assertEqual(d_K_1.shape, (test_nstates, test_nstates)) 97 | 98 | def test_partial_peq(self): 99 | test_nstates = 3 100 | test_peq = np.random.rand(3) 101 | d_peq_1 = msm_lib.partial_peq(test_peq,1) 102 | self.assertEqual(len(d_peq_1), test_nstates) 103 | for elem in range(test_nstates): 104 | d_peq_elem = msm_lib.partial_peq(test_peq, elem) 105 | for i in range(test_nstates): 106 | if i != elem: 107 | self.assertAlmostEqual(d_peq_elem[i] / (test_peq[elem] * test_peq[i]), beta) 108 | else: 109 | self.assertAlmostEqual(d_peq_elem[i] / (test_peq[i] * (1. - test_peq[i])), -beta) 110 | 111 | def test_partial_pfold(self): 112 | states = range(3) 113 | K = np.random.rand(2, 2) 114 | d_K = np.random.rand(2, 2) 115 | FF = [0] 116 | UU = [2] 117 | res_dpfold = msm_lib.partial_pfold(states, K, d_K, FF, UU, 118 | np.random.randint(0, 2)) # the last int parameter is not used 119 | self.assertEqual(len(res_dpfold), len(states)) 120 | self.assertIsInstance(res_dpfold, np.ndarray) 121 | self.assertIsInstance(res_dpfold[0], float) 122 | 123 | def test_partial_flux(self): 124 | nstates = np.random.randint(2,50) 125 | states = range(nstates) 126 | peq = np.random.rand(nstates) 127 | K = np.random.rand(nstates,nstates) 128 | pfold = np.random.rand(nstates) 129 | d_peq = np.random.rand(nstates) 130 | d_K = np.random.rand(nstates,nstates) 131 | d_pfold = np.random.rand(nstates) 132 | target = [0] 133 | 134 | sum_d_flux = 0 135 | d_J = np.zeros((nstates, nstates), float) 136 | for i in range(nstates): 137 | for j in range(nstates): 138 | d_J[j][i] = d_K[j][i] * peq[i] * (pfold[j] - pfold[i]) + \ 139 | K[j][i] * d_peq[i] * (pfold[j] - pfold[i]) + \ 140 | K[j][i] * peq[i] * (d_pfold[j] - d_pfold[i]) 141 | if j in target and K[j][i] > 0: # dividing line corresponds to I to F transitions 142 | sum_d_flux += d_J[j][i] 143 | res_sum_d_flux = msm_lib.partial_flux(states, peq, K, pfold,d_peq, d_K, d_pfold, target) 144 | 145 | self.assertIsNotNone(res_sum_d_flux) 146 | self.assertIsInstance(res_sum_d_flux, float) 147 | 148 | 149 | 150 | def test_tau_averages(self): 151 | tau_boots_test = np.random.rand(2, 2) 152 | keys_test = range(3) 153 | res_tau_ave, res_tau_std = msm_lib.tau_averages(tau_boots_test, keys_test) 154 | self.assertEqual(len(res_tau_ave),len(keys_test)-1) 155 | self.assertEqual(len(res_tau_std),len(keys_test)-1) 156 | self.assertIsInstance(res_tau_std, list) 157 | self.assertIsInstance(res_tau_ave, list) 158 | self.assertIsInstance(res_tau_ave[0],float) 159 | self.assertIsInstance(res_tau_std[0], float) 160 | 161 | def test_peq_averages(self): 162 | peq_boots_test = np.random.rand(2,3) 163 | keep_keys_boots_test = [['A','E','O'],['A','E','O']] 164 | keys = ['A','E','O'] 165 | res_peq_ave, res_peq_std = msm_lib.peq_averages(peq_boots_test, keep_keys_boots_test, keys) 166 | self.assertEqual(len(res_peq_ave),len(keys)) 167 | self.assertEqual(len(res_peq_std),len(keys)) 168 | self.assertIsInstance(res_peq_ave, list) 169 | self.assertIsInstance(res_peq_std, list) 170 | self.assertIsInstance(res_peq_ave[0], float) 171 | self.assertIsInstance(res_peq_std[0], float) 172 | 173 | def test_propagate_worker(self): 174 | t = 0 175 | rate = np.random.rand(2,2) 176 | pini = np.random.rand(2,2) 177 | x_test = [rate, t, pini] 178 | res_popul = msm_lib.propagate_worker(x_test) 179 | self.assertIsInstance(res_popul, list) 180 | self.assertIsInstance(res_popul[0], np.ndarray) 181 | self.assertIsInstance(res_popul[0][0], float) 182 | 183 | def test_propagateT_worker(self): 184 | t = 0 185 | rate = np.random.rand(2,2) 186 | pini = np.random.rand(2,2) 187 | x_test = [rate, t, pini] 188 | res_popul = msm_lib.propagateT_worker(x_test) 189 | self.assertIsInstance(res_popul, list) 190 | self.assertIsInstance(res_popul[0], np.ndarray) 191 | self.assertIsInstance(res_popul[0][0], float) 192 | 193 | def test_detailed_balance(self): 194 | nkeep_test = 2 195 | rate = np.array(np.random.rand(nkeep_test,nkeep_test)) 196 | peq = np.random.rand(nkeep_test) 197 | res_rate = msm_lib.detailed_balance(nkeep_test, rate, peq) 198 | self.assertEqual(res_rate.shape, (nkeep_test,nkeep_test)) 199 | self.assertIsInstance(res_rate,np.ndarray) 200 | self.assertIsInstance(res_rate[0][0],float) 201 | 202 | def test_likelihood(self): 203 | nkeep_test = 2 204 | rate = np.array(np.random.rand(nkeep_test,nkeep_test)) 205 | count = np.array(np.random.randint(0, 10**5, size=(nkeep_test,nkeep_test))) 206 | lagt = np.random.randint(1,1000) 207 | res_mlog_like = msm_lib.likelihood(nkeep_test,rate,count,lagt) 208 | self.assertIsInstance(res_mlog_like, float) 209 | self.assertIsNotNone(res_mlog_like) 210 | self.assertGreater(res_mlog_like, 0) 211 | 212 | def test_calc_mlrate(self): 213 | nkeep_test = 2 214 | rate_init = np.array(np.random.rand(nkeep_test, nkeep_test)) 215 | count = np.array(np.random.randint(0, 10 ** 5, size=(nkeep_test, nkeep_test))) 216 | lagt = np.random.randint(1, 1000) 217 | res_rate, res_ml, res_beta = msm_lib.calc_mlrate(nkeep_test, count, lagt, rate_init) 218 | self.assertIsInstance(res_rate, np.ndarray) 219 | self.assertIsNotNone(res_rate) 220 | self.assertIsNotNone(res_ml) 221 | self.assertIsNotNone(res_beta) 222 | 223 | def test_mc_move(self): 224 | nkeep_test = np.random.randint(2,100) 225 | rate = np.random.rand(nkeep_test,nkeep_test) 226 | peq_test = np.random.rand(nkeep_test) 227 | db_rate = msm_lib.detailed_balance(nkeep_test,rate,peq_test) 228 | new_rate, new_peq = msm_lib.mc_move(nkeep_test, db_rate, peq_test) 229 | self.assertFalse(np.array_equal(db_rate, new_rate)) 230 | self.assertEqual(db_rate.shape, new_rate.shape) 231 | self.assertEqual(peq_test.shape, new_peq.shape) 232 | 233 | def test_calc_eigsK(self): 234 | nstates = np.random.randint(2,100) 235 | rate_test = np.random.rand(nstates,nstates) 236 | res_tauK,res_peqK = msm_lib.calc_eigsK(rate_test) 237 | self.assertIsInstance(res_tauK, list) 238 | 239 | self.assertEqual(len(res_tauK), nstates) 240 | self.assertEqual(len(res_peqK), nstates) 241 | self.assertIsInstance(res_tauK[0], np.float) 242 | self.assertIsInstance(res_peqK[0], np.complex) 243 | 244 | res_tauK, res_peqK, res_rvecsK, res_lvecsK = msm_lib.calc_eigsK(rate_test, evecs=True) 245 | self.assertIsNotNone(res_rvecsK) 246 | self.assertIsNotNone(res_lvecsK) 247 | self.assertIsInstance(res_lvecsK, np.ndarray) 248 | self.assertIsInstance(res_rvecsK, np.ndarray) 249 | 250 | def test_run_commits(self): 251 | nstates = np.random.randint(2,100) 252 | states = range(nstates) 253 | K = np.random.rand(nstates, nstates) 254 | peq = np.random.rand(nstates) 255 | FF = [0] 256 | UU = [2] 257 | J, pfold, sum_flux, kf = msm_lib.run_commit(states, K, peq, FF, UU) 258 | self.assertIsNotNone(J) 259 | self.assertIsNotNone(pfold) 260 | self.assertIsNotNone(sum_flux) 261 | self.assertIsNotNone(kf) 262 | self.assertIsInstance(kf, float) 263 | self.assertGreater(kf, 0) 264 | self.assertEqual(J.shape, K.shape) 265 | self.assertEqual(len(pfold), nstates) 266 | self.assertIsInstance(pfold[0], float) 267 | self.assertIsInstance(J[0][0], float) 268 | 269 | def test_do_boots_worker(self): 270 | 271 | filetmp = "test_msm_temp.pickle" 272 | keys = ['A', 'E'] 273 | lagt = np.random.randint(1,100) 274 | slider = 1 275 | ncount = 10 276 | x = [filetmp, keys, lagt, ncount, slider] 277 | # result = msm_lib.do_boots_worker(x) 278 | # tauT, peqT, trans, keep_keys = result 279 | # print(tauT, peqT, trans, keep_keys) 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | class TestSuperMSM(unittest.TestCase): 295 | def setUp(self): 296 | download_test_data() 297 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \ 298 | traj=['test/data/protein_only.xtc']) 299 | self.tr.discretize('rama', states=['A', 'E', 'O']) 300 | self.tr.find_keys() 301 | self.msm = msm.SuperMSM([self.tr]) 302 | 303 | def test_init(self): 304 | self.assertIsNotNone(self.msm) 305 | self.assertTrue( hasattr(self.msm, 'data')) 306 | self.assertEqual(self.msm.data, [self.tr]) 307 | self.assertEqual(self.msm.dt, 1.0) 308 | # testing with more than one trajectory 309 | self.msm = msm.SuperMSM([self.tr, self.tr]) 310 | self.assertEqual(len(self.msm.data), 2) 311 | 312 | 313 | def test_merge_trajs(self): 314 | # create fake trajectory to merge 315 | traj2 = traj.TimeSeries(distraj=['L', 'L', 'L', 'A'], dt = 2.0) 316 | traj2.keys = ['L','A'] 317 | old_keys = self.msm.keys 318 | self.msm.data = [self.tr, traj2] 319 | new_keys = self.msm._merge_trajs() 320 | self.assertEqual(len(new_keys), len(old_keys) + 1) 321 | self.assertEqual(sorted(new_keys), ['A', 'E', 'L']) 322 | 323 | def test_max_dt(self): 324 | traj2 = traj.TimeSeries(distraj=['L', 'L', 'L', 'A'], dt=2.0) 325 | old_dt = self.msm.dt 326 | self.msm.data = [self.tr, traj2] 327 | new_dt = self.msm._max_dt() 328 | self.assertEqual(new_dt, 2.0) 329 | 330 | def test_do_msm(self): 331 | 332 | self.msm.do_msm(lagt=1) 333 | self.assertIsInstance(self.msm.msms[1], msm.MSM) 334 | self.assertEqual(self.msm.msms[1].lagt, 1) 335 | 336 | def test_convergence(self): 337 | lagtimes = np.array(range(10,100,10)) 338 | self.msm.convergence_test(time=lagtimes) 339 | for lagt in lagtimes: 340 | self.assertTrue(hasattr(self.msm.msms[lagt], 'tau_ave')) 341 | self.assertTrue(hasattr(self.msm.msms[lagt], 'tau_std')) 342 | self.assertTrue(hasattr(self.msm.msms[lagt], 'peq_ave')) 343 | self.assertTrue(hasattr(self.msm.msms[lagt], 'peq_std')) 344 | 345 | def test_do_boots(self): 346 | self.msm.do_msm(10) 347 | self.msm.msms[10].boots() 348 | 349 | self.assertTrue(hasattr(self.msm.msms[10], 'tau_ave')) 350 | self.assertTrue(hasattr(self.msm.msms[10], 'tau_std')) 351 | self.assertTrue(hasattr(self.msm.msms[10], 'peq_ave')) 352 | self.assertTrue(hasattr(self.msm.msms[10], 'peq_std')) 353 | 354 | def test_ck_test(self): 355 | init = ['A'] 356 | time = np.array(range(50,210,25)) 357 | pMSM, pMD, epMD = self.msm.ck_test(init=init, time=time) 358 | self.assertIsNotNone(pMSM) 359 | self.assertIsNotNone(pMD) 360 | self.assertIsNotNone(epMD) 361 | self.assertEqual(len(pMSM), len(time)) 362 | self.assertEqual(len(epMD), 10) 363 | 364 | self.assertIsInstance(pMSM, list) 365 | self.assertIsInstance(pMSM[0], tuple) 366 | self.assertIsInstance(pMD, np.ndarray) 367 | self.assertIsInstance(epMD, np.ndarray) 368 | 369 | def test_do_pfold(self): 370 | states = [ 371 | ['A'], 372 | ['E'] 373 | ] 374 | for lagt in [1,10,100]: 375 | self.msm.do_msm(lagt) 376 | self.msm.msms[lagt].boots() 377 | self.msm.msms[lagt].do_trans() 378 | self.msm.msms[lagt].do_rate() 379 | 380 | self.msm.msms[lagt].do_pfold(FF=states[0], UU=states[1]) 381 | self.assertTrue(hasattr(self.msm.msms[lagt], 'pfold')) 382 | self.assertTrue(hasattr(self.msm.msms[lagt], 'J')) 383 | self.assertTrue(hasattr(self.msm.msms[lagt], 'sum_flux')) 384 | self.assertTrue(hasattr(self.msm.msms[lagt], 'kf')) 385 | self.assertIsInstance(self.msm.msms[lagt].kf, np.float64) 386 | self.assertEqual(len(self.msm.msms[lagt].J), len(states)) 387 | 388 | def test_lb_rate(self): 389 | self.msm.do_lbrate() 390 | self.assertIsNotNone(self.msm.tauK) 391 | self.assertIsNotNone(self.msm.peqK) 392 | self.assertIsNotNone(self.msm.rvecsK) 393 | self.assertIsNotNone(self.msm.lvecsK) 394 | self.assertEqual(len(self.msm.tauK), len(self.msm.keys) - 1) 395 | self.assertEqual(self.msm.rvecsK.shape, (len(self.msm.keys), len(self.msm.keys))) 396 | 397 | 398 | 399 | class TestMSM(unittest.TestCase): 400 | def setUp(self): 401 | download_test_data() 402 | self.nstates = np.random.randint(3,100) 403 | distraj_1 = np.random.randint(1,self.nstates+1, size=1000).tolist() 404 | traj_1 = traj.TimeSeries(distraj= distraj_1, dt=1.) 405 | distraj_2 = np.random.randint(1,self.nstates+1, size=1000).tolist() 406 | traj_2 = traj.TimeSeries(distraj= distraj_2, dt=2.) 407 | self.data = np.array([ 408 | traj_1, 409 | traj_2 410 | ]) 411 | self.lagt = 10 412 | self.keys = [i for i in range(1,self.nstates+1)] 413 | msm_obj = msm.MSM(data=self.data, lagt=self.lagt, keys=self.keys, sym=True) 414 | self.msm = msm_obj 415 | 416 | 417 | def test_init(self): 418 | self.msm_empty = msm.MSM() 419 | self.assertIsNotNone(self.msm_empty) 420 | self.assertIsNone(self.msm_empty.data) 421 | self.assertIsNone(self.msm_empty.lagt) 422 | self.assertIsNone(self.msm_empty.keys) 423 | self.assertFalse(self.msm_empty.sym) 424 | 425 | self.assertIsNotNone(self.msm) 426 | self.assertIsNotNone(self.msm.data) 427 | self.assertIsNotNone(self.msm.keys) 428 | self.assertIsNotNone(self.msm.lagt) 429 | self.assertTrue(self.msm.sym) 430 | self.assertTrue(np.array_equal(self.data, self.msm.data)) 431 | self.assertEqual(self.msm.lagt, self.lagt) 432 | self.assertTrue(np.array_equal(self.keys, self.msm.keys)) 433 | 434 | def test_do_count(self): 435 | self.msm.do_count() 436 | self.assertIsNotNone(self.msm.keep_states) 437 | self.assertIsNotNone(self.msm.keep_keys) 438 | 439 | def test_calc_count_multi(self): 440 | count = self.msm.calc_count_multi() 441 | self.assertIsNotNone(count) 442 | self.assertIsInstance(count, np.ndarray) 443 | self.assertEqual(count.shape, (self.nstates, self.nstates)) 444 | 445 | def test_check_connect(self): 446 | self.msm.do_count() 447 | keep_states, keep_keys = self.msm.check_connect() 448 | self.assertEqual(len(keep_keys), len(keep_states)) 449 | self.assertEqual(self.msm.keep_keys, self.keys) 450 | 451 | def test_do_trans(self): 452 | self.msm.do_count() 453 | self.msm.do_trans(evecs=False) 454 | self.assertIsNotNone(self.msm.tauT) 455 | self.assertIsNotNone(self.msm.trans) 456 | self.assertIsNotNone(self.msm.peqT) 457 | self.assertFalse(hasattr(self.msm, "rvecsT")) 458 | self.assertFalse(hasattr(self.msm, "lvecsT")) 459 | self.assertEqual(len(self.msm.tauT), self.nstates - 1) 460 | self.assertEqual(len(self.msm.peqT), self.nstates) 461 | self.assertEqual(self.msm.trans.shape, (self.nstates, self.nstates)) 462 | self.msm.do_trans(evecs=True) 463 | self.assertTrue(hasattr(self.msm, "rvecsT")) 464 | self.assertTrue(hasattr(self.msm, "lvecsT")) 465 | self.assertEqual(len(self.msm.rvecsT), self.nstates) 466 | self.assertEqual(len(self.msm.lvecsT), self.nstates) 467 | 468 | def test_do_rate(self): 469 | self.msm.do_count() 470 | self.msm.do_trans() 471 | self.msm.do_rate(evecs=False) 472 | self.assertIsNotNone(self.msm.rate) 473 | self.assertIsNotNone(self.msm.tauK) 474 | self.assertIsNotNone(self.msm.peqK) 475 | self.assertEqual(len(self.msm.tauK), self.nstates - 1) 476 | self.assertEqual(len(self.msm.peqK), self.nstates) 477 | self.msm.do_rate(evecs=True) 478 | self.assertIsNotNone(self.msm.rvecsK) 479 | self.assertIsNotNone(self.msm.lvecsK) 480 | 481 | def test_calc_eigsT(self): 482 | self.msm.do_count() 483 | self.msm.do_trans() 484 | tauT, peqT, rvecsT_sorted, lvecsT_sorted = self.msm.calc_eigsT(evecs=True) 485 | self.assertIsNotNone(tauT) 486 | self.assertIsNotNone(peqT) 487 | self.assertEqual(len(tauT), self.nstates - 1) 488 | self.assertEqual(len(peqT), self.nstates) 489 | self.assertIsNotNone(rvecsT_sorted) 490 | self.assertIsNotNone(lvecsT_sorted) 491 | 492 | def test_calc_eigsK(self): 493 | self.msm.do_count() 494 | self.msm.do_trans() 495 | tauK, peqK, rvecsK_sorted, lvecsK_sorted = self.msm.calc_eigsT(evecs=True) 496 | self.assertIsNotNone(tauK) 497 | self.assertIsNotNone(peqK) 498 | self.assertEqual(len(tauK), self.nstates - 1) 499 | self.assertEqual(len(peqK), self.nstates) 500 | self.assertIsNotNone(rvecsK_sorted) 501 | self.assertIsNotNone(lvecsK_sorted) 502 | 503 | def test_boots(self): 504 | self.msm.do_count() 505 | self.msm.do_trans() 506 | self.msm.boots() 507 | self.assertIsNotNone(self.msm.tau_ave) 508 | self.assertIsNotNone(self.msm.tau_std) 509 | self.assertIsNotNone(self.msm.peq_ave) 510 | self.assertIsNotNone(self.msm.peq_std) 511 | self.assertEqual(len(self.msm.tau_ave), self.nstates - 1) 512 | self.assertEqual(len(self.msm.tau_std), self.nstates - 1) 513 | self.assertEqual(len(self.msm.peq_std), self.nstates) 514 | self.assertEqual(len(self.msm.peq_ave), self.nstates) 515 | 516 | def test_sensitivity(self): 517 | self.msm.do_count() 518 | self.msm.do_trans() 519 | self.msm.do_rate() 520 | FF = [np.random.randint(1, self.nstates + 1)] 521 | 522 | UU = [np.random.randint(1, self.nstates + 1)] 523 | self.msm.sensitivity(FF=FF, UU=UU) 524 | self.assertIsNotNone(self.msm.kf) 525 | self.assertIsNotNone(self.msm.d_pu) 526 | self.assertIsNotNone(self.msm.d_lnkf) 527 | self.assertIsNotNone(self.msm.dJ) 528 | self.assertIsInstance(self.msm.kf, float) 529 | self.assertEqual(len(self.msm.d_pu), self.nstates) 530 | self.assertEqual(len(self.msm.d_lnkf), self.nstates) 531 | self.assertEqual(len(self.msm.dJ),self.nstates) 532 | self.assertIsInstance(self.msm.d_pu[0], float) 533 | self.assertIsInstance(self.msm.dJ[0], float) 534 | self.assertIsInstance(self.msm.d_lnkf[0], float) 535 | 536 | def test_propagateK(self): 537 | # p0_fn = "p0.txt" 538 | # new_file = open(p0_fn, "w") 539 | random_p0 = np.random.rand(self.nstates) 540 | # random_pini = np.random.randint(1, self.nstates + 1, size = 2) 541 | # new_file.write(np.array2string(random_p0)) 542 | # new_file.close() 543 | self.msm.do_count() 544 | self.msm.do_trans() 545 | self.msm.do_rate() 546 | time, popul = self.msm.propagateK(p0=random_p0) 547 | self.assertIsNotNone(time) 548 | self.assertIsInstance(time, np.ndarray) 549 | self.assertIsInstance(popul, list) 550 | self.assertEqual(len(time), 20) 551 | self.assertEqual(len(popul), 20) 552 | self.assertEqual(len(popul[0]), self.nstates) 553 | 554 | for ind, t in enumerate(time): 555 | if ind != 0: 556 | self.assertGreater(t, time[ind - 1]) 557 | 558 | def test_propagateT(self): 559 | random_p0 = np.random.rand(self.nstates) 560 | self.msm.do_count() 561 | self.msm.do_trans() 562 | self.msm.do_rate() 563 | tcum, popul = self.msm.propagateT(p0=random_p0) 564 | self.assertIsNotNone(tcum) 565 | self.assertIsInstance(tcum, list) 566 | self.assertIsInstance(popul, list) 567 | self.assertEqual(len(tcum), 20) 568 | self.assertEqual(len(popul), 20) 569 | self.assertEqual(len(popul[0]), self.nstates) 570 | 571 | def test_acf_mode(self): 572 | self.msm.do_count() 573 | self.msm.do_trans(evecs=True) 574 | self.msm.do_rate() 575 | acf_ave = self.msm.acf_mode() 576 | self.assertIsInstance(acf_ave, dict) 577 | self.assertEqual(len(acf_ave.keys()), len(self.msm.keep_keys) - 1) 578 | modes = [key for key in acf_ave.keys()] 579 | 580 | self.assertIsInstance(acf_ave[modes[0]][0], float) 581 | 582 | 583 | 584 | 585 | 586 | -------------------------------------------------------------------------------- /mastermsm/test/test_trajectory.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import mdtraj as md 3 | import numpy as np 4 | from mastermsm.trajectory import traj_lib, traj 5 | from mastermsm.msm import msm, msm_lib 6 | from test.download_data import download_test_data 7 | import os 8 | 9 | 10 | class TestMDTrajLib(unittest.TestCase): 11 | def setUp(self): 12 | download_test_data() 13 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \ 14 | traj=['test/data/protein_only.xtc']) 15 | 16 | def test_inrange(self): 17 | self.inrange = traj_lib._inrange(2, 1, 3) 18 | self.assertEqual(self.inrange, 1) 19 | self.inrange = traj_lib._inrange(0, 1, 2) 20 | self.assertEqual(self.inrange, 0) 21 | self.inrange = traj_lib._inrange(1, 1, 2) 22 | self.assertEqual(self.inrange, 0) 23 | 24 | def test_inbounds(self): 25 | TBA_bounds = {} 26 | TBA_bounds['A'] = [-100., -40., -50., -10.] 27 | TBA_bounds['E'] = [-180., -40., 125., 165.] 28 | TBA_bounds['L'] = [50., 100., -40., 70.0] 29 | 30 | # test in alpha helix 31 | self.inbounds = traj_lib._inbounds(TBA_bounds['A'], -90, -40) 32 | self.assertEqual(self.inbounds, 1) 33 | # test in beta-sheet 34 | self.inbounds = traj_lib._inbounds(TBA_bounds['E'], -90, 140) 35 | self.assertEqual(self.inbounds, 1) 36 | # test in left-handed alpha helix 37 | self.inbounds = traj_lib._inbounds(TBA_bounds['L'], 70, 30) 38 | self.assertEqual(self.inbounds, 1) 39 | # test when no conformation 40 | self.inbounds = traj_lib._inbounds(TBA_bounds['A'], 0, 0) 41 | self.assertEqual(self.inbounds, 0) 42 | 43 | 44 | def test_state(self): 45 | psi = [-30, 0, -40, 90, 140, 180] 46 | phi = [60., 0, -90, -90, -90, -180] 47 | states_test = ['L','O','A','O','E','O'] 48 | bounds = {} 49 | bounds['A'] = [-100., -40., -50., -10.] 50 | bounds['E'] = [-180., -40., 125., 165.] 51 | bounds['L'] = [50., 100., -40., 70.0] 52 | 53 | for ind in range(len(phi)): 54 | result = traj_lib._state(phi[ind], psi[ind], bounds) 55 | state = result[0] 56 | self.assertEqual(state, states_test[ind], 'expected state %s but got %s'%(state,states_test[ind])) 57 | 58 | def test_stategrid(self): 59 | self.assertIsNotNone(traj_lib._stategrid(-180, -180, 20)) 60 | self.assertLess(traj_lib._stategrid(-180, 0, 20),400) 61 | self.assertEqual(traj_lib._stategrid(0, 0, 20), 210) 62 | self.assertEqual(traj_lib._stategrid(-180, 0, 100), 2186) 63 | 64 | def test_discreterama(self): 65 | mdt_test = self.tr.mdt 66 | 67 | phi = md.compute_phi(mdt_test) 68 | psi = md.compute_psi(mdt_test) 69 | # print(psi) 70 | # psi = ([ 6, 8, 14, 16], [-30, 0, -40, 90, 140, 180]) 71 | # phi = ([ 4, 6, 8, 14],[60., 0, -90, -90, -90, -180]) 72 | states = ['L','A','E'] 73 | discrete = traj_lib.discrete_rama(phi, psi, states=states) 74 | unique_st = set(discrete) 75 | for state in unique_st: 76 | self.assertIn(state, ['O', 'A', 'E', 'L']) 77 | 78 | def test_discreteramagrid(self): 79 | mdt_test = self.tr.mdt 80 | 81 | phi = md.compute_phi(mdt_test) 82 | psi = md.compute_psi(mdt_test) 83 | discrete = traj_lib.discrete_ramagrid(phi, psi, nbins=20) 84 | min_ibin = min(discrete) 85 | max_ibin = max(discrete) 86 | self.assertLess(max_ibin,400) 87 | self.assertGreaterEqual(min_ibin,0) 88 | 89 | class TestMDtraj(unittest.TestCase): 90 | def setUp(self): 91 | download_test_data() 92 | self.traj = md.load('test/data/protein_only.xtc', \ 93 | top='test/data/alaTB.gro') 94 | self.topfn = 'test/data/alaTB.gro' 95 | self.trajfn = 'test/data/protein_only.xtc' 96 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \ 97 | traj=['test/data/protein_only.xtc']) 98 | 99 | def test_traj(self): 100 | self.assertIsNotNone(self.traj) 101 | self.assertEqual(self.traj.n_atoms, 19) 102 | self.assertEqual(self.traj.timestep, 1.) 103 | self.assertEqual(self.traj.n_residues, 3) 104 | self.assertEqual(self.traj.n_frames, 10003) 105 | 106 | def test_load_mdtraj(self): 107 | mdtraj = traj._load_mdtraj(top=self.topfn, traj=self.trajfn) 108 | self.assertIsNotNone(mdtraj) 109 | self.assertEqual(mdtraj.__module__, 'mdtraj.core.trajectory') 110 | self.assertEqual(hasattr(mdtraj, '__class__'), True) 111 | 112 | def test_read_distraj(self): 113 | self.assertIsNotNone(self.tr._read_distraj) 114 | self.assertEqual(callable(self.tr._read_distraj), True) 115 | # read distraj from temp file 116 | content = "0.0 A\n" \ 117 | "1.0 E\n" \ 118 | "2.0 L\n" \ 119 | "3.0 O" 120 | fn = 'temp.txt' 121 | fd = open(fn, 'w+') 122 | 123 | try: 124 | fd.write(content) 125 | fd.seek(0) 126 | cstates, dt = self.tr._read_distraj(distraj=fd.name) 127 | self.assertIsInstance(cstates, list) 128 | self.assertEqual(len(cstates), len(content.split('\n'))) 129 | self.assertEqual(dt, 1.0) 130 | 131 | finally: 132 | fd.close() 133 | os.remove(fd.name) 134 | # read distraj from array and custom timestamp 135 | distraj_arr = content.split('\n') 136 | cstates, dt = self.tr._read_distraj(distraj=distraj_arr, dt=2.0) 137 | self.assertIsInstance(cstates,list) 138 | self.assertEqual(len(cstates), len(content.split('\n'))) 139 | self.assertEqual(dt, 2.0) 140 | # read empty 'discrete' trajectory 141 | cstates, dt = self.tr._read_distraj(distraj=[]) 142 | self.assertEqual(len(cstates), 0) 143 | self.assertEqual(dt, 1.0) 144 | 145 | def test_timeseries_init(self): 146 | self.assertIsNotNone(self.tr) 147 | self.assertIsNotNone(self.tr.mdt) 148 | self.assertEqual(hasattr(self.tr.mdt, '__class__'), True) 149 | self.assertEqual(self.tr.mdt.__module__ , 'mdtraj.core.trajectory') 150 | self.assertIsNotNone(self.tr.discretize) 151 | 152 | def test_ts_discretize(self): 153 | self.tr.discretize('rama', states=['A', 'E', 'L']) 154 | self.assertIsNotNone(self.tr.distraj) 155 | unique_states = sorted(set(self.tr.distraj)) 156 | self.assertListEqual(unique_states, ['A', 'E', 'L', 'O']) 157 | 158 | def test_ts_find_keys(self): 159 | self.assertIsNotNone(self.tr.find_keys) 160 | # test excluding state O (unassigned) 161 | self.tr.distraj = ['O']*50000 162 | for i in range(len(self.tr.distraj)): 163 | self.tr.distraj[i] = np.random.choice(['A', 'E', 'L', 'O']) 164 | 165 | self.tr.find_keys() 166 | keys = self.tr.keys 167 | self.assertEqual(len(set(keys)), len(keys)) 168 | self.assertEqual(len(keys), 3) 169 | for key in keys: 170 | self.assertIn(key,['A','E','L']) 171 | 172 | del self.tr.distraj 173 | # test excluding state in alpha-h 174 | self.tr.distraj = ['O'] * 50000 175 | for i in range(len(self.tr.distraj)): 176 | self.tr.distraj[i] = np.random.choice(['A', 'E', 'L', 'O']) 177 | 178 | self.tr.find_keys(exclude=['A']) 179 | keys = self.tr.keys 180 | self.assertEqual(len(set(keys)),len(keys)) 181 | self.assertEqual(len(keys), 3) 182 | for key in keys: 183 | self.assertIn(key,['O','E','L']) 184 | 185 | def test_gc(self): 186 | self.tr.gc() 187 | self.assertIs(hasattr(self.tr, 'mdt'), False) 188 | 189 | 190 | class UseMDtraj(unittest.TestCase): 191 | def setUp(self): 192 | download_test_data() 193 | self.tr = traj.TimeSeries(top='test/data/alaTB.gro', \ 194 | traj=['test/data/protein_only.xtc']) 195 | 196 | def test_atributes(self): 197 | self.assertIsNotNone(self.tr.mdt) 198 | self.assertEqual(self.tr.mdt.n_atoms, 19) 199 | self.assertEqual(self.tr.mdt.n_frames, 10003) 200 | self.assertEqual(self.tr.mdt.n_residues, 3) 201 | self.assertIsNotNone(self.tr.discretize) 202 | self.assertIs(callable(self.tr.discretize), True) 203 | 204 | 205 | class TestMSMLib(unittest.TestCase): 206 | def test_esort(self): 207 | self.assertTrue(hasattr(msm_lib, 'esort')) 208 | self.assertTrue(callable(msm_lib.esort)) 209 | self.esort = msm_lib.esort([0,float(1)], [1,float(2)]) 210 | self.assertEqual(self.esort, 1) 211 | self.esort = msm_lib.esort([0,float(100)], [1,float(2)]) 212 | self.assertEqual(self.esort, -1) 213 | self.esort = msm_lib.esort([100,float(1)], [1,float(1)]) 214 | self.assertEqual(self.esort, 0) 215 | 216 | def test_mat_mul_v(self): 217 | self.assertTrue(hasattr(msm_lib,'mat_mul_v')) 218 | self.assertTrue(callable(msm_lib.mat_mul_v)) 219 | self.matrix = np.array([ 220 | [1, 2, 3], 221 | [4, 5, 6] 222 | ]) 223 | self.vector = np.array( 224 | [1, 0, 1] 225 | ) 226 | self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [4, 10]) 227 | self.matrix = np.array([ 228 | [-5, -4, 2], 229 | [1, 6, -3], 230 | [3, 5.5, -4] 231 | ]) 232 | self.vector = np.array( 233 | [1, 2, -3] 234 | ) 235 | self.assertEqual(msm_lib.mat_mul_v(self.matrix, self.vector), [-19, 22, 26]) 236 | 237 | def test_rand_rate(self): 238 | testT = np.array([ 239 | [10, 2, 1], 240 | [1, 1, 1], 241 | [0, 1, 0] 242 | ]) 243 | self.random1 = msm_lib.rand_rate(nkeep= 3, count= testT) 244 | self.random2 = msm_lib.rand_rate(nkeep= 3, count= testT) 245 | self.assertEqual(self.random1.shape, (3, 3)) 246 | self.assertFalse((self.random1 == self.random2).all()) 247 | 248 | def test_traj_split(self): 249 | traj1 = traj.TimeSeries(distraj=[1, 2, 3], dt=1.) 250 | traj2 = traj.TimeSeries(distraj=[3, 2, 1], dt=2.) 251 | trajs = [traj1, traj2] 252 | self.filepath = msm_lib.traj_split(data=trajs, lagt=10) 253 | self.assertIsInstance(self.filepath, str) 254 | self.assertTrue(os.path.exists(self.filepath)) 255 | os.remove(self.filepath) # clean temp file 256 | 257 | def calc_trans(self): 258 | self.testT = msm_lib.calc_trans(nkeep=10) 259 | self.assertIsInstance(self.testT, np.ndarray) 260 | self.assertEqual(self.testT.shape, (10,10)) 261 | 262 | def test_calc_rate(self): 263 | self.testT = np.array([ 264 | [1, 2, 3], 265 | [0, 0, 0], 266 | [10, 10, 10] 267 | 268 | ]) 269 | self.rate = msm_lib.calc_rate(nkeep=3, trans=self.testT, lagt=10) 270 | self.assertIsInstance(self.rate, np.ndarray) 271 | self.assertEqual(self.rate.shape, (3, 3)) 272 | 273 | def test_calc_lifetime(self): 274 | distraj = [1, 1, 1, 2] 275 | dt = 1. 276 | keys = [1, 2] 277 | data = [distraj, dt, keys] 278 | self.life = msm_lib.calc_lifetime(data) 279 | self.assertIsInstance(self.life, dict) 280 | -------------------------------------------------------------------------------- /mastermsm/trajectory/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioKT/MasterMSM/7e71b0fcf42cc7d840e58a6ca18450d710fbdbb4/mastermsm/trajectory/__init__.py -------------------------------------------------------------------------------- /mastermsm/trajectory/traj.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is part of the MasterMSM package. 3 | 4 | """ 5 | import os 6 | import numpy as np 7 | import mdtraj as md 8 | from ..trajectory import traj_lib 9 | 10 | def _load_mdtraj(top=None, traj=None, stride=None): 11 | """ Loads trajectories using mdtraj. 12 | 13 | Parameters 14 | ---------- 15 | top: str 16 | The topology file, may be a PDB or GRO file. 17 | traj : str 18 | A list with the trajectory filenames to be read. 19 | 20 | Returns 21 | ------- 22 | mdtrajs : list 23 | A list of mdtraj Trajectory objects. 24 | 25 | """ 26 | return md.load(traj, top=top, stride=stride) 27 | 28 | class MultiTimeSeries(object): 29 | """ A class for generating multiple TimeSeries objects in 30 | a consistent way. In principle this is only needed when 31 | the clustering is not established a priori. 32 | 33 | """ 34 | def __init__(self, top=None, trajs=None, dt=None, stride=None): 35 | """ 36 | Parameters 37 | ---------- 38 | dt : float 39 | The time step. 40 | top : string 41 | The topology file, may be a PDB or GRO file. 42 | trajs : list 43 | A list of trajectory filenames to be read. 44 | 45 | """ 46 | self.file_list = trajs 47 | self.traj_list = [] 48 | for traj in self.file_list: 49 | tr = TimeSeries(top=top, traj=traj, stride=stride) 50 | self.traj_list.append(tr) 51 | 52 | def joint_discretize(self, method='backbone_torsions', mcs=None, ms=None, dPCA=False): 53 | """ 54 | Discretize simultaneously all trajectories with HDBSCAN. 55 | 56 | Parameters 57 | ---------- 58 | method : str 59 | The method of choice for the discretization. Options are 'backbone_torsions' 60 | and 'contacts'. 61 | mcs : int 62 | Minimum cluster size for HDBSCAN clustering. 63 | ms : int 64 | Minsamples parameter for HDBSCAN clustering. 65 | dPCA : bool 66 | Whether we are using the dihedral PCA method. 67 | 68 | """ 69 | if method=='backbone_torsions': 70 | labels = self.joint_discretize_backbone_torsions(mcs=mcs, ms=ms, dPCA=dPCA) 71 | elif method=='contacts': 72 | labels = self.joint_discretize_contacts(mcs=mcs, ms=ms) 73 | 74 | i = 0 75 | for tr in self.traj_list: 76 | ltraj = tr.mdt.n_frames 77 | tr.distraj = list(labels[i:i+ltraj]) 78 | i +=ltraj 79 | 80 | def joint_discretize_backbone_torsions(self, mcs=None, ms=None, dPCA=False): 81 | """ 82 | Analyze jointly torsion angles from multiple trajectories. 83 | 84 | Parameters 85 | ---------- 86 | mcs : int 87 | Minimum cluster size for HDBSCAN clustering. 88 | ms : int 89 | Minsamples parameter for HDBSCAN clustering. 90 | dPCA : bool 91 | Whether we are using the dihedral PCA method. 92 | 93 | """ 94 | # First we build the fake trajectory combining data 95 | phi_cum = [] 96 | psi_cum = [] 97 | for tr in self.traj_list: 98 | phi = md.compute_phi(tr.mdt) 99 | psi = md.compute_psi(tr.mdt) 100 | phi_cum.append(phi[1]) 101 | psi_cum.append(psi[1]) 102 | phi_cum = np.vstack(phi_cum) 103 | psi_cum = np.vstack(psi_cum) 104 | 105 | # Then we generate the consistent set of clusters 106 | if dPCA is True: 107 | angles = np.column_stack((phi_cum, psi_cum)) 108 | v = traj_lib.dPCA(angles) 109 | labels = traj_lib.discrete_backbone_torsion(mcs, ms, pcs=v, dPCA=True) 110 | else: 111 | phi_fake = [phi[0], phi_cum] 112 | psi_fake = [psi[0], psi_cum] 113 | labels = traj_lib.discrete_backbone_torsion(mcs, ms, phi=phi_fake, psi=psi_fake) 114 | return labels 115 | 116 | def joint_discretize_contacts(self, mcs=None, ms=None): 117 | """ 118 | Analyze jointly pairwise contacts from all trajectories. 119 | 120 | Produces a fake trajectory comprising a concatenated set 121 | to recover the labels from HDBSCAN. 122 | 123 | """ 124 | mdt_cum = [] 125 | for tr in self.traj_list: 126 | mdt_cum.append(tr.mdt) #mdt_cum = np.vstack(mdt_cum) 127 | 128 | labels = traj_lib.discrete_contacts_hdbscan(mcs, ms, mdt_cum) 129 | 130 | return labels 131 | 132 | class TimeSeries(object): 133 | """ A class to read and discretize simulation trajectories. 134 | When simulation trajectories are provided, frames are read 135 | and discretized using mdtraj [1]_. Alternatively, a discrete 136 | trajectory can be provided. 137 | 138 | Attributes 139 | ---------- 140 | mdt : 141 | An mdtraj Trajectory object. 142 | file_name : str 143 | The name of the trajectory file. 144 | distraj : list 145 | The assigned trajectory. 146 | dt : float 147 | The time step 148 | 149 | 150 | References 151 | ---------- 152 | .. [1] McGibbon, RT., Beauchamp, KA., Harrigan, MP., Klein, C., 153 | Swails, JM., Hernandez, CX., Schwantes, CR., Wang, LP., Lane, 154 | TJ. and Pande, VS." MDTraj: A Modern Open Library for the Analysis 155 | of Molecular Dynamics Trajectories", Biophys. J. (2015). 156 | 157 | """ 158 | def __init__(self, top=None, traj=None, dt=None, \ 159 | distraj=None, stride=None): 160 | """ 161 | Parameters 162 | ---------- 163 | distraj : string 164 | The discrete state trajectory file. 165 | dt : float 166 | The time step. 167 | top : string 168 | The topology file, may be a PDB or GRO file. 169 | traj : string 170 | The trajectory filenames to be read. 171 | stride : int 172 | Only read every stride-th frame 173 | 174 | """ 175 | if distraj is not None: 176 | # A discrete trajectory is provided 177 | self.distraj, self.dt = self._read_distraj(distraj=distraj, dt=dt) 178 | else: 179 | # An MD trajectory is provided 180 | self.file_name = traj 181 | mdt = _load_mdtraj(top=top, traj=traj, stride=stride) 182 | self.mdt = mdt 183 | self.dt = self.mdt.timestep 184 | 185 | def _read_distraj(self, distraj=None, dt=None): 186 | """ Loads discrete trajectories directly. 187 | 188 | Parameters 189 | ---------- 190 | distraj : str, list 191 | File or list with discrete trajectory. 192 | 193 | Returns 194 | ------- 195 | mdtrajs : list 196 | A list of mdtraj Trajectory objects. 197 | 198 | """ 199 | if isinstance(distraj, list): 200 | cstates = distraj 201 | if dt is None: 202 | dt = 1. 203 | return cstates, dt 204 | 205 | elif os.path.isfile(distraj): 206 | raw = open(distraj, "r").readlines() 207 | try: 208 | cstates = [x.split()[1] for x in raw] 209 | dt = float(raw[2].split()[0]) - float(raw[1].split()[0]) 210 | try: # make them integers if you can 211 | cstates = [int(x) for x in cstates] 212 | except ValueError: 213 | pass 214 | return cstates, dt 215 | except IndexError: 216 | cstates = [x.split()[0] for x in raw] 217 | return cstates, 1. 218 | 219 | def discretize(self, method="rama", states=None, nbins=20,\ 220 | mcs=100, ms=50): 221 | """ Discretize the simulation data. 222 | 223 | Parameters 224 | ---------- 225 | method : str 226 | A method for doing the clustering. Options are 227 | "rama", "ramagrid", "rama_hdb", "contacts_hdb"; 228 | where the latter two use HDBSCAN. 229 | states : list 230 | A list of states to be considered in the discretization. 231 | Only for method "rama". 232 | nbins : int 233 | Number of bins in the grid. Only for "ramagrid". 234 | mcs : int 235 | min_cluster_size for HDBSCAN 236 | ms : int 237 | min_samples for HDBSCAN 238 | 239 | Returns 240 | ------- 241 | discrete : list 242 | A list with the set of discrete states visited. 243 | 244 | """ 245 | if method == "rama": 246 | phi = md.compute_phi(self.mdt) 247 | psi = md.compute_psi(self.mdt) 248 | self.distraj = traj_lib.discrete_rama(phi, psi, states=states) 249 | elif method == "ramagrid": 250 | phi = md.compute_phi(self.mdt) 251 | psi = md.compute_psi(self.mdt) 252 | self.distraj = traj_lib.discrete_ramagrid(phi, psi, nbins) 253 | elif method == "rama_hdb": 254 | phi = md.compute_phi(self.mdt) 255 | psi = md.compute_psi(self.mdt) 256 | self.distraj = traj_lib.discrete_backbone_torsion(mcs, ms, phi=phi, psi=psi) 257 | elif method == "contacts_hdb": 258 | self.distraj = traj_lib.discrete_contacts_hdbscan(mcs, ms, self.mdt) 259 | 260 | def find_keys(self, exclude=['O']): 261 | """ Finds out the discrete states in the trajectory 262 | 263 | Parameters 264 | ---------- 265 | exclude : list 266 | A list of strings with states to exclude. 267 | 268 | """ 269 | keys = [] 270 | for s in self.distraj: 271 | if s not in keys and s not in exclude: 272 | keys.append(s) 273 | self.keys = keys 274 | 275 | def gc(self): 276 | """ 277 | Gets rid of the mdtraj attribute 278 | 279 | """ 280 | delattr (self, "mdt") 281 | 282 | # def discrete_rama(self, A=[-100, -40, -60, 0], \ 283 | # L=[-180, -40, 120., 180.], \ 284 | # E=[50., 100., -40., 70.]): 285 | # """ Discretize based on Ramachandran angles. 286 | # 287 | # """ 288 | # for t in self.mdtrajs: 289 | # phi,psi = zip(mdtraj.compute_phi(traj), mdtraj.compute_psi(traj)) 290 | # 291 | -------------------------------------------------------------------------------- /mastermsm/trajectory/traj_lib.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file is part of the MasterMSM package. 3 | 4 | """ 5 | #import h5py 6 | import copy 7 | import sys 8 | import math 9 | import hdbscan 10 | import numpy as np 11 | from sklearn.preprocessing import StandardScaler 12 | from sklearn.decomposition import PCA 13 | import mdtraj as md 14 | import matplotlib.pyplot as plt 15 | 16 | def discrete_rama(phi, psi, seq=None, bounds=None, states=['A', 'E', 'L']): 17 | """ Assign a set of phi, psi angles to coarse states. 18 | 19 | Parameters 20 | ---------- 21 | phi : list 22 | A list of Phi Ramachandran angles. 23 | psi : list 24 | A list of Psi Ramachandran angles. 25 | seq : list 26 | Sequence of states. 27 | bounds : list of lists 28 | Alternative bounds for transition based assignment. 29 | states : list 30 | The states that will be used in the assignment. 31 | 32 | Returns 33 | ------- 34 | cstates : list 35 | The sequence of coarse states. 36 | 37 | Notes 38 | ----- 39 | Here we follow Buchete and Hummer for the assignment procedure [1]_ . 40 | 41 | .. [1] N. V. Buchete and G. Hummer, "Coarse master equations for peptide folding dynamics", J. Phys. Chem. B. (2008). 42 | 43 | """ 44 | if bounds is None: 45 | TBA_bounds = {} 46 | if 'A' in states: 47 | TBA_bounds['A'] = [ -100., -40., -50., -10. ] 48 | if 'E' in states: 49 | TBA_bounds['E'] = [ -180., -40., 125.,165. ] 50 | if 'L' in states: 51 | TBA_bounds['L'] = [ 50., 100., -40.,70.0 ] 52 | 53 | res_idx = 0 54 | if len(phi[0]) != len(psi[0]): 55 | print (" Different number of phi and psi dihedrals") 56 | print (" STOPPING HERE") 57 | sys.exit() 58 | 59 | cstates = [] 60 | prev_s_string = "" 61 | ndih = len(phi[0]) 62 | for f,y in zip(phi[1],psi[1]): 63 | s_string = [] 64 | for n in range(ndih): 65 | s, _ = _state(f[n]*180/math.pi, y[n]*180/math.pi, TBA_bounds) 66 | #if s == "O" and len(prev_s_string) > 0: 67 | if s == "O": 68 | try: 69 | s_string += prev_s_string[n] 70 | except IndexError: 71 | s_string += "O" 72 | else: 73 | s_string += s 74 | cstates.append(''.join(s_string)) 75 | prev_s_string = s_string 76 | res_idx += 1 77 | return cstates 78 | 79 | def discrete_ramagrid(phi, psi, nbins): 80 | """ Finely partition the Ramachandran map into a grid of states. 81 | 82 | Parameters 83 | ---------- 84 | phi : list 85 | A list of Phi Ramachandran angles. 86 | psi : list 87 | A list of Psi Ramachandran angles. 88 | nbins : int 89 | The number of bins in the grid in each dimension. 90 | 91 | Returns 92 | ------- 93 | cstates : list 94 | The sequence of coarse states. 95 | 96 | """ 97 | cstates = [] 98 | for f, y in zip(phi[1], psi[1]): 99 | s = _stategrid(f, y, nbins) 100 | cstates.append(s) 101 | return cstates 102 | 103 | #stats_out = open(stats_file,"w") 104 | #cum = 0 105 | #for s in stats_list: 106 | # cum+=s[1] 107 | # #stats_out.write("%s %8i %8i %12.6f\n"%\ 108 | # # (s[0],s[1],cum,qave[s[0]]/float(s[1]))) 109 | # stats_out.write("%s %8i %8i\n"%\ 110 | # (s[0],s[1],cum)) 111 | # 112 | #stats_out.close() 113 | #state_out.close() 114 | # 115 | #def isnative(native_string, string): 116 | # s = "" 117 | # for i in range(len(string)): 118 | # if string[i]==native_string[i]: 119 | # s+="1" 120 | # else: 121 | # s+="0" 122 | # return s 123 | # 124 | def _inrange( x, lo, hi ): 125 | if x > lo and x < hi: 126 | return 1 127 | else: 128 | return 0 129 | 130 | def _inbounds(bounds,phi, psi): 131 | if _inrange( phi,bounds[0],bounds[1]) and _inrange( psi,bounds[2],bounds[3]): 132 | return 1 133 | if len(bounds) > 4: 134 | if _inrange( phi,bounds[4],bounds[5]) and _inrange( psi,bounds[6],bounds[7]): 135 | return 1 136 | if len(bounds) > 8: 137 | if _inrange( phi,bounds[8],bounds[9]) and _inrange( psi,bounds[10],bounds[11]): 138 | return 1 139 | if len(bounds) > 12: 140 | if _inrange( phi,bounds[12],bounds[13]) and _inrange( psi,bounds[14],bounds[15]): 141 | return 1 142 | return 0 143 | 144 | def _state(phi,psi,bounds): 145 | """ Finds coarse state for a pair of phi-psi dihedrals 146 | 147 | Parameters 148 | ---------- 149 | phi : float 150 | Phi dihedral angle 151 | psi : float 152 | Psi dihedral angle 153 | bounds : dict 154 | Dictionary containing list of states and their respective bounds 155 | 156 | Returns 157 | ------- 158 | k : string 159 | Key for assigned state 160 | 161 | """ 162 | # if type == "GLY": 163 | # for k in g_bounds.keys(): 164 | # if inbounds( g_bounds[k], (phi,psi) ): 165 | # return k, [] 166 | # # else 167 | # return 'O', [ (phi,psi) ] 168 | # if type == "prePRO": 169 | # for k in pp_bounds.keys(): 170 | # if inbounds( pp_bounds[k], (phi,psi) ): 171 | # return k, [] 172 | # # else 173 | # return 'O', [ (phi,psi) ] 174 | # else: 175 | for k in bounds.keys(): 176 | if _inbounds(bounds[k], phi, psi ): 177 | return k, [] 178 | # else 179 | return 'O', [ (phi,psi) ] 180 | 181 | #def stats_sort(x,y): 182 | # xx = x[1] 183 | # yy = y[1] 184 | # return yy-xx 185 | # 186 | ##if len(sys.argv)<5: 187 | ## sys.stdout.write(Usage) 188 | ## sys.exit(0) 189 | # 190 | #torsion_file = sys.argv[1] 191 | ##q_file = sys.argv[2] 192 | #state_file = sys.argv[2] 193 | #stats_file = sys.argv[3] 194 | 195 | def _stategrid(phi, psi, nbins): 196 | """ Finds coarse state for a pair of phi-psi dihedrals 197 | 198 | Parameters 199 | ---------- 200 | phi : float 201 | Phi dihedral angle 202 | psi : float 203 | Psi dihedral angle 204 | nbins : int 205 | Number of bins in each dimension of the grid 206 | 207 | Returns 208 | ------- 209 | k : int 210 | Index of bin 211 | 212 | """ 213 | #print phi, psi 214 | #print "column :", int(0.5*(phi + math.pi)/math.pi*nbins) 215 | #print "row :", int(0.5*(psi + math.pi)/math.pi*nbins) 216 | ibin = int(0.5*nbins*(phi/math.pi + 1.)) + int(0.5*nbins*(psi/math.pi + 1))*nbins 217 | return ibin 218 | 219 | def discrete_backbone_torsion(mcs, ms, phi=None, psi=None, \ 220 | pcs=None, dPCA=False): 221 | """ 222 | Discretize backbone torsion angles 223 | 224 | Assign a set of phi, psi angles (or their corresponding 225 | dPCA variables if dPCA=True) to coarse states 226 | by using the HDBSCAN algorithm. 227 | 228 | Parameters 229 | ---------- 230 | phi : list 231 | A list of Phi Ramachandran angles 232 | psi : list 233 | A list of Psi Ramachandran angles 234 | pcs : matrix 235 | Matrix containing principal components obtained 236 | from PCA of dihedral angles 237 | mcs : int 238 | min_cluster_size for HDBSCAN 239 | ms : int 240 | min_samples for HDBSCAN 241 | 242 | """ 243 | if dPCA: 244 | X = pcs 245 | else: 246 | # shift and combine dihedrals 247 | if len(phi[0]) != len(psi[0]): 248 | raise ValueError("Inconsistent dimensions for angles") 249 | 250 | ndih = len(phi[0]) 251 | phi_shift, psi_shift = [], [] 252 | for f, y in zip(phi[1], psi[1]): 253 | for n in range(ndih): 254 | phi_shift.append(f[n]) 255 | psi_shift.append(y[n]) 256 | np.savetxt("phi_psi.dat", np.column_stack((phi_shift, psi_shift))) 257 | psi_shift, phi_shift = _shift(psi_shift, phi_shift) 258 | data = np.column_stack((phi_shift, psi_shift)) 259 | np.savetxt("phi_psi_shifted.dat", data) 260 | X = StandardScaler().fit_transform(data) 261 | 262 | # Set values for clustering parameters 263 | if mcs is None: 264 | mcs = int(np.sqrt(len(X))) 265 | print("Setting minimum cluster size to: %g" % mcs) 266 | if ms is None: 267 | ms = mcs 268 | print("Setting min samples to: %g" % ms) 269 | 270 | hdb = hdbscan.HDBSCAN(min_cluster_size=mcs, min_samples=ms).fit(X) 271 | hdb.condensed_tree_.plot(select_clusters=True) 272 | 273 | #plt.savefig("alatb-hdbscan-tree.png",dpi=300,transparent=True) 274 | 275 | # n_micro_clusters = len(set(hb.labels_)) - (1 if -1 in hb.labels_ else 0 276 | # if n_micro_clusters > 0: 277 | # print("HDBSCAN mcs value set to %g"%mcs, n_micro_clusters,'clusters.') 278 | # break 279 | # elif mcs < 400: 280 | # mcs += 25 281 | # else: 282 | # sys.exit("Cannot find any valid HDBSCAN mcs value") 283 | # #n_noise = list(labels).count(-1) 284 | 285 | # ## plot clusters 286 | # colors = ['royalblue', 'maroon', 'forestgreen', 'mediumorchid', \ 287 | # 'tan', 'deeppink', 'olive', 'goldenrod', 'lightcyan', 'lightgray'] 288 | # vectorizer = np.vectorize(lambda x: colors[x % len(colors)]) 289 | # fig, ax = plt.subplots(figsize=(7,7)) 290 | # assign = hb.labels_ >= 0 291 | # ax.scatter(X[assign,0],X[assign,1], c=hb.labels_[assign]) 292 | # ax.set_xlim(-np.pi, np.pi) 293 | # ax.set_ylim(-np.pi, np.pi) 294 | # plt.savefig('alaTB_hdbscan.png', dpi=300, transparent=True) 295 | # 296 | # # remove noise from microstate trajectory and apply TBA (Buchete et al. JPCB 2008) 297 | # labels = _filter_states(hb.labels_) 298 | # 299 | # # remove from clusters points with small (<0.1) probability 300 | # for i in range(len(labels)): 301 | # if hb.probabilities_[i] < 0.1: 302 | # labels[i] = -1 303 | 304 | return hdb.labels_ 305 | 306 | def dPCA(angles): 307 | """ 308 | Compute PCA of dihedral angles 309 | 310 | We follow the methods described in A. Altis et al. 311 | *J. Chem. Phys.* 244111 (2007) 312 | 313 | Parameters 314 | ---------- 315 | angles : angles ordered by columns 316 | 317 | Returns 318 | ------- 319 | X_transf : dPCA components to retrieve 80% 320 | of variance ordered by columns 321 | 322 | """ 323 | shape = np.shape(angles) 324 | #print (shape) 325 | X = np.zeros((shape[0] , \ 326 | shape[1]+shape[1])) 327 | for i, ang in enumerate(angles): 328 | p = 0 329 | for phi in ang: 330 | X[i][p], X[i][p+1] = np.cos(phi), np.sin(phi) 331 | p += 2 332 | X_std = StandardScaler().fit_transform(X) 333 | sklearn_pca = PCA(n_components=2*shape[1]) 334 | 335 | X_transf = sklearn_pca.fit_transform(X_std) 336 | expl = sklearn_pca.explained_variance_ratio_ 337 | print("Ratio of variance retrieved by each component:", expl) 338 | 339 | cum_var = 0.0 340 | i = 0 341 | while cum_var < 0.8: 342 | cum_var += expl[i] 343 | i += 1 344 | 345 | ## Save cos and sin of dihedral angles along the trajectory 346 | #h5file = "data/out/%g_traj_angles.h5"%t 347 | #with h5py.File(h5file, "w") as hf: 348 | # hf.create_dataset("angles_trajectory", data=X) 349 | ## Plot cumulative variance retrieved by new components (i.e. those from PCA) 350 | #plt.figure() #plt.plot(np.cumsum(sklearn_pca.explained_variance_ratio_)) 351 | #plt.xlabel('number of components') #plt.ylabel('cumulative explained variance') 352 | #plt.savefig('cum_variance_%g.png'%t) 353 | 354 | #counts, ybins, xbins, image = plt.hist2d(X_transf[:,0], X_transf[:,1], \ 355 | # bins=len(X_transf[:,0]), cmap='binary_r', alpha=0.2)#bins=[np.linspace(-np.pi,np.pi,20), np.linspace(-np.pi,np.pi,30)] 356 | ##countmax = np.amax(counts) 357 | ##counts = np.log(countmax) - np.log(counts) 358 | ##print(counts, countmax) 359 | #plt.contour(np.transpose(counts), extent=[xbins.min(), xbins.max(), ybins.min(), ybins.max()], \ 360 | # linewidths=1, colors='gray') 361 | #plt.scatter(X_transf[:,0],X_transf[:,1])# c=counts) 362 | #fig, ax = plt.subplots(1,1, figsize=(8,8), sharex=True, sharey=True) 363 | #ax.contour(np.transpose(counts), extent=[xbins.min(), xbins.max(), ybins.min(), ybins.max()], \ 364 | # linewidths=1, colors='gray') 365 | #ax.plot(X_transf[:,0],X_transf[:,1], 'o', ms=0.2, color='C%g'%t) 366 | #plt.tight_layout() 367 | #plt.savefig('dpca_%g.png'%t) 368 | 369 | return X_transf[:,:i] 370 | 371 | def discrete_contacts_hdbscan(mcs, ms, mdt_all): 372 | """ 373 | HDBSCAN discretization based on contacts 374 | 375 | Parameters 376 | ---------- 377 | mdt : object 378 | mdtraj trajectory 379 | mcs : int 380 | min_cluster_size for HDBSCAN 381 | ms : int 382 | min_samples for HDBSCAN 383 | 384 | Returns 385 | ------- 386 | labels : list 387 | Indexes corresponding to the clustering 388 | 389 | """ 390 | 391 | dists_all = [] 392 | for mdt in mdt_all: 393 | dists = md.compute_contacts(mdt, contacts='all', periodic=True) 394 | for dist in dists[0]: 395 | dists_all.append(dist) 396 | 397 | X = StandardScaler().fit_transform(dists_all) #dists[0] 398 | if mcs is None: mcs = int(np.sqrt(len(X))) 399 | if ms is None: ms = 100 400 | hdb = hdbscan.HDBSCAN(min_cluster_size=mcs, min_samples=ms) 401 | hdb.fit(X) 402 | hdb.condensed_tree_.plot(select_clusters=True) 403 | plt.savefig("hdbscan-tree.png",dpi=300,transparent=True) 404 | 405 | # In case not enough states are produced, exit 406 | if (len(np.unique(hdb.labels_))<=2): 407 | raise Exception("Cannot generate clusters from contacts") 408 | 409 | dtraj = _filter_states(hdb.labels_) 410 | return dtraj 411 | 412 | def _filter_states(states): 413 | """ 414 | Filters to remove not-assigned frames when using dbscan or hdbscan 415 | 416 | """ 417 | fs = [] 418 | for s in states: 419 | if s >= 0: 420 | fs.append(s) 421 | else: 422 | try: 423 | fs.append(fs[-1]) 424 | except IndexError: 425 | pass 426 | return fs 427 | 428 | def _shift(psi, phi): 429 | psi_s, phi_s = copy.deepcopy(phi), copy.deepcopy(psi) 430 | for i in range(len(phi_s)): 431 | if phi_s[i] < -2: 432 | phi_s[i] += 2*np.pi 433 | for i in range(len(psi_s)): 434 | if psi_s[i] > 2: 435 | psi_s[i] -= 2*np.pi 436 | return phi_s, psi_s 437 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name : MasterMSM 2 | theme : readthedocs 3 | repo_url : https://github.com/daviddesancho/MasterMSM 4 | site_author: David De Sancho 5 | pages : 6 | - ['about.md', 'About'] 7 | - ['index.md', 'Introduction'] 8 | - ['installation.md', 'Installation'] 9 | - ['discretize.md', 'User guide', 'Discretizing the data'] 10 | - ['trajectory.md', 'User guide', 'Parsing trajectories'] 11 | - ['msm.md', 'User guide', 'Constructing the MSM'] 12 | - ['fewsm.md', 'User guide', 'Clustering the MSM'] 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | matplotlib 4 | networkx 5 | mdtraj 6 | hdbscan 7 | scikit-learn 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Setup script for bestmsm package 4 | 5 | import os 6 | from setuptools import setup, find_packages 7 | 8 | def read(fname): 9 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 10 | 11 | setup( 12 | name='MasterMSM', 13 | version='0.1dev', 14 | description='Algorithms to construct master equation / Markov state models', 15 | url='http://github.com/daviddesancho/MasterMSM', 16 | author='David De Sancho', 17 | author_email='daviddesancho.at.gmail.com', 18 | license='GPL', 19 | packages=find_packages(), 20 | keywords= "markov state model", 21 | long_description=read('README.md'), 22 | classifiers = ["""\ 23 | Development Status :: 1 - Planning 24 | Operating System :: POSIX :: Linux 25 | Operating System :: MacOS 26 | Programming Language :: Python :: 2.7 27 | Topic :: Scientific/Engineering :: Bio-Informatics 28 | Topic :: Scientific/Engineering :: Chemistry 29 | """] 30 | ) 31 | --------------------------------------------------------------------------------