├── .circleci └── config.yml ├── .gitattributes ├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── binder ├── environment.yml └── postBuild ├── conftest.py ├── devtools ├── conda-recipe │ └── meta.yaml ├── create_bibliography_nb.py └── install_miniconda.sh ├── manuscript ├── .gitignore ├── README.md ├── figures │ ├── figure_1.pdf │ ├── figure_2.pdf │ ├── figure_3.pdf │ ├── figure_4.pdf │ ├── figure_5.pdf │ ├── figure_6.pdf │ ├── figure_7.pdf │ ├── tutorials-logo.png │ └── workflowchart.svg ├── literature.bib ├── livecoms.cls ├── manuscript.tex └── vancouver-livecoms.bst ├── notebooks ├── .gitignore ├── 00-pentapeptide-showcase.ipynb ├── 01-data-io-and-featurization.ipynb ├── 02-dimension-reduction-and-discretization.ipynb ├── 03-msm-estimation-and-validation.ipynb ├── 04-msm-analysis.ipynb ├── 05-pcca-tpt.ipynb ├── 06-expectations-and-observables.ipynb ├── 07-hidden-markov-state-models.ipynb ├── 08-common-problems.ipynb └── static │ ├── hmm-backbone-1-385x432.png │ ├── hmm-backbone-2-388x526.png │ ├── hmm-backbone-3-347x500.png │ ├── hmm-backbone-4-367x348.png │ ├── hmm-backbone-5-260x374.png │ ├── pentapeptide-states.png │ └── pentapeptide-structure.png ├── pyemma_tutorials ├── __init__.py ├── __main__.py ├── _version.py ├── cli.py ├── jupyter_notebook_config.json ├── jupyter_notebook_config.py └── util.py ├── releases ├── LiveCoMS_Article_ASAP_V1.pdf ├── LiveCoMS_Article_V1.pdf └── header_V1.0.jpg ├── setup.cfg ├── setup.py └── versioneer.py /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | jobs: 3 | build: 4 | docker: 5 | - image: continuumio/miniconda3 6 | environment: 7 | PYTHONHASHSEED: 0 8 | OMP_NUM_THREADS: 1 9 | PYEMMA_NJOBS: 1 10 | NBVAL_OUTPUT: /root/nbval 11 | parallelism: 4 12 | steps: 13 | - checkout 14 | - run: 15 | name: conda_config 16 | command: | 17 | conda config --set always_yes true 18 | conda config --set quiet true 19 | - run: conda install conda-build 20 | - run: mkdir $NBVAL_OUTPUT 21 | - run: 22 | name: build_test 23 | command: conda build -c conda-forge . 24 | no_output_timeout: 20m 25 | - store_test_results: 26 | path: ~/junit 27 | - store_artifacts: 28 | path: /root/nbval #$NBVAL_OUTPUT 29 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | pyemma_tutorials/_version.py export-subst 2 | 3 | *.ipynb diff=jupyternotebook 4 | 5 | *.ipynb merge=jupyternotebook 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #copied stuff 2 | pyemma_tutorials/notebooks/* 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | .venv 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | #OSX stuff 107 | *.DS_Store 108 | manuscript/manuscript.suppinfo 109 | manuscript/manuscript.pdf 110 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | 3 | ======================================================================= 4 | 5 | Creative Commons Corporation ("Creative Commons") is not a law firm and 6 | does not provide legal services or legal advice. Distribution of 7 | Creative Commons public licenses does not create a lawyer-client or 8 | other relationship. Creative Commons makes its licenses and related 9 | information available on an "as-is" basis. Creative Commons gives no 10 | warranties regarding its licenses, any material licensed under their 11 | terms and conditions, or any related information. Creative Commons 12 | disclaims all liability for damages resulting from their use to the 13 | fullest extent possible. 14 | 15 | Using Creative Commons Public Licenses 16 | 17 | Creative Commons public licenses provide a standard set of terms and 18 | conditions that creators and other rights holders may use to share 19 | original works of authorship and other material subject to copyright 20 | and certain other rights specified in the public license below. The 21 | following considerations are for informational purposes only, are not 22 | exhaustive, and do not form part of our licenses. 23 | 24 | Considerations for licensors: Our public licenses are 25 | intended for use by those authorized to give the public 26 | permission to use material in ways otherwise restricted by 27 | copyright and certain other rights. Our licenses are 28 | irrevocable. Licensors should read and understand the terms 29 | and conditions of the license they choose before applying it. 30 | Licensors should also secure all rights necessary before 31 | applying our licenses so that the public can reuse the 32 | material as expected. Licensors should clearly mark any 33 | material not subject to the license. This includes other CC- 34 | licensed material, or material used under an exception or 35 | limitation to copyright. More considerations for licensors: 36 | wiki.creativecommons.org/Considerations_for_licensors 37 | 38 | Considerations for the public: By using one of our public 39 | licenses, a licensor grants the public permission to use the 40 | licensed material under specified terms and conditions. If 41 | the licensor's permission is not necessary for any reason--for 42 | example, because of any applicable exception or limitation to 43 | copyright--then that use is not regulated by the license. Our 44 | licenses grant only permissions under copyright and certain 45 | other rights that a licensor has authority to grant. Use of 46 | the licensed material may still be restricted for other 47 | reasons, including because others have copyright or other 48 | rights in the material. A licensor may make special requests, 49 | such as asking that all changes be marked or described. 50 | Although not required by our licenses, you are encouraged to 51 | respect those requests where reasonable. More considerations 52 | for the public: 53 | wiki.creativecommons.org/Considerations_for_licensees 54 | 55 | ======================================================================= 56 | 57 | Creative Commons Attribution 4.0 International Public License 58 | 59 | By exercising the Licensed Rights (defined below), You accept and agree 60 | to be bound by the terms and conditions of this Creative Commons 61 | Attribution 4.0 International Public License ("Public License"). To the 62 | extent this Public License may be interpreted as a contract, You are 63 | granted the Licensed Rights in consideration of Your acceptance of 64 | these terms and conditions, and the Licensor grants You such rights in 65 | consideration of benefits the Licensor receives from making the 66 | Licensed Material available under these terms and conditions. 67 | 68 | 69 | Section 1 -- Definitions. 70 | 71 | a. Adapted Material means material subject to Copyright and Similar 72 | Rights that is derived from or based upon the Licensed Material 73 | and in which the Licensed Material is translated, altered, 74 | arranged, transformed, or otherwise modified in a manner requiring 75 | permission under the Copyright and Similar Rights held by the 76 | Licensor. For purposes of this Public License, where the Licensed 77 | Material is a musical work, performance, or sound recording, 78 | Adapted Material is always produced where the Licensed Material is 79 | synched in timed relation with a moving image. 80 | 81 | b. Adapter's License means the license You apply to Your Copyright 82 | and Similar Rights in Your contributions to Adapted Material in 83 | accordance with the terms and conditions of this Public License. 84 | 85 | c. Copyright and Similar Rights means copyright and/or similar rights 86 | closely related to copyright including, without limitation, 87 | performance, broadcast, sound recording, and Sui Generis Database 88 | Rights, without regard to how the rights are labeled or 89 | categorized. For purposes of this Public License, the rights 90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 91 | Rights. 92 | 93 | d. Effective Technological Measures means those measures that, in the 94 | absence of proper authority, may not be circumvented under laws 95 | fulfilling obligations under Article 11 of the WIPO Copyright 96 | Treaty adopted on December 20, 1996, and/or similar international 97 | agreements. 98 | 99 | e. Exceptions and Limitations means fair use, fair dealing, and/or 100 | any other exception or limitation to Copyright and Similar Rights 101 | that applies to Your use of the Licensed Material. 102 | 103 | f. Licensed Material means the artistic or literary work, database, 104 | or other material to which the Licensor applied this Public 105 | License. 106 | 107 | g. Licensed Rights means the rights granted to You subject to the 108 | terms and conditions of this Public License, which are limited to 109 | all Copyright and Similar Rights that apply to Your use of the 110 | Licensed Material and that the Licensor has authority to license. 111 | 112 | h. Licensor means the individual(s) or entity(ies) granting rights 113 | under this Public License. 114 | 115 | i. Share means to provide material to the public by any means or 116 | process that requires permission under the Licensed Rights, such 117 | as reproduction, public display, public performance, distribution, 118 | dissemination, communication, or importation, and to make material 119 | available to the public including in ways that members of the 120 | public may access the material from a place and at a time 121 | individually chosen by them. 122 | 123 | j. Sui Generis Database Rights means rights other than copyright 124 | resulting from Directive 96/9/EC of the European Parliament and of 125 | the Council of 11 March 1996 on the legal protection of databases, 126 | as amended and/or succeeded, as well as other essentially 127 | equivalent rights anywhere in the world. 128 | 129 | k. You means the individual or entity exercising the Licensed Rights 130 | under this Public License. Your has a corresponding meaning. 131 | 132 | 133 | Section 2 -- Scope. 134 | 135 | a. License grant. 136 | 137 | 1. Subject to the terms and conditions of this Public License, 138 | the Licensor hereby grants You a worldwide, royalty-free, 139 | non-sublicensable, non-exclusive, irrevocable license to 140 | exercise the Licensed Rights in the Licensed Material to: 141 | 142 | a. reproduce and Share the Licensed Material, in whole or 143 | in part; and 144 | 145 | b. produce, reproduce, and Share Adapted Material. 146 | 147 | 2. Exceptions and Limitations. For the avoidance of doubt, where 148 | Exceptions and Limitations apply to Your use, this Public 149 | License does not apply, and You do not need to comply with 150 | its terms and conditions. 151 | 152 | 3. Term. The term of this Public License is specified in Section 153 | 6(a). 154 | 155 | 4. Media and formats; technical modifications allowed. The 156 | Licensor authorizes You to exercise the Licensed Rights in 157 | all media and formats whether now known or hereafter created, 158 | and to make technical modifications necessary to do so. The 159 | Licensor waives and/or agrees not to assert any right or 160 | authority to forbid You from making technical modifications 161 | necessary to exercise the Licensed Rights, including 162 | technical modifications necessary to circumvent Effective 163 | Technological Measures. For purposes of this Public License, 164 | simply making modifications authorized by this Section 2(a) 165 | (4) never produces Adapted Material. 166 | 167 | 5. Downstream recipients. 168 | 169 | a. Offer from the Licensor -- Licensed Material. Every 170 | recipient of the Licensed Material automatically 171 | receives an offer from the Licensor to exercise the 172 | Licensed Rights under the terms and conditions of this 173 | Public License. 174 | 175 | b. No downstream restrictions. You may not offer or impose 176 | any additional or different terms or conditions on, or 177 | apply any Effective Technological Measures to, the 178 | Licensed Material if doing so restricts exercise of the 179 | Licensed Rights by any recipient of the Licensed 180 | Material. 181 | 182 | 6. No endorsement. Nothing in this Public License constitutes or 183 | may be construed as permission to assert or imply that You 184 | are, or that Your use of the Licensed Material is, connected 185 | with, or sponsored, endorsed, or granted official status by, 186 | the Licensor or others designated to receive attribution as 187 | provided in Section 3(a)(1)(A)(i). 188 | 189 | b. Other rights. 190 | 191 | 1. Moral rights, such as the right of integrity, are not 192 | licensed under this Public License, nor are publicity, 193 | privacy, and/or other similar personality rights; however, to 194 | the extent possible, the Licensor waives and/or agrees not to 195 | assert any such rights held by the Licensor to the limited 196 | extent necessary to allow You to exercise the Licensed 197 | Rights, but not otherwise. 198 | 199 | 2. Patent and trademark rights are not licensed under this 200 | Public License. 201 | 202 | 3. To the extent possible, the Licensor waives any right to 203 | collect royalties from You for the exercise of the Licensed 204 | Rights, whether directly or through a collecting society 205 | under any voluntary or waivable statutory or compulsory 206 | licensing scheme. In all other cases the Licensor expressly 207 | reserves any right to collect such royalties. 208 | 209 | 210 | Section 3 -- License Conditions. 211 | 212 | Your exercise of the Licensed Rights is expressly made subject to the 213 | following conditions. 214 | 215 | a. Attribution. 216 | 217 | 1. If You Share the Licensed Material (including in modified 218 | form), You must: 219 | 220 | a. retain the following if it is supplied by the Licensor 221 | with the Licensed Material: 222 | 223 | i. identification of the creator(s) of the Licensed 224 | Material and any others designated to receive 225 | attribution, in any reasonable manner requested by 226 | the Licensor (including by pseudonym if 227 | designated); 228 | 229 | ii. a copyright notice; 230 | 231 | iii. a notice that refers to this Public License; 232 | 233 | iv. a notice that refers to the disclaimer of 234 | warranties; 235 | 236 | v. a URI or hyperlink to the Licensed Material to the 237 | extent reasonably practicable; 238 | 239 | b. indicate if You modified the Licensed Material and 240 | retain an indication of any previous modifications; and 241 | 242 | c. indicate the Licensed Material is licensed under this 243 | Public License, and include the text of, or the URI or 244 | hyperlink to, this Public License. 245 | 246 | 2. You may satisfy the conditions in Section 3(a)(1) in any 247 | reasonable manner based on the medium, means, and context in 248 | which You Share the Licensed Material. For example, it may be 249 | reasonable to satisfy the conditions by providing a URI or 250 | hyperlink to a resource that includes the required 251 | information. 252 | 253 | 3. If requested by the Licensor, You must remove any of the 254 | information required by Section 3(a)(1)(A) to the extent 255 | reasonably practicable. 256 | 257 | 4. If You Share Adapted Material You produce, the Adapter's 258 | License You apply must not prevent recipients of the Adapted 259 | Material from complying with this Public License. 260 | 261 | 262 | Section 4 -- Sui Generis Database Rights. 263 | 264 | Where the Licensed Rights include Sui Generis Database Rights that 265 | apply to Your use of the Licensed Material: 266 | 267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 268 | to extract, reuse, reproduce, and Share all or a substantial 269 | portion of the contents of the database; 270 | 271 | b. if You include all or a substantial portion of the database 272 | contents in a database in which You have Sui Generis Database 273 | Rights, then the database in which You have Sui Generis Database 274 | Rights (but not its individual contents) is Adapted Material; and 275 | 276 | c. You must comply with the conditions in Section 3(a) if You Share 277 | all or a substantial portion of the contents of the database. 278 | 279 | For the avoidance of doubt, this Section 4 supplements and does not 280 | replace Your obligations under this Public License where the Licensed 281 | Rights include other Copyright and Similar Rights. 282 | 283 | 284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 285 | 286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 296 | 297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 306 | 307 | c. The disclaimer of warranties and limitation of liability provided 308 | above shall be interpreted in a manner that, to the extent 309 | possible, most closely approximates an absolute disclaimer and 310 | waiver of all liability. 311 | 312 | 313 | Section 6 -- Term and Termination. 314 | 315 | a. This Public License applies for the term of the Copyright and 316 | Similar Rights licensed here. However, if You fail to comply with 317 | this Public License, then Your rights under this Public License 318 | terminate automatically. 319 | 320 | b. Where Your right to use the Licensed Material has terminated under 321 | Section 6(a), it reinstates: 322 | 323 | 1. automatically as of the date the violation is cured, provided 324 | it is cured within 30 days of Your discovery of the 325 | violation; or 326 | 327 | 2. upon express reinstatement by the Licensor. 328 | 329 | For the avoidance of doubt, this Section 6(b) does not affect any 330 | right the Licensor may have to seek remedies for Your violations 331 | of this Public License. 332 | 333 | c. For the avoidance of doubt, the Licensor may also offer the 334 | Licensed Material under separate terms or conditions or stop 335 | distributing the Licensed Material at any time; however, doing so 336 | will not terminate this Public License. 337 | 338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 339 | License. 340 | 341 | 342 | Section 7 -- Other Terms and Conditions. 343 | 344 | a. The Licensor shall not be bound by any additional or different 345 | terms or conditions communicated by You unless expressly agreed. 346 | 347 | b. Any arrangements, understandings, or agreements regarding the 348 | Licensed Material not stated herein are separate from and 349 | independent of the terms and conditions of this Public License. 350 | 351 | 352 | Section 8 -- Interpretation. 353 | 354 | a. For the avoidance of doubt, this Public License does not, and 355 | shall not be interpreted to, reduce, limit, restrict, or impose 356 | conditions on any use of the Licensed Material that could lawfully 357 | be made without permission under this Public License. 358 | 359 | b. To the extent possible, if any provision of this Public License is 360 | deemed unenforceable, it shall be automatically reformed to the 361 | minimum extent necessary to make it enforceable. If the provision 362 | cannot be reformed, it shall be severed from this Public License 363 | without affecting the enforceability of the remaining terms and 364 | conditions. 365 | 366 | c. No term or condition of this Public License will be waived and no 367 | failure to comply consented to unless expressly agreed to by the 368 | Licensor. 369 | 370 | d. Nothing in this Public License constitutes or may be interpreted 371 | as a limitation upon, or waiver of, any privileges and immunities 372 | that apply to the Licensor or You, including from the legal 373 | processes of any jurisdiction or authority. 374 | 375 | 376 | ======================================================================= 377 | 378 | Creative Commons is not a party to its public 379 | licenses. Notwithstanding, Creative Commons may elect to apply one of 380 | its public licenses to material it publishes and in those instances 381 | will be considered the “Licensor.” The text of the Creative Commons 382 | public licenses is dedicated to the public domain under the CC0 Public 383 | Domain Dedication. Except for the limited purpose of indicating that 384 | material is shared under a Creative Commons public license or as 385 | otherwise permitted by the Creative Commons policies published at 386 | creativecommons.org/policies, Creative Commons does not authorize the 387 | use of the trademark "Creative Commons" or any other trademark or logo 388 | of Creative Commons without its prior written consent including, 389 | without limitation, in connection with any unauthorized modifications 390 | to any of its public licenses or any other arrangements, 391 | understandings, or agreements concerning use of licensed material. For 392 | the avoidance of doubt, this paragraph does not form part of the 393 | public licenses. 394 | 395 | Creative Commons may be contacted at creativecommons.org. 396 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include pyemma_tutorials/notebooks *.ipynb 2 | recursive-include pyemma_tutorials/notebooks/static * 3 | prune pyemma_tutorials/notebooks/.ipynb_checkpoints 4 | include versioneer.py 5 | include pyemma_tutorials/_version.py 6 | include pyemma_tutorials/jupyter_notebook_config.json 7 | 8 | include LICENSE 9 | 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction to Markov state modeling with the PyEMMA software 2 | 3 | [![CircleCI](https://circleci.com/gh/markovmodel/pyemma_tutorials.svg?style=svg)](https://circleci.com/gh/markovmodel/pyemma_tutorials) 4 | [![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/markovmodel/pyemma_tutorials/master) 5 | [![CCA](https://img.shields.io/github/license/markovmodel/pyemma_tutorials.svg)](http://creativecommons.org/licenses/by/4.0/) 6 | ![Conda](https://img.shields.io/conda/dn/conda-forge/pyemma_tutorials.svg) 7 | ![Conda (channel only)](https://img.shields.io/conda/vn/conda-forge/pyemma_tutorials.svg) 8 | 9 | This work is licensed under a Creative Commons Attribution 4.0 International License. 10 | 11 | [PyEMMA](http://pyemma.org) (EMMA = Emma's Markov Model Algorithms) is an open source Python/C package for analysis of extensive molecular dynamics (MD) simulations. 12 | 13 | ### Content 14 | 15 | The first [notebook 📓](notebooks/00-pentapeptide-showcase.ipynb) in this tutorial guides through the basic analysis workflow using real MD data of a pentapeptide: 16 | 17 | 18 | 19 | We keep the details minimal throughout the showcase but point to the more specialized notebooks which allow you to go in-depth on selected topics. 20 | 21 | In detail, the remaining eight notebooks revisit all aspects shown in the showcase, provide additional details and variants, and contain exercises (and solutions) to self-check your learning progress: 22 | 23 | 1. Data-I/O and featurization [➜ 📓](notebooks/01-data-io-and-featurization.ipynb) 24 | 2. Dimension reduction and discretization [➜ 📓](notebooks/02-dimension-reduction-and-discretization.ipynb) 25 | 3. MSM estimation and validation [➜ 📓](notebooks/03-msm-estimation-and-validation.ipynb) 26 | 4. MSM analysis [➜ 📓](notebooks/04-msm-analysis.ipynb) 27 | 5. PCCA and TPT analysis [➜ 📓](notebooks/05-pcca-tpt.ipynb) 28 | 6. Expectations and observables [➜ 📓](notebooks/06-expectations-and-observables.ipynb) 29 | 7. Hidden Markov state models (HMMs) [➜ 📓](notebooks/07-hidden-markov-state-models.ipynb) 30 | 8. Common problems & bad data situations [➜ 📓](notebooks/08-common-problems.ipynb) 31 | 32 | **Please note that this is a work in progress and we value any kind of feedback that helps us improving this tutorial.** 33 | 34 | ### Installation 35 | We recommended to install the PyEMMA tutorials with conda. The following command will create a new environment that comes with all the dependencies of the tutorial. 36 | 37 | If you do not have conda, please follow the instructions here: 38 | 39 | https://conda.io/miniconda.html 40 | 41 | #### Installing the tutorials as a package 42 | 43 | After installing miniconda, you can install the tutorial either via 44 | 45 | ``` bash 46 | conda create -n pyemma_tutorials -c conda-forge pyemma_tutorials 47 | ``` 48 | 49 | ... or you can also install the tutorial in an existing environment by 50 | 51 | ``` bash 52 | conda install -c conda-forge pyemma_tutorials 53 | ``` 54 | 55 | If you intend to install with pip, for which can not give any support, you feel free to run: 56 | 57 | ``` bash 58 | pip install git+https://github.com/markovmodel/pyemma_tutorials 59 | ``` 60 | 61 | #### Manual installation 62 | 63 | If you wish to install the tutorial manually, you will need the following packages (including all their dependencies): 64 | 65 | - `pyemma` 66 | - `mdshare` 67 | - `nglview` 68 | - `nbexamples` 69 | - `jupyter_contrib_nbextensions` 70 | 71 | This can be done, for example, with conda: 72 | 73 | ```bash 74 | conda install -c conda-forge pyemma mdshare nglview nbexamples jupyter_contrib_nbextensions 75 | ``` 76 | 77 | After installing `jupyter_contrib_nbextensions`, you need to activate the `toc2` and `exercise2` extensions: 78 | 79 | ```bash 80 | jupyter nbextension enable toc2/main 81 | jupyter nbextension enable exercise2/main 82 | ``` 83 | 84 | Now all remains is to clone the repository to get the tutorial notebooks: 85 | 86 | ```bash 87 | git clone https://github.com/markovmodel/pyemma_tutorials.git 88 | ``` 89 | 90 | ### Usage 91 | Now we have a fresh conda environment containing the notebooks and the software to run them. We can now just activate the environment and run the notebook server by invoking: 92 | 93 | ``` bash 94 | conda activate pyemma_tutorials # skip this, if you have installed in your root environment or used pip to install. 95 | pyemma_tutorials 96 | ``` 97 | 98 | The last command will start the notebook server and your browser should pop up pointing to a list of notebooks. You can choose either to preview or to create your own copy of the notebook. The latter will create a copy of the chosen notebook in your home directory, so your changes will not be lost after shutting down the notebook server. 99 | 100 | If you have a manual installation, move to the repository's notebook directory... 101 | 102 | ```bash 103 | cd path_to_pyemma_tutorials/notebooks 104 | ``` 105 | 106 | ... and start the notebook server there: 107 | 108 | ```bash 109 | jupyter notebook 110 | ``` 111 | 112 | ### Deinstallation 113 | 114 | To uninstall you can remove the whole environment which will also uninstall the contained software again: 115 | ``` bash 116 | conda env remove -n pyemma_tutorials 117 | ``` 118 | 119 | or if you have installed the package directly 120 | 121 | ``` bash 122 | conda remove pyemma_tutorials 123 | ``` 124 | -------------------------------------------------------------------------------- /binder/environment.yml: -------------------------------------------------------------------------------- 1 | # this file is used by mybinder.org 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - pyemma_tutorials 7 | - nomkl 8 | 9 | -------------------------------------------------------------------------------- /binder/postBuild: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | jupyter nbextension enable toc2/main 4 | jupyter nbextension enable exercise2/main 5 | 6 | -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | 4 | 5 | notebook_groups = [ 6 | ('msm-analysis', 7 | 'pcca-tpt', 8 | ), 9 | ] 10 | 11 | @pytest.fixture(scope='session') 12 | def no_progress_bars(): 13 | """ disables progress bars during testing """ 14 | import pyemma 15 | pyemma.config.show_progress_bars = False 16 | yield 17 | 18 | 19 | ### execution timing ########################################################## 20 | from collections import defaultdict 21 | timings = defaultdict(int) 22 | 23 | 24 | def pytest_runtest_logreport(report): 25 | if report.when == "call": 26 | key = report.location[0] 27 | timings[key] += report.duration 28 | 29 | 30 | def pytest_terminal_summary(terminalreporter, exitstatus): 31 | terminalreporter.section('Notebook timings') 32 | s = sorted(timings.items(), key=lambda x: x[1]) 33 | for nb, total in s: 34 | terminalreporter.write_line('%s took %.1f seconds' % (nb, total)) 35 | 36 | ############################################################################### 37 | def cell_skipped(cell_metadata): 38 | excercise_2_cell = cell_metadata.get('solution2_first', False) 39 | skip = cell_metadata.get('skip', False) 40 | if excercise_2_cell or skip: 41 | return True 42 | return False 43 | 44 | #### Circle CI parallel execution ############################################# 45 | def pytest_collection_modifyitems(session, config, items): 46 | for i in items: 47 | if cell_skipped(i.cell['metadata']): 48 | i.add_marker(pytest.mark.skip('solution stub or metadata["skip"]=True')) 49 | 50 | circle_node_total, circle_node_index = read_circleci_env_variables() 51 | if circle_node_total > 1: 52 | by_parents = defaultdict(list) 53 | for index, item in enumerate(items): 54 | by_parents[item.parent].append(item) 55 | 56 | # merge grouped parents 57 | for n in notebook_groups: 58 | items_to_group = [] 59 | keys_to_merge = [] 60 | for p in by_parents: 61 | for nb in n: 62 | if nb in p.name: 63 | items_to_group.extend(by_parents[p]) 64 | keys_to_merge.append(p) 65 | for k in keys_to_merge: 66 | del by_parents[k] 67 | by_parents[tuple(keys_to_merge)] = items_to_group 68 | 69 | deselected = [] 70 | # round robbin: by notebook file and ci node index 71 | for i, p in enumerate(by_parents.keys()): 72 | if i % circle_node_total != circle_node_index: 73 | deselected.extend(by_parents[p]) 74 | for d in deselected: 75 | items.remove(d) 76 | executed_notebooks = [nb.name for nb in 77 | set(x.parent for x in set(items) - set(deselected))] 78 | print('Notebooks to execute:', executed_notebooks) 79 | config.hook.pytest_deselected(items=deselected) 80 | 81 | 82 | def read_circleci_env_variables(): 83 | """Read and convert CIRCLE_* environment variables""" 84 | circle_node_total = int(os.environ.get( 85 | "CIRCLE_NODE_TOTAL", "1").strip() or "1") 86 | circle_node_index = int(os.environ.get( 87 | "CIRCLE_NODE_INDEX", "0").strip() or "0") 88 | 89 | if circle_node_index >= circle_node_total: 90 | raise RuntimeError("CIRCLE_NODE_INDEX={} >= CIRCLE_NODE_TOTAL={}, should be less".format( 91 | circle_node_index, circle_node_total)) 92 | 93 | return circle_node_total, circle_node_index 94 | 95 | 96 | def pytest_report_header(config): 97 | """Add CircleCI information to report""" 98 | circle_node_total, circle_node_index = read_circleci_env_variables() 99 | return "CircleCI total nodes: {}, this node index: {}".format(circle_node_total, circle_node_index) 100 | 101 | ############################################################################### 102 | 103 | cells_per_notebook = defaultdict(list) 104 | 105 | 106 | def pytest_runtest_call(item): 107 | cells_per_notebook[item.parent].append(item) 108 | 109 | 110 | def pytest_sessionfinish(session, exitstatus): 111 | """ we store all notebooks in variable 'executed_notebooks' to a given path and convert them to html """ 112 | import nbformat as nbf 113 | import tempfile 114 | out_dir = os.getenv('NBVAL_OUTPUT', tempfile.mkdtemp( 115 | prefix='pyemma_tut_test_output')) 116 | print('write html output to', os.path.abspath(out_dir)) 117 | out_files = [] 118 | ipynbfiles = set(i.parent for i in session.items) 119 | for ipynbfile in ipynbfiles: 120 | out_file = os.path.join(out_dir, os.path.basename(ipynbfile.name)) 121 | # map output cells 122 | cells_with_non_skipped_output = (c for c in ipynbfile.nb.cells if hasattr(c, 'outputs') and not cell_skipped(c.metadata)) 123 | for cell, ipynbcell in zip(cells_with_non_skipped_output, cells_per_notebook[ipynbfile]): 124 | print(cell, ipynbcell) 125 | cell.outputs = ipynbcell.test_outputs 126 | 127 | with open(out_file, 'x') as fh: 128 | nbf.write(ipynbfile.nb, fh) 129 | out_files.append(out_file) 130 | 131 | import subprocess 132 | import sys 133 | 134 | cmd = [sys.executable, '-m', 'jupyter', 135 | 'nbconvert', '--to=html'] + out_files 136 | print('converting via cmd:', cmd) 137 | subprocess.check_output(cmd) 138 | 139 | # delete source output notebooks 140 | for f in out_files: 141 | os.unlink(f) 142 | -------------------------------------------------------------------------------- /devtools/conda-recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: pyemma_tutorials 3 | version: dev 4 | 5 | source: 6 | - path: ../.. 7 | 8 | build: 9 | script: python -m pip install --no-deps --ignore-installed . -v 10 | script_env: 11 | - NBVAL_OUTPUT 12 | entry_points: 13 | - pyemma_tutorials = pyemma_tutorials.cli:main 14 | 15 | osx_is_app: True 16 | noarch: python 17 | 18 | 19 | requirements: 20 | build: 21 | - pip 22 | run: 23 | - pyemma 24 | - numpy >=1.13 25 | - mdshare 26 | - notebook 27 | - nglview 28 | - jupyter_contrib_nbextensions 29 | - nbexamples 30 | 31 | test: 32 | imports: 33 | - pyemma_tutorials 34 | commands: 35 | - which pyemma_tutorials # [osx or linux] 36 | source_files: 37 | - conftest.py 38 | requires: 39 | - nbval 40 | - nbconvert 41 | 42 | commands: mkdir ~/junit; py.test --nbval -v --junit-xml=$HOME/junit/test.xml --pyargs pyemma_tutorials --durations=30 43 | -------------------------------------------------------------------------------- /devtools/create_bibliography_nb.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | import nbformat 4 | import os 5 | import re 6 | 7 | def export(in_file, out_file): 8 | nb = nbformat.v4.new_notebook() 9 | with open(in_file, encoding='utf8') as fh: 10 | bibtex = fh.read() 11 | 12 | src = ''.format(bibtex=bibtex) 13 | 14 | # remove bibdesk comments 15 | src = re.sub(pattern='@comment.*\{.*\}\}', repl='', string=src, flags=re.DOTALL) 16 | src = re.sub(pattern=r"\{\\'\{e\}\}", repl='é', string=src) 17 | src = re.sub(pattern=r"\{\\'\{a\}\}", repl='á', string=src) 18 | src = re.sub(pattern=r'\\"\{o\}', repl='ö', string=src) 19 | src = re.sub(pattern=r'\\"\{u\}', repl='ü', string=src) 20 | src = re.sub(pattern='pcca\+\+', repl='pcca_plus_plus', string=src) 21 | cell = nbformat.v4.new_markdown_cell(src) 22 | nb.cells.append(cell) 23 | 24 | with open(out_file, 'w', encoding='utf-8') as fh: 25 | nbformat.write(nb, fh) 26 | 27 | 28 | if __name__ == '__main__': 29 | devtools_dir = os.path.abspath(os.path.dirname(__file__)) 30 | in_file = os.path.join(devtools_dir, '../manuscript/literature.bib') 31 | out_file = os.path.join(devtools_dir, '../notebooks/Bibliography.ipynb') 32 | export(in_file, out_file) 33 | -------------------------------------------------------------------------------- /devtools/install_miniconda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # make TARGET overrideable with env 4 | : ${TARGET:=$HOME/miniconda} 5 | 6 | function install_miniconda { 7 | if [ -d $TARGET ]; then echo "file exists"; return; fi 8 | echo "installing miniconda to $TARGET" 9 | platform="Linux" 10 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-$platform-x86_64.sh -O mc.sh -o /dev/null 11 | bash mc.sh -b -f -p $TARGET 12 | } 13 | 14 | install_miniconda 15 | -------------------------------------------------------------------------------- /manuscript/.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | .*.lb 14 | 15 | ## Intermediate documents: 16 | *.dvi 17 | *.xdv 18 | *-converted-to.* 19 | # these rules might exclude image files for figures etc. 20 | # *.ps 21 | # *.eps 22 | # *.pdf 23 | 24 | ## Generated if empty string is given at "Please type another file name for output:" 25 | .pdf 26 | 27 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 28 | *.bbl 29 | *.bcf 30 | *.blg 31 | *-blx.aux 32 | *-blx.bib 33 | *.run.xml 34 | 35 | ## Build tool auxiliary files: 36 | *.fdb_latexmk 37 | *.synctex 38 | *.synctex(busy) 39 | *.synctex.gz 40 | *.synctex.gz(busy) 41 | *.pdfsync 42 | 43 | ## Build tool directories for auxiliary files 44 | # latexrun 45 | latex.out/ 46 | 47 | ## Auxiliary and intermediate files from other packages: 48 | # algorithms 49 | *.alg 50 | *.loa 51 | 52 | # achemso 53 | acs-*.bib 54 | 55 | # amsthm 56 | *.thm 57 | 58 | # beamer 59 | *.nav 60 | *.pre 61 | *.snm 62 | *.vrb 63 | 64 | # changes 65 | *.soc 66 | 67 | # cprotect 68 | *.cpt 69 | 70 | # elsarticle (documentclass of Elsevier journals) 71 | *.spl 72 | 73 | # endnotes 74 | *.ent 75 | 76 | # fixme 77 | *.lox 78 | 79 | # feynmf/feynmp 80 | *.mf 81 | *.mp 82 | *.t[1-9] 83 | *.t[1-9][0-9] 84 | *.tfm 85 | 86 | #(r)(e)ledmac/(r)(e)ledpar 87 | *.end 88 | *.?end 89 | *.[1-9] 90 | *.[1-9][0-9] 91 | *.[1-9][0-9][0-9] 92 | *.[1-9]R 93 | *.[1-9][0-9]R 94 | *.[1-9][0-9][0-9]R 95 | *.eledsec[1-9] 96 | *.eledsec[1-9]R 97 | *.eledsec[1-9][0-9] 98 | *.eledsec[1-9][0-9]R 99 | *.eledsec[1-9][0-9][0-9] 100 | *.eledsec[1-9][0-9][0-9]R 101 | 102 | # glossaries 103 | *.acn 104 | *.acr 105 | *.glg 106 | *.glo 107 | *.gls 108 | *.glsdefs 109 | 110 | # gnuplottex 111 | *-gnuplottex-* 112 | 113 | # gregoriotex 114 | *.gaux 115 | *.gtex 116 | 117 | # htlatex 118 | *.4ct 119 | *.4tc 120 | *.idv 121 | *.lg 122 | *.trc 123 | *.xref 124 | 125 | # hyperref 126 | *.brf 127 | 128 | # knitr 129 | *-concordance.tex 130 | # TODO Comment the next line if you want to keep your tikz graphics files 131 | *.tikz 132 | *-tikzDictionary 133 | 134 | # listings 135 | *.lol 136 | 137 | # makeidx 138 | *.idx 139 | *.ilg 140 | *.ind 141 | *.ist 142 | 143 | # minitoc 144 | *.maf 145 | *.mlf 146 | *.mlt 147 | *.mtc[0-9]* 148 | *.slf[0-9]* 149 | *.slt[0-9]* 150 | *.stc[0-9]* 151 | 152 | # minted 153 | _minted* 154 | *.pyg 155 | 156 | # morewrites 157 | *.mw 158 | 159 | # nomencl 160 | *.nlg 161 | *.nlo 162 | *.nls 163 | 164 | # pax 165 | *.pax 166 | 167 | # pdfpcnotes 168 | *.pdfpc 169 | 170 | # sagetex 171 | *.sagetex.sage 172 | *.sagetex.py 173 | *.sagetex.scmd 174 | 175 | # scrwfile 176 | *.wrt 177 | 178 | # sympy 179 | *.sout 180 | *.sympy 181 | sympy-plots-for-*.tex/ 182 | 183 | # pdfcomment 184 | *.upa 185 | *.upb 186 | 187 | # pythontex 188 | *.pytxcode 189 | pythontex-files-*/ 190 | 191 | # thmtools 192 | *.loe 193 | 194 | # TikZ & PGF 195 | *.dpth 196 | *.md5 197 | *.auxlock 198 | 199 | # todonotes 200 | *.tdo 201 | 202 | # easy-todo 203 | *.lod 204 | 205 | # xmpincl 206 | *.xmpi 207 | 208 | # xindy 209 | *.xdy 210 | 211 | # xypic precompiled matrices 212 | *.xyc 213 | 214 | # endfloat 215 | *.ttt 216 | *.fff 217 | 218 | # Latexian 219 | TSWLatexianTemp* 220 | 221 | ## Editors: 222 | # WinEdt 223 | *.bak 224 | *.sav 225 | 226 | # Texpad 227 | .texpadtmp 228 | 229 | # LyX 230 | *.lyx~ 231 | 232 | # Kile 233 | *.backup 234 | 235 | # KBibTeX 236 | *~[0-9]* 237 | 238 | # auto folder when using emacs and auctex 239 | ./auto/* 240 | *.el 241 | 242 | # expex forward references with \gathertags 243 | *-tags.tex 244 | 245 | # standalone packages 246 | *.sta 247 | -------------------------------------------------------------------------------- /manuscript/README.md: -------------------------------------------------------------------------------- 1 | ## PDF download 2 | A compiled version of the manuscript can be found [here](https://github.com/markovmodel/pyemma_tutorials/tree/master/releases). 3 | -------------------------------------------------------------------------------- /manuscript/figures/figure_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_1.pdf -------------------------------------------------------------------------------- /manuscript/figures/figure_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_2.pdf -------------------------------------------------------------------------------- /manuscript/figures/figure_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_3.pdf -------------------------------------------------------------------------------- /manuscript/figures/figure_4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_4.pdf -------------------------------------------------------------------------------- /manuscript/figures/figure_5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_5.pdf -------------------------------------------------------------------------------- /manuscript/figures/figure_6.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_6.pdf -------------------------------------------------------------------------------- /manuscript/figures/figure_7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_7.pdf -------------------------------------------------------------------------------- /manuscript/figures/tutorials-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/tutorials-logo.png -------------------------------------------------------------------------------- /manuscript/literature.bib: -------------------------------------------------------------------------------- 1 | @article{hmm-baum-welch-alg, 2 | Author = {Leonard E. Baum and Ted Petrie and George Soules and Norman Weiss}, 3 | Title = {A Maximization Technique Occurring in the Statistical Analysis of Probabilistic Functions of Markov Chains}, 4 | Journal = {Ann. Math. Stat.}, 5 | Year = {1970}, 6 | Volume = {41}, 7 | Number = {1}, 8 | Pages = {164--171}, 9 | URL = {http://www.jstor.org/stable/2239727}, 10 | } 11 | @article{sasa-calculation, 12 | Author = {A. Shrake and J.A. Rupley}, 13 | Title = {Environment and exposure to solvent of protein atoms. Lysozyme and insulin}, 14 | Journal = {J. Mol. Biol.}, 15 | Year = {1973}, 16 | Volume = {79}, 17 | Number = {2}, 18 | Pages = {351--371}, 19 | Month = {sep}, 20 | URL = {https://doi.org/10.1016/0022-2836(73)90011-9}, 21 | DOI = {10.1016/0022-2836(73)90011-9} 22 | } 23 | @article{hmm-tutorial, 24 | Author = {L.R. Rabiner}, 25 | Title = {A tutorial on hidden Markov models and selected applications in speech recognition}, 26 | Journal = {Proc. IEEE}, 27 | Year = {1989}, 28 | Volume = {77}, 29 | Number = {2}, 30 | Pages = {257--286}, 31 | URL = {https://doi.org/10.1109/5.18626}, 32 | DOI = {10.1109/5.18626} 33 | } 34 | @article{tica2, 35 | Author = {Molgedey, L. and Schuster, H. G.}, 36 | Title = {Separation of a mixture of independent signals using time delayed correlations}, 37 | Journal = {Phys. Rev. Lett.}, 38 | Year = {1994}, 39 | Volume = {72}, 40 | Number = {23}, 41 | Pages = {3634--3637}, 42 | Month = {Jun}, 43 | URL = {http://dx.doi.org/10.1103/PhysRevLett.72.3634}, 44 | DOI = {10.1103/physrevlett.72.3634} 45 | } 46 | @article{vmd, 47 | Author = {William Humphrey and Andrew Dalke and Klaus Schulten}, 48 | Title = {{VMD}: Visual molecular dynamics}, 49 | Journal = {J. Mol. Graph.}, 50 | Year = {1996}, 51 | Volume = {14}, 52 | Number = {1}, 53 | Pages = {33--38}, 54 | Month = {feb}, 55 | URL = {https://doi.org/10.1016/0263-7855(96)00018-5}, 56 | DOI = {10.1016/0263-7855(96)00018-5} 57 | } 58 | @article{schuette-msm, 59 | Author = {Ch Sch\"{u}tte and A Fischer and W Huisinga and P Deuflhard}, 60 | Title = {A Direct Approach to Conformational Dynamics Based on Hybrid Monte Carlo}, 61 | Journal = {J. Comput. Phys.}, 62 | Year = {1999}, 63 | Volume = {151}, 64 | Number = {1}, 65 | Pages = {146--168}, 66 | Month = {may}, 67 | URL = {https://doi.org/10.1006/jcph.1999.6231}, 68 | DOI = {10.1006/jcph.1999.6231} 69 | } 70 | @misc{scipy, 71 | Title = {{SciPy}: Open source scientific tools for {Python}}, 72 | Author = {Eric Jones and Travis Oliphant and Pearu Peterson and others}, 73 | Year = {2001--}, 74 | URL = {http://www.scipy.org/}, 75 | } 76 | @inproceedings{aggarwal_surprising_2001, 77 | Author = {Aggarwal, Charu C. and Hinneburg, Alexander and Keim, Daniel A.}, 78 | Title = {On the {Surprising} {Behavior} of {Distance} {Metrics} in {High} {Dimensional} {Space}}, 79 | Booktitle = {Database {Theory} --- {ICDT} 2001}, 80 | Year = {2001}, 81 | Editor = {Van den Bussche, Jan and Vianu, Victor}, 82 | Pages = {420--434}, 83 | Series = {Lecture {Notes} in {Computer} {Science}}, 84 | Publisher = {Springer Berlin Heidelberg}, 85 | } 86 | @article{singhal-msm-naming, 87 | Author = {Nina Singhal and Christopher D. Snow and Vijay S. Pande}, 88 | Title = {Using path sampling to build better Markovian state models: Predicting the folding rate and mechanism of a tryptophan zipper beta hairpin}, 89 | Journal = {J. Chem. Phys.}, 90 | Year = {2004}, 91 | Volume = {121}, 92 | Number = {1}, 93 | Pages = {415}, 94 | URL = {https://doi.org/10.1063/1.1738647}, 95 | DOI = {10.1063/1.1738647} 96 | } 97 | @article{swope-its, 98 | Author = {William C. Swope and Jed W. Pitera and Frank Suits}, 99 | Title = {Describing Protein Folding Kinetics by Molecular Dynamics Simulations. 1. Theory{\textdagger}}, 100 | Journal = {J. Phys. Chem. B}, 101 | Year = {2004}, 102 | Volume = {108}, 103 | Number = {21}, 104 | Pages = {6571--6581}, 105 | Month = {may}, 106 | URL = {https://doi.org/10.1021/jp037421y}, 107 | DOI = {10.1021/jp037421y} 108 | } 109 | @article{Deuflhard2005-pcca, 110 | Author = {Peter Deuflhard and Marcus Weber}, 111 | Title = {Robust Perron cluster analysis in conformation dynamics}, 112 | Journal = {Linear Algebra Appl.}, 113 | Year = {2005}, 114 | Volume = {398}, 115 | Pages = {161--184}, 116 | Month = {mar}, 117 | URL = {https://doi.org/10.1016/j.laa.2004.10.026}, 118 | DOI = {10.1016/j.laa.2004.10.026} 119 | } 120 | @article{weinan-tpt, 121 | Author = {Weinan E. and Eric Vanden-Eijnden}, 122 | Title = {Towards a Theory of Transition Paths}, 123 | Journal = {J. Stat. Phys.}, 124 | Year = {2006}, 125 | Volume = {123}, 126 | Number = {3}, 127 | Pages = {503--523}, 128 | Month = {may}, 129 | URL = {https://doi.org/10.1007/s10955-005-9003-9}, 130 | DOI = {10.1007/s10955-005-9003-9} 131 | } 132 | @article{matplotlib, 133 | Author = {Hunter, J. D.}, 134 | Title = {Matplotlib: A 2D graphics environment}, 135 | Journal = {Comput. Sci. Eng.}, 136 | Year = 2007, 137 | Volume = {9}, 138 | Number = {3}, 139 | Pages = {90--95}, 140 | DOI = {10.1109/MCSE.2007.55} 141 | } 142 | @article{Kube2007-pcca+, 143 | Author = {Susanna Kube and Marcus Weber}, 144 | Title = {A coarse graining method for the identification of transition rates between molecular conformations}, 145 | Journal = {J. Chem. Phys.}, 146 | Year = {2007}, 147 | Volume = {126}, 148 | Number = {2}, 149 | Pages = {024103}, 150 | Month = {jan}, 151 | URL = {https://doi.org/10.1063/1.2404953}, 152 | DOI = {10.1063/1.2404953} 153 | } 154 | @article{noe2007jcp, 155 | Author = {Frank No{\'{e}} and Illia Horenko and Christof Sch\"{u}tte and Jeremy C. Smith}, 156 | Title = {Hierarchical analysis of conformational dynamics in biomolecules: Transition networks of metastable states}, 157 | Journal = {J. Chem. Phys.}, 158 | Year = {2007}, 159 | Volume = {126}, 160 | Number = {15}, 161 | Pages = {155102}, 162 | Month = {apr}, 163 | URL = {https://doi.org/10.1063/1.2714539}, 164 | DOI = {10.1063/1.2714539} 165 | } 166 | @article{chodera2007jcp, 167 | Author = {John D. Chodera and Nina Singhal and Vijay S. Pande and Ken A. Dill and William C. Swope}, 168 | Title = {Automatic discovery of metastable states for the construction of Markov models of macromolecular conformational dynamics}, 169 | Journal = {J. Chem. Phys.}, 170 | Year = {2007}, 171 | Volume = {126}, 172 | Number = {15}, 173 | Pages = {155101}, 174 | Month = {apr}, 175 | URL = {https://doi.org/10.1063/1.2714538}, 176 | DOI = {10.1063/1.2714538} 177 | } 178 | @article{buchete-msm-2008, 179 | Author = {Nicolae-Viorel Buchete and Gerhard Hummer}, 180 | Title = {Coarse Master Equations for Peptide Folding Dynamics{\textdagger}}, 181 | Journal = {J. Phys. Chem. B}, 182 | Year = {2008}, 183 | Volume = {112}, 184 | Number = {19}, 185 | Pages = {6057--6069}, 186 | Month = {may}, 187 | URL = {https://doi.org/10.1021/jp0761665}, 188 | DOI = {10.1021/jp0761665} 189 | } 190 | @article{noe-tmat-sampling, 191 | Author = {Frank No{\'{e}}}, 192 | Title = {Probability distributions of molecular observables computed from Markov models}, 193 | Journal = {J. Chem. Phys.}, 194 | Year = {2008}, 195 | Volume = {128}, 196 | Number = {24}, 197 | Pages = {244103}, 198 | Month = {jun}, 199 | URL = {https://doi.org/10.1063/1.2916718}, 200 | DOI = {10.1063/1.2916718} 201 | } 202 | @article{metzner-msm-tpt, 203 | Author = {Philipp Metzner and Christof Schütte and Eric Vanden-Eijnden}, 204 | Title = {Transition Path Theory for Markov Jump Processes}, 205 | Journal = {Multiscale Model. Simul.}, 206 | Year = {2009}, 207 | Volume = {7}, 208 | Number = {3}, 209 | Pages = {1192--1219}, 210 | Month = {jan}, 211 | URL = {https://doi.org/10.1137/070699500}, 212 | DOI = {10.1137/070699500} 213 | } 214 | @article{bowman-msm-2009, 215 | Author = {Gregory R. Bowman and Kyle A. Beauchamp and George Boxer and Vijay S. Pande}, 216 | Title = {Progress and challenges in the automated construction of Markov state models for full protein systems}, 217 | Journal = {J. Chem. Phys.}, 218 | Year = {2009}, 219 | Volume = {131}, 220 | Number = {12}, 221 | Pages = {124101}, 222 | Month = {sep}, 223 | URL = {https://doi.org/10.1063/1.3216567}, 224 | DOI = {10.1063/1.3216567} 225 | } 226 | @article{noe-folding-pathways, 227 | Author = {Frank No{\'{e}} and Christof Sch\"{u}tte and Eric Vanden-Eijnden and Lothar Reich and Thomas R. Weikl}, 228 | Title = {Constructing the equilibrium ensemble of folding pathways from short off-equilibrium simulations}, 229 | Journal = {Proc. Natl. Acad. Sci. USA}, 230 | Year = {2009}, 231 | Volume = {106}, 232 | Number = {45}, 233 | Pages = {19011--19016}, 234 | Month = {nov}, 235 | URL = {https://doi.org/10.1073/pnas.0905466106}, 236 | DOI = {10.1073/pnas.0905466106} 237 | } 238 | @article{sarich-msm-quality, 239 | Author = {Marco Sarich and Frank Noé and Christof Schütte}, 240 | Title = {On the Approximation Quality of Markov State Models}, 241 | Journal = {Multiscale Model. Simul.}, 242 | Year = {2010}, 243 | Volume = {8}, 244 | Number = {4}, 245 | Pages = {1154--1177}, 246 | Month = {jan}, 247 | URL = {https://doi.org/10.1137/090764049}, 248 | DOI = {10.1137/090764049} 249 | } 250 | @article{sklearn, 251 | Author = {Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, 252 | Title = {Scikit-learn: Machine Learning in {P}ython}, 253 | Journal = {J. Mach. Learn. Res.}, 254 | Year = {2011}, 255 | Volume = {12}, 256 | Pages = {2825--2830}, 257 | } 258 | @article{msm-jhp, 259 | Author = {Prinz, Jan-Hendrik and Wu, Hao and Sarich, Marco and Keller, Bettina and Senne, Martin and Held, Martin and Chodera, John D. and Schütte, Christof and Noé, Frank}, 260 | Title = {Markov models of molecular kinetics: Generation and validation}, 261 | Journal = {J. Chem. Phys.}, 262 | Year = {2011}, 263 | Volume = {134}, 264 | Number = {17}, 265 | Pages = {174105}, 266 | URL = {http://scitation.aip.org/content/aip/journal/jcp/134/17/10.1063/1.3565032}, 267 | DOI = {http://dx.doi.org/10.1063/1.3565032} 268 | } 269 | @article{noe-fingerprints, 270 | Author = {F. Noe and S. Doose and I. Daidone and M. Lollmann and M. Sauer and J. D. Chodera and J. C. Smith}, 271 | Title = {Dynamical fingerprints for probing individual relaxation processes in biomolecular dynamics with simulations and kinetic experiments}, 272 | Journal = {Proc. Natl. Acad. Sci. USA}, 273 | Year = {2011}, 274 | Volume = {108}, 275 | Number = {12}, 276 | Pages = {4822--4827}, 277 | Month = {mar}, 278 | URL = {https://doi.org/10.1073/pnas.1004646108}, 279 | DOI = {10.1073/pnas.1004646108} 280 | } 281 | @article{bhmm-preprint, 282 | Author = {{Chodera}, J.~D. and {Elms}, P. and {Noé}, F. and {Keller}, B. and {Kaiser}, C.~M. and {Ewall-Wice}, A. and {Marqusee}, S. and {Bustamante}, C. and {Singhal Hinrichs}, N.}, 283 | Title = {{Bayesian hidden Markov model analysis of single-molecule force spectroscopy: Characterizing kinetics under measurement uncertainty}}, 284 | Journal = {arXiv preprint arXiv:1108.1430}, 285 | Year = 2011, 286 | Month = aug, 287 | URL = {https://arxiv.org/pdf/1108.1430.pdf}, 288 | } 289 | @article{dror2012biomolecular, 290 | Author = {Ron O. Dror and Robert M. Dirks and J.P. Grossman and Huafeng Xu and David E. Shaw}, 291 | Title = {Biomolecular Simulation: A Computational Microscope for Molecular Biology}, 292 | Journal = {Annu. Rev. Biophys.}, 293 | Year = {2012}, 294 | Volume = {41}, 295 | Number = {1}, 296 | Pages = {429--452}, 297 | Month = {jun}, 298 | URL = {https://doi.org/10.1146/annurev-biophys-042910-155245}, 299 | DOI = {10.1146/annurev-biophys-042910-155245} 300 | } 301 | @book{schuette-sarich-book, 302 | Title = {Metastability and Markov State Models in Molecular Dynamics}, 303 | Publisher = {American Mathematical Society}, 304 | Year = {2013}, 305 | Author = {M. Sarich and C. Sch\"{u}tte}, 306 | Series = {Courant Lecture Notes}, 307 | } 308 | @article{noe-vac, 309 | Author = {Frank No{\'{e}} and Feliks N\"{u}ske}, 310 | Title = {A Variational Approach to Modeling Slow Processes in Stochastic Dynamical Systems}, 311 | Journal = {Multiscale Model. Simul.}, 312 | Year = {2013}, 313 | Volume = {11}, 314 | Number = {2}, 315 | Pages = {635--655}, 316 | Month = {jan}, 317 | URL = {https://doi.org/10.1137/110858616}, 318 | DOI = {10.1137/110858616} 319 | } 320 | @article{tica3, 321 | Author = {Schwantes, Christian R. and Pande, Vijay S.}, 322 | Title = {Improvements in Markov State Model Construction Reveal Many Non-Native Interactions in the Folding of NTL9}, 323 | Journal = {J. Chem. Theory Comput.}, 324 | Year = {2013}, 325 | Volume = {9}, 326 | Number = {4}, 327 | Pages = {2000--2009}, 328 | Month = {Apr}, 329 | URL = {http://dx.doi.org/10.1021/ct300878a}, 330 | DOI = {10.1021/ct300878a} 331 | } 332 | @article{pcca++, 333 | Author = {Susanna R\"{o}blitz and Marcus Weber}, 334 | Title = {Fuzzy spectral clustering by {PCCA}+: application to Markov state models and data classification}, 335 | Journal = {Adv. Data Anal. Classif.}, 336 | Year = {2013}, 337 | Volume = {7}, 338 | Number = {2}, 339 | Pages = {147--179}, 340 | Month = {may}, 341 | URL = {https://doi.org/10.1007/s11634-013-0134-6}, 342 | DOI = {10.1007/s11634-013-0134-6} 343 | } 344 | @article{tica, 345 | Author = {Guillermo P{\'{e}}rez-Hern{\'{a}}ndez and Fabian Paul and Toni Giorgino and Gianni De Fabritiis and Frank No{\'{e}}}, 346 | Title = {Identification of slow molecular order parameters for Markov model construction}, 347 | Journal = {J. Chem. Phys.}, 348 | Year = {2013}, 349 | Volume = {139}, 350 | Number = {1}, 351 | Pages = {015102}, 352 | Month = {jul}, 353 | URL = {https://doi.org/10.1063/1.4811489}, 354 | DOI = {10.1063/1.4811489} 355 | } 356 | @article{noe-proj-hid-msm, 357 | Author = {Frank No{\'{e}} and Hao Wu and Jan-Hendrik Prinz and Nuria Plattner}, 358 | Title = {Projected and hidden Markov models for calculating kinetics and metastable states of complex molecules}, 359 | Journal = {J. Chem. Phys.}, 360 | Year = {2013}, 361 | Volume = {139}, 362 | Number = {18}, 363 | Pages = {184114}, 364 | Month = {nov}, 365 | URL = {https://doi.org/10.1063/1.4828816}, 366 | DOI = {10.1063/1.4828816} 367 | } 368 | @article{noe-dy-neut-scatt, 369 | Author = {Benjamin Lindner and Zheng Yi and Jan-Hendrik Prinz and Jeremy C. Smith and Frank No{\'{e}}}, 370 | Title = {Dynamic neutron scattering from conformational dynamics. I. Theory and Markov models}, 371 | Journal = {J. Chem. Phys.}, 372 | Year = {2013}, 373 | Volume = {139}, 374 | Number = {17}, 375 | Pages = {175101}, 376 | Month = {nov}, 377 | URL = {https://doi.org/10.1063/1.4824070}, 378 | DOI = {10.1063/1.4824070} 379 | } 380 | @book{msm-book, 381 | Title = {An Introduction to Markov State Models and Their Application to Long Timescale Molecular Simulation}, 382 | Publisher = {Springer Netherlands}, 383 | Year = 2014, 384 | Author = {Gregory R. Bowman and Vijay S. Pande and Frank No{\'{e}}}, 385 | Editor = {Gregory R. Bowman and Vijay S. Pande and Frank No{\'{e}}}, 386 | URL = {https://doi.org/10.1007%2F978-94-007-7606-7}, 387 | DOI = {10.1007/978-94-007-7606-7} 388 | } 389 | @article{jhp-spectral-rate-theory, 390 | Author = {Jan-Hendrik Prinz and John D. Chodera and Frank No{\'{e}}}, 391 | Title = {Spectral Rate Theory for Two-State Kinetics}, 392 | Journal = {Phys. Rev. X}, 393 | Year = {2014}, 394 | Volume = {4}, 395 | Number = {1}, 396 | Month = {feb}, 397 | URL = {https://doi.org/10.1103/physrevx.4.011020}, 398 | DOI = {10.1103/physrevx.4.011020} 399 | } 400 | @article{nueske-vamk, 401 | Author = {Feliks N\"{u}ske and Bettina G. Keller and Guillermo P{\'{e}}rez-Hern{\'{a}}ndez and Antonia S. J. S. Mey and Frank No{\'{e}}}, 402 | Title = {Variational Approach to Molecular Kinetics}, 403 | Journal = {J. Chem. Theory Comput.}, 404 | Year = {2014}, 405 | Volume = {10}, 406 | Number = {4}, 407 | Pages = {1739--1752}, 408 | Month = {mar}, 409 | URL = {https://doi.org/10.1021/ct4009156}, 410 | DOI = {10.1021/ct4009156} 411 | } 412 | @article{Chodera2014, 413 | Author = {John D Chodera and Frank No{\'{e}}}, 414 | Title = {Markov state models of biomolecular conformational dynamics}, 415 | Journal = {Curr. Opin. Struct. Biol.}, 416 | Year = {2014}, 417 | Volume = {25}, 418 | Pages = {135--144}, 419 | Month = {apr}, 420 | URL = {https://doi.org/10.1016/j.sbi.2014.04.002}, 421 | DOI = {10.1016/j.sbi.2014.04.002} 422 | } 423 | @article{dtram, 424 | Author = {Hao Wu and Antonia S. J. S. Mey and Edina Rosta and Frank No{\'{e}}}, 425 | Title = {Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states}, 426 | Journal = {J. Chem. Phys.}, 427 | Year = {2014}, 428 | Volume = {141}, 429 | Number = {21}, 430 | Pages = {214106}, 431 | Month = {dec}, 432 | URL = {https://doi.org/10.1063/1.4902240}, 433 | DOI = {10.1063/1.4902240} 434 | } 435 | @article{plattner_protein_2015, 436 | Author = {Plattner, Nuria and Noé, Frank}, 437 | Title = {Protein conformational plasticity and complex ligand-binding kinetics explored by atomistic simulations and {Markov} models}, 438 | Journal = {Nat. Commun.}, 439 | Year = {2015}, 440 | Volume = {6}, 441 | Pages = {7653}, 442 | URL = {http://www.nature.com/ncomms/2015/150702/ncomms8653/full/ncomms8653.html}, 443 | DOI = {10.1038/ncomms8653} 444 | } 445 | @article{mdtraj, 446 | Author = {McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, 447 | Title = {MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, 448 | Journal = {Biophys. J.}, 449 | Year = {2015}, 450 | Volume = {109}, 451 | Number = {8}, 452 | Pages = {1528 -- 1532}, 453 | DOI = {10.1016/j.bpj.2015.08.015} 454 | } 455 | @book{numpy, 456 | Title = {Guide to NumPy}, 457 | Publisher = {CreateSpace Independent Publishing Platform}, 458 | Year = {2015}, 459 | Author = {Oliphant, Travis E.}, 460 | Address = {USA}, 461 | Edition = {2nd}, 462 | } 463 | @article{gmrq, 464 | Author = {Robert T. McGibbon and Vijay S. Pande}, 465 | Title = {Variational cross-validation of slow dynamical modes in molecular kinetics}, 466 | Journal = {J. Chem. Phys.}, 467 | Year = {2015}, 468 | Volume = {142}, 469 | Number = {12}, 470 | Pages = {124105}, 471 | Month = {mar}, 472 | URL = {https://doi.org/10.1063/1.4916292}, 473 | DOI = {10.1063/1.4916292} 474 | } 475 | @article{kinetic-maps, 476 | Author = {Noé, Frank and Clementi, Cecilia}, 477 | Title = {Kinetic Distance and Kinetic Maps from Molecular Dynamics Simulation}, 478 | Journal = {J. Chem. Theory Comput.}, 479 | Year = {2015}, 480 | Volume = {11}, 481 | Number = {10}, 482 | Pages = {5002--5011}, 483 | Month = {Oct}, 484 | URL = {http://dx.doi.org/10.1021/acs.jctc.5b00553}, 485 | DOI = {10.1021/acs.jctc.5b00553} 486 | } 487 | @article{pyemma, 488 | Author = {Martin K. Scherer and Benjamin Trendelkamp-Schroer and Fabian Paul and Guillermo P{\'{e}}rez-Hern{\'{a}}ndez and Moritz Hoffmann and Nuria Plattner and Christoph Wehmeyer and Jan-Hendrik Prinz and Frank No{\'{e}}}, 489 | Title = {{PyEMMA} 2: A Software Package for Estimation, Validation, and Analysis of Markov Models}, 490 | Journal = {J. Chem. Theory Comput.}, 491 | Year = {2015}, 492 | Volume = {11}, 493 | Number = {11}, 494 | Pages = {5525--5542}, 495 | Month = {nov}, 496 | URL = {http://dx.doi.org/10.1021/acs.jctc.5b00743}, 497 | DOI = {10.1021/acs.jctc.5b00743} 498 | } 499 | @article{banushkina_nonparametric_2015, 500 | Author = {Polina V. Banushkina and Sergei V. Krivov}, 501 | Title = {Nonparametric variational optimization of reaction coordinates}, 502 | Journal = {J. Chem. Phys.}, 503 | Year = {2015}, 504 | Volume = {143}, 505 | Number = {18}, 506 | Pages = {184108}, 507 | Month = {nov}, 508 | URL = {https://doi.org/10.1063/1.4935180}, 509 | DOI = {10.1063/1.4935180} 510 | } 511 | @article{ben-rev-msm, 512 | Author = {Benjamin Trendelkamp-Schroer and Hao Wu and Fabian Paul and Frank No{\'{e}}}, 513 | Title = {Estimation and uncertainty of reversible Markov models}, 514 | Journal = {J. Chem. Phys.}, 515 | Year = {2015}, 516 | Volume = {143}, 517 | Number = {17}, 518 | Pages = {174101}, 519 | Month = {nov}, 520 | URL = {https://doi.org/10.1063/1.4934536}, 521 | DOI = {10.1063/1.4934536} 522 | } 523 | @conference{jupyter, 524 | Author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando Pérez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Damián Avila and Safia Abdalla and Carol Willing}, 525 | Title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows}, 526 | Booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas}, 527 | Year = {2016}, 528 | Editor = {F. Loizides and B. Schmidt}, 529 | Pages = {87--90}, 530 | Organization = {IOS Press}, 531 | } 532 | @article{tram, 533 | Author = {Hao Wu and Fabian Paul and Christoph Wehmeyer and Frank No{\'{e}}}, 534 | Title = {Multiensemble Markov models of molecular thermodynamics and kinetics}, 535 | Journal = {Proc. Natl. Acad. Sci. USA}, 536 | Year = {2016}, 537 | Volume = {113}, 538 | Number = {23}, 539 | Pages = {E3221--E3230}, 540 | Month = {may}, 541 | URL = {https://doi.org/10.1073/pnas.1525092113}, 542 | DOI = {10.1073/pnas.1525092113} 543 | } 544 | @article{husic-optimized, 545 | Author = {Brooke E. Husic and Robert T. McGibbon and Mohammad M. Sultan and Vijay S. Pande}, 546 | Title = {Optimized parameter selection reveals trends in Markov state models for protein folding}, 547 | Journal = {J. Chem. Phys.}, 548 | Year = {2016}, 549 | Volume = {145}, 550 | Number = {19}, 551 | Pages = {194103}, 552 | Month = {nov}, 553 | URL = {https://doi.org/10.1063/1.4967809}, 554 | DOI = {10.1063/1.4967809} 555 | } 556 | @article{simon-mech-mod-nmr, 557 | Author = {Simon Olsson and Frank No{\'{e}}}, 558 | Title = {Mechanistic Models of Chemical Exchange Induced Relaxation in Protein {NMR}}, 559 | Journal = {J. Am. Chem. Soc.}, 560 | Year = {2016}, 561 | Volume = {139}, 562 | Number = {1}, 563 | Pages = {200--210}, 564 | Month = {dec}, 565 | URL = {https://doi.org/10.1021/jacs.6b09460}, 566 | DOI = {10.1021/jacs.6b09460} 567 | } 568 | @article{oom-feliks, 569 | Author = {Feliks N\"{u}ske and Hao Wu and Jan-Hendrik Prinz and Christoph Wehmeyer and Cecilia Clementi and Frank No{\'{e}}}, 570 | Title = {Markov state models from short non-equilibrium simulations{\textemdash}Analysis and correction of estimation bias}, 571 | Journal = {J. Chem. Phys.}, 572 | Year = {2017}, 573 | Volume = {146}, 574 | Number = {9}, 575 | Pages = {094104}, 576 | Month = {mar}, 577 | URL = {https://doi.org/10.1063/1.4976518}, 578 | DOI = {10.1063/1.4976518} 579 | } 580 | @article{hao-variational-koopman-models, 581 | Author = {Hao Wu and Feliks N\"{u}ske and Fabian Paul and Stefan Klus and P{\'{e}}ter Koltai and Frank No{\'{e}}}, 582 | Title = {Variational Koopman models: Slow collective variables and molecular kinetics from short off-equilibrium simulations}, 583 | Journal = {J. Chem. Phys.}, 584 | Year = {2017}, 585 | Volume = {146}, 586 | Number = {15}, 587 | Pages = {154104}, 588 | Month = {apr}, 589 | URL = {https://doi.org/10.1063/1.4979344}, 590 | DOI = {10.1063/1.4979344} 591 | } 592 | @article{NoeClementiReview, 593 | Author = {Frank No{\'{e}} and Cecilia Clementi}, 594 | Title = {Collective variables for the study of long-time kinetics from molecular trajectories: theory and methods}, 595 | Journal = {Curr. Opin. Struct. Biol.}, 596 | Year = {2017}, 597 | Volume = {43}, 598 | Pages = {141--147}, 599 | Month = {apr}, 600 | URL = {https://doi.org/10.1016/j.sbi.2017.02.006}, 601 | DOI = {10.1016/j.sbi.2017.02.006} 602 | } 603 | @article{vamp-preprint, 604 | Author = {{Wu}, H. and {Noé}, F.}, 605 | Title = {{Variational approach for learning Markov processes from time series data}}, 606 | Journal = {arXiv preprint arXiv:1707.04659}, 607 | Year = 2017, 608 | Month = jul, 609 | URL = {https://arxiv.org/pdf/1707.04659.pdf}, 610 | } 611 | @article{simon-amm, 612 | Author = {Simon Olsson and Hao Wu and Fabian Paul and Cecilia Clementi and Frank No{\'{e}}}, 613 | Title = {Combining experimental and simulation data of molecular processes via augmented Markov models}, 614 | Journal = {Proc. Natl. Acad. Sci. USA}, 615 | Year = {2017}, 616 | Volume = {114}, 617 | Number = {31}, 618 | Pages = {8265--8270}, 619 | Month = {jul}, 620 | URL = {https://doi.org/10.1073/pnas.1704803114}, 621 | DOI = {10.1073/pnas.1704803114} 622 | } 623 | @article{trammbar, 624 | Author = {Paul, Fabian and Wehmeyer, Christoph and Abualrous, Esam T. and Wu, Hao and Crabtree, Michael D. and Schöneberg, Johannes and Clarke, Jane and Freund, Christian and Weikl, Thomas R. and Noé, Frank}, 625 | Title = {Protein-peptide association kinetics beyond the seconds timescale from atomistic simulations}, 626 | Journal = {Nat. Commun.}, 627 | Year = {2017}, 628 | Volume = {8}, 629 | Number = {1}, 630 | Month = {Oct}, 631 | URL = {http://dx.doi.org/10.1038/s41467-017-01163-6}, 632 | DOI = {10.1038/s41467-017-01163-6} 633 | } 634 | @article{plattner_complete_2017, 635 | Author = {Plattner, Nuria and Doerr, Stefan and Fabritiis, Gianni De and Noé, Frank}, 636 | Title = {Complete protein--protein association kinetics in atomic detail revealed by molecular dynamics simulations and {Markov} modelling}, 637 | Journal = {Nat. Chem.}, 638 | Year = {2017}, 639 | Volume = {9}, 640 | Number = {10}, 641 | Pages = {1005}, 642 | Month = oct, 643 | URL = {https://www.nature.com/articles/nchem.2785}, 644 | DOI = {10.1038/nchem.2785} 645 | } 646 | @article{husic2017note, 647 | Author = {Brooke E. Husic and Vijay S. Pande}, 648 | Title = {Note: {MSM} lag time cannot be used for variational model selection}, 649 | Journal = {J. Chem. Phys.}, 650 | Year = {2017}, 651 | Volume = {147}, 652 | Number = {17}, 653 | Pages = {176101}, 654 | Month = {nov}, 655 | URL = {https://doi.org/10.1063/1.5002086}, 656 | DOI = {10.1063/1.5002086} 657 | } 658 | @article{nglview, 659 | Author = {Hai Nguyen and David A Case and Alexander S Rose}, 660 | Title = {{NGLview}{\textendash}interactive molecular graphics for Jupyter notebooks}, 661 | Journal = {Bioinformatics}, 662 | Year = {2017}, 663 | Volume = {34}, 664 | Number = {7}, 665 | Pages = {1241--1242}, 666 | Month = {dec}, 667 | URL = {https://doi.org/10.1093/bioinformatics/btx789}, 668 | DOI = {10.1093/bioinformatics/btx789} 669 | } 670 | @article{vampnet, 671 | Author = {Andreas Mardt and Luca Pasquali and Hao Wu and Frank Noé}, 672 | Title = {{VAMPnets} for deep learning of molecular kinetics}, 673 | Journal = {Nat. Commun.}, 674 | Year = {2018}, 675 | Volume = {9}, 676 | Number = {1}, 677 | Month = {jan}, 678 | URL = {https://doi.org/10.1038/s41467-017-02388-1}, 679 | DOI = {10.1038/s41467-017-02388-1} 680 | } 681 | @article{Koltai2018, 682 | Author = {P{\'{e}}ter Koltai and Hao Wu and Frank No{\'{e}} and Christof Sch\"{u}tte}, 683 | Title = {Optimal Data-Driven Estimation of Generalized Markov State Models for Non-Equilibrium Dynamics}, 684 | Journal = {Computation}, 685 | Year = {2018}, 686 | Volume = {6}, 687 | Number = {1}, 688 | Pages = {22}, 689 | Month = {feb}, 690 | URL = {https://doi.org/10.3390/computation6010022}, 691 | DOI = {10.3390/computation6010022} 692 | } 693 | @article{msm-brooke, 694 | Author = {Brooke E. Husic and Vijay S. Pande}, 695 | Title = {Markov State Models: From an Art to a Science}, 696 | Journal = {J. Am. Chem. Soc.}, 697 | Year = {2018}, 698 | Volume = {140}, 699 | Number = {7}, 700 | Pages = {2386--2396}, 701 | Month = {feb}, 702 | DOI = {10.1021/jacs.7b12191} 703 | } 704 | @article{Sultan2018-vde-enhanced-sampling, 705 | Author = {Mohammad M. Sultan and Hannah K. Wayment-Steele and Vijay S. Pande}, 706 | Title = {Transferable Neural Networks for Enhanced Sampling of Protein Dynamics}, 707 | Journal = {J. Chem. Theory Comput.}, 708 | Year = {2018}, 709 | Volume = {14}, 710 | Number = {4}, 711 | Pages = {1887--1894}, 712 | Month = {mar}, 713 | URL = {https://doi.org/10.1021/acs.jctc.8b00025}, 714 | DOI = {10.1021/acs.jctc.8b00025} 715 | } 716 | @article{deep-gen-msm-preprint, 717 | Author = {{Wu}, H. and {Mardt}, A. and {Pasquali}, L. and {Noe}, F.}, 718 | Title = {{Deep Generative Markov State Models}}, 719 | Journal = {arXiv preprint arXiv:1805.07601}, 720 | Year = 2018, 721 | Month = may, 722 | URL = {https://arxiv.org/pdf/1805.07601.pdf}, 723 | } 724 | @article{hernandez-vde, 725 | Author = {Carlos X. Hern{\'{a}}ndez and Hannah K. Wayment-Steele and Mohammad M. Sultan and Brooke E. Husic and Vijay S. Pande}, 726 | Title = {Variational encoding of complex dynamics}, 727 | Journal = {Phys. Rev. E}, 728 | Year = {2018}, 729 | Volume = {97}, 730 | Number = {6}, 731 | Month = {jun}, 732 | URL = {https://doi.org/10.1103/physreve.97.062412}, 733 | DOI = {10.1103/physreve.97.062412} 734 | } 735 | @article{tae, 736 | Author = {Christoph Wehmeyer and Frank No{\'{e}}}, 737 | Title = {Time-lagged autoencoders: Deep learning of slow collective variables for molecular kinetics}, 738 | Journal = {J. Chem. Phys.}, 739 | Year = {2018}, 740 | Volume = {148}, 741 | Number = {24}, 742 | Pages = {241703}, 743 | Month = {jun}, 744 | URL = {https://doi.org/10.1063/1.5011399}, 745 | DOI = {10.1063/1.5011399} 746 | } 747 | @article{Ribeiro2018-rave, 748 | Author = {Jo{\~{a}}o Marcelo Lamim Ribeiro and Pablo Bravo and Yihang Wang and Pratyush Tiwary}, 749 | Title = {Reweighted autoencoded variational Bayes for enhanced sampling ({RAVE})}, 750 | Journal = {J. Chem. Phys.}, 751 | Year = {2018}, 752 | Volume = {149}, 753 | Number = {7}, 754 | Pages = {072301}, 755 | Month = {aug}, 756 | URL = {https://doi.org/10.1063/1.5025487}, 757 | DOI = {10.1063/1.5025487} 758 | } 759 | 760 | @article{wu2015projected, 761 | title={Projected metastable Markov processes and their estimation with observable operator models}, 762 | author={Wu, Hao and Prinz, Jan-Hendrik and No{\'e}, Frank}, 763 | journal={J. Chem. Phys.}, 764 | volume={143}, 765 | number={14}, 766 | pages={10B610\_1}, 767 | year={2015}, 768 | publisher={AIP Publishing} 769 | } 770 | 771 | @Misc{mdtutorial, 772 | author = {Efrem Braun and Justin Gilmer and Heather B. Mayes and David L. Mobley and Jacob I. Monroe and Samarjeet Prasad and Daniel M. Zuckerman}, 773 | title = {Best Practices for Foundations in Molecular Simulations [Article v1.0]}, 774 | year = {2018}, 775 | url = "https://github.com/MobleyLab/basic_simulation_training", 776 | DOI = {10.33011/livecoms.1.1.5957} 777 | } 778 | -------------------------------------------------------------------------------- /manuscript/livecoms.cls: -------------------------------------------------------------------------------- 1 | % A template for LiveCoMS submissions. 2 | % 3 | % adapted from elife template, v1.4 4 | \NeedsTeXFormat{LaTeX2e} 5 | \ProvidesClass{livecoms}[2017/08/10, v0.5...] 6 | 7 | \RequirePackage[english]{babel} 8 | 9 | \RequirePackage{calc} 10 | \RequirePackage{etoolbox} 11 | \RequirePackage{regexpatch} 12 | \RequirePackage{ifxetex,ifluatex} 13 | 14 | \newif\ifxetexorluatex 15 | \ifxetex 16 | \xetexorluatextrue 17 | \else 18 | \ifluatex 19 | \xetexorluatextrue 20 | \else 21 | \xetexorluatexfalse 22 | \fi 23 | \fi 24 | 25 | \newif\if@reqslineno 26 | \DeclareOption{lineno}{\@reqslinenotrue} 27 | 28 | %% the type of document this is. The current types: 29 | % bestpractices, editorial, tutorial, review, comparison, lessons 30 | \newif\if@bestpractices 31 | \DeclareOption{bestpractices}{\@bestpracticestrue} 32 | 33 | \newif\if@editorial 34 | \DeclareOption{editorial}{\@editorialtrue} 35 | 36 | \newif\if@tutorial 37 | \DeclareOption{tutorial}{\@tutorialtrue} 38 | 39 | \newif\if@review 40 | \DeclareOption{review}{\@reviewtrue} 41 | 42 | \newif\if@comparison 43 | \DeclareOption{comparison}{\@comparisontrue} 44 | 45 | \newif\if@lessons 46 | \DeclareOption{lessons}{\@lessonstrue} 47 | 48 | %Publication Information 49 | \newif\if@pubversion 50 | \DeclareOption{pubversion}{\@pubversiontrue} 51 | 52 | \newif\if@ASAPversion 53 | \DeclareOption{ASAPversion}{\@ASAPversiontrue} 54 | 55 | %% Linespacing. 56 | \newif\if@onehalfspacing 57 | \newif\if@doublespacing 58 | \DeclareOption{onehalfspacing}{\@onehalfspacingtrue} 59 | \DeclareOption{doublespacing}{\@doublespacingtrue} 60 | 61 | \DeclareOption*{\PassOptionsToClass{\CurrentOption}{extarticle}} 62 | \ExecuteOptions{} 63 | \ProcessOptions\relax 64 | \LoadClass{extarticle} 65 | 66 | \RequirePackage{amsmath} 67 | \RequirePackage{amssymb} 68 | \RequirePackage{mdframed} 69 | 70 | \RequirePackage{lineno} 71 | \if@reqslineno\linenumbers\fi 72 | 73 | \ifxetexorluatex 74 | \RequirePackage[no-math]{fontspec} 75 | \setmainfont[Ligatures = TeX, 76 | Extension = .ttf, 77 | UprightFont = *-Regular, 78 | BoldFont = *-Bold, 79 | ItalicFont = *-Italic, 80 | BoldItalicFont = *-BoldItalic] 81 | {OpenSans} 82 | \else 83 | \RequirePackage[T1]{fontenc} 84 | \RequirePackage[utf8]{inputenc} 85 | \RequirePackage[default]{opensans} 86 | \renewcommand{\ttdefault}{lmtt} 87 | \fi 88 | 89 | \RequirePackage{microtype} 90 | 91 | % Trueno/Open Sans requires a bigger "single" linespread. 92 | \linespread{1.2} 93 | \if@onehalfspacing\linespread{1.5}\fi 94 | \if@doublespacing\linespread{2.0}\fi 95 | 96 | \emergencystretch 3em 97 | 98 | \RequirePackage{graphicx,xcolor} 99 | \definecolor{LiveCoMSDarkBlue}{HTML}{273B81} 100 | \definecolor{LiveCoMSLightBlue}{HTML}{0A9DD9} 101 | \definecolor{LiveCoMSMediumGrey}{HTML}{6D6E70} 102 | \definecolor{LiveCoMSLightGrey}{HTML}{929497} 103 | 104 | \RequirePackage{booktabs} 105 | \RequirePackage{authblk} 106 | 107 | % Modified page geometry for LiveComs 108 | \RequirePackage[%left=6cm,% 109 | %marginparwidth=4cm,% 110 | %marginparsep=0.5cm,% 111 | left=2cm, 112 | right=1.3cm,% 113 | top=2cm,% 114 | bottom=2.5cm,% 115 | headheight=21pt,% 116 | headsep=2\baselineskip,% 117 | columnsep=2em,% 118 | letterpaper]{geometry}% 119 | \RequirePackage{changepage} 120 | 121 | \RequirePackage{silence} 122 | \WarningFilter{caption}{The option `hypcap=true' will be ignored} 123 | \WarningFilter{microtype}{Unknown slot} 124 | 125 | \RequirePackage[labelfont={bf},% 126 | labelsep=period,% 127 | justification=justified,% 128 | singlelinecheck=false,% 129 | tableposition=top,font=small] 130 | {caption} 131 | 132 | % \captionsetup*[table]{skip=\medskipamount} 133 | 134 | 135 | \RequirePackage[square,numbers,sort&compress]{natbib} 136 | \RequirePackage{natmove} 137 | \renewcommand{\bibfont}{\small} 138 | % modifed from https://github.com/gbhutani/vancouver_authoryear_bibstyle/ 139 | \IfFileExists{vancouver-livecoms.bst} 140 | {\bibliographystyle{vancouver-livecoms}} 141 | {\PackageWarning{elife}{vancouver-livecoms.bst not found; falling back to apalike bibliography style.}\bibliographystyle{apalike}} 142 | % Make author in citation italic 143 | \renewcommand{\NAT@nmfmt}[1]{{\bfseries\itshape\color{LiveCoMSMediumGrey} #1}} 144 | 145 | % ...as well as the year 146 | \xpatchcmd{\NAT@citex} 147 | {\@citea\NAT@hyper@{\NAT@nmfmt{\NAT@nm}\NAT@date}} 148 | {\@citea\NAT@hyper@{\NAT@nmfmt{\NAT@nm}\NAT@nmfmt{\NAT@date}}} 149 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}} 150 | 151 | \xpatchcmd{\NAT@citex} 152 | {\else\unskip\NAT@spacechar\NAT@hyper@{\NAT@date}} 153 | {\else\unskip\NAT@spacechar\NAT@hyper@{\NAT@nmfmt{\NAT@date}}} 154 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}} 155 | 156 | \xpatchcmd{\NAT@citex} 157 | {\hyper@natlinkbreak{\NAT@aysep\NAT@spacechar}{\@citeb\@extra@b@citeb}\NAT@date} 158 | {\hyper@natlinkbreak{\NAT@nmfmt{\NAT@aysep\NAT@spacechar}}{\@citeb\@extra@b@citeb}\NAT@nmfmt{\NAT@date}} 159 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}} 160 | 161 | \xpatchcmd{\NAT@citex} 162 | {\@citea\NAT@hyper@{\NAT@date}} 163 | {\@citea\NAT@hyper@{\NAT@nmfmt{\NAT@date}}} 164 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}} 165 | 166 | \xpatchcmd{\NAT@citex} 167 | {{\@citeb\@extra@b@citeb}\NAT@date} 168 | {{\@citeb\@extra@b@citeb}\NAT@nmfmt{\NAT@date}} 169 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}} 170 | %% There, we're finally done with patching the year in citations. 171 | 172 | % 173 | % headers and footers 174 | % 175 | 176 | \RequirePackage{fancyhdr} % custom headers/footers 177 | \RequirePackage{lastpage} % Number of pages in the document 178 | \pagestyle{fancy} % Enables the custom headers/footers 179 | %% Next two lines unnecessary for LiveComs 180 | % \addtolength{\headwidth}{\marginparsep} 181 | % \addtolength{\headwidth}{\marginparwidth} 182 | 183 | %% different document types listed here 184 | 185 | \newif\ifdocumenttype 186 | \documenttypefalse 187 | 188 | \if@bestpractices 189 | \documenttypetrue 190 | \newcommand{\documenttype}{Best Practices Guide} 191 | \else 192 | % nothing 193 | \fi 194 | 195 | \if@editorial 196 | \documenttypetrue 197 | \newcommand{\documenttype}{Editorial} 198 | \else 199 | % nothing 200 | \fi 201 | 202 | \if@tutorial 203 | \documenttypetrue 204 | \newcommand{\documenttype}{Tutorial} 205 | \else 206 | % nothing 207 | \fi 208 | 209 | \if@review 210 | \documenttypetrue 211 | \newcommand{\documenttype}{Perpetual Review} 212 | \else 213 | % nothing 214 | \fi 215 | 216 | \if@comparison 217 | \documenttypetrue 218 | \newcommand{\documenttype}{Molecular Simulation Comparison} 219 | \else 220 | % nothing 221 | \fi 222 | 223 | \if@lessons 224 | \documenttypetrue 225 | \newcommand{\documenttype}{``Lessons Learned'' Document} 226 | \else 227 | % nothing 228 | \fi 229 | 230 | \fancyhf{} 231 | \ifdocumenttype 232 | \chead{% 233 | \setlength{\fboxsep}{3pt} 234 | \colorbox{LiveCoMSMediumGrey}{\begin{minipage}{\headwidth}\centering\color{white} A LiveCoMS \documenttype\end{minipage}}% 235 | } 236 | \fi 237 | 238 | % Publication information in document footer 239 | % *ONLY INCLUDED IF "pubversion" CLASS OPTION IS INVOKED* 240 | \def\@publishedDOI{} 241 | \def\@publishedvolume{} 242 | \def\@publishedyear{} 243 | \def\@publishedarticlenum{} 244 | \def\@publisheddatereceived{} 245 | \def\@publisheddateaccepted{} 246 | \def \DOIprefix{10.XXXX} %May be able to use this later 247 | \newcommand{\pubDOI}[1]{% 248 | \appto{\@publishedDOI}{#1}{}{} 249 | } 250 | \newcommand{\pubvolume}[1]{% 251 | \appto{\@publishedvolume}{#1}{}{} 252 | } 253 | \newcommand{\pubissue}[1]{% 254 | \appto{\@publishedissue}{#1}{}{} 255 | } 256 | \newcommand{\pubyear}[1]{% 257 | \appto{\@publishedyear}{#1}{}{} 258 | } 259 | \newcommand{\articlenum}[1]{% 260 | \appto{\@publishedarticlenum}{#1}{}{} 261 | } 262 | \newcommand{\datereceived}[1]{% 263 | \appto{\@publisheddatereceived}{#1}{}{} 264 | } 265 | \newcommand{\dateaccepted}[1]{% 266 | \appto{\@publisheddateaccepted}{#1}{}{} 267 | } 268 | 269 | %-------------------------------------------------------- 270 | % Footers 271 | % 1. Error Check for conflicting class options 272 | \if@pubversion 273 | \if@ASAPversion 274 | \ClassError{livecoms} 275 | {Nope nope nope, you cannot invoke 'pubversion' and 'ASAPversion' simultaneously. Please correct the class options.} 276 | \fi 277 | \fi 278 | % 2. Publication Version: put submission/acceptance dates in left footer and citation information in right footer 279 | %%% DWS NOTE: would be nice if the left footer was in an if A-or-B type logical statement 280 | \if@pubversion 281 | \lfoot{\ifthenelse{\value{page}=1} 282 | {\small\color{LiveCoMSMediumGrey}Received: \@publisheddatereceived \\ Accepted: \@publisheddateaccepted} 283 | {~\\~} 284 | }% 285 | \rfoot{\small\color{LiveCoMSMediumGrey}\href{https://doi.org/\@publishedDOI}{https://doi.org/\@publishedDOI}\\ 286 | {\it Living J. Comp. Mol. Sci.} \@publishedyear, \@publishedvolume\nobreak\hspace{.05em}(\@publishedissue), \@publishedarticlenum 287 | }% 288 | \fi 289 | % 3. ASAP Version: put submission/acceptance dates in left footer and "ASAP Version" in right footer 290 | \if@ASAPversion 291 | \lfoot{\ifthenelse{\value{page}=1} 292 | {\small\color{LiveCoMSMediumGrey}Received: \@publisheddatereceived \\ Accepted: \@publisheddateaccepted} 293 | {~\\~} 294 | }% 295 | \rfoot{\small\color{LiveCoMSMediumGrey}\href{https://doi.org/\@publishedDOI}{https://doi.org/\@publishedDOI}\\ 296 | {\it Living J. Comp. Mol. Sci.} ASAP Version 297 | }% 298 | \fi 299 | % 4. Page Number in center of footer 300 | \cfoot{\small\color{white} \vspace{\baselineskip} \small\color{LiveCoMSMediumGrey} \thepage\space of\space\pageref{LastPage}}% 301 | \preto{\footrule}{\color{LiveCoMSMediumGrey}} 302 | \renewcommand{\headrulewidth}{0pt}% % No header rule 303 | \renewcommand{\footrulewidth}{0.4pt}% % No footer rule 304 | %---------------------------------------------------------- 305 | 306 | % 307 | % section/subsection/paragraph set-up 308 | % Updated for LiveComs 309 | % \setcounter{secnumdepth}{0} 310 | \RequirePackage[explicit]{titlesec} 311 | \titleformat{\section} 312 | {\LARGE\bfseries\raggedright} 313 | {\thesection}{1em}{#1}[] 314 | \titleformat{\subsection} 315 | {\Large\bfseries\raggedright\color{LiveCoMSMediumGrey}} 316 | {\thesubsection}{1em}{#1}[] 317 | \titleformat{\subsubsection} 318 | {\large\raggedright\color{LiveCoMSMediumGrey}} 319 | {\thesubsubsection}{1em}{#1}[] 320 | \titleformat{\paragraph} 321 | {\large\raggedright\color{LiveCoMSMediumGrey}} 322 | {\theparagraph}{1em}{#1}[] 323 | \titlespacing*{\section}{0pc}{3ex \@plus4pt \@minus3pt}{0pt} 324 | \titlespacing*{\subsection}{0pc}{2.5ex \@plus3pt \@minus2pt}{0pt} 325 | \titlespacing*{\subsubsection}{0pc}{2ex \@plus2.5pt \@minus1.5pt}{0pt} 326 | \titlespacing*{\paragraph}{0pc}{1.5ex \@plus2pt \@minus1pt}{0pt} 327 | 328 | \RequirePackage{enumitem} 329 | \setlist{noitemsep} 330 | 331 | \RequirePackage{alphalph} 332 | \newalphalph{\fnsymbolmult}[mult]{\@fnsymbol}{5} 333 | 334 | \newcounter{authorfn} 335 | \setcounter{authorfn}{1} 336 | \newcommand{\authfn}[1]{% 337 | \fnsymbolmult{\numexpr\value{authorfn}+#1}% 338 | } 339 | 340 | \def\@correspondence{} 341 | \def\@contribution{} 342 | \def\@presentaddress{} 343 | \def\@deceased{} 344 | % Added blurb for LiveComs 345 | \def\@blurb{} 346 | \def\@orcidblock{} 347 | 348 | 349 | \newcommand{\corr}[2]{% 350 | \ifx\empty\@correspondence\else\appto{\@correspondence}{; }{}{}\fi 351 | \appto{\@correspondence}{% 352 | \url{#1}% 353 | \ifx\empty#2\else\space(#2)\fi 354 | }{}{}% 355 | } 356 | 357 | \newcommand{\contrib}[2][]{ 358 | \appto{\@contribution}{% 359 | \ifx\empty#1\else\textsuperscript{#1}\fi 360 | #2\\ 361 | }{}{} 362 | } 363 | 364 | \newcommand{\presentadd}[2][]{ 365 | \ifx\empty\@presentaddress\else\appto{\@presentaddress}{; }{}{}\fi 366 | \appto{\@presentaddress}{% 367 | \ifx\empty#1\else\textsuperscript{#1}\fi 368 | #2% 369 | }{}{} 370 | } 371 | 372 | \newcommand{\deceased}[1]{\def\@deceased{\textsuperscript{#1}Deceased}} 373 | 374 | % Added for LiveComs 375 | \newcommand{\blurb}[1]{\def\@blurb{#1}} 376 | 377 | \newcommand{\orcid}[2]{% 378 | \ifx\empty\@orcidblock\else\appto{\@orcidblock}{\\}{}{}\fi 379 | \appto{\@orcidblock}{% 380 | #1:\space% 381 | \ifx\empty#2\else\href{https://orcid.org/#2}{#2} \fi 382 | }{}{}% 383 | } 384 | 385 | 386 | 387 | \reversemarginpar 388 | 389 | % 390 | % custom title page 391 | % 392 | \renewcommand{\Authfont}{\bfseries\large\raggedright} 393 | \renewcommand{\Affilfont}{\mdseries\large\raggedright} 394 | \renewcommand{\Authands}{, } 395 | \setlength{\affilsep}{16pt} 396 | \renewcommand{\AB@affilsepx}{; \protect\Affilfont} 397 | 398 | \newcommand{\themetadata}{% 399 | \textbf{*For correspondence:\\} \@correspondence\par 400 | \ifx\empty\@contribution\else 401 | \bigskip\@contribution\par\fi 402 | \ifx\empty\@presentaddress\else 403 | \textbf{Present address: }\@presentaddress\par\fi 404 | \ifx\empty\@deceased\else\@deceased\par\fi 405 | } 406 | 407 | \patchcmd{\@author}{\AB@authlist\\[\affilsep]\AB@affillist}{\AB@authlist\\[\affilsep] 408 | %% Removed for LiveComs; will be placed after abstract in frontmatter 409 | % \marginpar{\raggedright\footnotesize\themetadata\par} 410 | \AB@affillist}{}{} 411 | 412 | %% Added for LiveComs 413 | \RequirePackage{environ} 414 | \RequirePackage{textpos} 415 | 416 | %% Abstract outside frontmatter will throw an error! 417 | \RenewEnviron{abstract}{% 418 | \ClassError{livecoms} 419 | {Nope nope nope, please put the abstract inside the frontmatter environment.} 420 | {Please put the abstract inside the frontmatter environment.} 421 | } 422 | 423 | \NewEnviron{frontmatter}{% 424 | %% Define abstract's behavior when placed in frontmatter 425 | \renewenvironment{abstract}{% 426 | \setlength{\parindent}{0pt} %\raggedright 427 | \raisebox{-16pt-\baselineskip}[0pt][0pt]{\makebox[0pt][r]{\parbox[t]{3cm}{% 428 | \raggedright\itshape\footnotesize\@blurb\par\medskip% 429 | This version dated \@date% 430 | }\hspace*{1cm}}}% 431 | \textcolor{LiveCoMSMediumGrey}{\rule{\textwidth}{2pt}} 432 | \vskip16pt 433 | \textcolor{LiveCoMSLightBlue}{\large\bfseries\abstractname\space} 434 | }{% 435 | \vskip8pt 436 | \textcolor{LiveCoMSMediumGrey}{\rule{\textwidth}{2pt}} 437 | \vskip16pt 438 | } 439 | \twocolumn[% 440 | \protecting{%\begin{minipage}[b]{3cm} 441 | % \small\itshape 442 | % \raggedright\@blurb 443 | % \end{minipage} 444 | \hfill 445 | \begin{minipage}[b]{\textwidth-4cm} 446 | \BODY 447 | \themetadata% 448 | \end{minipage}}\vspace*{2\baselineskip} 449 | ]% 450 | } 451 | 452 | \renewcommand{\maketitle}{% 453 | \vskip36pt% 454 | {\color{LiveCoMSDarkBlue}\raggedright\bfseries\fontsize{22}{27}\selectfont \@title\par}% 455 | \vskip16pt 456 | {\@author\par} 457 | \vskip8pt 458 | } 459 | 460 | \newcommand{\makeorcid}{% 461 | % \textbf{*For correspondence:\\} \@correspondence\par 462 | % \textbf{ORCID:\\} \@correspondence\par 463 | \textbf{ORCID:\\} \@orcidblock\par 464 | } 465 | 466 | %% Insert a grey line to separate floats from main text 467 | \newcommand{\topfigrule}{\vskip8pt\noindent{\rule{\linewidth}{1pt}}} 468 | \newcommand{\botfigrule}{\noindent{\rule{\linewidth}{1pt}}\vskip8pt} 469 | 470 | \RequirePackage{newfloat} 471 | \RequirePackage{wrapfig} 472 | \AtEndEnvironment{wrapfigure}{\vskip8pt\noindent{\rule{\hsize}{1pt}}} 473 | % \RequirePackage[lflt]{floatflt} 474 | % \AtEndEnvironment{floatingfigure}{\vskip8pt\noindent\textcolor{LiveCoMSMediumGrey}{\rule{\hsize}{2pt}}} 475 | 476 | \DeclareFloatingEnvironment[placement=hbt,name=Box]{featurebox} 477 | \captionsetup[featurebox]{font={Large,bf,color=LiveCoMSDarkBlue}} 478 | 479 | \newcounter{featurefigure} 480 | \newcounter{featuretable} 481 | \AtBeginEnvironment{featurebox}{% 482 | \setcounter{featurefigure}{0}% 483 | \setcounter{featuretable}{0}% 484 | \newcommand{\featurefig}[1]{% 485 | \refstepcounter{featurefigure}% 486 | \vskip\smallskipamount% 487 | {\small\textbf{\color{LiveCoMSDarkBlue}Box \arabic{featurebox} Figure \arabic{featurefigure}.}\space #1\par}\medskip} 488 | \newcommand{\featuretable}[1]{% 489 | \refstepcounter{featuretable}% 490 | \vskip\smallskipamount% 491 | {\small\textbf{\color{LiveCoMSDarkBlue}Box \arabic{featurebox} Table \arabic{featuretable}.}\space #1\par}\medskip} 492 | 493 | } 494 | \apptocmd{\featurebox}{% 495 | \begin{mdframed}[linewidth=0pt,backgroundcolor=LiveCoMSLightBlue!10,fontcolor=LiveCoMSDarkBlue] 496 | \if@reqslineno\addtolength{\linenumbersep}{1em}\internallinenumbers\fi% 497 | }{}{} 498 | \pretocmd{\endfeaturebox}{\end{mdframed}}{}{} 499 | 500 | %% Starred version for LiveComs two-column 501 | \AtBeginEnvironment{featurebox*}{% 502 | \setcounter{featurefigure}{0}% 503 | \setcounter{featuretable}{0}% 504 | \newcommand{\featurefig}[1]{% 505 | \refstepcounter{featurefigure}% 506 | \vskip\smallskipamount% 507 | {\small\textbf{\color{LiveCoMSDarkBlue}Box \arabic{featurebox} Figure \arabic{featurefigure}.}\space #1\par}\medskip} 508 | \newcommand{\featuretable}[1]{% 509 | \refstepcounter{featuretable}% 510 | \vskip\smallskipamount% 511 | {\small\textbf{\color{LiveCoMSDarkBlue}Box \arabic{featurebox} Table \arabic{featuretable}.}\space #1\par}\medskip} 512 | } 513 | \expandafter\apptocmd\csname featurebox*\endcsname{% 514 | \begin{mdframed}[linewidth=0pt,backgroundcolor=LiveCoMSLightBlue!10,fontcolor=LiveCoMSDarkBlue] 515 | \if@reqslineno\addtolength{\linenumbersep}{1em}\internallinenumbers\fi% 516 | }{}{} 517 | \expandafter\pretocmd\csname endfeaturebox*\endcsname{\end{mdframed}}{}{} 518 | 519 | %% Unnecessary for LiveComs 520 | % \newenvironment{fullwidth}{% 521 | % \begin{adjustwidth}{-4.5cm}{} 522 | % }{\end{adjustwidth}} 523 | 524 | %% Provide support for pseudocode and algorithms 525 | \RequirePackage{algorithm,algpseudocode} 526 | \captionsetup[algorithm]{% 527 | labelfont={bf},font=small,labelsep=period, 528 | justification=raggedright,singlelinecheck=false} 529 | \newcommand\fs@notopruled{\def\@fs@cfont{\bfseries}\let\@fs@capt\floatc@ruled 530 | \def\@fs@pre{}% \hrule height.8pt depth0pt \kern2pt}% 531 | \def\@fs@post{} %\kern2pt\hrule\relax}% 532 | \def\@fs@mid{\medskip\kern2pt\hrule\kern2pt}% 533 | \let\@fs@iftopcapt\iftrue} 534 | \floatstyle{notopruled} 535 | \restylefloat{algorithm} 536 | \newcommand{\algorithmautorefname}{Algorithm} 537 | \newcommand{\ALG}[1]{\autoref{alg:#1}} 538 | 539 | %% Update some appendix sectional styles 540 | \appto{\appendix}{% 541 | \@addtoreset{figure}{section} 542 | \@addtoreset{table}{section} 543 | \@addtoreset{featurebox}{section} 544 | \@addtoreset{algorithm}{section} 545 | % \numberwithin{figure}{section} 546 | % \numberwithin{table}{section} 547 | % \numberwithin{featurebox}{section} 548 | \titleformat{\section} 549 | {\LARGE\bfseries\color{LiveCoMSDarkBlue}} 550 | {\appendixname\ \thesection}{1em}{#1}[] 551 | 552 | \captionsetup*[figure]{name={Appendix \thesection\ Figure },font={color=LiveCoMSDarkBlue,small},skip=\smallskipamount}% 553 | 554 | \captionsetup*[table]{name={Appendix \thesection\ Table },font={color=LiveCoMSDarkBlue,small}}% 555 | } 556 | 557 | \newcounter{figsupp} 558 | \setcounter{figsupp}{0} 559 | \newcounter{data} 560 | \setcounter{data}{0} 561 | \def\supplist{} 562 | 563 | \RequirePackage{newfile} 564 | \newoutputstream{suppinfo} 565 | \openoutputfile{\jobname.suppinfo}{suppinfo} 566 | 567 | 568 | \AtBeginEnvironment{figure}{% 569 | \setcounter{figsupp}{0} 570 | \setcounter{data}{0} 571 | %% Updated 2017/06/30 to allow optional argument 572 | \newcommand{\figsupp}[3][]{% 573 | \refstepcounter{figsupp}% 574 | {% 575 | \ifstrequal{#1}{none}{}{% 576 | \small\textbf{Figure~\thefigure--Figure supplement \arabic{figsupp}.} \ifstrempty{#1}{#2}{#1}}\par} 577 | \addtostream{suppinfo}{% 578 | \noindent\protect\begin{minipage}{\linewidth} 579 | \protect #3\noexpand\par 580 | \textbf{Figure \thefigure--Figure supplement \arabic{figsupp}.} #2\noexpand\par 581 | \vskip8pt 582 | \protect\end{minipage} 583 | \vskip16pt 584 | } 585 | } 586 | \newcommand{\figdata}[1]{% 587 | \refstepcounter{data} 588 | {\small\textbf{Figure~\thefigure--source data \arabic{data}.} #1}\par 589 | } 590 | } 591 | 592 | %% Added for LiveComs (two columns) 593 | \AtBeginEnvironment{figure*}{% 594 | \setcounter{figsupp}{0} 595 | \setcounter{data}{0} 596 | %% Updated 2017/06/30 to allow optional argument 597 | \newcommand{\figsupp}[3][]{% 598 | \refstepcounter{figsupp}% 599 | {% 600 | \ifstrequal{#1}{none}{}{% 601 | \small\textbf{Figure~\thefigure--Figure supplement \arabic{figsupp}.} \ifstrempty{#1}{#2}{#1}}\par} 602 | \addtostream{suppinfo}{% 603 | \noindent\protect\begin{minipage}{\linewidth} 604 | \protect #3\noexpand\par 605 | \textbf{Figure \thefigure--Figure supplement \arabic{figsupp}.} #2\noexpand\par 606 | \vskip8pt 607 | \protect\end{minipage} 608 | \vskip16pt 609 | } 610 | } 611 | \newcommand{\figdata}[1]{% 612 | \refstepcounter{data} 613 | {\small\textbf{Figure~\thefigure--source data \arabic{data}.} #1}\par 614 | } 615 | } 616 | 617 | \AtBeginEnvironment{table}{% 618 | \setcounter{data}{0} 619 | \newcommand{\tabledata}[1]{% 620 | \refstepcounter{data} 621 | {\small\textbf{Table~\thetable--source data \arabic{data}.} #1}\par 622 | } 623 | } 624 | 625 | %% Added for LiveComs (twocolumns) 626 | \AtBeginEnvironment{table*}{% 627 | \setcounter{data}{0} 628 | \newcommand{\tabledata}[1]{% 629 | \refstepcounter{data} 630 | {\small\textbf{Table~\thetable--source data \arabic{data}.} #1}\par 631 | } 632 | } 633 | 634 | %% Checklists as floats 635 | \RequirePackage{fontawesome} 636 | \DeclareFloatingEnvironment[placement=hbtp,name=Checklists]{Checklists} 637 | \newcounter{checklist} 638 | \AtBeginEnvironment{Checklists}{% 639 | \setcounter{checklist}{0} 640 | \mdfsetup{skipabove=0pt,skipbelow=0pt, 641 | frametitleaboveskip=12pt,innerbottommargin=12pt, 642 | hidealllines=true, 643 | frametitlefont=\Large\bfseries\color{LiveCoMSLightBlue}} 644 | }{}{} 645 | 646 | \AtBeginEnvironment{Checklists*}{% 647 | \setcounter{checklist}{0} 648 | \mdfsetup{skipabove=0pt,skipbelow=0pt, 649 | frametitleaboveskip=12pt,innerbottommargin=12pt, 650 | hidealllines=true, 651 | frametitlefont=\Large\bfseries\color{LiveCoMSLightBlue}} 652 | }{}{} 653 | 654 | \newenvironment{checklist}[1]{% 655 | \stepcounter{checklist} 656 | \ifnumodd{\thechecklist} 657 | {\def\cl@bgcolor{gray!12}} 658 | {\def\cl@bgcolor{gray!25}} 659 | \begin{mdframed}[ 660 | frametitle=\MakeUppercase{#1}, 661 | backgroundcolor=\cl@bgcolor] 662 | \setlist[itemize]{label=$\Box$,leftmargin=*} 663 | }{\end{mdframed}} 664 | 665 | \AtEndDocument{% 666 | \closeoutputstream{suppinfo} 667 | % \pagestyle{empty} 668 | \renewcommand{\footrule}{} 669 | \rfoot{} 670 | \input{\jobname.suppinfo} 671 | } 672 | 673 | %% Use more traditional Appendix section approach 674 | % \newcounter{appendix} 675 | % \setcounter{appendix}{0} 676 | % \newenvironment{appendixbox}{% 677 | % \setcounter{figure}{0} 678 | % \setcounter{table}{0} 679 | % \refstepcounter{appendix}% 680 | % \clearpage% 681 | % \patchcmd{\ttlf@section}{LiveCoMSMediumGrey}{LiveCoMSDarkBlue}{}{} 682 | % \noindent{\bfseries\Large\color{LiveCoMSMediumGrey}Appendix \arabic{appendix}\par} 683 | % \nolinenumbers% 684 | % %% Remove box colours for LiveComs 685 | % \begin{mdframed}[hidealllines=true, 686 | % % backgroundcolor=LiveCoMSLightBlue!10, 687 | % fontcolor=LiveCoMSDarkBlue, 688 | % % leftline=true,linecolor=LiveCoMSLightBlue,linewidth=1em 689 | % ] 690 | % \if@reqslineno\addtolength{\linenumbersep}{2em}\internallinenumbers\fi 691 | % }{% 692 | % \end{mdframed} 693 | % } 694 | 695 | \RequirePackage[colorlinks=true,allcolors=black,citecolor=LiveCoMSLightBlue,linkcolor=LiveCoMSMediumGrey,urlcolor=LiveCoMSLightBlue]{hyperref} 696 | \urlstyle{sf} 697 | 698 | % Other desired commands 699 | \renewcommand{\equationautorefname}{Eq.} 700 | \newcommand{\FIG}[1]{\autoref{fig:#1}} 701 | \newcommand{\TABLE}[1]{\autoref{tab:#1}} 702 | \newcommand{\EQ}[1]{\autoref{eq:#1}} 703 | \newcommand{\BOX}[1]{\autoref{box:#1}} 704 | \let\oldautoref\autoref 705 | \renewcommand{\autoref}[1]{\emph{\textbf{\oldautoref{#1}}}} 706 | 707 | \endinput 708 | -------------------------------------------------------------------------------- /notebooks/.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | *.pyemma 3 | -------------------------------------------------------------------------------- /notebooks/03-msm-estimation-and-validation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 03 - MSM estimation and validation\n", 8 | "\n", 9 | "\"Creative\n", 10 | "\n", 11 | "In this notebook, we will cover how to estimate a Markov state model (MSM) and do model validation;\n", 12 | "we also show how to save and restore model and estimator objects.\n", 13 | "For this notebook, you need to know how to do data loading/visualization\n", 14 | "([Notebook 01 ➜ 📓](01-data-io-and-featurization.ipynb))\n", 15 | "as well as dimension reduction ([Notebook 02 ➜ 📓](02-dimension-reduction-and-discretization.ipynb)).\n", 16 | "\n", 17 | "We further recommend to have a look at the literature, if you are new to the concept of Markov state models:\n", 18 | "- prinz-11\n", 19 | "- bowman-14\n", 20 | "- husic-18\n", 21 | "\n", 22 | "Maintainers: [@cwehmeyer](https://github.com/cwehmeyer), [@marscher](https://github.com/marscher), [@thempel](https://github.com/thempel), [@psolsson](https://github.com/psolsson)\n", 23 | "\n", 24 | "**Remember**:\n", 25 | "- to run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter;\n", 26 | "- to get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab;\n", 27 | "- you can find the full documentation at [PyEMMA.org](http://www.pyemma.org)." 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "%matplotlib inline\n", 37 | "import matplotlib.pyplot as plt\n", 38 | "import numpy as np\n", 39 | "import mdshare\n", 40 | "import pyemma" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "## Case 1: preprocessed, two-dimensional data (toy model)\n", 48 | "We load the two-dimensional trajectory from an archive using numpy and directly discretize the full space using $k$-means clustering:" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')\n", 58 | "with np.load(file) as fh:\n", 59 | " data = fh['trajectory']\n", 60 | "\n", 61 | "cluster = pyemma.coordinates.cluster_kmeans(data, k=50, max_iter=50)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "To start with, we visualize the marginal and joint distributions of both components as well as the cluster centers:" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n", 78 | "pyemma.plots.plot_feature_histograms(data, feature_labels=['$x$', '$y$'], ax=axes[0])\n", 79 | "pyemma.plots.plot_density(*data.T, ax=axes[1], cbar=False, alpha=0.1)\n", 80 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 81 | "axes[1].set_xlabel('$x$')\n", 82 | "axes[1].set_ylabel('$y$')\n", 83 | "axes[1].set_xlim(-4, 4)\n", 84 | "axes[1].set_ylim(-4, 4)\n", 85 | "axes[1].set_aspect('equal')\n", 86 | "fig.tight_layout()" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "The first step after obtaining the discretized dynamics is finding a suitable lag time.\n", 94 | "The systematic approach is to estimate MSMs at various lag times and observe how the implied timescales (ITSs) of these models behave.\n", 95 | "In particular, we are looking for lag time ranges in which the implied timescales are constant\n", 96 | "(i.e., lag time independent as described in the manuscript in Section 2.1).\n", 97 | "To this aim, PyEMMA provides the `its()` function which we use to track the first three (`nits=3`) implied timescales:" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 3, 5, 7, 10], nits=3, errors='bayes')" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "We can pass the returned `its` object to the `pyemma.plots.plot_implied_timescales()` function:" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "pyemma.plots.plot_implied_timescales(its, ylog=False);" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "The above plot tells us that there is one resolved process with an ITS of approximately $8.5$ steps (blue) which is largely invariant to the MSM lag time.\n", 130 | "The other two ITSs (green, red) are smaller than the lag time (black line, grey-shaded area);\n", 131 | "they correspond to processes which are faster than the lag time and, thus, are not resolved.\n", 132 | "Since the implied timescales are, like the corresponding eigenvalues, sorted in decreasing order,\n", 133 | "we know that all other remaining processes must be even faster.\n", 134 | "\n", 135 | "As MSMs tend to underestimate the true ITSs, we are looking for a converged maximum in the ITS plot.\n", 136 | "In our case, any lag time before the slow process (blue line) crosses the lag time threshold (black line) would work.\n", 137 | "To maximize the kinetic resolution, we choose the lag time $1$ step.\n", 138 | "\n", 139 | "To see whether our model satisfies Markovianity, we perform (and visualize) a Chapman-Kolmogorow (CK) test.\n", 140 | "Since we aim at modeling the dynamics between metastable states rather than between microstates, this will be conducted in the space of metastable states.\n", 141 | "The latter are identified automatically using PCCA++ (which is explained in [Notebook 05 📓](05-pcca-tpt.ipynb)).\n", 142 | "We usually choose the number of metastable states according to the implied timescales plot by identifying a gap between the ITS.\n", 143 | "For a single process, we can assume that there are two metastable states between which the process occurs." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=1)\n", 153 | "pyemma.plots.plot_cktest(msm.cktest(2));" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "We can see a perfect agreement between models estimated at higher lag times and predictions of the model at lag time $1$ step.\n", 161 | "Thus, we have estimated a valid MSM according to basic model validation.\n", 162 | "\n", 163 | "Should a CK test fail, it means that the dynamics in the space of metastable states is not Markovian.\n", 164 | "This can have multiple causes since it is the result of the combination of all steps in the pipeline.\n", 165 | "In practice, one would attempt to find a better model by tuning hyper-parameters such as the number of metastable states, the MSM lag time or the number of cluster centers.\n", 166 | "Back-tracking the error by following the pipeline in an upstream direction,\n", 167 | "i.e., by starting with the number of metastable states, is usually advised. \n", 168 | "\n", 169 | "A failing CK test might further hint at poor sampling.\n", 170 | "This case is explained in more detail in [Notebook 08 📓](08-common-problems.ipynb#poorly_sampled_dw).\n", 171 | "\n", 172 | "## Case 2: low-dimensional molecular dynamics data (alanine dipeptide)\n", 173 | "We fetch the alanine dipeptide data set, load the backbone torsions into memory and directly discretize the full space using $k$-means clustering.\n", 174 | "In order to demonstrate how to adjust the MSM lag time,\n", 175 | "we will first set the number of cluster centers to $200$ and justify this choice later." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb', working_directory='data')\n", 185 | "files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.xtc', working_directory='data')\n", 186 | "\n", 187 | "feat = pyemma.coordinates.featurizer(pdb)\n", 188 | "feat.add_backbone_torsions(periodic=False)\n", 189 | "data = pyemma.coordinates.load(files, features=feat)\n", 190 | "data_concatenated = np.concatenate(data)\n", 191 | "\n", 192 | "cluster = pyemma.coordinates.cluster_kmeans(data, k=200, max_iter=50, stride=10)" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "From the discrete trajectories, implied timescales can be estimated:" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "We visualize the marginal and joint distributions of both components as well as the cluster centers,\n", 216 | "and show the ITS convergence to help selecting a suitable lag time:" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n", 226 | "pyemma.plots.plot_feature_histograms(data_concatenated, feature_labels=['$\\Phi$', '$\\Psi$'], ax=axes[0])\n", 227 | "pyemma.plots.plot_density(*data_concatenated.T, ax=axes[1], cbar=False, alpha=0.1)\n", 228 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 229 | "axes[1].set_xlabel('$\\Phi$')\n", 230 | "axes[1].set_ylabel('$\\Psi$')\n", 231 | "pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')\n", 232 | "fig.tight_layout()" 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "metadata": {}, 238 | "source": [ 239 | "We observe three resolved processes with flat ITS for a lag time of approximately $10$ ps.\n", 240 | "\n", 241 | "Please note though that this ITS convergence analysis is based on the assumption that $200$ $k$-means centers are sufficient to discretize the dynamics.\n", 242 | "In order to study the influence of the clustering on the ITS convergence,\n", 243 | "we repeat the clustering and ITS convergence analysis for various number of cluster centers.\n", 244 | "For the sake of simplicity, we will restrict ourselves to the $k$-means algorithm; alternative clustering methods are presented in [Notebook 02 ➜ 📓](02-dimension-reduction-and-discretization.ipynb)." 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": {}, 251 | "outputs": [], 252 | "source": [ 253 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n", 254 | "for i, k in enumerate([20, 50, 100]):\n", 255 | " cluster = pyemma.coordinates.cluster_kmeans(data, k=k, max_iter=50, stride=10)\n", 256 | " pyemma.plots.plot_density(*data_concatenated.T, ax=axes[0, i], cbar=False, alpha=0.1)\n", 257 | " axes[0, i].scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 258 | " axes[0, i].set_xlabel('$\\Phi$')\n", 259 | " axes[0, i].set_ylabel('$\\Psi$')\n", 260 | " axes[0, i].set_title('k = {} centers'.format(k))\n", 261 | " pyemma.plots.plot_implied_timescales(\n", 262 | " pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes'),\n", 263 | " ax=axes[1, i], units='ps')\n", 264 | " axes[1, i].set_ylim(1, 2000)\n", 265 | "fig.tight_layout()" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "We can see from this analysis that the ITS curves indeed converge towards the $200$ centers case and we can continue with estimating/validating an MSM.\n", 273 | "\n", 274 | "Before we continue with MSM estimation, let us discuss implied timescales convergence for large systems.\n", 275 | "Given sufficient sampling, the task is often to find a discretization that captures the process of interest well enough to obtain implied timescales that converge within the trajectory length. \n", 276 | "\n", 277 | "As we see in the above example with $k=20$ cluster centers,\n", 278 | "increasing the MSM lag time compensates for poor discretization to a certain extent.\n", 279 | "In a more realistic system, however, trajectories have a finite length that limits the choice of our MSM lag time.\n", 280 | "Furthermore, our clustering might be worse than the one presented above,\n", 281 | "so convergence might not be reached at all.\n", 282 | "Thus, we aim to converge the implied timescales at a low lag time by fine-tuning not only the number of cluster centers,\n", 283 | "but also feature selection and dimension reduction measures.\n", 284 | "This additionally ensures that our model has the maximum achievable temporal resolution.\n", 285 | "\n", 286 | "Please note that choosing an appropriate MSM lag time variationally\n", 287 | "(e.g., using VAMP scoring) is as far as we know not possible.\n", 288 | "\n", 289 | "Further details on how to account for poor discretization can be found in our notebook about hidden Markov models [Notebook 07 📓](07-hidden-markov-state-models.ipynb).\n", 290 | "An example on how implied timescales behave in the limit of poor sampling is shown in [Notebook 08 📓](08-common-problems.ipynb).\n", 291 | "\n", 292 | "Now, let's continue with the alanine dipeptide system.\n", 293 | "We estimate an MSM at lag time $10$ ps and, given that we have three slow processes, perform a CK test for four metastable states.\n", 294 | "\n", 295 | "⚠️ In general, the number of metastable states is a modeler's choice and will be explained in more detail in [Notebook 04 ➜ 📓](04-msm-analysis.ipynb) and [Notebook 07 ➜ 📓](07-hidden-markov-state-models.ipynb)." 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": {}, 302 | "outputs": [], 303 | "source": [ 304 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n", 305 | "pyemma.plots.plot_cktest(msm.cktest(4), units='ps');" 306 | ] 307 | }, 308 | { 309 | "cell_type": "markdown", 310 | "metadata": {}, 311 | "source": [ 312 | "The model prediction and re-estimation are in quite good agreement but we do see some small deviations in the first row.\n", 313 | "\n", 314 | "To obtain error bars for the model prediction,\n", 315 | "we estimate a Bayesian MSM under the same conditions as the regular MSM and repeat the CK test for the Bayesian model:" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps', conf=0.95)\n", 325 | "pyemma.plots.plot_cktest(bayesian_msm.cktest(4), units='ps');" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "Bayesian MSMs are an extension of regular maximum likelihood (ML) MSMs that represent a sample of (reversible) transition matrices.\n", 333 | "As presented here, they are usually used to compute confidence intervals.\n", 334 | "\n", 335 | "A regular MSM estimates a single transition matrix which maximizes the likelihood of the data given the model.\n", 336 | "Thus, all derived quantities are based on this ML estimation.\n", 337 | "A Bayesian MSM, in comparison, starts with a ML-MSM and samples transition matrices using a Monte Carlo scheme.\n", 338 | "Hence, target property posterior distributions can be estimated by computing these properties from each individual transition matrix in the sample. \n", 339 | "\n", 340 | "The initial ML-MSM used for the transition matrix sampling is contained in the `BayesianMSM` object with its properties accessible to the user.\n", 341 | "Please note that different default estimation parameters might yield results that numerically differ from a directly estimated ML-MSM.\n", 342 | "\n", 343 | "In the case of the low dimensional molecular dynamics data, we thus observe that the deviations are within a $95\\%$ confidence interval.\n", 344 | "\n", 345 | "### Persisting and restoring estimators\n", 346 | "\n", 347 | "Because some estimations we have performed so far require more computational effort (e.g., TICA or kmeans with lots of centers),\n", 348 | "it could be desirable to persist the resulting models in a file.\n", 349 | "Luckily, PyEMMA provides a convenience method for this.\n", 350 | "Just try it out:" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": null, 356 | "metadata": {}, 357 | "outputs": [], 358 | "source": [ 359 | "cluster.save('nb3.pyemma', model_name='kmeans_k200')" 360 | ] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": {}, 365 | "source": [ 366 | "Now we have stored the current state of the clustering estimator to disk.\n", 367 | "A file can contain multiple models, this is why we have used the `model_name` argument to specify the name.\n", 368 | "If omitted, the estimator will be saved under the name `default_model`.\n", 369 | "\n", 370 | "Assume that we have restarted our Python session and do not want to re-compute everything.\n", 371 | "We can now restore the previously saved estimator via" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": {}, 378 | "outputs": [], 379 | "source": [ 380 | "cluster_restored = pyemma.load('nb3.pyemma', model_name='kmeans_k200')\n", 381 | "\n", 382 | "# check that nothing has changed\n", 383 | "np.testing.assert_allclose(cluster_restored.clustercenters, cluster.clustercenters, atol=1e-15)" 384 | ] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": {}, 389 | "source": [ 390 | "To check the contents of a file, you can utilize the list_models function of PyEMMA:" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": null, 396 | "metadata": {}, 397 | "outputs": [], 398 | "source": [ 399 | "pyemma.list_models('nb3.pyemma')" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": null, 405 | "metadata": {}, 406 | "outputs": [], 407 | "source": [ 408 | "# we now remove this files again\n", 409 | "import os\n", 410 | "os.unlink('nb3.pyemma')" 411 | ] 412 | }, 413 | { 414 | "cell_type": "markdown", 415 | "metadata": {}, 416 | "source": [ 417 | "As you see, all important attributes of an estimator will be stored.\n", 418 | "PyEMMA provides future compatibility of stored estimators,\n", 419 | "which means that you can always load your files in a new version, but are then restricted to not using older ones.\n", 420 | "\n", 421 | "#### Exercise 1\n", 422 | "\n", 423 | "Load the heavy atom distances into memory, perform PCA and TICA (`lag=3`) with `dim=2`,\n", 424 | "then discretize with $100$ $k$-means centers and a stride of $10$." 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": null, 430 | "metadata": { 431 | "solution2": "hidden", 432 | "solution2_first": true 433 | }, 434 | "outputs": [], 435 | "source": [ 436 | "feat = #FIXME\n", 437 | "feat. #FIXME\n", 438 | "data = #FIXME\n", 439 | "\n", 440 | "pca = pyemma.coordinates.pca(data, dim=2)\n", 441 | "tica = #FIXME\n", 442 | "\n", 443 | "pca_concatenated = np.concatenate(pca.get_output())\n", 444 | "tica_concatenated = #FIXME\n", 445 | "\n", 446 | "cls_pca = pyemma.coordinates.cluster_kmeans(pca, k=100, max_iter=50, stride=10)\n", 447 | "cls_tica = #FIXME\n", 448 | "\n", 449 | "its_pca = pyemma.msm.its(\n", 450 | " cls_pca.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')\n", 451 | "its_tica = #FIXME" 452 | ] 453 | }, 454 | { 455 | "cell_type": "markdown", 456 | "metadata": { 457 | "solution2": "hidden" 458 | }, 459 | "source": [ 460 | "###### Solution" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": { 467 | "solution2": "hidden" 468 | }, 469 | "outputs": [], 470 | "source": [ 471 | "feat = pyemma.coordinates.featurizer(pdb)\n", 472 | "pairs = feat.pairs(feat.select_Heavy())\n", 473 | "feat.add_distances(pairs, periodic=False)\n", 474 | "data = pyemma.coordinates.load(files, features=feat)\n", 475 | "\n", 476 | "pca = pyemma.coordinates.pca(data, dim=2)\n", 477 | "tica = pyemma.coordinates.tica(data, lag=3, dim=2)\n", 478 | "\n", 479 | "pca_concatenated = np.concatenate(pca.get_output())\n", 480 | "tica_concatenated = np.concatenate(tica.get_output())\n", 481 | "\n", 482 | "cls_pca = pyemma.coordinates.cluster_kmeans(pca, k=100, max_iter=50, stride=10)\n", 483 | "cls_tica = pyemma.coordinates.cluster_kmeans(tica, k=100, max_iter=50, stride=10)\n", 484 | "\n", 485 | "its_pca = pyemma.msm.its(\n", 486 | " cls_pca.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')\n", 487 | "its_tica = pyemma.msm.its(\n", 488 | " cls_tica.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')" 489 | ] 490 | }, 491 | { 492 | "cell_type": "markdown", 493 | "metadata": {}, 494 | "source": [ 495 | "Let's visualize the ITS convergence for both projections:" 496 | ] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "execution_count": null, 501 | "metadata": {}, 502 | "outputs": [], 503 | "source": [ 504 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n", 505 | "pyemma.plots.plot_feature_histograms(pca_concatenated, ax=axes[0, 0])\n", 506 | "pyemma.plots.plot_feature_histograms(tica_concatenated, ax=axes[1, 0])\n", 507 | "axes[0, 0].set_title('PCA')\n", 508 | "axes[1, 0].set_title('TICA')\n", 509 | "pyemma.plots.plot_density(*pca_concatenated.T, ax=axes[0, 1], cbar=False, alpha=0.1)\n", 510 | "axes[0, 1].scatter(*cls_pca.clustercenters.T, s=15, c='C1')\n", 511 | "axes[0, 1].set_xlabel('PC 1')\n", 512 | "axes[0, 1].set_ylabel('PC 2')\n", 513 | "pyemma.plots.plot_density(*tica_concatenated.T, ax=axes[1, 1], cbar=False, alpha=0.1)\n", 514 | "axes[1, 1].scatter(*cls_tica.clustercenters.T, s=15, c='C1')\n", 515 | "axes[1, 1].set_xlabel('IC 1')\n", 516 | "axes[1, 1].set_ylabel('IC 2')\n", 517 | "pyemma.plots.plot_implied_timescales(its_pca, ax=axes[0, 2], units='ps')\n", 518 | "pyemma.plots.plot_implied_timescales(its_tica, ax=axes[1, 2], units='ps')\n", 519 | "axes[0, 2].set_ylim(1, 2000)\n", 520 | "axes[1, 2].set_ylim(1, 2000)\n", 521 | "fig.tight_layout()" 522 | ] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "metadata": {}, 527 | "source": [ 528 | "Despite the fact that PCA yields a projection with some defined basins,\n", 529 | "the ITS plot shows that only one \"slow\" process is resolved which is more than one order of magnitude too fast.\n", 530 | "\n", 531 | "TICA does find three slow processes which agree (in terms of the implied timescales) with the backbone torsions example above.\n", 532 | "\n", 533 | "We conclude that this PCA projection is not suitable to resolve the slow dynamics of alanine dipeptide and we will continue to estimate/validate the TICA-based projection.\n", 534 | "\n", 535 | "#### Exercise 2\n", 536 | "\n", 537 | "Estimate a Bayesian MSM at lag time $10$ ps and perform/show a CK test for four metastable states." 538 | ] 539 | }, 540 | { 541 | "cell_type": "code", 542 | "execution_count": null, 543 | "metadata": { 544 | "solution2": "hidden", 545 | "solution2_first": true 546 | }, 547 | "outputs": [], 548 | "source": [ 549 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cls_tica.dtrajs, lag=10, dt_traj='1 ps')\n", 550 | "pyemma.plots. #FIXME" 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": { 556 | "solution2": "hidden" 557 | }, 558 | "source": [ 559 | "###### Solution" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": { 566 | "solution2": "hidden" 567 | }, 568 | "outputs": [], 569 | "source": [ 570 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cls_tica.dtrajs, lag=10, dt_traj='1 ps')\n", 571 | "pyemma.plots.plot_cktest(bayesian_msm.cktest(4), units='ps');" 572 | ] 573 | }, 574 | { 575 | "cell_type": "markdown", 576 | "metadata": {}, 577 | "source": [ 578 | "We again see a good agreement between model prediction and re-estimation.\n", 579 | "\n", 580 | "## Wrapping up\n", 581 | "In this notebook, we have learned how to estimate a regular or Bayesian MSM from discretized molecular simulation data with `pyemma` and how to perform basic model validation.\n", 582 | "In detail, we have selected a suitable lag time by using\n", 583 | "- `pyemma.msm.its()` to obtain an implied timescale object and\n", 584 | "- `pyemma.plots.plot_implied_timescales()` to visualize the convergence of the implied timescales.\n", 585 | "\n", 586 | "We then have used\n", 587 | "- `pyemma.msm.estimate_markov_model()` to estimate a regular MSM,\n", 588 | "- `pyemma.msm.bayesian_markov_model()` to estimate a Bayesian MSM,\n", 589 | "- the `timescales()` method of an estimated MSM object to access its implied timescales,\n", 590 | "- the `cktest()` method of an estimated MSM object to perform a Chapman-Kolmogorow test, and\n", 591 | "- `pyemma.plots.plot_cktest()` to visualize the latter." 592 | ] 593 | }, 594 | { 595 | "cell_type": "markdown", 596 | "metadata": {}, 597 | "source": [ 598 | "## References\n", 599 | "\n", 600 | "[^]Prinz, Jan-Hendrik and Wu, Hao and Sarich, Marco and Keller, Bettina and Senne, Martin and Held, Martin and Chodera, John D. and Schütte, Christof and Noé, Frank. 2011. _Markov models of molecular kinetics: Generation and validation_. [URL](http://scitation.aip.org/content/aip/journal/jcp/134/17/10.1063/1.3565032)\n", 601 | "\n", 602 | "[^]Gregory R. Bowman and Vijay S. Pande and Frank Noé. 2014. _An Introduction to Markov State Models and Their Application to Long Timescale Molecular Simulation_. [URL](https://doi.org/10.1007%2F978-94-007-7606-7)\n", 603 | "\n", 604 | "[^]Brooke E. Husic and Vijay S. Pande. 2018. _Markov State Models: From an Art to a Science_.\n", 605 | "\n" 606 | ] 607 | } 608 | ], 609 | "metadata": { 610 | "kernelspec": { 611 | "display_name": "Python 3", 612 | "language": "python", 613 | "name": "python3" 614 | }, 615 | "language_info": { 616 | "codemirror_mode": { 617 | "name": "ipython", 618 | "version": 3 619 | }, 620 | "file_extension": ".py", 621 | "mimetype": "text/x-python", 622 | "name": "python", 623 | "nbconvert_exporter": "python", 624 | "pygments_lexer": "ipython3", 625 | "version": "3.6.5" 626 | }, 627 | "toc": { 628 | "base_numbering": 1, 629 | "nav_menu": {}, 630 | "number_sections": false, 631 | "sideBar": true, 632 | "skip_h1_title": true, 633 | "title_cell": "Table of Contents", 634 | "title_sidebar": "Contents", 635 | "toc_cell": false, 636 | "toc_position": {}, 637 | "toc_section_display": true, 638 | "toc_window_display": true 639 | } 640 | }, 641 | "nbformat": 4, 642 | "nbformat_minor": 2 643 | } 644 | -------------------------------------------------------------------------------- /notebooks/04-msm-analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 04 - MSM analysis\n", 8 | "\n", 9 | "\"Creative\n", 10 | "\n", 11 | "In this notebook, we will cover how to analyze an MSM and how the modeled processes correspond to MSM spectral properties.\n", 12 | "We assume that you are familiar with data loading/visualization\n", 13 | "([Notebook 01 ➜ 📓](01-data-io-and-featurization.ipynb)),\n", 14 | "dimension reduction ([Notebook 02 ➜ 📓](02-dimension-reduction-and-discretization.ipynb)), and\n", 15 | "the estimation and validation process ([Notebook 03 ➜ 📓](03-msm-estimation-and-validation.ipynb)).\n", 16 | "\n", 17 | "Maintainers: [@cwehmeyer](https://github.com/cwehmeyer), [@marscher](https://github.com/marscher), [@thempel](https://github.com/thempel), [@psolsson](https://github.com/psolsson)\n", 18 | "\n", 19 | "**Remember**:\n", 20 | "- to run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter;\n", 21 | "- to get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab;\n", 22 | "- you can find the full documentation at [PyEMMA.org](http://www.pyemma.org)." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "%matplotlib inline\n", 32 | "import matplotlib.pyplot as plt\n", 33 | "import matplotlib as mpl\n", 34 | "import numpy as np\n", 35 | "import mdshare\n", 36 | "import pyemma" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Case 1: preprocessed, two-dimensional data (toy model)\n", 44 | "We load the two-dimensional trajectory from an archive using numpy,\n", 45 | "directly discretize the full space using $k$-means clustering,\n", 46 | "visualize the marginal and joint distributions of both components as well as the cluster centers,\n", 47 | "and show the implied timescale (ITS) convergence:" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "metadata": {}, 54 | "outputs": [], 55 | "source": [ 56 | "file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')\n", 57 | "with np.load(file) as fh:\n", 58 | " data = fh['trajectory']\n", 59 | "\n", 60 | "cluster = pyemma.coordinates.cluster_kmeans(data, k=50, max_iter=50)\n", 61 | "its = pyemma.msm.its(\n", 62 | " cluster.dtrajs, lags=[1, 2, 3, 5, 7, 10], nits=3, errors='bayes')\n", 63 | "\n", 64 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n", 65 | "pyemma.plots.plot_feature_histograms(data, feature_labels=['$x$', '$y$'], ax=axes[0])\n", 66 | "pyemma.plots.plot_density(*data.T, ax=axes[1], cbar=False, alpha=0.1)\n", 67 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 68 | "axes[1].set_xlabel('$x$')\n", 69 | "axes[1].set_ylabel('$y$')\n", 70 | "axes[1].set_xlim(-4, 4)\n", 71 | "axes[1].set_ylim(-4, 4)\n", 72 | "axes[1].set_aspect('equal')\n", 73 | "pyemma.plots.plot_implied_timescales(its, ylog=False, ax=axes[2])\n", 74 | "fig.tight_layout()" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "The plots show us the marginal (left panel) and joint distributions along with the cluster centers (middle panel).\n", 82 | "The implied timescales are converged (right panel). \n", 83 | "\n", 84 | "Before we proceed, let's have a look at the implied timescales error bars.\n", 85 | "They were computed from a Bayesian MSM, as requested by the `errors='bayes'` argument of the `pyemma.msm.its()` function.\n", 86 | "As mentioned before, Bayesian MSMs incorporate a sample of transition matrices.\n", 87 | "Target properties such as implied timescales can now simply be computed from the individual matrices.\n", 88 | "Thereby, the posterior distributions of these properties can be estimated.\n", 89 | "The ITS plot shows a confidence interval that contains $95\\%$ of the Bayesian samples." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=1, conf=0.95)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "For any PyEMMA method that derives target properties from MSMs, sample mean and confidence intervals (as defined by the function argument above) are directly accessible with `sample_mean()` and `sample_conf()`.\n", 106 | "Further, `sample_std()` is available for computing the standard deviation.\n", 107 | "In the more general case, it might be interesting to extract the full sample of a function evaluation with `sample_f()`.\n", 108 | "The syntax is equivalent for all those functions." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "sample_mean = bayesian_msm.sample_mean('timescales', k=1)\n", 118 | "sample_conf_l, sample_conf_r = bayesian_msm.sample_conf('timescales', k=1)\n", 119 | "\n", 120 | "print('Mean of first ITS: {:f}'.format(sample_mean[0]))\n", 121 | "print('Confidence interval: [{:f}, {:f}]'.format(sample_conf_l[0], sample_conf_r[0]))" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "Please note that sample mean and maximum likelihood estimates are not identical and generally do not provide numerically identical results.\n", 129 | "\n", 130 | "Now, for the sake of simplicity we proceed with the analysis of a maximum likelihood MSM.\n", 131 | "We estimate it at lag time $1$ step..." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=1)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "... and check for disconnectivity.\n", 148 | "The MSM is constructed on the largest set of discrete states that are (reversibly) connected.\n", 149 | "The `active_state_fraction` and `active_count_fraction` show us the fraction of discrete states and transition counts from our data which are part of this largest set and, thus, used for the model:" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": {}, 156 | "outputs": [], 157 | "source": [ 158 | "print('fraction of states used = {:f}'.format(msm.active_state_fraction))\n", 159 | "print('fraction of counts used = {:f}'.format(msm.active_count_fraction))" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "The fraction is, in both cases, $1$ and, thus, we have no disconnected states (which we would have to exclude from our analysis).\n", 167 | "\n", 168 | "If there were any disconnectivities in our data (fractions $<1$),\n", 169 | "we could access the indices of the **active states** (members of the largest connected set) via the `active_set` attribute:" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "print(msm.active_set)" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "With this potential issue out of the way, we can extract our first (stationary/thermodynamic) property,\n", 186 | "the `stationary_distribution` or, as a shortcut, `pi`:" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": null, 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "print(msm.stationary_distribution)\n", 196 | "print('sum of weights = {:f}'.format(msm.pi.sum()))" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "The attribute `msm.pi` tells us, for each discrete state, the absolute probability of observing said state in global equilibrium.\n", 204 | "Mathematically speaking, the stationary distribution $\\pi$ is the left eigenvector of the transition matrix $\\mathbf{P}$ to the eigenvalue $1$:\n", 205 | "\n", 206 | "$$\\pi^\\top \\mathbf{P} = \\pi^\\top.$$\n", 207 | "\n", 208 | "Please note that the $\\pi$ is fundamentaly different from a normalized histogram of states:\n", 209 | "for the histogram of states to accurately describe the stationary distribution, the data needs to be sampled from global equilibrium, i.e, the data points need to be statistically independent.\n", 210 | "The MSM approach, on the other hand, only requires local equilibrium, i.e., statistical independence of state transitions.\n", 211 | "Thus, the MSM approach requires a much weaker and, in practice, much easier to satisfy condition than simply counting state visits.\n", 212 | "\n", 213 | "We can use the stationary distribution to, e.g., visualize the weight of the dicrete states and, thus, to highlight which areas of our feature space are most probable.\n", 214 | "Here, we show all data points in a two dimensional scatter plot and color/weight them according to their discrete state membership:" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "fig, ax, misc = pyemma.plots.plot_contour(\n", 224 | " *data.T, msm.pi[cluster.dtrajs[0]],\n", 225 | " cbar_label='stationary distribution',\n", 226 | " method='nearest', mask=True)\n", 227 | "ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 228 | "ax.set_xlabel('$x$')\n", 229 | "ax.set_ylabel('$y$')\n", 230 | "ax.set_xlim(-4, 4)\n", 231 | "ax.set_ylim(-4, 4)\n", 232 | "ax.set_aspect('equal')\n", 233 | "fig.tight_layout()" 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "The stationary distribution can also be used to correct the `pyemma.plots.plot_free_energy()` function that we used to visualize this dataset in [Notebook 01 ➜ 📓](01-data-io-and-featurization.ipynb).\n", 241 | "This might be necessary if the data points are not sampled from global equilibrium.\n", 242 | "\n", 243 | "In this case, we assign the weight of the corresponding discrete state to each data point and pass this information to the plotting function via its `weights` parameter:" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "fig, ax, misc = pyemma.plots.plot_free_energy(\n", 253 | " *data.T,\n", 254 | " weights=np.concatenate(msm.trajectory_weights()),\n", 255 | " legacy=False)\n", 256 | "ax.set_xlabel('$x$')\n", 257 | "ax.set_ylabel('$y$')\n", 258 | "ax.set_xlim(-4, 4)\n", 259 | "ax.set_ylim(-4, 4)\n", 260 | "ax.set_aspect('equal')\n", 261 | "fig.tight_layout()" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "We will see further uses of the stationary distribution later.\n", 269 | "But for now, we continue the analysis of our model by visualizing its (right) eigenvectors which encode the dynamical processes.\n", 270 | "First, we notice that the first right eigenvector is a constant $1$." 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "eigvec = msm.eigenvectors_right()\n", 280 | "print('first eigenvector is one: {} (min={}, max={})'.format(\n", 281 | " np.allclose(eigvec[:, 0], 1, atol=1e-15), eigvec[:, 0].min(), eigvec[:, 0].max()))" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "metadata": {}, 287 | "source": [ 288 | "Second, the higher eigenvectors can be visualized as follows:" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": {}, 295 | "outputs": [], 296 | "source": [ 297 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n", 298 | "for i, ax in enumerate(axes.flat):\n", 299 | " pyemma.plots.plot_contour(\n", 300 | " *data.T, eigvec[cluster.dtrajs[0], i + 1], ax=ax, cmap='PiYG',\n", 301 | " cbar_label='{}. right eigenvector'.format(i + 2), mask=True)\n", 302 | " ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 303 | " ax.set_xlabel('$x$')\n", 304 | " ax.set_xlim(-4, 4)\n", 305 | " ax.set_ylim(-4, 4)\n", 306 | " ax.set_aspect('equal')\n", 307 | "axes[0].set_ylabel('$y$')\n", 308 | "fig.tight_layout()" 309 | ] 310 | }, 311 | { 312 | "cell_type": "markdown", 313 | "metadata": {}, 314 | "source": [ 315 | "The right eigenvectors can be used to visualize the processes governed by the corresponding implied timescales.\n", 316 | "The first right eigenvector (always) is $(1,\\dots,1)^\\top$ for an MSM transition matrix and it corresponds to the stationary process (infinite implied timescale).\n", 317 | "\n", 318 | "The second right eigenvector corresponds to the slowest process;\n", 319 | "its entries are negative for one group of discrete states and positive for the other group.\n", 320 | "This tells us that the slowest process happens between these two groups and that the process relaxes on the slowest ITS ($\\approx 8.5$ steps).\n", 321 | "\n", 322 | "The third and fourth eigenvectors show a larger spread of values and no clear grouping.\n", 323 | "In combination with the ITS convergence plot, we can safely assume that these eigenvectors contain just noise and do not indicate any resolved processes.\n", 324 | "\n", 325 | "We then continue to validate our MSM with a CK test for $2$ metastable states which are already indicated by the second right eigenvector." 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "nstates = 2\n", 335 | "pyemma.plots.plot_cktest(msm.cktest(nstates));" 336 | ] 337 | }, 338 | { 339 | "cell_type": "markdown", 340 | "metadata": {}, 341 | "source": [ 342 | "We now save the model to do more analyses with PCCA++ and TPT in [Notebook 05 ➜ 📓](05-pcca-tpt.ipynb):" 343 | ] 344 | }, 345 | { 346 | "cell_type": "code", 347 | "execution_count": null, 348 | "metadata": {}, 349 | "outputs": [], 350 | "source": [ 351 | "cluster.save('nb4.pyemma', model_name='doublewell_cluster', overwrite=True)\n", 352 | "msm.save('nb4.pyemma', model_name='doublewell_msm', overwrite=True)\n", 353 | "bayesian_msm.save('nb4.pyemma', model_name='doublewell_bayesian_msm', overwrite=True)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "## Case 2: low-dimensional molecular dynamics data (alanine dipeptide)\n", 361 | "\n", 362 | "We fetch the alanine dipeptide data set, load the backbone torsions into memory,\n", 363 | "directly discretize the full space using $k$-means clustering,\n", 364 | "visualize the margial and joint distributions of both components as well as the cluster centers,\n", 365 | "and show the ITS convergence to help selecting a suitable lag time:" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": null, 371 | "metadata": { 372 | "scrolled": true 373 | }, 374 | "outputs": [], 375 | "source": [ 376 | "pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb', working_directory='data')\n", 377 | "files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.xtc', working_directory='data')\n", 378 | "\n", 379 | "feat = pyemma.coordinates.featurizer(pdb)\n", 380 | "feat.add_backbone_torsions(periodic=False)\n", 381 | "data = pyemma.coordinates.load(files, features=feat)\n", 382 | "data_concatenated = np.concatenate(data)\n", 383 | "\n", 384 | "cluster = pyemma.coordinates.cluster_kmeans(data, k=100, max_iter=50, stride=10)\n", 385 | "dtrajs_concatenated = np.concatenate(cluster.dtrajs)\n", 386 | "\n", 387 | "its = pyemma.msm.its(\n", 388 | " cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')\n", 389 | "\n", 390 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n", 391 | "pyemma.plots.plot_feature_histograms(\n", 392 | " np.concatenate(data), feature_labels=['$\\Phi$', '$\\Psi$'], ax=axes[0])\n", 393 | "pyemma.plots.plot_density(*data_concatenated.T, ax=axes[1], cbar=False, alpha=0.1)\n", 394 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 395 | "axes[1].set_xlabel('$\\Phi$')\n", 396 | "axes[1].set_ylabel('$\\Psi$')\n", 397 | "pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')\n", 398 | "fig.tight_layout()" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "metadata": {}, 404 | "source": [ 405 | "The plots show us the marginal (left panel) and joint distributions along with the cluster centers (middle panel).\n", 406 | "The implied timescales are converged (right panel). \n", 407 | "\n", 408 | "We then estimate an MSM at lag time $10$ ps and visualize the stationary distribution by coloring all data points according to the stationary weight of the discrete state they belong to:" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": {}, 415 | "outputs": [], 416 | "source": [ 417 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n", 418 | "\n", 419 | "print('fraction of states used = {:f}'.format(msm.active_state_fraction))\n", 420 | "print('fraction of counts used = {:f}'.format(msm.active_count_fraction))\n", 421 | "\n", 422 | "fig, ax, misc = pyemma.plots.plot_contour(\n", 423 | " *data_concatenated.T, msm.pi[dtrajs_concatenated],\n", 424 | " cbar_label='stationary_distribution',\n", 425 | " method='nearest', mask=True)\n", 426 | "ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 427 | "ax.set_xlabel('$\\Phi$')\n", 428 | "ax.set_ylabel('$\\Psi$')\n", 429 | "fig.tight_layout()" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "Next, we visualize the first six right eigenvectors:" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": {}, 443 | "outputs": [], 444 | "source": [ 445 | "eigvec = msm.eigenvectors_right()\n", 446 | "print('first eigenvector is one: {} (min={}, max={})'.format(\n", 447 | " np.allclose(eigvec[:, 0], 1, atol=1e-15), eigvec[:, 0].min(), eigvec[:, 0].max()))\n", 448 | "\n", 449 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n", 450 | "for i, ax in enumerate(axes.flat):\n", 451 | " pyemma.plots.plot_contour(\n", 452 | " *data_concatenated.T, eigvec[dtrajs_concatenated, i + 1], ax=ax, cmap='PiYG',\n", 453 | " cbar_label='{}. right eigenvector'.format(i + 2), mask=True)\n", 454 | " ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 455 | " ax.set_xlabel('$\\Phi$')\n", 456 | " ax.set_ylabel('$\\Psi$')\n", 457 | "fig.tight_layout()" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "Again, we have the $(1,\\dots,1)^\\top$ first right eigenvector of the stationary process.\n", 465 | "\n", 466 | "The second to fourth right eigenvectors illustrate the three slowest processes which are (in that order):\n", 467 | "\n", 468 | "- rotation of the $\\Phi$ dihedral,\n", 469 | "- rotation of the $\\Psi$ dihedral when $\\Phi\\approx-2$ rad, and\n", 470 | "- rotation of the $\\Psi$ dihedral when $\\Phi\\approx1$ rad.\n", 471 | "\n", 472 | "Eigenvectors five, six, and seven indicate further processes which, however, relax faster than the lag time and cannot be resolved clearly.\n", 473 | "\n", 474 | "We now proceed our validation process using a Bayesian MSM with four metastable states:" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [ 483 | "nstates = 4\n", 484 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n", 485 | "pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates), units='ps');" 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "We note that four metastable states are a reasonable choice for our MSM.\n", 493 | "\n", 494 | "In general, the number of metastable states is a modeler's choice; it is adjusted to map the kinetics to be modeled.\n", 495 | "In the current example, increasing the resolution with a higher number of metastable states or resolving only the slowest process between $2$ states would be possible.\n", 496 | "However, the number of states is not arbitrary as the observed processes in metastable state space need not be Markovian in general.\n", 497 | "A failed Chapman-Kolmogorov test can thus also hint to a bad choice of the metastable state number.\n", 498 | "\n", 499 | "In order to perform further analysis, we save the model to disk:" 500 | ] 501 | }, 502 | { 503 | "cell_type": "code", 504 | "execution_count": null, 505 | "metadata": {}, 506 | "outputs": [], 507 | "source": [ 508 | "cluster.save('nb4.pyemma', model_name='ala2_cluster', overwrite=True)\n", 509 | "msm.save('nb4.pyemma', model_name='ala2_msm', overwrite=True)\n", 510 | "bayesian_msm.save('nb4.pyemma', model_name='ala2_bayesian_msm', overwrite=True)" 511 | ] 512 | }, 513 | { 514 | "cell_type": "markdown", 515 | "metadata": {}, 516 | "source": [ 517 | "#### Exercise 1\n", 518 | "Load the heavy atom distances into memory, TICA (`lag=3` and `dim=2`), discretize with 100 $k$-means centers and a stride of $10$, and show the ITS convergence." 519 | ] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "execution_count": null, 524 | "metadata": { 525 | "solution2": "hidden", 526 | "solution2_first": true 527 | }, 528 | "outputs": [], 529 | "source": [ 530 | "feat = #FIXME\n", 531 | "feat. #FIXME\n", 532 | "data = #FIXME\n", 533 | "\n", 534 | "tica = #FIXME\n", 535 | "tica_concatenated = #FIXME\n", 536 | "\n", 537 | "cluster = #FIXME\n", 538 | "dtrajs_concatenated = #FIXME\n", 539 | "\n", 540 | "its = #FIXME\n", 541 | "\n", 542 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n", 543 | "pyemma.plots.plot_feature_histograms(tica_concatenated, feature_labels=['IC 1', 'IC 2'], ax=axes[0])\n", 544 | "pyemma.plots.plot_density(*tica_concatenated.T, ax=axes[1], cbar=False, alpha=0.3)\n", 545 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 546 | "axes[1].set_xlabel('IC 1')\n", 547 | "axes[1].set_ylabel('IC 2')\n", 548 | "pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')\n", 549 | "fig.tight_layout()" 550 | ] 551 | }, 552 | { 553 | "cell_type": "markdown", 554 | "metadata": { 555 | "solution2": "hidden" 556 | }, 557 | "source": [ 558 | "###### Solution" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": { 565 | "solution2": "hidden" 566 | }, 567 | "outputs": [], 568 | "source": [ 569 | "feat = pyemma.coordinates.featurizer(pdb)\n", 570 | "pairs = feat.pairs(feat.select_Heavy())\n", 571 | "feat.add_distances(pairs, periodic=False)\n", 572 | "data = pyemma.coordinates.load(files, features=feat)\n", 573 | "\n", 574 | "tica = pyemma.coordinates.tica(data, lag=3, dim=2)\n", 575 | "tica_concatenated = np.concatenate(tica.get_output())\n", 576 | "\n", 577 | "cluster = pyemma.coordinates.cluster_kmeans(tica, k=100, max_iter=50, stride=10)\n", 578 | "dtrajs_concatenated = np.concatenate(cluster.dtrajs)\n", 579 | "\n", 580 | "its = pyemma.msm.its(\n", 581 | " cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')\n", 582 | "\n", 583 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n", 584 | "pyemma.plots.plot_feature_histograms(tica_concatenated, feature_labels=['IC 1', 'IC 2'], ax=axes[0])\n", 585 | "pyemma.plots.plot_density(*tica_concatenated.T, ax=axes[1], cbar=False, alpha=0.3)\n", 586 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 587 | "axes[1].set_xlabel('IC 1')\n", 588 | "axes[1].set_ylabel('IC 2')\n", 589 | "pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')\n", 590 | "fig.tight_layout()" 591 | ] 592 | }, 593 | { 594 | "cell_type": "markdown", 595 | "metadata": {}, 596 | "source": [ 597 | "#### Exercise 2\n", 598 | "Estimate an MSM at lag time $10$ ps with `dt_traj='1 ps'` and visualize the stationary distribution using a two-dimensional colored scatter plot of all data points in TICA space." 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": null, 604 | "metadata": { 605 | "solution2": "hidden", 606 | "solution2_first": true 607 | }, 608 | "outputs": [], 609 | "source": [ 610 | "msm = #FIXME\n", 611 | "\n", 612 | "print('fraction of states used = {:f}'. #FIXME\n", 613 | "print('fraction of counts used = {:f}'. #FIXME\n", 614 | "\n", 615 | "fig, ax, misc = pyemma.plots.plot_contour(\n", 616 | " *tica_concatenated.T, msm.pi[dtrajs_concatenated],\n", 617 | " cbar_label='stationary_distribution',\n", 618 | " method='nearest', mask=True)\n", 619 | "ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 620 | "ax.set_xlabel('IC 1')\n", 621 | "ax.set_ylabel('IC 2')\n", 622 | "fig.tight_layout()" 623 | ] 624 | }, 625 | { 626 | "cell_type": "markdown", 627 | "metadata": { 628 | "solution2": "hidden" 629 | }, 630 | "source": [ 631 | "###### Solution" 632 | ] 633 | }, 634 | { 635 | "cell_type": "code", 636 | "execution_count": null, 637 | "metadata": { 638 | "solution2": "hidden" 639 | }, 640 | "outputs": [], 641 | "source": [ 642 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n", 643 | "\n", 644 | "print('fraction of states used = {:f}'.format(msm.active_state_fraction))\n", 645 | "print('fraction of counts used = {:f}'.format(msm.active_count_fraction))\n", 646 | "\n", 647 | "fig, ax, misc = pyemma.plots.plot_contour(\n", 648 | " *tica_concatenated.T, msm.pi[dtrajs_concatenated],\n", 649 | " cbar_label='stationary_distribution',\n", 650 | " method='nearest', mask=True)\n", 651 | "ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 652 | "ax.set_xlabel('IC 1')\n", 653 | "ax.set_ylabel('IC 2')\n", 654 | "fig.tight_layout()" 655 | ] 656 | }, 657 | { 658 | "cell_type": "markdown", 659 | "metadata": {}, 660 | "source": [ 661 | "#### Exercise 3\n", 662 | "Visualize the first six right eigenvectors." 663 | ] 664 | }, 665 | { 666 | "cell_type": "code", 667 | "execution_count": null, 668 | "metadata": { 669 | "solution2": "hidden", 670 | "solution2_first": true 671 | }, 672 | "outputs": [], 673 | "source": [ 674 | "eigvec = #FIXME\n", 675 | "print('first eigenvector is one: {} (min={}, max={})'.format( #FIXME\n", 676 | "\n", 677 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n", 678 | "for i, ax in enumerate(axes.flat):\n", 679 | " pyemma.plots.plot_contour( #FIXME )\n", 680 | " ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 681 | " ax.set_xlabel('IC 1')\n", 682 | " ax.set_ylabel('IC 2')\n", 683 | "fig.tight_layout()" 684 | ] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": { 689 | "solution2": "hidden" 690 | }, 691 | "source": [ 692 | "###### Solution" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": null, 698 | "metadata": { 699 | "solution2": "hidden" 700 | }, 701 | "outputs": [], 702 | "source": [ 703 | "eigvec = msm.eigenvectors_right()\n", 704 | "print('first eigenvector is one: {} (min={}, max={})'.format(\n", 705 | " np.allclose(eigvec[:, 0], 1, atol=1e-15), eigvec[:, 0].min(), eigvec[:, 0].max()))\n", 706 | "\n", 707 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n", 708 | "for i, ax in enumerate(axes.flat):\n", 709 | " pyemma.plots.plot_contour(\n", 710 | " *tica_concatenated.T, eigvec[dtrajs_concatenated, i + 1], ax=ax, cmap='PiYG',\n", 711 | " cbar_label='{}. right eigenvector'.format(i + 2), mask=True)\n", 712 | " ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n", 713 | " ax.set_xlabel('IC 1')\n", 714 | " ax.set_ylabel('IC 2')\n", 715 | "fig.tight_layout()" 716 | ] 717 | }, 718 | { 719 | "cell_type": "markdown", 720 | "metadata": {}, 721 | "source": [ 722 | "Can you already guess from eigenvectors two to four which the metastable states are?\n", 723 | "\n", 724 | "#### Exercise 4\n", 725 | "Estimate a Bayesian MSM at lag time $10$ ps and perform/show a CK test for four metastable states." 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": null, 731 | "metadata": { 732 | "solution2": "hidden", 733 | "solution2_first": true 734 | }, 735 | "outputs": [], 736 | "source": [ 737 | "bayesian_msm = #FIXME\n", 738 | "\n", 739 | "nstates = 4\n", 740 | "pyemma.plots. #FIXME" 741 | ] 742 | }, 743 | { 744 | "cell_type": "markdown", 745 | "metadata": { 746 | "solution2": "hidden" 747 | }, 748 | "source": [ 749 | "###### Solution" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": null, 755 | "metadata": { 756 | "solution2": "hidden" 757 | }, 758 | "outputs": [], 759 | "source": [ 760 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n", 761 | "\n", 762 | "nstates = 4\n", 763 | "pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates), units='ps');" 764 | ] 765 | }, 766 | { 767 | "cell_type": "markdown", 768 | "metadata": {}, 769 | "source": [ 770 | "#### Exercise 5\n", 771 | "Save the MSM, Bayesian MSM and Cluster objects to the same file as before.\n", 772 | "Use the model names `ala2tica_msm`, `ala2tica_bayesian_msm` and `ala2tica_cluster`, respectively.\n", 773 | "Further, include the TICA object with model name `ala2tica_tica`." 774 | ] 775 | }, 776 | { 777 | "cell_type": "code", 778 | "execution_count": null, 779 | "metadata": { 780 | "solution2": "hidden", 781 | "solution2_first": true 782 | }, 783 | "outputs": [], 784 | "source": [ 785 | "#FIXME " 786 | ] 787 | }, 788 | { 789 | "cell_type": "markdown", 790 | "metadata": { 791 | "solution2": "hidden" 792 | }, 793 | "source": [ 794 | "###### Solution" 795 | ] 796 | }, 797 | { 798 | "cell_type": "code", 799 | "execution_count": null, 800 | "metadata": { 801 | "solution2": "hidden" 802 | }, 803 | "outputs": [], 804 | "source": [ 805 | "cluster.save('nb4.pyemma', model_name='ala2tica_cluster', overwrite=True)\n", 806 | "msm.save('nb4.pyemma', model_name='ala2tica_msm', overwrite=True)\n", 807 | "bayesian_msm.save('nb4.pyemma', model_name='ala2tica_bayesian_msm', overwrite=True)\n", 808 | "tica.save('nb4.pyemma', model_name='ala2tica_tica', overwrite=True)" 809 | ] 810 | }, 811 | { 812 | "cell_type": "markdown", 813 | "metadata": {}, 814 | "source": [ 815 | "## Wrapping up\n", 816 | "In this notebook, we have learned how to analyze an MSM and how to extract kinetic information from the model. In detail, we have used\n", 817 | "- the `active_state_fraction`, `active_count_fraction`, and `active_set` attributes of an MSM object to see how much (and which parts) of our data form the largest connected set represented by the MSM,\n", 818 | "- the `stationary_distribution` (or `pi`) attribute of an MSM object to access its stationary vector,\n", 819 | "- the `eigenvectors_right()` method of an MSM object to access its (right) eigenvectors,\n", 820 | "\n", 821 | "For visualizing MSMs or kinetic networks we used\n", 822 | "- `pyemma.plots.plot_density()`\n", 823 | "- `pyemma.plots.plot_contour()` and\n", 824 | "- `pyemma.plots.plot_cktest()`." 825 | ] 826 | } 827 | ], 828 | "metadata": { 829 | "kernelspec": { 830 | "display_name": "Python 3", 831 | "language": "python", 832 | "name": "python3" 833 | }, 834 | "language_info": { 835 | "codemirror_mode": { 836 | "name": "ipython", 837 | "version": 3 838 | }, 839 | "file_extension": ".py", 840 | "mimetype": "text/x-python", 841 | "name": "python", 842 | "nbconvert_exporter": "python", 843 | "pygments_lexer": "ipython3", 844 | "version": "3.6.5" 845 | }, 846 | "toc": { 847 | "base_numbering": 1, 848 | "nav_menu": {}, 849 | "number_sections": false, 850 | "sideBar": true, 851 | "skip_h1_title": true, 852 | "title_cell": "Table of Contents", 853 | "title_sidebar": "Contents", 854 | "toc_cell": false, 855 | "toc_position": {}, 856 | "toc_section_display": true, 857 | "toc_window_display": true 858 | } 859 | }, 860 | "nbformat": 4, 861 | "nbformat_minor": 2 862 | } 863 | -------------------------------------------------------------------------------- /notebooks/static/hmm-backbone-1-385x432.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-1-385x432.png -------------------------------------------------------------------------------- /notebooks/static/hmm-backbone-2-388x526.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-2-388x526.png -------------------------------------------------------------------------------- /notebooks/static/hmm-backbone-3-347x500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-3-347x500.png -------------------------------------------------------------------------------- /notebooks/static/hmm-backbone-4-367x348.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-4-367x348.png -------------------------------------------------------------------------------- /notebooks/static/hmm-backbone-5-260x374.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-5-260x374.png -------------------------------------------------------------------------------- /notebooks/static/pentapeptide-states.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/pentapeptide-states.png -------------------------------------------------------------------------------- /notebooks/static/pentapeptide-structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/pentapeptide-structure.png -------------------------------------------------------------------------------- /pyemma_tutorials/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Jupyter notebook launcher for PyEMMA's tutorials series. 3 | """ 4 | 5 | from ._version import get_versions 6 | __version__ = get_versions()['version'] 7 | del get_versions 8 | 9 | from .util import notebook_location, configs_location, run_dir 10 | -------------------------------------------------------------------------------- /pyemma_tutorials/__main__.py: -------------------------------------------------------------------------------- 1 | 2 | # this file is here for executing the package like this: 3 | # pythom -m pyemma_tutorials 4 | 5 | 6 | if __name__ == '__main__': 7 | from .cli import main 8 | main() 9 | -------------------------------------------------------------------------------- /pyemma_tutorials/_version.py: -------------------------------------------------------------------------------- 1 | 2 | # This file helps to compute a version number in source trees obtained from 3 | # git-archive tarball (such as those provided by githubs download-from-tag 4 | # feature). Distribution tarballs (built by setup.py sdist) and build 5 | # directories (produced by setup.py build) will contain a much shorter file 6 | # that just contains the computed version number. 7 | 8 | # This file is released into the public domain. Generated by 9 | # versioneer-0.18 (https://github.com/warner/python-versioneer) 10 | 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import os 15 | import re 16 | import subprocess 17 | import sys 18 | 19 | 20 | def get_keywords(): 21 | """Get the keywords needed to look up the version information.""" 22 | # these strings will be replaced by git during git-archive. 23 | # setup.py/versioneer.py will grep for the variable names, so they must 24 | # each be defined on a line of their own. _version.py will just call 25 | # get_keywords(). 26 | git_refnames = " (HEAD -> master)" 27 | git_full = "6b9183686d2238d4f60c752a73e9b710c667ec10" 28 | git_date = "2019-05-29 16:02:41 +0200" 29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 30 | return keywords 31 | 32 | 33 | class VersioneerConfig: 34 | """Container for Versioneer configuration parameters.""" 35 | 36 | 37 | def get_config(): 38 | """Create, populate and return the VersioneerConfig() object.""" 39 | # these strings are filled in when 'setup.py versioneer' creates 40 | # _version.py 41 | cfg = VersioneerConfig() 42 | cfg.VCS = "git" 43 | cfg.style = "pep440" 44 | cfg.tag_prefix = "v" 45 | cfg.parentdir_prefix = "None" 46 | cfg.versionfile_source = "pyemma_tutorials/_version.py" 47 | cfg.verbose = False 48 | return cfg 49 | 50 | 51 | class NotThisMethod(Exception): 52 | """Exception raised if a method is not valid for the current scenario.""" 53 | 54 | 55 | LONG_VERSION_PY = {} 56 | HANDLERS = {} 57 | 58 | 59 | def register_vcs_handler(vcs, method): # decorator 60 | """Decorator to mark a method as the handler for a particular VCS.""" 61 | def decorate(f): 62 | """Store f in HANDLERS[vcs][method].""" 63 | if vcs not in HANDLERS: 64 | HANDLERS[vcs] = {} 65 | HANDLERS[vcs][method] = f 66 | return f 67 | return decorate 68 | 69 | 70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, 71 | env=None): 72 | """Call the given command(s).""" 73 | assert isinstance(commands, list) 74 | p = None 75 | for c in commands: 76 | try: 77 | dispcmd = str([c] + args) 78 | # remember shell=False, so use git.cmd on windows, not just git 79 | p = subprocess.Popen([c] + args, cwd=cwd, env=env, 80 | stdout=subprocess.PIPE, 81 | stderr=(subprocess.PIPE if hide_stderr 82 | else None)) 83 | break 84 | except EnvironmentError: 85 | e = sys.exc_info()[1] 86 | if e.errno == errno.ENOENT: 87 | continue 88 | if verbose: 89 | print("unable to run %s" % dispcmd) 90 | print(e) 91 | return None, None 92 | else: 93 | if verbose: 94 | print("unable to find command, tried %s" % (commands,)) 95 | return None, None 96 | stdout = p.communicate()[0].strip() 97 | if sys.version_info[0] >= 3: 98 | stdout = stdout.decode() 99 | if p.returncode != 0: 100 | if verbose: 101 | print("unable to run %s (error)" % dispcmd) 102 | print("stdout was %s" % stdout) 103 | return None, p.returncode 104 | return stdout, p.returncode 105 | 106 | 107 | def versions_from_parentdir(parentdir_prefix, root, verbose): 108 | """Try to determine the version from the parent directory name. 109 | 110 | Source tarballs conventionally unpack into a directory that includes both 111 | the project name and a version string. We will also support searching up 112 | two directory levels for an appropriately named parent directory 113 | """ 114 | rootdirs = [] 115 | 116 | for i in range(3): 117 | dirname = os.path.basename(root) 118 | if dirname.startswith(parentdir_prefix): 119 | return {"version": dirname[len(parentdir_prefix):], 120 | "full-revisionid": None, 121 | "dirty": False, "error": None, "date": None} 122 | else: 123 | rootdirs.append(root) 124 | root = os.path.dirname(root) # up a level 125 | 126 | if verbose: 127 | print("Tried directories %s but none started with prefix %s" % 128 | (str(rootdirs), parentdir_prefix)) 129 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 130 | 131 | 132 | @register_vcs_handler("git", "get_keywords") 133 | def git_get_keywords(versionfile_abs): 134 | """Extract version information from the given file.""" 135 | # the code embedded in _version.py can just fetch the value of these 136 | # keywords. When used from setup.py, we don't want to import _version.py, 137 | # so we do it with a regexp instead. This function is not used from 138 | # _version.py. 139 | keywords = {} 140 | try: 141 | f = open(versionfile_abs, "r") 142 | for line in f.readlines(): 143 | if line.strip().startswith("git_refnames ="): 144 | mo = re.search(r'=\s*"(.*)"', line) 145 | if mo: 146 | keywords["refnames"] = mo.group(1) 147 | if line.strip().startswith("git_full ="): 148 | mo = re.search(r'=\s*"(.*)"', line) 149 | if mo: 150 | keywords["full"] = mo.group(1) 151 | if line.strip().startswith("git_date ="): 152 | mo = re.search(r'=\s*"(.*)"', line) 153 | if mo: 154 | keywords["date"] = mo.group(1) 155 | f.close() 156 | except EnvironmentError: 157 | pass 158 | return keywords 159 | 160 | 161 | @register_vcs_handler("git", "keywords") 162 | def git_versions_from_keywords(keywords, tag_prefix, verbose): 163 | """Get version information from git keywords.""" 164 | if not keywords: 165 | raise NotThisMethod("no keywords at all, weird") 166 | date = keywords.get("date") 167 | if date is not None: 168 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 169 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 170 | # -like" string, which we must then edit to make compliant), because 171 | # it's been around since git-1.5.3, and it's too difficult to 172 | # discover which version we're using, or to work around using an 173 | # older one. 174 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 175 | refnames = keywords["refnames"].strip() 176 | if refnames.startswith("$Format"): 177 | if verbose: 178 | print("keywords are unexpanded, not using") 179 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 180 | refs = set([r.strip() for r in refnames.strip("()").split(",")]) 181 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 182 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 183 | TAG = "tag: " 184 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) 185 | if not tags: 186 | # Either we're using git < 1.8.3, or there really are no tags. We use 187 | # a heuristic: assume all version tags have a digit. The old git %d 188 | # expansion behaves like git log --decorate=short and strips out the 189 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 190 | # between branches and tags. By ignoring refnames without digits, we 191 | # filter out many common branch names like "release" and 192 | # "stabilization", as well as "HEAD" and "master". 193 | tags = set([r for r in refs if re.search(r'\d', r)]) 194 | if verbose: 195 | print("discarding '%s', no digits" % ",".join(refs - tags)) 196 | if verbose: 197 | print("likely tags: %s" % ",".join(sorted(tags))) 198 | for ref in sorted(tags): 199 | # sorting will prefer e.g. "2.0" over "2.0rc1" 200 | if ref.startswith(tag_prefix): 201 | r = ref[len(tag_prefix):] 202 | if verbose: 203 | print("picking %s" % r) 204 | return {"version": r, 205 | "full-revisionid": keywords["full"].strip(), 206 | "dirty": False, "error": None, 207 | "date": date} 208 | # no suitable tags, so version is "0+unknown", but full hex is still there 209 | if verbose: 210 | print("no suitable tags, using unknown + full revision id") 211 | return {"version": "0+unknown", 212 | "full-revisionid": keywords["full"].strip(), 213 | "dirty": False, "error": "no suitable tags", "date": None} 214 | 215 | 216 | @register_vcs_handler("git", "pieces_from_vcs") 217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): 218 | """Get version from 'git describe' in the root of the source tree. 219 | 220 | This only gets called if the git-archive 'subst' keywords were *not* 221 | expanded, and _version.py hasn't already been rewritten with a short 222 | version string, meaning we're inside a checked out source tree. 223 | """ 224 | GITS = ["git"] 225 | if sys.platform == "win32": 226 | GITS = ["git.cmd", "git.exe"] 227 | 228 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, 229 | hide_stderr=True) 230 | if rc != 0: 231 | if verbose: 232 | print("Directory %s not under git control" % root) 233 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 234 | 235 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 236 | # if there isn't one, this yields HEX[-dirty] (no NUM) 237 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", 238 | "--always", "--long", 239 | "--match", "%s*" % tag_prefix], 240 | cwd=root) 241 | # --long was added in git-1.5.5 242 | if describe_out is None: 243 | raise NotThisMethod("'git describe' failed") 244 | describe_out = describe_out.strip() 245 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root) 246 | if full_out is None: 247 | raise NotThisMethod("'git rev-parse' failed") 248 | full_out = full_out.strip() 249 | 250 | pieces = {} 251 | pieces["long"] = full_out 252 | pieces["short"] = full_out[:7] # maybe improved later 253 | pieces["error"] = None 254 | 255 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 256 | # TAG might have hyphens. 257 | git_describe = describe_out 258 | 259 | # look for -dirty suffix 260 | dirty = git_describe.endswith("-dirty") 261 | pieces["dirty"] = dirty 262 | if dirty: 263 | git_describe = git_describe[:git_describe.rindex("-dirty")] 264 | 265 | # now we have TAG-NUM-gHEX or HEX 266 | 267 | if "-" in git_describe: 268 | # TAG-NUM-gHEX 269 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) 270 | if not mo: 271 | # unparseable. Maybe git-describe is misbehaving? 272 | pieces["error"] = ("unable to parse git-describe output: '%s'" 273 | % describe_out) 274 | return pieces 275 | 276 | # tag 277 | full_tag = mo.group(1) 278 | if not full_tag.startswith(tag_prefix): 279 | if verbose: 280 | fmt = "tag '%s' doesn't start with prefix '%s'" 281 | print(fmt % (full_tag, tag_prefix)) 282 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" 283 | % (full_tag, tag_prefix)) 284 | return pieces 285 | pieces["closest-tag"] = full_tag[len(tag_prefix):] 286 | 287 | # distance: number of commits since tag 288 | pieces["distance"] = int(mo.group(2)) 289 | 290 | # commit: short hex revision ID 291 | pieces["short"] = mo.group(3) 292 | 293 | else: 294 | # HEX: no tags 295 | pieces["closest-tag"] = None 296 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], 297 | cwd=root) 298 | pieces["distance"] = int(count_out) # total number of commits 299 | 300 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 301 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], 302 | cwd=root)[0].strip() 303 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 304 | 305 | return pieces 306 | 307 | 308 | def plus_or_dot(pieces): 309 | """Return a + if we don't already have one, else return a .""" 310 | if "+" in pieces.get("closest-tag", ""): 311 | return "." 312 | return "+" 313 | 314 | 315 | def render_pep440(pieces): 316 | """Build up version string, with post-release "local version identifier". 317 | 318 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 319 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 320 | 321 | Exceptions: 322 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 323 | """ 324 | if pieces["closest-tag"]: 325 | rendered = pieces["closest-tag"] 326 | if pieces["distance"] or pieces["dirty"]: 327 | rendered += plus_or_dot(pieces) 328 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 329 | if pieces["dirty"]: 330 | rendered += ".dirty" 331 | else: 332 | # exception #1 333 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], 334 | pieces["short"]) 335 | if pieces["dirty"]: 336 | rendered += ".dirty" 337 | return rendered 338 | 339 | 340 | def render_pep440_pre(pieces): 341 | """TAG[.post.devDISTANCE] -- No -dirty. 342 | 343 | Exceptions: 344 | 1: no tags. 0.post.devDISTANCE 345 | """ 346 | if pieces["closest-tag"]: 347 | rendered = pieces["closest-tag"] 348 | if pieces["distance"]: 349 | rendered += ".post.dev%d" % pieces["distance"] 350 | else: 351 | # exception #1 352 | rendered = "0.post.dev%d" % pieces["distance"] 353 | return rendered 354 | 355 | 356 | def render_pep440_post(pieces): 357 | """TAG[.postDISTANCE[.dev0]+gHEX] . 358 | 359 | The ".dev0" means dirty. Note that .dev0 sorts backwards 360 | (a dirty tree will appear "older" than the corresponding clean one), 361 | but you shouldn't be releasing software with -dirty anyways. 362 | 363 | Exceptions: 364 | 1: no tags. 0.postDISTANCE[.dev0] 365 | """ 366 | if pieces["closest-tag"]: 367 | rendered = pieces["closest-tag"] 368 | if pieces["distance"] or pieces["dirty"]: 369 | rendered += ".post%d" % pieces["distance"] 370 | if pieces["dirty"]: 371 | rendered += ".dev0" 372 | rendered += plus_or_dot(pieces) 373 | rendered += "g%s" % pieces["short"] 374 | else: 375 | # exception #1 376 | rendered = "0.post%d" % pieces["distance"] 377 | if pieces["dirty"]: 378 | rendered += ".dev0" 379 | rendered += "+g%s" % pieces["short"] 380 | return rendered 381 | 382 | 383 | def render_pep440_old(pieces): 384 | """TAG[.postDISTANCE[.dev0]] . 385 | 386 | The ".dev0" means dirty. 387 | 388 | Eexceptions: 389 | 1: no tags. 0.postDISTANCE[.dev0] 390 | """ 391 | if pieces["closest-tag"]: 392 | rendered = pieces["closest-tag"] 393 | if pieces["distance"] or pieces["dirty"]: 394 | rendered += ".post%d" % pieces["distance"] 395 | if pieces["dirty"]: 396 | rendered += ".dev0" 397 | else: 398 | # exception #1 399 | rendered = "0.post%d" % pieces["distance"] 400 | if pieces["dirty"]: 401 | rendered += ".dev0" 402 | return rendered 403 | 404 | 405 | def render_git_describe(pieces): 406 | """TAG[-DISTANCE-gHEX][-dirty]. 407 | 408 | Like 'git describe --tags --dirty --always'. 409 | 410 | Exceptions: 411 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 412 | """ 413 | if pieces["closest-tag"]: 414 | rendered = pieces["closest-tag"] 415 | if pieces["distance"]: 416 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 417 | else: 418 | # exception #1 419 | rendered = pieces["short"] 420 | if pieces["dirty"]: 421 | rendered += "-dirty" 422 | return rendered 423 | 424 | 425 | def render_git_describe_long(pieces): 426 | """TAG-DISTANCE-gHEX[-dirty]. 427 | 428 | Like 'git describe --tags --dirty --always -long'. 429 | The distance/hash is unconditional. 430 | 431 | Exceptions: 432 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 433 | """ 434 | if pieces["closest-tag"]: 435 | rendered = pieces["closest-tag"] 436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 437 | else: 438 | # exception #1 439 | rendered = pieces["short"] 440 | if pieces["dirty"]: 441 | rendered += "-dirty" 442 | return rendered 443 | 444 | 445 | def render(pieces, style): 446 | """Render the given version pieces into the requested style.""" 447 | if pieces["error"]: 448 | return {"version": "unknown", 449 | "full-revisionid": pieces.get("long"), 450 | "dirty": None, 451 | "error": pieces["error"], 452 | "date": None} 453 | 454 | if not style or style == "default": 455 | style = "pep440" # the default 456 | 457 | if style == "pep440": 458 | rendered = render_pep440(pieces) 459 | elif style == "pep440-pre": 460 | rendered = render_pep440_pre(pieces) 461 | elif style == "pep440-post": 462 | rendered = render_pep440_post(pieces) 463 | elif style == "pep440-old": 464 | rendered = render_pep440_old(pieces) 465 | elif style == "git-describe": 466 | rendered = render_git_describe(pieces) 467 | elif style == "git-describe-long": 468 | rendered = render_git_describe_long(pieces) 469 | else: 470 | raise ValueError("unknown style '%s'" % style) 471 | 472 | return {"version": rendered, "full-revisionid": pieces["long"], 473 | "dirty": pieces["dirty"], "error": None, 474 | "date": pieces.get("date")} 475 | 476 | 477 | def get_versions(): 478 | """Get version information or return default if unable to do so.""" 479 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 480 | # __file__, we can work backwards from there to the root. Some 481 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 482 | # case we can only use expanded keywords. 483 | 484 | cfg = get_config() 485 | verbose = cfg.verbose 486 | 487 | try: 488 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, 489 | verbose) 490 | except NotThisMethod: 491 | pass 492 | 493 | try: 494 | root = os.path.realpath(__file__) 495 | # versionfile_source is the relative path from the top of the source 496 | # tree (where the .git directory might live) to this file. Invert 497 | # this to find the root from __file__. 498 | for i in cfg.versionfile_source.split('/'): 499 | root = os.path.dirname(root) 500 | except NameError: 501 | return {"version": "0+unknown", "full-revisionid": None, 502 | "dirty": None, 503 | "error": "unable to find root of source tree", 504 | "date": None} 505 | 506 | try: 507 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 508 | return render(pieces, cfg.style) 509 | except NotThisMethod: 510 | pass 511 | 512 | try: 513 | if cfg.parentdir_prefix: 514 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 515 | except NotThisMethod: 516 | pass 517 | 518 | return {"version": "0+unknown", "full-revisionid": None, 519 | "dirty": None, 520 | "error": "unable to compute version", "date": None} 521 | -------------------------------------------------------------------------------- /pyemma_tutorials/cli.py: -------------------------------------------------------------------------------- 1 | 2 | def _nglview_pip_installed_workaround(): 3 | # This is a workaround for people having a pip installation of nglview. The XDG_DATA_DIR is being searched for the 4 | # javascript nbextensions. This would cause mixing up different versions of the widget. 5 | # Further info: 6 | # https://jupyter.readthedocs.io/en/latest/migrating.html?highlight=data-dir#finding-the-location-of-important-files 7 | # https://github.com/arose/nglview/issues/696#issuecomment-332850270 8 | # https://github.com/arose/nglview/issues/718#issuecomment-346041897 9 | import os 10 | os.environ['JUPYTER_DATA_DIR'] = 'non-sense' 11 | assert os.getenv('JUPYTER_DATA_DIR', False) 12 | 13 | 14 | def main(): 15 | from notebook.notebookapp import main as main_ 16 | from .util import configs_location 17 | 18 | # main eats, argv list and kwargs 19 | notebook_cfg, notebook_cfg_json = configs_location() 20 | 21 | _nglview_pip_installed_workaround() 22 | 23 | # extend passed arguments with our config files 24 | import sys 25 | argv = sys.argv[1:] + ['--config=%s' % notebook_cfg, '--config=%s' % notebook_cfg_json] 26 | print('invoking notebook server with arguments:', argv) 27 | main_(argv=argv) 28 | 29 | 30 | if __name__ == '__main__': 31 | main() 32 | -------------------------------------------------------------------------------- /pyemma_tutorials/jupyter_notebook_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "load_extensions": { 3 | "jupyter-matplotlib/extension": true, 4 | "jupyter-js-widgets/extension": true, 5 | "nbextensions_configurator/config_menu/main": true, 6 | "contrib_nbextensions_help_item/main": true, 7 | "nglview-js-widgets/extension": true, 8 | "nglview_main": false, 9 | "exercise2/main": true, 10 | "toc2/main": true 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /pyemma_tutorials/jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | # Configuration file for jupyter-notebook. 2 | 3 | #------------------------------------------------------------------------------ 4 | # Application(SingletonConfigurable) configuration 5 | #------------------------------------------------------------------------------ 6 | 7 | ## This is an application. 8 | 9 | ## The date format used by logging formatters for %(asctime)s 10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S' 11 | 12 | ## The Logging format template 13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s' 14 | 15 | ## Set the log level by value or name. 16 | #c.Application.log_level = 30 17 | 18 | #------------------------------------------------------------------------------ 19 | # JupyterApp(Application) configuration 20 | #------------------------------------------------------------------------------ 21 | ## Base class for Jupyter applications 22 | 23 | ## Answer yes to any prompts. 24 | #c.JupyterApp.answer_yes = False 25 | 26 | ## Full path of a config file. 27 | #c.JupyterApp.config_file = '' 28 | 29 | ## Specify a config file to load. 30 | #c.JupyterApp.config_file_name = '' 31 | 32 | ## Generate default config file. 33 | #c.JupyterApp.generate_config = False 34 | 35 | #------------------------------------------------------------------------------ 36 | # NotebookApp(JupyterApp) configuration 37 | #------------------------------------------------------------------------------ 38 | 39 | ## Set the Access-Control-Allow-Credentials: true header 40 | #c.NotebookApp.allow_credentials = False 41 | 42 | ## Set the Access-Control-Allow-Origin header 43 | # 44 | # Use '*' to allow any origin to access your server. 45 | # 46 | # Takes precedence over allow_origin_pat. 47 | c.NotebookApp.allow_origin = 'localhost' 48 | 49 | ## Use a regular expression for the Access-Control-Allow-Origin header 50 | # 51 | # Requests from an origin matching the expression will get replies with: 52 | # 53 | # Access-Control-Allow-Origin: origin 54 | # 55 | # where `origin` is the origin of the request. 56 | # 57 | # Ignored if allow_origin is set. 58 | #c.NotebookApp.allow_origin_pat = '' 59 | 60 | ## Allow password to be changed at login for the notebook server. 61 | # 62 | # While loggin in with a token, the notebook server UI will give the opportunity 63 | # to the user to enter a new password at the same time that will replace the 64 | # token login mechanism. 65 | # 66 | # This can be set to false to prevent changing password from the UI/API. 67 | #c.NotebookApp.allow_password_change = True 68 | 69 | ## Whether to allow the user to run the notebook as root. 70 | #c.NotebookApp.allow_root = False 71 | 72 | ## DEPRECATED use base_url 73 | #c.NotebookApp.base_project_url = '/' 74 | 75 | ## The base URL for the notebook server. 76 | # 77 | # Leading and trailing slashes can be omitted, and will automatically be added. 78 | #c.NotebookApp.base_url = '/' 79 | 80 | ## Specify what command to use to invoke a web browser when opening the notebook. 81 | # If not specified, the default browser will be determined by the `webbrowser` 82 | # standard library module, which allows setting of the BROWSER environment 83 | # variable to override it. 84 | #c.NotebookApp.browser = '' 85 | 86 | ## The full path to an SSL/TLS certificate file. 87 | #c.NotebookApp.certfile = '' 88 | 89 | ## The full path to a certificate authority certificate for SSL/TLS client 90 | # authentication. 91 | #c.NotebookApp.client_ca = '' 92 | 93 | ## The config manager class to use 94 | #c.NotebookApp.config_manager_class = 'notebook.services.config.manager.ConfigManager' 95 | 96 | ## The notebook manager class to use. 97 | #c.NotebookApp.contents_manager_class = 'notebook.services.contents.largefilemanager.LargeFileManager' 98 | 99 | ## Extra keyword arguments to pass to `set_secure_cookie`. See tornado's 100 | # set_secure_cookie docs for details. 101 | #c.NotebookApp.cookie_options = {} 102 | 103 | ## The random bytes used to secure cookies. By default this is a new random 104 | # number every time you start the Notebook. Set it to a value in a config file 105 | # to enable logins to persist across server sessions. 106 | # 107 | # Note: Cookie secrets should be kept private, do not share config files with 108 | # cookie_secret stored in plaintext (you can read the value from a file). 109 | #c.NotebookApp.cookie_secret = b'' 110 | 111 | ## The file where the cookie secret is stored. 112 | #c.NotebookApp.cookie_secret_file = '' 113 | 114 | ## The default URL to redirect to from `/` 115 | c.NotebookApp.default_url = '/tree#examples' 116 | 117 | ## Disable cross-site-request-forgery protection 118 | # 119 | # Jupyter notebook 4.3.1 introduces protection from cross-site request 120 | # forgeries, requiring API requests to either: 121 | # 122 | # - originate from pages served by this server (validated with XSRF cookie and 123 | # token), or - authenticate with a token 124 | # 125 | # Some anonymous compute resources still desire the ability to run code, 126 | # completely without authentication. These services can disable all 127 | # authentication and security checks, with the full knowledge of what that 128 | # implies. 129 | #c.NotebookApp.disable_check_xsrf = False 130 | 131 | ## Whether to enable MathJax for typesetting math/TeX 132 | # 133 | # MathJax is the javascript library Jupyter uses to render math/LaTeX. It is 134 | # very large, so you may want to disable it if you have a slow internet 135 | # connection, or for offline use of the notebook. 136 | # 137 | # When disabled, equations etc. will appear as their untransformed TeX source. 138 | #c.NotebookApp.enable_mathjax = True 139 | 140 | ## extra paths to look for Javascript notebook extensions 141 | #c.NotebookApp.extra_nbextensions_path = [] 142 | 143 | ## handlers that should be loaded at higher priority than the default services 144 | #c.NotebookApp.extra_services = [] 145 | 146 | ## Extra paths to search for serving static files. 147 | # 148 | # This allows adding javascript/css to be available from the notebook server 149 | # machine, or overriding individual files in the IPython 150 | #c.NotebookApp.extra_static_paths = [] 151 | 152 | ## Extra paths to search for serving jinja templates. 153 | # 154 | # Can be used to override templates from notebook.templates. 155 | #c.NotebookApp.extra_template_paths = [] 156 | 157 | ## 158 | #c.NotebookApp.file_to_run = '' 159 | 160 | ## Deprecated: Use minified JS file or not, mainly use during dev to avoid JS 161 | # recompilation 162 | #c.NotebookApp.ignore_minified_js = False 163 | 164 | ## (bytes/sec) Maximum rate at which stream output can be sent on iopub before 165 | # they are limited. 166 | #c.NotebookApp.iopub_data_rate_limit = 1000000 167 | 168 | ## (msgs/sec) Maximum rate at which messages can be sent on iopub before they are 169 | # limited. 170 | #c.NotebookApp.iopub_msg_rate_limit = 1000 171 | 172 | ## The IP address the notebook server will listen on. 173 | c.NotebookApp.ip = 'localhost' 174 | 175 | ## Supply extra arguments that will be passed to Jinja environment. 176 | #c.NotebookApp.jinja_environment_options = {} 177 | 178 | ## Extra variables to supply to jinja templates when rendering. 179 | #c.NotebookApp.jinja_template_vars = {} 180 | 181 | ## The kernel manager class to use. 182 | #c.NotebookApp.kernel_manager_class = 'notebook.services.kernels.kernelmanager.MappingKernelManager' 183 | 184 | ## The kernel spec manager class to use. Should be a subclass of 185 | # `jupyter_client.kernelspec.KernelSpecManager`. 186 | # 187 | # The Api of KernelSpecManager is provisional and might change without warning 188 | # between this version of Jupyter and the next stable one. 189 | #c.NotebookApp.kernel_spec_manager_class = 'jupyter_client.kernelspec.KernelSpecManager' 190 | 191 | ## The full path to a private key file for usage with SSL/TLS. 192 | #c.NotebookApp.keyfile = '' 193 | 194 | ## The login handler class to use. 195 | #c.NotebookApp.login_handler_class = 'notebook.auth.login.LoginHandler' 196 | 197 | ## The logout handler class to use. 198 | #c.NotebookApp.logout_handler_class = 'notebook.auth.logout.LogoutHandler' 199 | 200 | ## The MathJax.js configuration file that is to be used. 201 | #c.NotebookApp.mathjax_config = 'TeX-AMS-MML_HTMLorMML-full,Safe' 202 | 203 | ## A custom url for MathJax.js. Should be in the form of a case-sensitive url to 204 | # MathJax, for example: /static/components/MathJax/MathJax.js 205 | #c.NotebookApp.mathjax_url = '' 206 | 207 | ## Dict of Python modules to load as notebook server extensions.Entry values can 208 | # be used to enable and disable the loading ofthe extensions. The extensions 209 | # will be loaded in alphabetical order. 210 | # notebook examples enables us to have an unmodified copy of the tutorials in the pkg dir and create a working copy 211 | c.NotebookApp.nbserver_extensions = {'nbexamples.handlers': True} 212 | import pyemma_tutorials 213 | c.Examples.reviewed_example_dir = pyemma_tutorials.notebook_location() 214 | c.Examples.unreviewed_example_dir = '' 215 | 216 | ## The directory to use for notebooks and kernels. 217 | run_dir = pyemma_tutorials.run_dir() 218 | c.NotebookApp.notebook_dir = run_dir 219 | 220 | ## Whether to open in a browser after starting. The specific browser used is 221 | # platform dependent and determined by the python standard library `webbrowser` 222 | # module, unless it is overridden using the --browser (NotebookApp.browser) 223 | # configuration option. 224 | #c.NotebookApp.open_browser = True 225 | 226 | ## Hashed password to use for web authentication. 227 | # 228 | # To generate, type in a python/IPython shell: 229 | # 230 | # from notebook.auth import passwd; passwd() 231 | # 232 | # The string should be of the form type:salt:hashed-password. 233 | #c.NotebookApp.password = '' 234 | 235 | ## Forces users to use a password for the Notebook server. This is useful in a 236 | # multi user environment, for instance when everybody in the LAN can access each 237 | # other's machine through ssh. 238 | # 239 | # In such a case, server the notebook server on localhost is not secure since 240 | # any user can connect to the notebook server via ssh. 241 | #c.NotebookApp.password_required = False 242 | 243 | ## The port the notebook server will listen on. 244 | #c.NotebookApp.port = 8888 245 | 246 | ## The number of additional ports to try if the specified port is not available. 247 | #c.NotebookApp.port_retries = 50 248 | 249 | ## DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. 250 | #c.NotebookApp.pylab = 'disabled' 251 | 252 | ## If True, display a button in the dashboard to quit (shutdown the notebook 253 | # server). 254 | #c.NotebookApp.quit_button = True 255 | 256 | ## (sec) Time window used to check the message and data rate limits. 257 | #c.NotebookApp.rate_limit_window = 3 258 | 259 | ## Reraise exceptions encountered loading server extensions? 260 | #c.NotebookApp.reraise_server_extension_failures = False 261 | 262 | ## DEPRECATED use the nbserver_extensions dict instead 263 | #c.NotebookApp.server_extensions = [] 264 | 265 | ## The session manager class to use. 266 | #c.NotebookApp.session_manager_class = 'notebook.services.sessions.sessionmanager.SessionManager' 267 | 268 | ## Shut down the server after N seconds with no kernels or terminals running and 269 | # no activity. This can be used together with culling idle kernels 270 | # (MappingKernelManager.cull_idle_timeout) to shutdown the notebook server when 271 | # it's not in use. This is not precisely timed: it may shut down up to a minute 272 | # later. 0 (the default) disables this automatic shutdown. 273 | #c.NotebookApp.shutdown_no_activity_timeout = 0 274 | 275 | ## Supply SSL options for the tornado HTTPServer. See the tornado docs for 276 | # details. 277 | #c.NotebookApp.ssl_options = {} 278 | 279 | ## Supply overrides for terminado. Currently only supports "shell_command". 280 | #c.NotebookApp.terminado_settings = {} 281 | 282 | ## Set to False to disable terminals. 283 | # 284 | # This does *not* make the notebook server more secure by itself. Anything the 285 | # user can in a terminal, they can also do in a notebook. 286 | # 287 | # Terminals may also be automatically disabled if the terminado package is not 288 | # available. 289 | #c.NotebookApp.terminals_enabled = True 290 | 291 | ## Token used for authenticating first-time connections to the server. 292 | # 293 | # When no password is enabled, the default is to generate a new, random token. 294 | # 295 | # Setting to an empty string disables authentication altogether, which is NOT 296 | # RECOMMENDED. 297 | #c.NotebookApp.token = '' 298 | 299 | ## Supply overrides for the tornado.web.Application that the Jupyter notebook 300 | # uses. 301 | #c.NotebookApp.tornado_settings = {} 302 | 303 | ## Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded- 304 | # For headerssent by the upstream reverse proxy. Necessary if the proxy handles 305 | # SSL 306 | #c.NotebookApp.trust_xheaders = False 307 | 308 | ## DEPRECATED, use tornado_settings 309 | #c.NotebookApp.webapp_settings = {} 310 | 311 | ## Specify Where to open the notebook on startup. This is the `new` argument 312 | # passed to the standard library method `webbrowser.open`. The behaviour is not 313 | # guaranteed, but depends on browser support. Valid values are: 314 | # 315 | # - 2 opens a new tab, 316 | # - 1 opens a new window, 317 | # - 0 opens in an existing window. 318 | # 319 | # See the `webbrowser.open` documentation for details. 320 | #c.NotebookApp.webbrowser_open_new = 2 321 | 322 | ## Set the tornado compression options for websocket connections. 323 | # 324 | # This value will be returned from 325 | # :meth:`WebSocketHandler.get_compression_options`. None (default) will disable 326 | # compression. A dict (even an empty one) will enable compression. 327 | # 328 | # See the tornado docs for WebSocketHandler.get_compression_options for details. 329 | #c.NotebookApp.websocket_compression_options = None 330 | 331 | ## The base URL for websockets, if it differs from the HTTP server (hint: it 332 | # almost certainly doesn't). 333 | # 334 | # Should be in the form of an HTTP origin: ws[s]://hostname[:port] 335 | #c.NotebookApp.websocket_url = '' 336 | 337 | #------------------------------------------------------------------------------ 338 | # ConnectionFileMixin(LoggingConfigurable) configuration 339 | #------------------------------------------------------------------------------ 340 | 341 | ## Mixin for configurable classes that work with connection files 342 | 343 | ## JSON file in which to store connection info [default: kernel-.json] 344 | # 345 | # This file will contain the IP, ports, and authentication key needed to connect 346 | # clients to this kernel. By default, this file will be created in the security 347 | # dir of the current profile, but can be specified by absolute path. 348 | #c.ConnectionFileMixin.connection_file = '' 349 | 350 | ## set the control (ROUTER) port [default: random] 351 | #c.ConnectionFileMixin.control_port = 0 352 | 353 | ## set the heartbeat port [default: random] 354 | #c.ConnectionFileMixin.hb_port = 0 355 | 356 | ## set the iopub (PUB) port [default: random] 357 | #c.ConnectionFileMixin.iopub_port = 0 358 | 359 | ## Set the kernel's IP address [default localhost]. If the IP address is 360 | # something other than localhost, then Consoles on other machines will be able 361 | # to connect to the Kernel, so be careful! 362 | #c.ConnectionFileMixin.ip = '' 363 | 364 | ## set the shell (ROUTER) port [default: random] 365 | #c.ConnectionFileMixin.shell_port = 0 366 | 367 | ## set the stdin (ROUTER) port [default: random] 368 | #c.ConnectionFileMixin.stdin_port = 0 369 | 370 | ## 371 | #c.ConnectionFileMixin.transport = 'tcp' 372 | 373 | #------------------------------------------------------------------------------ 374 | # KernelManager(ConnectionFileMixin) configuration 375 | #------------------------------------------------------------------------------ 376 | 377 | ## Manages a single kernel in a subprocess on this host. 378 | # 379 | # This version starts kernels with Popen. 380 | 381 | ## Should we autorestart the kernel if it dies. 382 | #c.KernelManager.autorestart = True 383 | 384 | ## DEPRECATED: Use kernel_name instead. 385 | # 386 | # The Popen Command to launch the kernel. Override this if you have a custom 387 | # kernel. If kernel_cmd is specified in a configuration file, Jupyter does not 388 | # pass any arguments to the kernel, because it cannot make any assumptions about 389 | # the arguments that the kernel understands. In particular, this means that the 390 | # kernel does not receive the option --debug if it given on the Jupyter command 391 | # line. 392 | #c.KernelManager.kernel_cmd = ['python3'] 393 | 394 | ## Time to wait for a kernel to terminate before killing it, in seconds. 395 | #c.KernelManager.shutdown_wait_time = 5.0 396 | 397 | #------------------------------------------------------------------------------ 398 | # Session(Configurable) configuration 399 | #------------------------------------------------------------------------------ 400 | 401 | ## Object for handling serialization and sending of messages. 402 | # 403 | # The Session object handles building messages and sending them with ZMQ sockets 404 | # or ZMQStream objects. Objects can communicate with each other over the 405 | # network via Session objects, and only need to work with the dict-based IPython 406 | # message spec. The Session will handle serialization/deserialization, security, 407 | # and metadata. 408 | # 409 | # Sessions support configurable serialization via packer/unpacker traits, and 410 | # signing with HMAC digests via the key/keyfile traits. 411 | # 412 | # Parameters ---------- 413 | # 414 | # debug : bool 415 | # whether to trigger extra debugging statements 416 | # packer/unpacker : str : 'json', 'pickle' or import_string 417 | # importstrings for methods to serialize message parts. If just 418 | # 'json' or 'pickle', predefined JSON and pickle packers will be used. 419 | # Otherwise, the entire importstring must be used. 420 | # 421 | # The functions must accept at least valid JSON input, and output *bytes*. 422 | # 423 | # For example, to use msgpack: 424 | # packer = 'msgpack.packb', unpacker='msgpack.unpackb' 425 | # pack/unpack : callables 426 | # You can also set the pack/unpack callables for serialization directly. 427 | # session : bytes 428 | # the ID of this Session object. The default is to generate a new UUID. 429 | # username : unicode 430 | # username added to message headers. The default is to ask the OS. 431 | # key : bytes 432 | # The key used to initialize an HMAC signature. If unset, messages 433 | # will not be signed or checked. 434 | # keyfile : filepath 435 | # The file containing a key. If this is set, `key` will be initialized 436 | # to the contents of the file. 437 | 438 | ## Threshold (in bytes) beyond which an object's buffer should be extracted to 439 | # avoid pickling. 440 | #c.Session.buffer_threshold = 1024 441 | 442 | ## Whether to check PID to protect against calls after fork. 443 | # 444 | # This check can be disabled if fork-safety is handled elsewhere. 445 | #c.Session.check_pid = True 446 | 447 | ## Threshold (in bytes) beyond which a buffer should be sent without copying. 448 | #c.Session.copy_threshold = 65536 449 | 450 | ## Debug output in the Session 451 | #c.Session.debug = False 452 | 453 | ## The maximum number of digests to remember. 454 | # 455 | # The digest history will be culled when it exceeds this value. 456 | #c.Session.digest_history_size = 65536 457 | 458 | ## The maximum number of items for a container to be introspected for custom 459 | # serialization. Containers larger than this are pickled outright. 460 | #c.Session.item_threshold = 64 461 | 462 | ## execution key, for signing messages. 463 | #c.Session.key = b'' 464 | 465 | ## path to file containing execution key. 466 | #c.Session.keyfile = '' 467 | 468 | ## Metadata dictionary, which serves as the default top-level metadata dict for 469 | # each message. 470 | #c.Session.metadata = {} 471 | 472 | ## The name of the packer for serializing messages. Should be one of 'json', 473 | # 'pickle', or an import name for a custom callable serializer. 474 | #c.Session.packer = 'json' 475 | 476 | ## The UUID identifying this session. 477 | #c.Session.session = '' 478 | 479 | ## The digest scheme used to construct the message signatures. Must have the form 480 | # 'hmac-HASH'. 481 | #c.Session.signature_scheme = 'hmac-sha256' 482 | 483 | ## The name of the unpacker for unserializing messages. Only used with custom 484 | # functions for `packer`. 485 | #c.Session.unpacker = 'json' 486 | 487 | ## Username for the Session. Default is your system username. 488 | #c.Session.username = 'marscher' 489 | 490 | #------------------------------------------------------------------------------ 491 | # MultiKernelManager(LoggingConfigurable) configuration 492 | #------------------------------------------------------------------------------ 493 | 494 | ## A class for managing multiple kernels. 495 | 496 | ## The name of the default kernel to start 497 | #c.MultiKernelManager.default_kernel_name = 'python3' 498 | 499 | ## The kernel manager class. This is configurable to allow subclassing of the 500 | # KernelManager for customized behavior. 501 | #c.MultiKernelManager.kernel_manager_class = 'jupyter_client.ioloop.IOLoopKernelManager' 502 | 503 | #------------------------------------------------------------------------------ 504 | # MappingKernelManager(MultiKernelManager) configuration 505 | #------------------------------------------------------------------------------ 506 | 507 | ## A KernelManager that handles notebook mapping and HTTP error handling 508 | 509 | ## Whether messages from kernels whose frontends have disconnected should be 510 | # buffered in-memory. 511 | # 512 | # When True (default), messages are buffered and replayed on reconnect, avoiding 513 | # lost messages due to interrupted connectivity. 514 | # 515 | # Disable if long-running kernels will produce too much output while no 516 | # frontends are connected. 517 | #c.MappingKernelManager.buffer_offline_messages = True 518 | 519 | ## Whether to consider culling kernels which are busy. Only effective if 520 | # cull_idle_timeout > 0. 521 | #c.MappingKernelManager.cull_busy = False 522 | 523 | ## Whether to consider culling kernels which have one or more connections. Only 524 | # effective if cull_idle_timeout > 0. 525 | #c.MappingKernelManager.cull_connected = False 526 | 527 | ## Timeout (in seconds) after which a kernel is considered idle and ready to be 528 | # culled. Values of 0 or lower disable culling. Very short timeouts may result 529 | # in kernels being culled for users with poor network connections. 530 | #c.MappingKernelManager.cull_idle_timeout = 0 531 | 532 | ## The interval (in seconds) on which to check for idle kernels exceeding the 533 | # cull timeout value. 534 | #c.MappingKernelManager.cull_interval = 300 535 | 536 | ## 537 | #c.MappingKernelManager.root_dir = '' 538 | 539 | #------------------------------------------------------------------------------ 540 | # ContentsManager(LoggingConfigurable) configuration 541 | #------------------------------------------------------------------------------ 542 | 543 | ## Base class for serving files and directories. 544 | # 545 | # This serves any text or binary file, as well as directories, with special 546 | # handling for JSON notebook documents. 547 | # 548 | # Most APIs take a path argument, which is always an API-style unicode path, and 549 | # always refers to a directory. 550 | # 551 | # - unicode, not url-escaped 552 | # - '/'-separated 553 | # - leading and trailing '/' will be stripped 554 | # - if unspecified, path defaults to '', 555 | # indicating the root path. 556 | 557 | ## Allow access to hidden files 558 | #c.ContentsManager.allow_hidden = False 559 | 560 | ## 561 | #c.ContentsManager.checkpoints = None 562 | 563 | ## 564 | #c.ContentsManager.checkpoints_class = 'notebook.services.contents.checkpoints.Checkpoints' 565 | 566 | ## 567 | #c.ContentsManager.checkpoints_kwargs = {} 568 | 569 | ## handler class to use when serving raw file requests. 570 | # 571 | # Default is a fallback that talks to the ContentsManager API, which may be 572 | # inefficient, especially for large files. 573 | # 574 | # Local files-based ContentsManagers can use a StaticFileHandler subclass, which 575 | # will be much more efficient. 576 | # 577 | # Access to these files should be Authenticated. 578 | #c.ContentsManager.files_handler_class = 'notebook.files.handlers.FilesHandler' 579 | 580 | ## Extra parameters to pass to files_handler_class. 581 | # 582 | # For example, StaticFileHandlers generally expect a `path` argument specifying 583 | # the root directory from which to serve files. 584 | #c.ContentsManager.files_handler_params = {} 585 | 586 | ## Glob patterns to hide in file and directory listings. 587 | #c.ContentsManager.hide_globs = ['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dylib', '*~'] 588 | 589 | ## Python callable or importstring thereof 590 | # 591 | # To be called on a contents model prior to save. 592 | # 593 | # This can be used to process the structure, such as removing notebook outputs 594 | # or other side effects that should not be saved. 595 | # 596 | # It will be called as (all arguments passed by keyword):: 597 | # 598 | # hook(path=path, model=model, contents_manager=self) 599 | # 600 | # - model: the model to be saved. Includes file contents. 601 | # Modifying this dict will affect the file that is stored. 602 | # - path: the API path of the save destination 603 | # - contents_manager: this ContentsManager instance 604 | #c.ContentsManager.pre_save_hook = None 605 | 606 | ## 607 | #c.ContentsManager.root_dir = '/' 608 | 609 | ## The base name used when creating untitled directories. 610 | #c.ContentsManager.untitled_directory = 'Untitled Folder' 611 | 612 | ## The base name used when creating untitled files. 613 | #c.ContentsManager.untitled_file = 'untitled' 614 | 615 | ## The base name used when creating untitled notebooks. 616 | #c.ContentsManager.untitled_notebook = 'Untitled' 617 | 618 | #------------------------------------------------------------------------------ 619 | # FileManagerMixin(Configurable) configuration 620 | #------------------------------------------------------------------------------ 621 | 622 | ## Mixin for ContentsAPI classes that interact with the filesystem. 623 | # 624 | # Provides facilities for reading, writing, and copying both notebooks and 625 | # generic files. 626 | # 627 | # Shared by FileContentsManager and FileCheckpoints. 628 | # 629 | # Note ---- Classes using this mixin must provide the following attributes: 630 | # 631 | # root_dir : unicode 632 | # A directory against against which API-style paths are to be resolved. 633 | # 634 | # log : logging.Logger 635 | 636 | ## By default notebooks are saved on disk on a temporary file and then if 637 | # succefully written, it replaces the old ones. This procedure, namely 638 | # 'atomic_writing', causes some bugs on file system whitout operation order 639 | # enforcement (like some networked fs). If set to False, the new notebook is 640 | # written directly on the old one which could fail (eg: full filesystem or quota 641 | # ) 642 | #c.FileManagerMixin.use_atomic_writing = True 643 | 644 | #------------------------------------------------------------------------------ 645 | # FileContentsManager(FileManagerMixin,ContentsManager) configuration 646 | #------------------------------------------------------------------------------ 647 | 648 | ## If True (default), deleting files will send them to the platform's 649 | # trash/recycle bin, where they can be recovered. If False, deleting files 650 | # really deletes them. 651 | #c.FileContentsManager.delete_to_trash = True 652 | 653 | ## Python callable or importstring thereof 654 | # 655 | # to be called on the path of a file just saved. 656 | # 657 | # This can be used to process the file on disk, such as converting the notebook 658 | # to a script or HTML via nbconvert. 659 | # 660 | # It will be called as (all arguments passed by keyword):: 661 | # 662 | # hook(os_path=os_path, model=model, contents_manager=instance) 663 | # 664 | # - path: the filesystem path to the file just written - model: the model 665 | # representing the file - contents_manager: this ContentsManager instance 666 | #c.FileContentsManager.post_save_hook = None 667 | 668 | ## 669 | #c.FileContentsManager.root_dir = '' 670 | 671 | ## DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0 672 | #c.FileContentsManager.save_script = False 673 | 674 | #------------------------------------------------------------------------------ 675 | # NotebookNotary(LoggingConfigurable) configuration 676 | #------------------------------------------------------------------------------ 677 | 678 | ## A class for computing and verifying notebook signatures. 679 | 680 | ## The hashing algorithm used to sign notebooks. 681 | #c.NotebookNotary.algorithm = 'sha256' 682 | 683 | ## The sqlite file in which to store notebook signatures. By default, this will 684 | # be in your Jupyter data directory. You can set it to ':memory:' to disable 685 | # sqlite writing to the filesystem. 686 | #c.NotebookNotary.db_file = '' 687 | 688 | ## The secret key with which notebooks are signed. 689 | #c.NotebookNotary.secret = b'' 690 | 691 | ## The file where the secret key is stored. 692 | #c.NotebookNotary.secret_file = '' 693 | 694 | ## A callable returning the storage backend for notebook signatures. The default 695 | # uses an SQLite database. 696 | #c.NotebookNotary.store_factory = traitlets.Undefined 697 | 698 | #------------------------------------------------------------------------------ 699 | # KernelSpecManager(LoggingConfigurable) configuration 700 | #------------------------------------------------------------------------------ 701 | 702 | ## If there is no Python kernelspec registered and the IPython kernel is 703 | # available, ensure it is added to the spec list. 704 | #c.KernelSpecManager.ensure_native_kernel = True 705 | 706 | ## The kernel spec class. This is configurable to allow subclassing of the 707 | # KernelSpecManager for customized behavior. 708 | #c.KernelSpecManager.kernel_spec_class = 'jupyter_client.kernelspec.KernelSpec' 709 | 710 | ## Whitelist of allowed kernel names. 711 | # 712 | # By default, all installed kernels are allowed. 713 | #c.KernelSpecManager.whitelist = set() 714 | -------------------------------------------------------------------------------- /pyemma_tutorials/util.py: -------------------------------------------------------------------------------- 1 | import pkg_resources 2 | import os 3 | 4 | 5 | def notebook_location(): 6 | d = pkg_resources.resource_filename('pyemma_tutorials', 'notebooks') 7 | assert os.path.isdir(d) 8 | return d 9 | 10 | 11 | def configs_location(): 12 | notebook_cfg = pkg_resources.resource_filename('pyemma_tutorials', 'jupyter_notebook_config.py') 13 | notebook_cfg_json = pkg_resources.resource_filename('pyemma_tutorials', 'jupyter_notebook_config.json') 14 | 15 | assert os.path.exists(notebook_cfg) 16 | assert os.path.exists(notebook_cfg_json) 17 | 18 | return notebook_cfg, notebook_cfg_json 19 | 20 | 21 | def run_dir(): 22 | """ directory in which the user copies of the notebooks will reside. """ 23 | import os 24 | target = os.path.expanduser('~/pyemma_tutorials') 25 | os.makedirs(target, exist_ok=True) 26 | 27 | # copy static data into run dir 28 | src = os.path.join(notebook_location(), 'static') 29 | 30 | def copytree(src, dst, symlinks=False, ignore=None): 31 | # shutil.copytree fails for existing target dirs... 32 | import shutil 33 | for item in os.listdir(src): 34 | s = os.path.join(src, item) 35 | d = os.path.join(dst, item) 36 | if os.path.isdir(s): 37 | shutil.copytree(s, d, symlinks, ignore) 38 | else: 39 | shutil.copy2(s, d) 40 | 41 | copytree(src, os.path.join(target, 'static')) 42 | 43 | return target 44 | -------------------------------------------------------------------------------- /releases/LiveCoMS_Article_ASAP_V1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/releases/LiveCoMS_Article_ASAP_V1.pdf -------------------------------------------------------------------------------- /releases/LiveCoMS_Article_V1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/releases/LiveCoMS_Article_V1.pdf -------------------------------------------------------------------------------- /releases/header_V1.0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/releases/header_V1.0.jpg -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | 2 | # See the docstring in versioneer.py for instructions. Note that you must 3 | # re-run 'versioneer.py setup' after changing this section, and commit the 4 | # resulting files. 5 | 6 | [versioneer] 7 | VCS = git 8 | style = pep440 9 | versionfile_source = pyemma_tutorials/_version.py 10 | versionfile_build = pyemma_tutorials/_version.py 11 | tag_prefix = v 12 | #parentdir_prefix = 13 | 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import versioneer 3 | 4 | from setuptools import setup 5 | 6 | 7 | def copy_notebooks(): 8 | import shutil 9 | dest = os.path.join('pyemma_tutorials', 'notebooks') 10 | try: 11 | shutil.rmtree(dest, ignore_errors=True) 12 | shutil.copytree('notebooks', dest) 13 | print('moved notebooks into pkg') 14 | except OSError: 15 | pass 16 | 17 | 18 | metadata=dict( 19 | name='pyemma_tutorials', 20 | version=versioneer.get_version(), 21 | cmdclass=versioneer.get_cmdclass(), 22 | packages=['pyemma_tutorials'], 23 | package_data={'pyemma_tutorials': ['notebooks/*', 24 | 'notebooks/static/*', 25 | 'jupyter_notebook_config.py', 26 | 'jupyter_notebook_config.json', 27 | ]}, 28 | include_package_data=True, 29 | entry_points={'console_scripts': ['pyemma_tutorials = pyemma_tutorials.cli:main'],}, 30 | install_requires=['pyemma', 31 | 'mdshare', 32 | 'nbexamples', 33 | 'nglview', 34 | 'notebook', 35 | 'jupyter_contrib_nbextensions', 36 | ], 37 | zip_safe=False, 38 | ) 39 | 40 | if __name__ == '__main__': 41 | copy_notebooks() 42 | setup(**metadata) 43 | --------------------------------------------------------------------------------