├── .circleci
└── config.yml
├── .gitattributes
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── binder
├── environment.yml
└── postBuild
├── conftest.py
├── devtools
├── conda-recipe
│ └── meta.yaml
├── create_bibliography_nb.py
└── install_miniconda.sh
├── manuscript
├── .gitignore
├── README.md
├── figures
│ ├── figure_1.pdf
│ ├── figure_2.pdf
│ ├── figure_3.pdf
│ ├── figure_4.pdf
│ ├── figure_5.pdf
│ ├── figure_6.pdf
│ ├── figure_7.pdf
│ ├── tutorials-logo.png
│ └── workflowchart.svg
├── literature.bib
├── livecoms.cls
├── manuscript.tex
└── vancouver-livecoms.bst
├── notebooks
├── .gitignore
├── 00-pentapeptide-showcase.ipynb
├── 01-data-io-and-featurization.ipynb
├── 02-dimension-reduction-and-discretization.ipynb
├── 03-msm-estimation-and-validation.ipynb
├── 04-msm-analysis.ipynb
├── 05-pcca-tpt.ipynb
├── 06-expectations-and-observables.ipynb
├── 07-hidden-markov-state-models.ipynb
├── 08-common-problems.ipynb
└── static
│ ├── hmm-backbone-1-385x432.png
│ ├── hmm-backbone-2-388x526.png
│ ├── hmm-backbone-3-347x500.png
│ ├── hmm-backbone-4-367x348.png
│ ├── hmm-backbone-5-260x374.png
│ ├── pentapeptide-states.png
│ └── pentapeptide-structure.png
├── pyemma_tutorials
├── __init__.py
├── __main__.py
├── _version.py
├── cli.py
├── jupyter_notebook_config.json
├── jupyter_notebook_config.py
└── util.py
├── releases
├── LiveCoMS_Article_ASAP_V1.pdf
├── LiveCoMS_Article_V1.pdf
└── header_V1.0.jpg
├── setup.cfg
├── setup.py
└── versioneer.py
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | jobs:
3 | build:
4 | docker:
5 | - image: continuumio/miniconda3
6 | environment:
7 | PYTHONHASHSEED: 0
8 | OMP_NUM_THREADS: 1
9 | PYEMMA_NJOBS: 1
10 | NBVAL_OUTPUT: /root/nbval
11 | parallelism: 4
12 | steps:
13 | - checkout
14 | - run:
15 | name: conda_config
16 | command: |
17 | conda config --set always_yes true
18 | conda config --set quiet true
19 | - run: conda install conda-build
20 | - run: mkdir $NBVAL_OUTPUT
21 | - run:
22 | name: build_test
23 | command: conda build -c conda-forge .
24 | no_output_timeout: 20m
25 | - store_test_results:
26 | path: ~/junit
27 | - store_artifacts:
28 | path: /root/nbval #$NBVAL_OUTPUT
29 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | pyemma_tutorials/_version.py export-subst
2 |
3 | *.ipynb diff=jupyternotebook
4 |
5 | *.ipynb merge=jupyternotebook
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #copied stuff
2 | pyemma_tutorials/notebooks/*
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # Jupyter Notebook
74 | .ipynb_checkpoints
75 |
76 | # pyenv
77 | .python-version
78 |
79 | # celery beat schedule file
80 | celerybeat-schedule
81 |
82 | # SageMath parsed files
83 | *.sage.py
84 |
85 | # dotenv
86 | .env
87 |
88 | # virtualenv
89 | .venv
90 | venv/
91 | ENV/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 | #OSX stuff
107 | *.DS_Store
108 | manuscript/manuscript.suppinfo
109 | manuscript/manuscript.pdf
110 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Attribution 4.0 International
2 |
3 | =======================================================================
4 |
5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
6 | does not provide legal services or legal advice. Distribution of
7 | Creative Commons public licenses does not create a lawyer-client or
8 | other relationship. Creative Commons makes its licenses and related
9 | information available on an "as-is" basis. Creative Commons gives no
10 | warranties regarding its licenses, any material licensed under their
11 | terms and conditions, or any related information. Creative Commons
12 | disclaims all liability for damages resulting from their use to the
13 | fullest extent possible.
14 |
15 | Using Creative Commons Public Licenses
16 |
17 | Creative Commons public licenses provide a standard set of terms and
18 | conditions that creators and other rights holders may use to share
19 | original works of authorship and other material subject to copyright
20 | and certain other rights specified in the public license below. The
21 | following considerations are for informational purposes only, are not
22 | exhaustive, and do not form part of our licenses.
23 |
24 | Considerations for licensors: Our public licenses are
25 | intended for use by those authorized to give the public
26 | permission to use material in ways otherwise restricted by
27 | copyright and certain other rights. Our licenses are
28 | irrevocable. Licensors should read and understand the terms
29 | and conditions of the license they choose before applying it.
30 | Licensors should also secure all rights necessary before
31 | applying our licenses so that the public can reuse the
32 | material as expected. Licensors should clearly mark any
33 | material not subject to the license. This includes other CC-
34 | licensed material, or material used under an exception or
35 | limitation to copyright. More considerations for licensors:
36 | wiki.creativecommons.org/Considerations_for_licensors
37 |
38 | Considerations for the public: By using one of our public
39 | licenses, a licensor grants the public permission to use the
40 | licensed material under specified terms and conditions. If
41 | the licensor's permission is not necessary for any reason--for
42 | example, because of any applicable exception or limitation to
43 | copyright--then that use is not regulated by the license. Our
44 | licenses grant only permissions under copyright and certain
45 | other rights that a licensor has authority to grant. Use of
46 | the licensed material may still be restricted for other
47 | reasons, including because others have copyright or other
48 | rights in the material. A licensor may make special requests,
49 | such as asking that all changes be marked or described.
50 | Although not required by our licenses, you are encouraged to
51 | respect those requests where reasonable. More considerations
52 | for the public:
53 | wiki.creativecommons.org/Considerations_for_licensees
54 |
55 | =======================================================================
56 |
57 | Creative Commons Attribution 4.0 International Public License
58 |
59 | By exercising the Licensed Rights (defined below), You accept and agree
60 | to be bound by the terms and conditions of this Creative Commons
61 | Attribution 4.0 International Public License ("Public License"). To the
62 | extent this Public License may be interpreted as a contract, You are
63 | granted the Licensed Rights in consideration of Your acceptance of
64 | these terms and conditions, and the Licensor grants You such rights in
65 | consideration of benefits the Licensor receives from making the
66 | Licensed Material available under these terms and conditions.
67 |
68 |
69 | Section 1 -- Definitions.
70 |
71 | a. Adapted Material means material subject to Copyright and Similar
72 | Rights that is derived from or based upon the Licensed Material
73 | and in which the Licensed Material is translated, altered,
74 | arranged, transformed, or otherwise modified in a manner requiring
75 | permission under the Copyright and Similar Rights held by the
76 | Licensor. For purposes of this Public License, where the Licensed
77 | Material is a musical work, performance, or sound recording,
78 | Adapted Material is always produced where the Licensed Material is
79 | synched in timed relation with a moving image.
80 |
81 | b. Adapter's License means the license You apply to Your Copyright
82 | and Similar Rights in Your contributions to Adapted Material in
83 | accordance with the terms and conditions of this Public License.
84 |
85 | c. Copyright and Similar Rights means copyright and/or similar rights
86 | closely related to copyright including, without limitation,
87 | performance, broadcast, sound recording, and Sui Generis Database
88 | Rights, without regard to how the rights are labeled or
89 | categorized. For purposes of this Public License, the rights
90 | specified in Section 2(b)(1)-(2) are not Copyright and Similar
91 | Rights.
92 |
93 | d. Effective Technological Measures means those measures that, in the
94 | absence of proper authority, may not be circumvented under laws
95 | fulfilling obligations under Article 11 of the WIPO Copyright
96 | Treaty adopted on December 20, 1996, and/or similar international
97 | agreements.
98 |
99 | e. Exceptions and Limitations means fair use, fair dealing, and/or
100 | any other exception or limitation to Copyright and Similar Rights
101 | that applies to Your use of the Licensed Material.
102 |
103 | f. Licensed Material means the artistic or literary work, database,
104 | or other material to which the Licensor applied this Public
105 | License.
106 |
107 | g. Licensed Rights means the rights granted to You subject to the
108 | terms and conditions of this Public License, which are limited to
109 | all Copyright and Similar Rights that apply to Your use of the
110 | Licensed Material and that the Licensor has authority to license.
111 |
112 | h. Licensor means the individual(s) or entity(ies) granting rights
113 | under this Public License.
114 |
115 | i. Share means to provide material to the public by any means or
116 | process that requires permission under the Licensed Rights, such
117 | as reproduction, public display, public performance, distribution,
118 | dissemination, communication, or importation, and to make material
119 | available to the public including in ways that members of the
120 | public may access the material from a place and at a time
121 | individually chosen by them.
122 |
123 | j. Sui Generis Database Rights means rights other than copyright
124 | resulting from Directive 96/9/EC of the European Parliament and of
125 | the Council of 11 March 1996 on the legal protection of databases,
126 | as amended and/or succeeded, as well as other essentially
127 | equivalent rights anywhere in the world.
128 |
129 | k. You means the individual or entity exercising the Licensed Rights
130 | under this Public License. Your has a corresponding meaning.
131 |
132 |
133 | Section 2 -- Scope.
134 |
135 | a. License grant.
136 |
137 | 1. Subject to the terms and conditions of this Public License,
138 | the Licensor hereby grants You a worldwide, royalty-free,
139 | non-sublicensable, non-exclusive, irrevocable license to
140 | exercise the Licensed Rights in the Licensed Material to:
141 |
142 | a. reproduce and Share the Licensed Material, in whole or
143 | in part; and
144 |
145 | b. produce, reproduce, and Share Adapted Material.
146 |
147 | 2. Exceptions and Limitations. For the avoidance of doubt, where
148 | Exceptions and Limitations apply to Your use, this Public
149 | License does not apply, and You do not need to comply with
150 | its terms and conditions.
151 |
152 | 3. Term. The term of this Public License is specified in Section
153 | 6(a).
154 |
155 | 4. Media and formats; technical modifications allowed. The
156 | Licensor authorizes You to exercise the Licensed Rights in
157 | all media and formats whether now known or hereafter created,
158 | and to make technical modifications necessary to do so. The
159 | Licensor waives and/or agrees not to assert any right or
160 | authority to forbid You from making technical modifications
161 | necessary to exercise the Licensed Rights, including
162 | technical modifications necessary to circumvent Effective
163 | Technological Measures. For purposes of this Public License,
164 | simply making modifications authorized by this Section 2(a)
165 | (4) never produces Adapted Material.
166 |
167 | 5. Downstream recipients.
168 |
169 | a. Offer from the Licensor -- Licensed Material. Every
170 | recipient of the Licensed Material automatically
171 | receives an offer from the Licensor to exercise the
172 | Licensed Rights under the terms and conditions of this
173 | Public License.
174 |
175 | b. No downstream restrictions. You may not offer or impose
176 | any additional or different terms or conditions on, or
177 | apply any Effective Technological Measures to, the
178 | Licensed Material if doing so restricts exercise of the
179 | Licensed Rights by any recipient of the Licensed
180 | Material.
181 |
182 | 6. No endorsement. Nothing in this Public License constitutes or
183 | may be construed as permission to assert or imply that You
184 | are, or that Your use of the Licensed Material is, connected
185 | with, or sponsored, endorsed, or granted official status by,
186 | the Licensor or others designated to receive attribution as
187 | provided in Section 3(a)(1)(A)(i).
188 |
189 | b. Other rights.
190 |
191 | 1. Moral rights, such as the right of integrity, are not
192 | licensed under this Public License, nor are publicity,
193 | privacy, and/or other similar personality rights; however, to
194 | the extent possible, the Licensor waives and/or agrees not to
195 | assert any such rights held by the Licensor to the limited
196 | extent necessary to allow You to exercise the Licensed
197 | Rights, but not otherwise.
198 |
199 | 2. Patent and trademark rights are not licensed under this
200 | Public License.
201 |
202 | 3. To the extent possible, the Licensor waives any right to
203 | collect royalties from You for the exercise of the Licensed
204 | Rights, whether directly or through a collecting society
205 | under any voluntary or waivable statutory or compulsory
206 | licensing scheme. In all other cases the Licensor expressly
207 | reserves any right to collect such royalties.
208 |
209 |
210 | Section 3 -- License Conditions.
211 |
212 | Your exercise of the Licensed Rights is expressly made subject to the
213 | following conditions.
214 |
215 | a. Attribution.
216 |
217 | 1. If You Share the Licensed Material (including in modified
218 | form), You must:
219 |
220 | a. retain the following if it is supplied by the Licensor
221 | with the Licensed Material:
222 |
223 | i. identification of the creator(s) of the Licensed
224 | Material and any others designated to receive
225 | attribution, in any reasonable manner requested by
226 | the Licensor (including by pseudonym if
227 | designated);
228 |
229 | ii. a copyright notice;
230 |
231 | iii. a notice that refers to this Public License;
232 |
233 | iv. a notice that refers to the disclaimer of
234 | warranties;
235 |
236 | v. a URI or hyperlink to the Licensed Material to the
237 | extent reasonably practicable;
238 |
239 | b. indicate if You modified the Licensed Material and
240 | retain an indication of any previous modifications; and
241 |
242 | c. indicate the Licensed Material is licensed under this
243 | Public License, and include the text of, or the URI or
244 | hyperlink to, this Public License.
245 |
246 | 2. You may satisfy the conditions in Section 3(a)(1) in any
247 | reasonable manner based on the medium, means, and context in
248 | which You Share the Licensed Material. For example, it may be
249 | reasonable to satisfy the conditions by providing a URI or
250 | hyperlink to a resource that includes the required
251 | information.
252 |
253 | 3. If requested by the Licensor, You must remove any of the
254 | information required by Section 3(a)(1)(A) to the extent
255 | reasonably practicable.
256 |
257 | 4. If You Share Adapted Material You produce, the Adapter's
258 | License You apply must not prevent recipients of the Adapted
259 | Material from complying with this Public License.
260 |
261 |
262 | Section 4 -- Sui Generis Database Rights.
263 |
264 | Where the Licensed Rights include Sui Generis Database Rights that
265 | apply to Your use of the Licensed Material:
266 |
267 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right
268 | to extract, reuse, reproduce, and Share all or a substantial
269 | portion of the contents of the database;
270 |
271 | b. if You include all or a substantial portion of the database
272 | contents in a database in which You have Sui Generis Database
273 | Rights, then the database in which You have Sui Generis Database
274 | Rights (but not its individual contents) is Adapted Material; and
275 |
276 | c. You must comply with the conditions in Section 3(a) if You Share
277 | all or a substantial portion of the contents of the database.
278 |
279 | For the avoidance of doubt, this Section 4 supplements and does not
280 | replace Your obligations under this Public License where the Licensed
281 | Rights include other Copyright and Similar Rights.
282 |
283 |
284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
285 |
286 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
287 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
288 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
289 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
290 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
291 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
292 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
293 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
294 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
295 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
296 |
297 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
298 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
299 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
300 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
301 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
302 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
303 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
304 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
305 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
306 |
307 | c. The disclaimer of warranties and limitation of liability provided
308 | above shall be interpreted in a manner that, to the extent
309 | possible, most closely approximates an absolute disclaimer and
310 | waiver of all liability.
311 |
312 |
313 | Section 6 -- Term and Termination.
314 |
315 | a. This Public License applies for the term of the Copyright and
316 | Similar Rights licensed here. However, if You fail to comply with
317 | this Public License, then Your rights under this Public License
318 | terminate automatically.
319 |
320 | b. Where Your right to use the Licensed Material has terminated under
321 | Section 6(a), it reinstates:
322 |
323 | 1. automatically as of the date the violation is cured, provided
324 | it is cured within 30 days of Your discovery of the
325 | violation; or
326 |
327 | 2. upon express reinstatement by the Licensor.
328 |
329 | For the avoidance of doubt, this Section 6(b) does not affect any
330 | right the Licensor may have to seek remedies for Your violations
331 | of this Public License.
332 |
333 | c. For the avoidance of doubt, the Licensor may also offer the
334 | Licensed Material under separate terms or conditions or stop
335 | distributing the Licensed Material at any time; however, doing so
336 | will not terminate this Public License.
337 |
338 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
339 | License.
340 |
341 |
342 | Section 7 -- Other Terms and Conditions.
343 |
344 | a. The Licensor shall not be bound by any additional or different
345 | terms or conditions communicated by You unless expressly agreed.
346 |
347 | b. Any arrangements, understandings, or agreements regarding the
348 | Licensed Material not stated herein are separate from and
349 | independent of the terms and conditions of this Public License.
350 |
351 |
352 | Section 8 -- Interpretation.
353 |
354 | a. For the avoidance of doubt, this Public License does not, and
355 | shall not be interpreted to, reduce, limit, restrict, or impose
356 | conditions on any use of the Licensed Material that could lawfully
357 | be made without permission under this Public License.
358 |
359 | b. To the extent possible, if any provision of this Public License is
360 | deemed unenforceable, it shall be automatically reformed to the
361 | minimum extent necessary to make it enforceable. If the provision
362 | cannot be reformed, it shall be severed from this Public License
363 | without affecting the enforceability of the remaining terms and
364 | conditions.
365 |
366 | c. No term or condition of this Public License will be waived and no
367 | failure to comply consented to unless expressly agreed to by the
368 | Licensor.
369 |
370 | d. Nothing in this Public License constitutes or may be interpreted
371 | as a limitation upon, or waiver of, any privileges and immunities
372 | that apply to the Licensor or You, including from the legal
373 | processes of any jurisdiction or authority.
374 |
375 |
376 | =======================================================================
377 |
378 | Creative Commons is not a party to its public
379 | licenses. Notwithstanding, Creative Commons may elect to apply one of
380 | its public licenses to material it publishes and in those instances
381 | will be considered the “Licensor.” The text of the Creative Commons
382 | public licenses is dedicated to the public domain under the CC0 Public
383 | Domain Dedication. Except for the limited purpose of indicating that
384 | material is shared under a Creative Commons public license or as
385 | otherwise permitted by the Creative Commons policies published at
386 | creativecommons.org/policies, Creative Commons does not authorize the
387 | use of the trademark "Creative Commons" or any other trademark or logo
388 | of Creative Commons without its prior written consent including,
389 | without limitation, in connection with any unauthorized modifications
390 | to any of its public licenses or any other arrangements,
391 | understandings, or agreements concerning use of licensed material. For
392 | the avoidance of doubt, this paragraph does not form part of the
393 | public licenses.
394 |
395 | Creative Commons may be contacted at creativecommons.org.
396 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include pyemma_tutorials/notebooks *.ipynb
2 | recursive-include pyemma_tutorials/notebooks/static *
3 | prune pyemma_tutorials/notebooks/.ipynb_checkpoints
4 | include versioneer.py
5 | include pyemma_tutorials/_version.py
6 | include pyemma_tutorials/jupyter_notebook_config.json
7 |
8 | include LICENSE
9 |
10 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Introduction to Markov state modeling with the PyEMMA software
2 |
3 | [](https://circleci.com/gh/markovmodel/pyemma_tutorials)
4 | [](https://mybinder.org/v2/gh/markovmodel/pyemma_tutorials/master)
5 | [](http://creativecommons.org/licenses/by/4.0/)
6 | 
7 | 
8 |
9 | This work is licensed under a Creative Commons Attribution 4.0 International License.
10 |
11 | [PyEMMA](http://pyemma.org) (EMMA = Emma's Markov Model Algorithms) is an open source Python/C package for analysis of extensive molecular dynamics (MD) simulations.
12 |
13 | ### Content
14 |
15 | The first [notebook 📓](notebooks/00-pentapeptide-showcase.ipynb) in this tutorial guides through the basic analysis workflow using real MD data of a pentapeptide:
16 |
17 |
18 |
19 | We keep the details minimal throughout the showcase but point to the more specialized notebooks which allow you to go in-depth on selected topics.
20 |
21 | In detail, the remaining eight notebooks revisit all aspects shown in the showcase, provide additional details and variants, and contain exercises (and solutions) to self-check your learning progress:
22 |
23 | 1. Data-I/O and featurization [➜ 📓](notebooks/01-data-io-and-featurization.ipynb)
24 | 2. Dimension reduction and discretization [➜ 📓](notebooks/02-dimension-reduction-and-discretization.ipynb)
25 | 3. MSM estimation and validation [➜ 📓](notebooks/03-msm-estimation-and-validation.ipynb)
26 | 4. MSM analysis [➜ 📓](notebooks/04-msm-analysis.ipynb)
27 | 5. PCCA and TPT analysis [➜ 📓](notebooks/05-pcca-tpt.ipynb)
28 | 6. Expectations and observables [➜ 📓](notebooks/06-expectations-and-observables.ipynb)
29 | 7. Hidden Markov state models (HMMs) [➜ 📓](notebooks/07-hidden-markov-state-models.ipynb)
30 | 8. Common problems & bad data situations [➜ 📓](notebooks/08-common-problems.ipynb)
31 |
32 | **Please note that this is a work in progress and we value any kind of feedback that helps us improving this tutorial.**
33 |
34 | ### Installation
35 | We recommended to install the PyEMMA tutorials with conda. The following command will create a new environment that comes with all the dependencies of the tutorial.
36 |
37 | If you do not have conda, please follow the instructions here:
38 |
39 | https://conda.io/miniconda.html
40 |
41 | #### Installing the tutorials as a package
42 |
43 | After installing miniconda, you can install the tutorial either via
44 |
45 | ``` bash
46 | conda create -n pyemma_tutorials -c conda-forge pyemma_tutorials
47 | ```
48 |
49 | ... or you can also install the tutorial in an existing environment by
50 |
51 | ``` bash
52 | conda install -c conda-forge pyemma_tutorials
53 | ```
54 |
55 | If you intend to install with pip, for which can not give any support, you feel free to run:
56 |
57 | ``` bash
58 | pip install git+https://github.com/markovmodel/pyemma_tutorials
59 | ```
60 |
61 | #### Manual installation
62 |
63 | If you wish to install the tutorial manually, you will need the following packages (including all their dependencies):
64 |
65 | - `pyemma`
66 | - `mdshare`
67 | - `nglview`
68 | - `nbexamples`
69 | - `jupyter_contrib_nbextensions`
70 |
71 | This can be done, for example, with conda:
72 |
73 | ```bash
74 | conda install -c conda-forge pyemma mdshare nglview nbexamples jupyter_contrib_nbextensions
75 | ```
76 |
77 | After installing `jupyter_contrib_nbextensions`, you need to activate the `toc2` and `exercise2` extensions:
78 |
79 | ```bash
80 | jupyter nbextension enable toc2/main
81 | jupyter nbextension enable exercise2/main
82 | ```
83 |
84 | Now all remains is to clone the repository to get the tutorial notebooks:
85 |
86 | ```bash
87 | git clone https://github.com/markovmodel/pyemma_tutorials.git
88 | ```
89 |
90 | ### Usage
91 | Now we have a fresh conda environment containing the notebooks and the software to run them. We can now just activate the environment and run the notebook server by invoking:
92 |
93 | ``` bash
94 | conda activate pyemma_tutorials # skip this, if you have installed in your root environment or used pip to install.
95 | pyemma_tutorials
96 | ```
97 |
98 | The last command will start the notebook server and your browser should pop up pointing to a list of notebooks. You can choose either to preview or to create your own copy of the notebook. The latter will create a copy of the chosen notebook in your home directory, so your changes will not be lost after shutting down the notebook server.
99 |
100 | If you have a manual installation, move to the repository's notebook directory...
101 |
102 | ```bash
103 | cd path_to_pyemma_tutorials/notebooks
104 | ```
105 |
106 | ... and start the notebook server there:
107 |
108 | ```bash
109 | jupyter notebook
110 | ```
111 |
112 | ### Deinstallation
113 |
114 | To uninstall you can remove the whole environment which will also uninstall the contained software again:
115 | ``` bash
116 | conda env remove -n pyemma_tutorials
117 | ```
118 |
119 | or if you have installed the package directly
120 |
121 | ``` bash
122 | conda remove pyemma_tutorials
123 | ```
124 |
--------------------------------------------------------------------------------
/binder/environment.yml:
--------------------------------------------------------------------------------
1 | # this file is used by mybinder.org
2 | channels:
3 | - conda-forge
4 | - defaults
5 | dependencies:
6 | - pyemma_tutorials
7 | - nomkl
8 |
9 |
--------------------------------------------------------------------------------
/binder/postBuild:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | jupyter nbextension enable toc2/main
4 | jupyter nbextension enable exercise2/main
5 |
6 |
--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import os
3 |
4 |
5 | notebook_groups = [
6 | ('msm-analysis',
7 | 'pcca-tpt',
8 | ),
9 | ]
10 |
11 | @pytest.fixture(scope='session')
12 | def no_progress_bars():
13 | """ disables progress bars during testing """
14 | import pyemma
15 | pyemma.config.show_progress_bars = False
16 | yield
17 |
18 |
19 | ### execution timing ##########################################################
20 | from collections import defaultdict
21 | timings = defaultdict(int)
22 |
23 |
24 | def pytest_runtest_logreport(report):
25 | if report.when == "call":
26 | key = report.location[0]
27 | timings[key] += report.duration
28 |
29 |
30 | def pytest_terminal_summary(terminalreporter, exitstatus):
31 | terminalreporter.section('Notebook timings')
32 | s = sorted(timings.items(), key=lambda x: x[1])
33 | for nb, total in s:
34 | terminalreporter.write_line('%s took %.1f seconds' % (nb, total))
35 |
36 | ###############################################################################
37 | def cell_skipped(cell_metadata):
38 | excercise_2_cell = cell_metadata.get('solution2_first', False)
39 | skip = cell_metadata.get('skip', False)
40 | if excercise_2_cell or skip:
41 | return True
42 | return False
43 |
44 | #### Circle CI parallel execution #############################################
45 | def pytest_collection_modifyitems(session, config, items):
46 | for i in items:
47 | if cell_skipped(i.cell['metadata']):
48 | i.add_marker(pytest.mark.skip('solution stub or metadata["skip"]=True'))
49 |
50 | circle_node_total, circle_node_index = read_circleci_env_variables()
51 | if circle_node_total > 1:
52 | by_parents = defaultdict(list)
53 | for index, item in enumerate(items):
54 | by_parents[item.parent].append(item)
55 |
56 | # merge grouped parents
57 | for n in notebook_groups:
58 | items_to_group = []
59 | keys_to_merge = []
60 | for p in by_parents:
61 | for nb in n:
62 | if nb in p.name:
63 | items_to_group.extend(by_parents[p])
64 | keys_to_merge.append(p)
65 | for k in keys_to_merge:
66 | del by_parents[k]
67 | by_parents[tuple(keys_to_merge)] = items_to_group
68 |
69 | deselected = []
70 | # round robbin: by notebook file and ci node index
71 | for i, p in enumerate(by_parents.keys()):
72 | if i % circle_node_total != circle_node_index:
73 | deselected.extend(by_parents[p])
74 | for d in deselected:
75 | items.remove(d)
76 | executed_notebooks = [nb.name for nb in
77 | set(x.parent for x in set(items) - set(deselected))]
78 | print('Notebooks to execute:', executed_notebooks)
79 | config.hook.pytest_deselected(items=deselected)
80 |
81 |
82 | def read_circleci_env_variables():
83 | """Read and convert CIRCLE_* environment variables"""
84 | circle_node_total = int(os.environ.get(
85 | "CIRCLE_NODE_TOTAL", "1").strip() or "1")
86 | circle_node_index = int(os.environ.get(
87 | "CIRCLE_NODE_INDEX", "0").strip() or "0")
88 |
89 | if circle_node_index >= circle_node_total:
90 | raise RuntimeError("CIRCLE_NODE_INDEX={} >= CIRCLE_NODE_TOTAL={}, should be less".format(
91 | circle_node_index, circle_node_total))
92 |
93 | return circle_node_total, circle_node_index
94 |
95 |
96 | def pytest_report_header(config):
97 | """Add CircleCI information to report"""
98 | circle_node_total, circle_node_index = read_circleci_env_variables()
99 | return "CircleCI total nodes: {}, this node index: {}".format(circle_node_total, circle_node_index)
100 |
101 | ###############################################################################
102 |
103 | cells_per_notebook = defaultdict(list)
104 |
105 |
106 | def pytest_runtest_call(item):
107 | cells_per_notebook[item.parent].append(item)
108 |
109 |
110 | def pytest_sessionfinish(session, exitstatus):
111 | """ we store all notebooks in variable 'executed_notebooks' to a given path and convert them to html """
112 | import nbformat as nbf
113 | import tempfile
114 | out_dir = os.getenv('NBVAL_OUTPUT', tempfile.mkdtemp(
115 | prefix='pyemma_tut_test_output'))
116 | print('write html output to', os.path.abspath(out_dir))
117 | out_files = []
118 | ipynbfiles = set(i.parent for i in session.items)
119 | for ipynbfile in ipynbfiles:
120 | out_file = os.path.join(out_dir, os.path.basename(ipynbfile.name))
121 | # map output cells
122 | cells_with_non_skipped_output = (c for c in ipynbfile.nb.cells if hasattr(c, 'outputs') and not cell_skipped(c.metadata))
123 | for cell, ipynbcell in zip(cells_with_non_skipped_output, cells_per_notebook[ipynbfile]):
124 | print(cell, ipynbcell)
125 | cell.outputs = ipynbcell.test_outputs
126 |
127 | with open(out_file, 'x') as fh:
128 | nbf.write(ipynbfile.nb, fh)
129 | out_files.append(out_file)
130 |
131 | import subprocess
132 | import sys
133 |
134 | cmd = [sys.executable, '-m', 'jupyter',
135 | 'nbconvert', '--to=html'] + out_files
136 | print('converting via cmd:', cmd)
137 | subprocess.check_output(cmd)
138 |
139 | # delete source output notebooks
140 | for f in out_files:
141 | os.unlink(f)
142 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: pyemma_tutorials
3 | version: dev
4 |
5 | source:
6 | - path: ../..
7 |
8 | build:
9 | script: python -m pip install --no-deps --ignore-installed . -v
10 | script_env:
11 | - NBVAL_OUTPUT
12 | entry_points:
13 | - pyemma_tutorials = pyemma_tutorials.cli:main
14 |
15 | osx_is_app: True
16 | noarch: python
17 |
18 |
19 | requirements:
20 | build:
21 | - pip
22 | run:
23 | - pyemma
24 | - numpy >=1.13
25 | - mdshare
26 | - notebook
27 | - nglview
28 | - jupyter_contrib_nbextensions
29 | - nbexamples
30 |
31 | test:
32 | imports:
33 | - pyemma_tutorials
34 | commands:
35 | - which pyemma_tutorials # [osx or linux]
36 | source_files:
37 | - conftest.py
38 | requires:
39 | - nbval
40 | - nbconvert
41 |
42 | commands: mkdir ~/junit; py.test --nbval -v --junit-xml=$HOME/junit/test.xml --pyargs pyemma_tutorials --durations=30
43 |
--------------------------------------------------------------------------------
/devtools/create_bibliography_nb.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 |
3 | import nbformat
4 | import os
5 | import re
6 |
7 | def export(in_file, out_file):
8 | nb = nbformat.v4.new_notebook()
9 | with open(in_file, encoding='utf8') as fh:
10 | bibtex = fh.read()
11 |
12 | src = ''.format(bibtex=bibtex)
13 |
14 | # remove bibdesk comments
15 | src = re.sub(pattern='@comment.*\{.*\}\}', repl='', string=src, flags=re.DOTALL)
16 | src = re.sub(pattern=r"\{\\'\{e\}\}", repl='é', string=src)
17 | src = re.sub(pattern=r"\{\\'\{a\}\}", repl='á', string=src)
18 | src = re.sub(pattern=r'\\"\{o\}', repl='ö', string=src)
19 | src = re.sub(pattern=r'\\"\{u\}', repl='ü', string=src)
20 | src = re.sub(pattern='pcca\+\+', repl='pcca_plus_plus', string=src)
21 | cell = nbformat.v4.new_markdown_cell(src)
22 | nb.cells.append(cell)
23 |
24 | with open(out_file, 'w', encoding='utf-8') as fh:
25 | nbformat.write(nb, fh)
26 |
27 |
28 | if __name__ == '__main__':
29 | devtools_dir = os.path.abspath(os.path.dirname(__file__))
30 | in_file = os.path.join(devtools_dir, '../manuscript/literature.bib')
31 | out_file = os.path.join(devtools_dir, '../notebooks/Bibliography.ipynb')
32 | export(in_file, out_file)
33 |
--------------------------------------------------------------------------------
/devtools/install_miniconda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # make TARGET overrideable with env
4 | : ${TARGET:=$HOME/miniconda}
5 |
6 | function install_miniconda {
7 | if [ -d $TARGET ]; then echo "file exists"; return; fi
8 | echo "installing miniconda to $TARGET"
9 | platform="Linux"
10 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-$platform-x86_64.sh -O mc.sh -o /dev/null
11 | bash mc.sh -b -f -p $TARGET
12 | }
13 |
14 | install_miniconda
15 |
--------------------------------------------------------------------------------
/manuscript/.gitignore:
--------------------------------------------------------------------------------
1 | ## Core latex/pdflatex auxiliary files:
2 | *.aux
3 | *.lof
4 | *.log
5 | *.lot
6 | *.fls
7 | *.out
8 | *.toc
9 | *.fmt
10 | *.fot
11 | *.cb
12 | *.cb2
13 | .*.lb
14 |
15 | ## Intermediate documents:
16 | *.dvi
17 | *.xdv
18 | *-converted-to.*
19 | # these rules might exclude image files for figures etc.
20 | # *.ps
21 | # *.eps
22 | # *.pdf
23 |
24 | ## Generated if empty string is given at "Please type another file name for output:"
25 | .pdf
26 |
27 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
28 | *.bbl
29 | *.bcf
30 | *.blg
31 | *-blx.aux
32 | *-blx.bib
33 | *.run.xml
34 |
35 | ## Build tool auxiliary files:
36 | *.fdb_latexmk
37 | *.synctex
38 | *.synctex(busy)
39 | *.synctex.gz
40 | *.synctex.gz(busy)
41 | *.pdfsync
42 |
43 | ## Build tool directories for auxiliary files
44 | # latexrun
45 | latex.out/
46 |
47 | ## Auxiliary and intermediate files from other packages:
48 | # algorithms
49 | *.alg
50 | *.loa
51 |
52 | # achemso
53 | acs-*.bib
54 |
55 | # amsthm
56 | *.thm
57 |
58 | # beamer
59 | *.nav
60 | *.pre
61 | *.snm
62 | *.vrb
63 |
64 | # changes
65 | *.soc
66 |
67 | # cprotect
68 | *.cpt
69 |
70 | # elsarticle (documentclass of Elsevier journals)
71 | *.spl
72 |
73 | # endnotes
74 | *.ent
75 |
76 | # fixme
77 | *.lox
78 |
79 | # feynmf/feynmp
80 | *.mf
81 | *.mp
82 | *.t[1-9]
83 | *.t[1-9][0-9]
84 | *.tfm
85 |
86 | #(r)(e)ledmac/(r)(e)ledpar
87 | *.end
88 | *.?end
89 | *.[1-9]
90 | *.[1-9][0-9]
91 | *.[1-9][0-9][0-9]
92 | *.[1-9]R
93 | *.[1-9][0-9]R
94 | *.[1-9][0-9][0-9]R
95 | *.eledsec[1-9]
96 | *.eledsec[1-9]R
97 | *.eledsec[1-9][0-9]
98 | *.eledsec[1-9][0-9]R
99 | *.eledsec[1-9][0-9][0-9]
100 | *.eledsec[1-9][0-9][0-9]R
101 |
102 | # glossaries
103 | *.acn
104 | *.acr
105 | *.glg
106 | *.glo
107 | *.gls
108 | *.glsdefs
109 |
110 | # gnuplottex
111 | *-gnuplottex-*
112 |
113 | # gregoriotex
114 | *.gaux
115 | *.gtex
116 |
117 | # htlatex
118 | *.4ct
119 | *.4tc
120 | *.idv
121 | *.lg
122 | *.trc
123 | *.xref
124 |
125 | # hyperref
126 | *.brf
127 |
128 | # knitr
129 | *-concordance.tex
130 | # TODO Comment the next line if you want to keep your tikz graphics files
131 | *.tikz
132 | *-tikzDictionary
133 |
134 | # listings
135 | *.lol
136 |
137 | # makeidx
138 | *.idx
139 | *.ilg
140 | *.ind
141 | *.ist
142 |
143 | # minitoc
144 | *.maf
145 | *.mlf
146 | *.mlt
147 | *.mtc[0-9]*
148 | *.slf[0-9]*
149 | *.slt[0-9]*
150 | *.stc[0-9]*
151 |
152 | # minted
153 | _minted*
154 | *.pyg
155 |
156 | # morewrites
157 | *.mw
158 |
159 | # nomencl
160 | *.nlg
161 | *.nlo
162 | *.nls
163 |
164 | # pax
165 | *.pax
166 |
167 | # pdfpcnotes
168 | *.pdfpc
169 |
170 | # sagetex
171 | *.sagetex.sage
172 | *.sagetex.py
173 | *.sagetex.scmd
174 |
175 | # scrwfile
176 | *.wrt
177 |
178 | # sympy
179 | *.sout
180 | *.sympy
181 | sympy-plots-for-*.tex/
182 |
183 | # pdfcomment
184 | *.upa
185 | *.upb
186 |
187 | # pythontex
188 | *.pytxcode
189 | pythontex-files-*/
190 |
191 | # thmtools
192 | *.loe
193 |
194 | # TikZ & PGF
195 | *.dpth
196 | *.md5
197 | *.auxlock
198 |
199 | # todonotes
200 | *.tdo
201 |
202 | # easy-todo
203 | *.lod
204 |
205 | # xmpincl
206 | *.xmpi
207 |
208 | # xindy
209 | *.xdy
210 |
211 | # xypic precompiled matrices
212 | *.xyc
213 |
214 | # endfloat
215 | *.ttt
216 | *.fff
217 |
218 | # Latexian
219 | TSWLatexianTemp*
220 |
221 | ## Editors:
222 | # WinEdt
223 | *.bak
224 | *.sav
225 |
226 | # Texpad
227 | .texpadtmp
228 |
229 | # LyX
230 | *.lyx~
231 |
232 | # Kile
233 | *.backup
234 |
235 | # KBibTeX
236 | *~[0-9]*
237 |
238 | # auto folder when using emacs and auctex
239 | ./auto/*
240 | *.el
241 |
242 | # expex forward references with \gathertags
243 | *-tags.tex
244 |
245 | # standalone packages
246 | *.sta
247 |
--------------------------------------------------------------------------------
/manuscript/README.md:
--------------------------------------------------------------------------------
1 | ## PDF download
2 | A compiled version of the manuscript can be found [here](https://github.com/markovmodel/pyemma_tutorials/tree/master/releases).
3 |
--------------------------------------------------------------------------------
/manuscript/figures/figure_1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_1.pdf
--------------------------------------------------------------------------------
/manuscript/figures/figure_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_2.pdf
--------------------------------------------------------------------------------
/manuscript/figures/figure_3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_3.pdf
--------------------------------------------------------------------------------
/manuscript/figures/figure_4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_4.pdf
--------------------------------------------------------------------------------
/manuscript/figures/figure_5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_5.pdf
--------------------------------------------------------------------------------
/manuscript/figures/figure_6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_6.pdf
--------------------------------------------------------------------------------
/manuscript/figures/figure_7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/figure_7.pdf
--------------------------------------------------------------------------------
/manuscript/figures/tutorials-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/manuscript/figures/tutorials-logo.png
--------------------------------------------------------------------------------
/manuscript/literature.bib:
--------------------------------------------------------------------------------
1 | @article{hmm-baum-welch-alg,
2 | Author = {Leonard E. Baum and Ted Petrie and George Soules and Norman Weiss},
3 | Title = {A Maximization Technique Occurring in the Statistical Analysis of Probabilistic Functions of Markov Chains},
4 | Journal = {Ann. Math. Stat.},
5 | Year = {1970},
6 | Volume = {41},
7 | Number = {1},
8 | Pages = {164--171},
9 | URL = {http://www.jstor.org/stable/2239727},
10 | }
11 | @article{sasa-calculation,
12 | Author = {A. Shrake and J.A. Rupley},
13 | Title = {Environment and exposure to solvent of protein atoms. Lysozyme and insulin},
14 | Journal = {J. Mol. Biol.},
15 | Year = {1973},
16 | Volume = {79},
17 | Number = {2},
18 | Pages = {351--371},
19 | Month = {sep},
20 | URL = {https://doi.org/10.1016/0022-2836(73)90011-9},
21 | DOI = {10.1016/0022-2836(73)90011-9}
22 | }
23 | @article{hmm-tutorial,
24 | Author = {L.R. Rabiner},
25 | Title = {A tutorial on hidden Markov models and selected applications in speech recognition},
26 | Journal = {Proc. IEEE},
27 | Year = {1989},
28 | Volume = {77},
29 | Number = {2},
30 | Pages = {257--286},
31 | URL = {https://doi.org/10.1109/5.18626},
32 | DOI = {10.1109/5.18626}
33 | }
34 | @article{tica2,
35 | Author = {Molgedey, L. and Schuster, H. G.},
36 | Title = {Separation of a mixture of independent signals using time delayed correlations},
37 | Journal = {Phys. Rev. Lett.},
38 | Year = {1994},
39 | Volume = {72},
40 | Number = {23},
41 | Pages = {3634--3637},
42 | Month = {Jun},
43 | URL = {http://dx.doi.org/10.1103/PhysRevLett.72.3634},
44 | DOI = {10.1103/physrevlett.72.3634}
45 | }
46 | @article{vmd,
47 | Author = {William Humphrey and Andrew Dalke and Klaus Schulten},
48 | Title = {{VMD}: Visual molecular dynamics},
49 | Journal = {J. Mol. Graph.},
50 | Year = {1996},
51 | Volume = {14},
52 | Number = {1},
53 | Pages = {33--38},
54 | Month = {feb},
55 | URL = {https://doi.org/10.1016/0263-7855(96)00018-5},
56 | DOI = {10.1016/0263-7855(96)00018-5}
57 | }
58 | @article{schuette-msm,
59 | Author = {Ch Sch\"{u}tte and A Fischer and W Huisinga and P Deuflhard},
60 | Title = {A Direct Approach to Conformational Dynamics Based on Hybrid Monte Carlo},
61 | Journal = {J. Comput. Phys.},
62 | Year = {1999},
63 | Volume = {151},
64 | Number = {1},
65 | Pages = {146--168},
66 | Month = {may},
67 | URL = {https://doi.org/10.1006/jcph.1999.6231},
68 | DOI = {10.1006/jcph.1999.6231}
69 | }
70 | @misc{scipy,
71 | Title = {{SciPy}: Open source scientific tools for {Python}},
72 | Author = {Eric Jones and Travis Oliphant and Pearu Peterson and others},
73 | Year = {2001--},
74 | URL = {http://www.scipy.org/},
75 | }
76 | @inproceedings{aggarwal_surprising_2001,
77 | Author = {Aggarwal, Charu C. and Hinneburg, Alexander and Keim, Daniel A.},
78 | Title = {On the {Surprising} {Behavior} of {Distance} {Metrics} in {High} {Dimensional} {Space}},
79 | Booktitle = {Database {Theory} --- {ICDT} 2001},
80 | Year = {2001},
81 | Editor = {Van den Bussche, Jan and Vianu, Victor},
82 | Pages = {420--434},
83 | Series = {Lecture {Notes} in {Computer} {Science}},
84 | Publisher = {Springer Berlin Heidelberg},
85 | }
86 | @article{singhal-msm-naming,
87 | Author = {Nina Singhal and Christopher D. Snow and Vijay S. Pande},
88 | Title = {Using path sampling to build better Markovian state models: Predicting the folding rate and mechanism of a tryptophan zipper beta hairpin},
89 | Journal = {J. Chem. Phys.},
90 | Year = {2004},
91 | Volume = {121},
92 | Number = {1},
93 | Pages = {415},
94 | URL = {https://doi.org/10.1063/1.1738647},
95 | DOI = {10.1063/1.1738647}
96 | }
97 | @article{swope-its,
98 | Author = {William C. Swope and Jed W. Pitera and Frank Suits},
99 | Title = {Describing Protein Folding Kinetics by Molecular Dynamics Simulations. 1. Theory{\textdagger}},
100 | Journal = {J. Phys. Chem. B},
101 | Year = {2004},
102 | Volume = {108},
103 | Number = {21},
104 | Pages = {6571--6581},
105 | Month = {may},
106 | URL = {https://doi.org/10.1021/jp037421y},
107 | DOI = {10.1021/jp037421y}
108 | }
109 | @article{Deuflhard2005-pcca,
110 | Author = {Peter Deuflhard and Marcus Weber},
111 | Title = {Robust Perron cluster analysis in conformation dynamics},
112 | Journal = {Linear Algebra Appl.},
113 | Year = {2005},
114 | Volume = {398},
115 | Pages = {161--184},
116 | Month = {mar},
117 | URL = {https://doi.org/10.1016/j.laa.2004.10.026},
118 | DOI = {10.1016/j.laa.2004.10.026}
119 | }
120 | @article{weinan-tpt,
121 | Author = {Weinan E. and Eric Vanden-Eijnden},
122 | Title = {Towards a Theory of Transition Paths},
123 | Journal = {J. Stat. Phys.},
124 | Year = {2006},
125 | Volume = {123},
126 | Number = {3},
127 | Pages = {503--523},
128 | Month = {may},
129 | URL = {https://doi.org/10.1007/s10955-005-9003-9},
130 | DOI = {10.1007/s10955-005-9003-9}
131 | }
132 | @article{matplotlib,
133 | Author = {Hunter, J. D.},
134 | Title = {Matplotlib: A 2D graphics environment},
135 | Journal = {Comput. Sci. Eng.},
136 | Year = 2007,
137 | Volume = {9},
138 | Number = {3},
139 | Pages = {90--95},
140 | DOI = {10.1109/MCSE.2007.55}
141 | }
142 | @article{Kube2007-pcca+,
143 | Author = {Susanna Kube and Marcus Weber},
144 | Title = {A coarse graining method for the identification of transition rates between molecular conformations},
145 | Journal = {J. Chem. Phys.},
146 | Year = {2007},
147 | Volume = {126},
148 | Number = {2},
149 | Pages = {024103},
150 | Month = {jan},
151 | URL = {https://doi.org/10.1063/1.2404953},
152 | DOI = {10.1063/1.2404953}
153 | }
154 | @article{noe2007jcp,
155 | Author = {Frank No{\'{e}} and Illia Horenko and Christof Sch\"{u}tte and Jeremy C. Smith},
156 | Title = {Hierarchical analysis of conformational dynamics in biomolecules: Transition networks of metastable states},
157 | Journal = {J. Chem. Phys.},
158 | Year = {2007},
159 | Volume = {126},
160 | Number = {15},
161 | Pages = {155102},
162 | Month = {apr},
163 | URL = {https://doi.org/10.1063/1.2714539},
164 | DOI = {10.1063/1.2714539}
165 | }
166 | @article{chodera2007jcp,
167 | Author = {John D. Chodera and Nina Singhal and Vijay S. Pande and Ken A. Dill and William C. Swope},
168 | Title = {Automatic discovery of metastable states for the construction of Markov models of macromolecular conformational dynamics},
169 | Journal = {J. Chem. Phys.},
170 | Year = {2007},
171 | Volume = {126},
172 | Number = {15},
173 | Pages = {155101},
174 | Month = {apr},
175 | URL = {https://doi.org/10.1063/1.2714538},
176 | DOI = {10.1063/1.2714538}
177 | }
178 | @article{buchete-msm-2008,
179 | Author = {Nicolae-Viorel Buchete and Gerhard Hummer},
180 | Title = {Coarse Master Equations for Peptide Folding Dynamics{\textdagger}},
181 | Journal = {J. Phys. Chem. B},
182 | Year = {2008},
183 | Volume = {112},
184 | Number = {19},
185 | Pages = {6057--6069},
186 | Month = {may},
187 | URL = {https://doi.org/10.1021/jp0761665},
188 | DOI = {10.1021/jp0761665}
189 | }
190 | @article{noe-tmat-sampling,
191 | Author = {Frank No{\'{e}}},
192 | Title = {Probability distributions of molecular observables computed from Markov models},
193 | Journal = {J. Chem. Phys.},
194 | Year = {2008},
195 | Volume = {128},
196 | Number = {24},
197 | Pages = {244103},
198 | Month = {jun},
199 | URL = {https://doi.org/10.1063/1.2916718},
200 | DOI = {10.1063/1.2916718}
201 | }
202 | @article{metzner-msm-tpt,
203 | Author = {Philipp Metzner and Christof Schütte and Eric Vanden-Eijnden},
204 | Title = {Transition Path Theory for Markov Jump Processes},
205 | Journal = {Multiscale Model. Simul.},
206 | Year = {2009},
207 | Volume = {7},
208 | Number = {3},
209 | Pages = {1192--1219},
210 | Month = {jan},
211 | URL = {https://doi.org/10.1137/070699500},
212 | DOI = {10.1137/070699500}
213 | }
214 | @article{bowman-msm-2009,
215 | Author = {Gregory R. Bowman and Kyle A. Beauchamp and George Boxer and Vijay S. Pande},
216 | Title = {Progress and challenges in the automated construction of Markov state models for full protein systems},
217 | Journal = {J. Chem. Phys.},
218 | Year = {2009},
219 | Volume = {131},
220 | Number = {12},
221 | Pages = {124101},
222 | Month = {sep},
223 | URL = {https://doi.org/10.1063/1.3216567},
224 | DOI = {10.1063/1.3216567}
225 | }
226 | @article{noe-folding-pathways,
227 | Author = {Frank No{\'{e}} and Christof Sch\"{u}tte and Eric Vanden-Eijnden and Lothar Reich and Thomas R. Weikl},
228 | Title = {Constructing the equilibrium ensemble of folding pathways from short off-equilibrium simulations},
229 | Journal = {Proc. Natl. Acad. Sci. USA},
230 | Year = {2009},
231 | Volume = {106},
232 | Number = {45},
233 | Pages = {19011--19016},
234 | Month = {nov},
235 | URL = {https://doi.org/10.1073/pnas.0905466106},
236 | DOI = {10.1073/pnas.0905466106}
237 | }
238 | @article{sarich-msm-quality,
239 | Author = {Marco Sarich and Frank Noé and Christof Schütte},
240 | Title = {On the Approximation Quality of Markov State Models},
241 | Journal = {Multiscale Model. Simul.},
242 | Year = {2010},
243 | Volume = {8},
244 | Number = {4},
245 | Pages = {1154--1177},
246 | Month = {jan},
247 | URL = {https://doi.org/10.1137/090764049},
248 | DOI = {10.1137/090764049}
249 | }
250 | @article{sklearn,
251 | Author = {Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
252 | Title = {Scikit-learn: Machine Learning in {P}ython},
253 | Journal = {J. Mach. Learn. Res.},
254 | Year = {2011},
255 | Volume = {12},
256 | Pages = {2825--2830},
257 | }
258 | @article{msm-jhp,
259 | Author = {Prinz, Jan-Hendrik and Wu, Hao and Sarich, Marco and Keller, Bettina and Senne, Martin and Held, Martin and Chodera, John D. and Schütte, Christof and Noé, Frank},
260 | Title = {Markov models of molecular kinetics: Generation and validation},
261 | Journal = {J. Chem. Phys.},
262 | Year = {2011},
263 | Volume = {134},
264 | Number = {17},
265 | Pages = {174105},
266 | URL = {http://scitation.aip.org/content/aip/journal/jcp/134/17/10.1063/1.3565032},
267 | DOI = {http://dx.doi.org/10.1063/1.3565032}
268 | }
269 | @article{noe-fingerprints,
270 | Author = {F. Noe and S. Doose and I. Daidone and M. Lollmann and M. Sauer and J. D. Chodera and J. C. Smith},
271 | Title = {Dynamical fingerprints for probing individual relaxation processes in biomolecular dynamics with simulations and kinetic experiments},
272 | Journal = {Proc. Natl. Acad. Sci. USA},
273 | Year = {2011},
274 | Volume = {108},
275 | Number = {12},
276 | Pages = {4822--4827},
277 | Month = {mar},
278 | URL = {https://doi.org/10.1073/pnas.1004646108},
279 | DOI = {10.1073/pnas.1004646108}
280 | }
281 | @article{bhmm-preprint,
282 | Author = {{Chodera}, J.~D. and {Elms}, P. and {Noé}, F. and {Keller}, B. and {Kaiser}, C.~M. and {Ewall-Wice}, A. and {Marqusee}, S. and {Bustamante}, C. and {Singhal Hinrichs}, N.},
283 | Title = {{Bayesian hidden Markov model analysis of single-molecule force spectroscopy: Characterizing kinetics under measurement uncertainty}},
284 | Journal = {arXiv preprint arXiv:1108.1430},
285 | Year = 2011,
286 | Month = aug,
287 | URL = {https://arxiv.org/pdf/1108.1430.pdf},
288 | }
289 | @article{dror2012biomolecular,
290 | Author = {Ron O. Dror and Robert M. Dirks and J.P. Grossman and Huafeng Xu and David E. Shaw},
291 | Title = {Biomolecular Simulation: A Computational Microscope for Molecular Biology},
292 | Journal = {Annu. Rev. Biophys.},
293 | Year = {2012},
294 | Volume = {41},
295 | Number = {1},
296 | Pages = {429--452},
297 | Month = {jun},
298 | URL = {https://doi.org/10.1146/annurev-biophys-042910-155245},
299 | DOI = {10.1146/annurev-biophys-042910-155245}
300 | }
301 | @book{schuette-sarich-book,
302 | Title = {Metastability and Markov State Models in Molecular Dynamics},
303 | Publisher = {American Mathematical Society},
304 | Year = {2013},
305 | Author = {M. Sarich and C. Sch\"{u}tte},
306 | Series = {Courant Lecture Notes},
307 | }
308 | @article{noe-vac,
309 | Author = {Frank No{\'{e}} and Feliks N\"{u}ske},
310 | Title = {A Variational Approach to Modeling Slow Processes in Stochastic Dynamical Systems},
311 | Journal = {Multiscale Model. Simul.},
312 | Year = {2013},
313 | Volume = {11},
314 | Number = {2},
315 | Pages = {635--655},
316 | Month = {jan},
317 | URL = {https://doi.org/10.1137/110858616},
318 | DOI = {10.1137/110858616}
319 | }
320 | @article{tica3,
321 | Author = {Schwantes, Christian R. and Pande, Vijay S.},
322 | Title = {Improvements in Markov State Model Construction Reveal Many Non-Native Interactions in the Folding of NTL9},
323 | Journal = {J. Chem. Theory Comput.},
324 | Year = {2013},
325 | Volume = {9},
326 | Number = {4},
327 | Pages = {2000--2009},
328 | Month = {Apr},
329 | URL = {http://dx.doi.org/10.1021/ct300878a},
330 | DOI = {10.1021/ct300878a}
331 | }
332 | @article{pcca++,
333 | Author = {Susanna R\"{o}blitz and Marcus Weber},
334 | Title = {Fuzzy spectral clustering by {PCCA}+: application to Markov state models and data classification},
335 | Journal = {Adv. Data Anal. Classif.},
336 | Year = {2013},
337 | Volume = {7},
338 | Number = {2},
339 | Pages = {147--179},
340 | Month = {may},
341 | URL = {https://doi.org/10.1007/s11634-013-0134-6},
342 | DOI = {10.1007/s11634-013-0134-6}
343 | }
344 | @article{tica,
345 | Author = {Guillermo P{\'{e}}rez-Hern{\'{a}}ndez and Fabian Paul and Toni Giorgino and Gianni De Fabritiis and Frank No{\'{e}}},
346 | Title = {Identification of slow molecular order parameters for Markov model construction},
347 | Journal = {J. Chem. Phys.},
348 | Year = {2013},
349 | Volume = {139},
350 | Number = {1},
351 | Pages = {015102},
352 | Month = {jul},
353 | URL = {https://doi.org/10.1063/1.4811489},
354 | DOI = {10.1063/1.4811489}
355 | }
356 | @article{noe-proj-hid-msm,
357 | Author = {Frank No{\'{e}} and Hao Wu and Jan-Hendrik Prinz and Nuria Plattner},
358 | Title = {Projected and hidden Markov models for calculating kinetics and metastable states of complex molecules},
359 | Journal = {J. Chem. Phys.},
360 | Year = {2013},
361 | Volume = {139},
362 | Number = {18},
363 | Pages = {184114},
364 | Month = {nov},
365 | URL = {https://doi.org/10.1063/1.4828816},
366 | DOI = {10.1063/1.4828816}
367 | }
368 | @article{noe-dy-neut-scatt,
369 | Author = {Benjamin Lindner and Zheng Yi and Jan-Hendrik Prinz and Jeremy C. Smith and Frank No{\'{e}}},
370 | Title = {Dynamic neutron scattering from conformational dynamics. I. Theory and Markov models},
371 | Journal = {J. Chem. Phys.},
372 | Year = {2013},
373 | Volume = {139},
374 | Number = {17},
375 | Pages = {175101},
376 | Month = {nov},
377 | URL = {https://doi.org/10.1063/1.4824070},
378 | DOI = {10.1063/1.4824070}
379 | }
380 | @book{msm-book,
381 | Title = {An Introduction to Markov State Models and Their Application to Long Timescale Molecular Simulation},
382 | Publisher = {Springer Netherlands},
383 | Year = 2014,
384 | Author = {Gregory R. Bowman and Vijay S. Pande and Frank No{\'{e}}},
385 | Editor = {Gregory R. Bowman and Vijay S. Pande and Frank No{\'{e}}},
386 | URL = {https://doi.org/10.1007%2F978-94-007-7606-7},
387 | DOI = {10.1007/978-94-007-7606-7}
388 | }
389 | @article{jhp-spectral-rate-theory,
390 | Author = {Jan-Hendrik Prinz and John D. Chodera and Frank No{\'{e}}},
391 | Title = {Spectral Rate Theory for Two-State Kinetics},
392 | Journal = {Phys. Rev. X},
393 | Year = {2014},
394 | Volume = {4},
395 | Number = {1},
396 | Month = {feb},
397 | URL = {https://doi.org/10.1103/physrevx.4.011020},
398 | DOI = {10.1103/physrevx.4.011020}
399 | }
400 | @article{nueske-vamk,
401 | Author = {Feliks N\"{u}ske and Bettina G. Keller and Guillermo P{\'{e}}rez-Hern{\'{a}}ndez and Antonia S. J. S. Mey and Frank No{\'{e}}},
402 | Title = {Variational Approach to Molecular Kinetics},
403 | Journal = {J. Chem. Theory Comput.},
404 | Year = {2014},
405 | Volume = {10},
406 | Number = {4},
407 | Pages = {1739--1752},
408 | Month = {mar},
409 | URL = {https://doi.org/10.1021/ct4009156},
410 | DOI = {10.1021/ct4009156}
411 | }
412 | @article{Chodera2014,
413 | Author = {John D Chodera and Frank No{\'{e}}},
414 | Title = {Markov state models of biomolecular conformational dynamics},
415 | Journal = {Curr. Opin. Struct. Biol.},
416 | Year = {2014},
417 | Volume = {25},
418 | Pages = {135--144},
419 | Month = {apr},
420 | URL = {https://doi.org/10.1016/j.sbi.2014.04.002},
421 | DOI = {10.1016/j.sbi.2014.04.002}
422 | }
423 | @article{dtram,
424 | Author = {Hao Wu and Antonia S. J. S. Mey and Edina Rosta and Frank No{\'{e}}},
425 | Title = {Statistically optimal analysis of state-discretized trajectory data from multiple thermodynamic states},
426 | Journal = {J. Chem. Phys.},
427 | Year = {2014},
428 | Volume = {141},
429 | Number = {21},
430 | Pages = {214106},
431 | Month = {dec},
432 | URL = {https://doi.org/10.1063/1.4902240},
433 | DOI = {10.1063/1.4902240}
434 | }
435 | @article{plattner_protein_2015,
436 | Author = {Plattner, Nuria and Noé, Frank},
437 | Title = {Protein conformational plasticity and complex ligand-binding kinetics explored by atomistic simulations and {Markov} models},
438 | Journal = {Nat. Commun.},
439 | Year = {2015},
440 | Volume = {6},
441 | Pages = {7653},
442 | URL = {http://www.nature.com/ncomms/2015/150702/ncomms8653/full/ncomms8653.html},
443 | DOI = {10.1038/ncomms8653}
444 | }
445 | @article{mdtraj,
446 | Author = {McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.},
447 | Title = {MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories},
448 | Journal = {Biophys. J.},
449 | Year = {2015},
450 | Volume = {109},
451 | Number = {8},
452 | Pages = {1528 -- 1532},
453 | DOI = {10.1016/j.bpj.2015.08.015}
454 | }
455 | @book{numpy,
456 | Title = {Guide to NumPy},
457 | Publisher = {CreateSpace Independent Publishing Platform},
458 | Year = {2015},
459 | Author = {Oliphant, Travis E.},
460 | Address = {USA},
461 | Edition = {2nd},
462 | }
463 | @article{gmrq,
464 | Author = {Robert T. McGibbon and Vijay S. Pande},
465 | Title = {Variational cross-validation of slow dynamical modes in molecular kinetics},
466 | Journal = {J. Chem. Phys.},
467 | Year = {2015},
468 | Volume = {142},
469 | Number = {12},
470 | Pages = {124105},
471 | Month = {mar},
472 | URL = {https://doi.org/10.1063/1.4916292},
473 | DOI = {10.1063/1.4916292}
474 | }
475 | @article{kinetic-maps,
476 | Author = {Noé, Frank and Clementi, Cecilia},
477 | Title = {Kinetic Distance and Kinetic Maps from Molecular Dynamics Simulation},
478 | Journal = {J. Chem. Theory Comput.},
479 | Year = {2015},
480 | Volume = {11},
481 | Number = {10},
482 | Pages = {5002--5011},
483 | Month = {Oct},
484 | URL = {http://dx.doi.org/10.1021/acs.jctc.5b00553},
485 | DOI = {10.1021/acs.jctc.5b00553}
486 | }
487 | @article{pyemma,
488 | Author = {Martin K. Scherer and Benjamin Trendelkamp-Schroer and Fabian Paul and Guillermo P{\'{e}}rez-Hern{\'{a}}ndez and Moritz Hoffmann and Nuria Plattner and Christoph Wehmeyer and Jan-Hendrik Prinz and Frank No{\'{e}}},
489 | Title = {{PyEMMA} 2: A Software Package for Estimation, Validation, and Analysis of Markov Models},
490 | Journal = {J. Chem. Theory Comput.},
491 | Year = {2015},
492 | Volume = {11},
493 | Number = {11},
494 | Pages = {5525--5542},
495 | Month = {nov},
496 | URL = {http://dx.doi.org/10.1021/acs.jctc.5b00743},
497 | DOI = {10.1021/acs.jctc.5b00743}
498 | }
499 | @article{banushkina_nonparametric_2015,
500 | Author = {Polina V. Banushkina and Sergei V. Krivov},
501 | Title = {Nonparametric variational optimization of reaction coordinates},
502 | Journal = {J. Chem. Phys.},
503 | Year = {2015},
504 | Volume = {143},
505 | Number = {18},
506 | Pages = {184108},
507 | Month = {nov},
508 | URL = {https://doi.org/10.1063/1.4935180},
509 | DOI = {10.1063/1.4935180}
510 | }
511 | @article{ben-rev-msm,
512 | Author = {Benjamin Trendelkamp-Schroer and Hao Wu and Fabian Paul and Frank No{\'{e}}},
513 | Title = {Estimation and uncertainty of reversible Markov models},
514 | Journal = {J. Chem. Phys.},
515 | Year = {2015},
516 | Volume = {143},
517 | Number = {17},
518 | Pages = {174101},
519 | Month = {nov},
520 | URL = {https://doi.org/10.1063/1.4934536},
521 | DOI = {10.1063/1.4934536}
522 | }
523 | @conference{jupyter,
524 | Author = {Thomas Kluyver and Benjamin Ragan-Kelley and Fernando Pérez and Brian Granger and Matthias Bussonnier and Jonathan Frederic and Kyle Kelley and Jessica Hamrick and Jason Grout and Sylvain Corlay and Paul Ivanov and Damián Avila and Safia Abdalla and Carol Willing},
525 | Title = {Jupyter Notebooks -- a publishing format for reproducible computational workflows},
526 | Booktitle = {Positioning and Power in Academic Publishing: Players, Agents and Agendas},
527 | Year = {2016},
528 | Editor = {F. Loizides and B. Schmidt},
529 | Pages = {87--90},
530 | Organization = {IOS Press},
531 | }
532 | @article{tram,
533 | Author = {Hao Wu and Fabian Paul and Christoph Wehmeyer and Frank No{\'{e}}},
534 | Title = {Multiensemble Markov models of molecular thermodynamics and kinetics},
535 | Journal = {Proc. Natl. Acad. Sci. USA},
536 | Year = {2016},
537 | Volume = {113},
538 | Number = {23},
539 | Pages = {E3221--E3230},
540 | Month = {may},
541 | URL = {https://doi.org/10.1073/pnas.1525092113},
542 | DOI = {10.1073/pnas.1525092113}
543 | }
544 | @article{husic-optimized,
545 | Author = {Brooke E. Husic and Robert T. McGibbon and Mohammad M. Sultan and Vijay S. Pande},
546 | Title = {Optimized parameter selection reveals trends in Markov state models for protein folding},
547 | Journal = {J. Chem. Phys.},
548 | Year = {2016},
549 | Volume = {145},
550 | Number = {19},
551 | Pages = {194103},
552 | Month = {nov},
553 | URL = {https://doi.org/10.1063/1.4967809},
554 | DOI = {10.1063/1.4967809}
555 | }
556 | @article{simon-mech-mod-nmr,
557 | Author = {Simon Olsson and Frank No{\'{e}}},
558 | Title = {Mechanistic Models of Chemical Exchange Induced Relaxation in Protein {NMR}},
559 | Journal = {J. Am. Chem. Soc.},
560 | Year = {2016},
561 | Volume = {139},
562 | Number = {1},
563 | Pages = {200--210},
564 | Month = {dec},
565 | URL = {https://doi.org/10.1021/jacs.6b09460},
566 | DOI = {10.1021/jacs.6b09460}
567 | }
568 | @article{oom-feliks,
569 | Author = {Feliks N\"{u}ske and Hao Wu and Jan-Hendrik Prinz and Christoph Wehmeyer and Cecilia Clementi and Frank No{\'{e}}},
570 | Title = {Markov state models from short non-equilibrium simulations{\textemdash}Analysis and correction of estimation bias},
571 | Journal = {J. Chem. Phys.},
572 | Year = {2017},
573 | Volume = {146},
574 | Number = {9},
575 | Pages = {094104},
576 | Month = {mar},
577 | URL = {https://doi.org/10.1063/1.4976518},
578 | DOI = {10.1063/1.4976518}
579 | }
580 | @article{hao-variational-koopman-models,
581 | Author = {Hao Wu and Feliks N\"{u}ske and Fabian Paul and Stefan Klus and P{\'{e}}ter Koltai and Frank No{\'{e}}},
582 | Title = {Variational Koopman models: Slow collective variables and molecular kinetics from short off-equilibrium simulations},
583 | Journal = {J. Chem. Phys.},
584 | Year = {2017},
585 | Volume = {146},
586 | Number = {15},
587 | Pages = {154104},
588 | Month = {apr},
589 | URL = {https://doi.org/10.1063/1.4979344},
590 | DOI = {10.1063/1.4979344}
591 | }
592 | @article{NoeClementiReview,
593 | Author = {Frank No{\'{e}} and Cecilia Clementi},
594 | Title = {Collective variables for the study of long-time kinetics from molecular trajectories: theory and methods},
595 | Journal = {Curr. Opin. Struct. Biol.},
596 | Year = {2017},
597 | Volume = {43},
598 | Pages = {141--147},
599 | Month = {apr},
600 | URL = {https://doi.org/10.1016/j.sbi.2017.02.006},
601 | DOI = {10.1016/j.sbi.2017.02.006}
602 | }
603 | @article{vamp-preprint,
604 | Author = {{Wu}, H. and {Noé}, F.},
605 | Title = {{Variational approach for learning Markov processes from time series data}},
606 | Journal = {arXiv preprint arXiv:1707.04659},
607 | Year = 2017,
608 | Month = jul,
609 | URL = {https://arxiv.org/pdf/1707.04659.pdf},
610 | }
611 | @article{simon-amm,
612 | Author = {Simon Olsson and Hao Wu and Fabian Paul and Cecilia Clementi and Frank No{\'{e}}},
613 | Title = {Combining experimental and simulation data of molecular processes via augmented Markov models},
614 | Journal = {Proc. Natl. Acad. Sci. USA},
615 | Year = {2017},
616 | Volume = {114},
617 | Number = {31},
618 | Pages = {8265--8270},
619 | Month = {jul},
620 | URL = {https://doi.org/10.1073/pnas.1704803114},
621 | DOI = {10.1073/pnas.1704803114}
622 | }
623 | @article{trammbar,
624 | Author = {Paul, Fabian and Wehmeyer, Christoph and Abualrous, Esam T. and Wu, Hao and Crabtree, Michael D. and Schöneberg, Johannes and Clarke, Jane and Freund, Christian and Weikl, Thomas R. and Noé, Frank},
625 | Title = {Protein-peptide association kinetics beyond the seconds timescale from atomistic simulations},
626 | Journal = {Nat. Commun.},
627 | Year = {2017},
628 | Volume = {8},
629 | Number = {1},
630 | Month = {Oct},
631 | URL = {http://dx.doi.org/10.1038/s41467-017-01163-6},
632 | DOI = {10.1038/s41467-017-01163-6}
633 | }
634 | @article{plattner_complete_2017,
635 | Author = {Plattner, Nuria and Doerr, Stefan and Fabritiis, Gianni De and Noé, Frank},
636 | Title = {Complete protein--protein association kinetics in atomic detail revealed by molecular dynamics simulations and {Markov} modelling},
637 | Journal = {Nat. Chem.},
638 | Year = {2017},
639 | Volume = {9},
640 | Number = {10},
641 | Pages = {1005},
642 | Month = oct,
643 | URL = {https://www.nature.com/articles/nchem.2785},
644 | DOI = {10.1038/nchem.2785}
645 | }
646 | @article{husic2017note,
647 | Author = {Brooke E. Husic and Vijay S. Pande},
648 | Title = {Note: {MSM} lag time cannot be used for variational model selection},
649 | Journal = {J. Chem. Phys.},
650 | Year = {2017},
651 | Volume = {147},
652 | Number = {17},
653 | Pages = {176101},
654 | Month = {nov},
655 | URL = {https://doi.org/10.1063/1.5002086},
656 | DOI = {10.1063/1.5002086}
657 | }
658 | @article{nglview,
659 | Author = {Hai Nguyen and David A Case and Alexander S Rose},
660 | Title = {{NGLview}{\textendash}interactive molecular graphics for Jupyter notebooks},
661 | Journal = {Bioinformatics},
662 | Year = {2017},
663 | Volume = {34},
664 | Number = {7},
665 | Pages = {1241--1242},
666 | Month = {dec},
667 | URL = {https://doi.org/10.1093/bioinformatics/btx789},
668 | DOI = {10.1093/bioinformatics/btx789}
669 | }
670 | @article{vampnet,
671 | Author = {Andreas Mardt and Luca Pasquali and Hao Wu and Frank Noé},
672 | Title = {{VAMPnets} for deep learning of molecular kinetics},
673 | Journal = {Nat. Commun.},
674 | Year = {2018},
675 | Volume = {9},
676 | Number = {1},
677 | Month = {jan},
678 | URL = {https://doi.org/10.1038/s41467-017-02388-1},
679 | DOI = {10.1038/s41467-017-02388-1}
680 | }
681 | @article{Koltai2018,
682 | Author = {P{\'{e}}ter Koltai and Hao Wu and Frank No{\'{e}} and Christof Sch\"{u}tte},
683 | Title = {Optimal Data-Driven Estimation of Generalized Markov State Models for Non-Equilibrium Dynamics},
684 | Journal = {Computation},
685 | Year = {2018},
686 | Volume = {6},
687 | Number = {1},
688 | Pages = {22},
689 | Month = {feb},
690 | URL = {https://doi.org/10.3390/computation6010022},
691 | DOI = {10.3390/computation6010022}
692 | }
693 | @article{msm-brooke,
694 | Author = {Brooke E. Husic and Vijay S. Pande},
695 | Title = {Markov State Models: From an Art to a Science},
696 | Journal = {J. Am. Chem. Soc.},
697 | Year = {2018},
698 | Volume = {140},
699 | Number = {7},
700 | Pages = {2386--2396},
701 | Month = {feb},
702 | DOI = {10.1021/jacs.7b12191}
703 | }
704 | @article{Sultan2018-vde-enhanced-sampling,
705 | Author = {Mohammad M. Sultan and Hannah K. Wayment-Steele and Vijay S. Pande},
706 | Title = {Transferable Neural Networks for Enhanced Sampling of Protein Dynamics},
707 | Journal = {J. Chem. Theory Comput.},
708 | Year = {2018},
709 | Volume = {14},
710 | Number = {4},
711 | Pages = {1887--1894},
712 | Month = {mar},
713 | URL = {https://doi.org/10.1021/acs.jctc.8b00025},
714 | DOI = {10.1021/acs.jctc.8b00025}
715 | }
716 | @article{deep-gen-msm-preprint,
717 | Author = {{Wu}, H. and {Mardt}, A. and {Pasquali}, L. and {Noe}, F.},
718 | Title = {{Deep Generative Markov State Models}},
719 | Journal = {arXiv preprint arXiv:1805.07601},
720 | Year = 2018,
721 | Month = may,
722 | URL = {https://arxiv.org/pdf/1805.07601.pdf},
723 | }
724 | @article{hernandez-vde,
725 | Author = {Carlos X. Hern{\'{a}}ndez and Hannah K. Wayment-Steele and Mohammad M. Sultan and Brooke E. Husic and Vijay S. Pande},
726 | Title = {Variational encoding of complex dynamics},
727 | Journal = {Phys. Rev. E},
728 | Year = {2018},
729 | Volume = {97},
730 | Number = {6},
731 | Month = {jun},
732 | URL = {https://doi.org/10.1103/physreve.97.062412},
733 | DOI = {10.1103/physreve.97.062412}
734 | }
735 | @article{tae,
736 | Author = {Christoph Wehmeyer and Frank No{\'{e}}},
737 | Title = {Time-lagged autoencoders: Deep learning of slow collective variables for molecular kinetics},
738 | Journal = {J. Chem. Phys.},
739 | Year = {2018},
740 | Volume = {148},
741 | Number = {24},
742 | Pages = {241703},
743 | Month = {jun},
744 | URL = {https://doi.org/10.1063/1.5011399},
745 | DOI = {10.1063/1.5011399}
746 | }
747 | @article{Ribeiro2018-rave,
748 | Author = {Jo{\~{a}}o Marcelo Lamim Ribeiro and Pablo Bravo and Yihang Wang and Pratyush Tiwary},
749 | Title = {Reweighted autoencoded variational Bayes for enhanced sampling ({RAVE})},
750 | Journal = {J. Chem. Phys.},
751 | Year = {2018},
752 | Volume = {149},
753 | Number = {7},
754 | Pages = {072301},
755 | Month = {aug},
756 | URL = {https://doi.org/10.1063/1.5025487},
757 | DOI = {10.1063/1.5025487}
758 | }
759 |
760 | @article{wu2015projected,
761 | title={Projected metastable Markov processes and their estimation with observable operator models},
762 | author={Wu, Hao and Prinz, Jan-Hendrik and No{\'e}, Frank},
763 | journal={J. Chem. Phys.},
764 | volume={143},
765 | number={14},
766 | pages={10B610\_1},
767 | year={2015},
768 | publisher={AIP Publishing}
769 | }
770 |
771 | @Misc{mdtutorial,
772 | author = {Efrem Braun and Justin Gilmer and Heather B. Mayes and David L. Mobley and Jacob I. Monroe and Samarjeet Prasad and Daniel M. Zuckerman},
773 | title = {Best Practices for Foundations in Molecular Simulations [Article v1.0]},
774 | year = {2018},
775 | url = "https://github.com/MobleyLab/basic_simulation_training",
776 | DOI = {10.33011/livecoms.1.1.5957}
777 | }
778 |
--------------------------------------------------------------------------------
/manuscript/livecoms.cls:
--------------------------------------------------------------------------------
1 | % A template for LiveCoMS submissions.
2 | %
3 | % adapted from elife template, v1.4
4 | \NeedsTeXFormat{LaTeX2e}
5 | \ProvidesClass{livecoms}[2017/08/10, v0.5...]
6 |
7 | \RequirePackage[english]{babel}
8 |
9 | \RequirePackage{calc}
10 | \RequirePackage{etoolbox}
11 | \RequirePackage{regexpatch}
12 | \RequirePackage{ifxetex,ifluatex}
13 |
14 | \newif\ifxetexorluatex
15 | \ifxetex
16 | \xetexorluatextrue
17 | \else
18 | \ifluatex
19 | \xetexorluatextrue
20 | \else
21 | \xetexorluatexfalse
22 | \fi
23 | \fi
24 |
25 | \newif\if@reqslineno
26 | \DeclareOption{lineno}{\@reqslinenotrue}
27 |
28 | %% the type of document this is. The current types:
29 | % bestpractices, editorial, tutorial, review, comparison, lessons
30 | \newif\if@bestpractices
31 | \DeclareOption{bestpractices}{\@bestpracticestrue}
32 |
33 | \newif\if@editorial
34 | \DeclareOption{editorial}{\@editorialtrue}
35 |
36 | \newif\if@tutorial
37 | \DeclareOption{tutorial}{\@tutorialtrue}
38 |
39 | \newif\if@review
40 | \DeclareOption{review}{\@reviewtrue}
41 |
42 | \newif\if@comparison
43 | \DeclareOption{comparison}{\@comparisontrue}
44 |
45 | \newif\if@lessons
46 | \DeclareOption{lessons}{\@lessonstrue}
47 |
48 | %Publication Information
49 | \newif\if@pubversion
50 | \DeclareOption{pubversion}{\@pubversiontrue}
51 |
52 | \newif\if@ASAPversion
53 | \DeclareOption{ASAPversion}{\@ASAPversiontrue}
54 |
55 | %% Linespacing.
56 | \newif\if@onehalfspacing
57 | \newif\if@doublespacing
58 | \DeclareOption{onehalfspacing}{\@onehalfspacingtrue}
59 | \DeclareOption{doublespacing}{\@doublespacingtrue}
60 |
61 | \DeclareOption*{\PassOptionsToClass{\CurrentOption}{extarticle}}
62 | \ExecuteOptions{}
63 | \ProcessOptions\relax
64 | \LoadClass{extarticle}
65 |
66 | \RequirePackage{amsmath}
67 | \RequirePackage{amssymb}
68 | \RequirePackage{mdframed}
69 |
70 | \RequirePackage{lineno}
71 | \if@reqslineno\linenumbers\fi
72 |
73 | \ifxetexorluatex
74 | \RequirePackage[no-math]{fontspec}
75 | \setmainfont[Ligatures = TeX,
76 | Extension = .ttf,
77 | UprightFont = *-Regular,
78 | BoldFont = *-Bold,
79 | ItalicFont = *-Italic,
80 | BoldItalicFont = *-BoldItalic]
81 | {OpenSans}
82 | \else
83 | \RequirePackage[T1]{fontenc}
84 | \RequirePackage[utf8]{inputenc}
85 | \RequirePackage[default]{opensans}
86 | \renewcommand{\ttdefault}{lmtt}
87 | \fi
88 |
89 | \RequirePackage{microtype}
90 |
91 | % Trueno/Open Sans requires a bigger "single" linespread.
92 | \linespread{1.2}
93 | \if@onehalfspacing\linespread{1.5}\fi
94 | \if@doublespacing\linespread{2.0}\fi
95 |
96 | \emergencystretch 3em
97 |
98 | \RequirePackage{graphicx,xcolor}
99 | \definecolor{LiveCoMSDarkBlue}{HTML}{273B81}
100 | \definecolor{LiveCoMSLightBlue}{HTML}{0A9DD9}
101 | \definecolor{LiveCoMSMediumGrey}{HTML}{6D6E70}
102 | \definecolor{LiveCoMSLightGrey}{HTML}{929497}
103 |
104 | \RequirePackage{booktabs}
105 | \RequirePackage{authblk}
106 |
107 | % Modified page geometry for LiveComs
108 | \RequirePackage[%left=6cm,%
109 | %marginparwidth=4cm,%
110 | %marginparsep=0.5cm,%
111 | left=2cm,
112 | right=1.3cm,%
113 | top=2cm,%
114 | bottom=2.5cm,%
115 | headheight=21pt,%
116 | headsep=2\baselineskip,%
117 | columnsep=2em,%
118 | letterpaper]{geometry}%
119 | \RequirePackage{changepage}
120 |
121 | \RequirePackage{silence}
122 | \WarningFilter{caption}{The option `hypcap=true' will be ignored}
123 | \WarningFilter{microtype}{Unknown slot}
124 |
125 | \RequirePackage[labelfont={bf},%
126 | labelsep=period,%
127 | justification=justified,%
128 | singlelinecheck=false,%
129 | tableposition=top,font=small]
130 | {caption}
131 |
132 | % \captionsetup*[table]{skip=\medskipamount}
133 |
134 |
135 | \RequirePackage[square,numbers,sort&compress]{natbib}
136 | \RequirePackage{natmove}
137 | \renewcommand{\bibfont}{\small}
138 | % modifed from https://github.com/gbhutani/vancouver_authoryear_bibstyle/
139 | \IfFileExists{vancouver-livecoms.bst}
140 | {\bibliographystyle{vancouver-livecoms}}
141 | {\PackageWarning{elife}{vancouver-livecoms.bst not found; falling back to apalike bibliography style.}\bibliographystyle{apalike}}
142 | % Make author in citation italic
143 | \renewcommand{\NAT@nmfmt}[1]{{\bfseries\itshape\color{LiveCoMSMediumGrey} #1}}
144 |
145 | % ...as well as the year
146 | \xpatchcmd{\NAT@citex}
147 | {\@citea\NAT@hyper@{\NAT@nmfmt{\NAT@nm}\NAT@date}}
148 | {\@citea\NAT@hyper@{\NAT@nmfmt{\NAT@nm}\NAT@nmfmt{\NAT@date}}}
149 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}}
150 |
151 | \xpatchcmd{\NAT@citex}
152 | {\else\unskip\NAT@spacechar\NAT@hyper@{\NAT@date}}
153 | {\else\unskip\NAT@spacechar\NAT@hyper@{\NAT@nmfmt{\NAT@date}}}
154 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}}
155 |
156 | \xpatchcmd{\NAT@citex}
157 | {\hyper@natlinkbreak{\NAT@aysep\NAT@spacechar}{\@citeb\@extra@b@citeb}\NAT@date}
158 | {\hyper@natlinkbreak{\NAT@nmfmt{\NAT@aysep\NAT@spacechar}}{\@citeb\@extra@b@citeb}\NAT@nmfmt{\NAT@date}}
159 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}}
160 |
161 | \xpatchcmd{\NAT@citex}
162 | {\@citea\NAT@hyper@{\NAT@date}}
163 | {\@citea\NAT@hyper@{\NAT@nmfmt{\NAT@date}}}
164 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}}
165 |
166 | \xpatchcmd{\NAT@citex}
167 | {{\@citeb\@extra@b@citeb}\NAT@date}
168 | {{\@citeb\@extra@b@citeb}\NAT@nmfmt{\NAT@date}}
169 | {}{\PackageWarning{LiveCoMS}{Failed to patch year format in citation}}
170 | %% There, we're finally done with patching the year in citations.
171 |
172 | %
173 | % headers and footers
174 | %
175 |
176 | \RequirePackage{fancyhdr} % custom headers/footers
177 | \RequirePackage{lastpage} % Number of pages in the document
178 | \pagestyle{fancy} % Enables the custom headers/footers
179 | %% Next two lines unnecessary for LiveComs
180 | % \addtolength{\headwidth}{\marginparsep}
181 | % \addtolength{\headwidth}{\marginparwidth}
182 |
183 | %% different document types listed here
184 |
185 | \newif\ifdocumenttype
186 | \documenttypefalse
187 |
188 | \if@bestpractices
189 | \documenttypetrue
190 | \newcommand{\documenttype}{Best Practices Guide}
191 | \else
192 | % nothing
193 | \fi
194 |
195 | \if@editorial
196 | \documenttypetrue
197 | \newcommand{\documenttype}{Editorial}
198 | \else
199 | % nothing
200 | \fi
201 |
202 | \if@tutorial
203 | \documenttypetrue
204 | \newcommand{\documenttype}{Tutorial}
205 | \else
206 | % nothing
207 | \fi
208 |
209 | \if@review
210 | \documenttypetrue
211 | \newcommand{\documenttype}{Perpetual Review}
212 | \else
213 | % nothing
214 | \fi
215 |
216 | \if@comparison
217 | \documenttypetrue
218 | \newcommand{\documenttype}{Molecular Simulation Comparison}
219 | \else
220 | % nothing
221 | \fi
222 |
223 | \if@lessons
224 | \documenttypetrue
225 | \newcommand{\documenttype}{``Lessons Learned'' Document}
226 | \else
227 | % nothing
228 | \fi
229 |
230 | \fancyhf{}
231 | \ifdocumenttype
232 | \chead{%
233 | \setlength{\fboxsep}{3pt}
234 | \colorbox{LiveCoMSMediumGrey}{\begin{minipage}{\headwidth}\centering\color{white} A LiveCoMS \documenttype\end{minipage}}%
235 | }
236 | \fi
237 |
238 | % Publication information in document footer
239 | % *ONLY INCLUDED IF "pubversion" CLASS OPTION IS INVOKED*
240 | \def\@publishedDOI{}
241 | \def\@publishedvolume{}
242 | \def\@publishedyear{}
243 | \def\@publishedarticlenum{}
244 | \def\@publisheddatereceived{}
245 | \def\@publisheddateaccepted{}
246 | \def \DOIprefix{10.XXXX} %May be able to use this later
247 | \newcommand{\pubDOI}[1]{%
248 | \appto{\@publishedDOI}{#1}{}{}
249 | }
250 | \newcommand{\pubvolume}[1]{%
251 | \appto{\@publishedvolume}{#1}{}{}
252 | }
253 | \newcommand{\pubissue}[1]{%
254 | \appto{\@publishedissue}{#1}{}{}
255 | }
256 | \newcommand{\pubyear}[1]{%
257 | \appto{\@publishedyear}{#1}{}{}
258 | }
259 | \newcommand{\articlenum}[1]{%
260 | \appto{\@publishedarticlenum}{#1}{}{}
261 | }
262 | \newcommand{\datereceived}[1]{%
263 | \appto{\@publisheddatereceived}{#1}{}{}
264 | }
265 | \newcommand{\dateaccepted}[1]{%
266 | \appto{\@publisheddateaccepted}{#1}{}{}
267 | }
268 |
269 | %--------------------------------------------------------
270 | % Footers
271 | % 1. Error Check for conflicting class options
272 | \if@pubversion
273 | \if@ASAPversion
274 | \ClassError{livecoms}
275 | {Nope nope nope, you cannot invoke 'pubversion' and 'ASAPversion' simultaneously. Please correct the class options.}
276 | \fi
277 | \fi
278 | % 2. Publication Version: put submission/acceptance dates in left footer and citation information in right footer
279 | %%% DWS NOTE: would be nice if the left footer was in an if A-or-B type logical statement
280 | \if@pubversion
281 | \lfoot{\ifthenelse{\value{page}=1}
282 | {\small\color{LiveCoMSMediumGrey}Received: \@publisheddatereceived \\ Accepted: \@publisheddateaccepted}
283 | {~\\~}
284 | }%
285 | \rfoot{\small\color{LiveCoMSMediumGrey}\href{https://doi.org/\@publishedDOI}{https://doi.org/\@publishedDOI}\\
286 | {\it Living J. Comp. Mol. Sci.} \@publishedyear, \@publishedvolume\nobreak\hspace{.05em}(\@publishedissue), \@publishedarticlenum
287 | }%
288 | \fi
289 | % 3. ASAP Version: put submission/acceptance dates in left footer and "ASAP Version" in right footer
290 | \if@ASAPversion
291 | \lfoot{\ifthenelse{\value{page}=1}
292 | {\small\color{LiveCoMSMediumGrey}Received: \@publisheddatereceived \\ Accepted: \@publisheddateaccepted}
293 | {~\\~}
294 | }%
295 | \rfoot{\small\color{LiveCoMSMediumGrey}\href{https://doi.org/\@publishedDOI}{https://doi.org/\@publishedDOI}\\
296 | {\it Living J. Comp. Mol. Sci.} ASAP Version
297 | }%
298 | \fi
299 | % 4. Page Number in center of footer
300 | \cfoot{\small\color{white} \vspace{\baselineskip} \small\color{LiveCoMSMediumGrey} \thepage\space of\space\pageref{LastPage}}%
301 | \preto{\footrule}{\color{LiveCoMSMediumGrey}}
302 | \renewcommand{\headrulewidth}{0pt}% % No header rule
303 | \renewcommand{\footrulewidth}{0.4pt}% % No footer rule
304 | %----------------------------------------------------------
305 |
306 | %
307 | % section/subsection/paragraph set-up
308 | % Updated for LiveComs
309 | % \setcounter{secnumdepth}{0}
310 | \RequirePackage[explicit]{titlesec}
311 | \titleformat{\section}
312 | {\LARGE\bfseries\raggedright}
313 | {\thesection}{1em}{#1}[]
314 | \titleformat{\subsection}
315 | {\Large\bfseries\raggedright\color{LiveCoMSMediumGrey}}
316 | {\thesubsection}{1em}{#1}[]
317 | \titleformat{\subsubsection}
318 | {\large\raggedright\color{LiveCoMSMediumGrey}}
319 | {\thesubsubsection}{1em}{#1}[]
320 | \titleformat{\paragraph}
321 | {\large\raggedright\color{LiveCoMSMediumGrey}}
322 | {\theparagraph}{1em}{#1}[]
323 | \titlespacing*{\section}{0pc}{3ex \@plus4pt \@minus3pt}{0pt}
324 | \titlespacing*{\subsection}{0pc}{2.5ex \@plus3pt \@minus2pt}{0pt}
325 | \titlespacing*{\subsubsection}{0pc}{2ex \@plus2.5pt \@minus1.5pt}{0pt}
326 | \titlespacing*{\paragraph}{0pc}{1.5ex \@plus2pt \@minus1pt}{0pt}
327 |
328 | \RequirePackage{enumitem}
329 | \setlist{noitemsep}
330 |
331 | \RequirePackage{alphalph}
332 | \newalphalph{\fnsymbolmult}[mult]{\@fnsymbol}{5}
333 |
334 | \newcounter{authorfn}
335 | \setcounter{authorfn}{1}
336 | \newcommand{\authfn}[1]{%
337 | \fnsymbolmult{\numexpr\value{authorfn}+#1}%
338 | }
339 |
340 | \def\@correspondence{}
341 | \def\@contribution{}
342 | \def\@presentaddress{}
343 | \def\@deceased{}
344 | % Added blurb for LiveComs
345 | \def\@blurb{}
346 | \def\@orcidblock{}
347 |
348 |
349 | \newcommand{\corr}[2]{%
350 | \ifx\empty\@correspondence\else\appto{\@correspondence}{; }{}{}\fi
351 | \appto{\@correspondence}{%
352 | \url{#1}%
353 | \ifx\empty#2\else\space(#2)\fi
354 | }{}{}%
355 | }
356 |
357 | \newcommand{\contrib}[2][]{
358 | \appto{\@contribution}{%
359 | \ifx\empty#1\else\textsuperscript{#1}\fi
360 | #2\\
361 | }{}{}
362 | }
363 |
364 | \newcommand{\presentadd}[2][]{
365 | \ifx\empty\@presentaddress\else\appto{\@presentaddress}{; }{}{}\fi
366 | \appto{\@presentaddress}{%
367 | \ifx\empty#1\else\textsuperscript{#1}\fi
368 | #2%
369 | }{}{}
370 | }
371 |
372 | \newcommand{\deceased}[1]{\def\@deceased{\textsuperscript{#1}Deceased}}
373 |
374 | % Added for LiveComs
375 | \newcommand{\blurb}[1]{\def\@blurb{#1}}
376 |
377 | \newcommand{\orcid}[2]{%
378 | \ifx\empty\@orcidblock\else\appto{\@orcidblock}{\\}{}{}\fi
379 | \appto{\@orcidblock}{%
380 | #1:\space%
381 | \ifx\empty#2\else\href{https://orcid.org/#2}{#2} \fi
382 | }{}{}%
383 | }
384 |
385 |
386 |
387 | \reversemarginpar
388 |
389 | %
390 | % custom title page
391 | %
392 | \renewcommand{\Authfont}{\bfseries\large\raggedright}
393 | \renewcommand{\Affilfont}{\mdseries\large\raggedright}
394 | \renewcommand{\Authands}{, }
395 | \setlength{\affilsep}{16pt}
396 | \renewcommand{\AB@affilsepx}{; \protect\Affilfont}
397 |
398 | \newcommand{\themetadata}{%
399 | \textbf{*For correspondence:\\} \@correspondence\par
400 | \ifx\empty\@contribution\else
401 | \bigskip\@contribution\par\fi
402 | \ifx\empty\@presentaddress\else
403 | \textbf{Present address: }\@presentaddress\par\fi
404 | \ifx\empty\@deceased\else\@deceased\par\fi
405 | }
406 |
407 | \patchcmd{\@author}{\AB@authlist\\[\affilsep]\AB@affillist}{\AB@authlist\\[\affilsep]
408 | %% Removed for LiveComs; will be placed after abstract in frontmatter
409 | % \marginpar{\raggedright\footnotesize\themetadata\par}
410 | \AB@affillist}{}{}
411 |
412 | %% Added for LiveComs
413 | \RequirePackage{environ}
414 | \RequirePackage{textpos}
415 |
416 | %% Abstract outside frontmatter will throw an error!
417 | \RenewEnviron{abstract}{%
418 | \ClassError{livecoms}
419 | {Nope nope nope, please put the abstract inside the frontmatter environment.}
420 | {Please put the abstract inside the frontmatter environment.}
421 | }
422 |
423 | \NewEnviron{frontmatter}{%
424 | %% Define abstract's behavior when placed in frontmatter
425 | \renewenvironment{abstract}{%
426 | \setlength{\parindent}{0pt} %\raggedright
427 | \raisebox{-16pt-\baselineskip}[0pt][0pt]{\makebox[0pt][r]{\parbox[t]{3cm}{%
428 | \raggedright\itshape\footnotesize\@blurb\par\medskip%
429 | This version dated \@date%
430 | }\hspace*{1cm}}}%
431 | \textcolor{LiveCoMSMediumGrey}{\rule{\textwidth}{2pt}}
432 | \vskip16pt
433 | \textcolor{LiveCoMSLightBlue}{\large\bfseries\abstractname\space}
434 | }{%
435 | \vskip8pt
436 | \textcolor{LiveCoMSMediumGrey}{\rule{\textwidth}{2pt}}
437 | \vskip16pt
438 | }
439 | \twocolumn[%
440 | \protecting{%\begin{minipage}[b]{3cm}
441 | % \small\itshape
442 | % \raggedright\@blurb
443 | % \end{minipage}
444 | \hfill
445 | \begin{minipage}[b]{\textwidth-4cm}
446 | \BODY
447 | \themetadata%
448 | \end{minipage}}\vspace*{2\baselineskip}
449 | ]%
450 | }
451 |
452 | \renewcommand{\maketitle}{%
453 | \vskip36pt%
454 | {\color{LiveCoMSDarkBlue}\raggedright\bfseries\fontsize{22}{27}\selectfont \@title\par}%
455 | \vskip16pt
456 | {\@author\par}
457 | \vskip8pt
458 | }
459 |
460 | \newcommand{\makeorcid}{%
461 | % \textbf{*For correspondence:\\} \@correspondence\par
462 | % \textbf{ORCID:\\} \@correspondence\par
463 | \textbf{ORCID:\\} \@orcidblock\par
464 | }
465 |
466 | %% Insert a grey line to separate floats from main text
467 | \newcommand{\topfigrule}{\vskip8pt\noindent{\rule{\linewidth}{1pt}}}
468 | \newcommand{\botfigrule}{\noindent{\rule{\linewidth}{1pt}}\vskip8pt}
469 |
470 | \RequirePackage{newfloat}
471 | \RequirePackage{wrapfig}
472 | \AtEndEnvironment{wrapfigure}{\vskip8pt\noindent{\rule{\hsize}{1pt}}}
473 | % \RequirePackage[lflt]{floatflt}
474 | % \AtEndEnvironment{floatingfigure}{\vskip8pt\noindent\textcolor{LiveCoMSMediumGrey}{\rule{\hsize}{2pt}}}
475 |
476 | \DeclareFloatingEnvironment[placement=hbt,name=Box]{featurebox}
477 | \captionsetup[featurebox]{font={Large,bf,color=LiveCoMSDarkBlue}}
478 |
479 | \newcounter{featurefigure}
480 | \newcounter{featuretable}
481 | \AtBeginEnvironment{featurebox}{%
482 | \setcounter{featurefigure}{0}%
483 | \setcounter{featuretable}{0}%
484 | \newcommand{\featurefig}[1]{%
485 | \refstepcounter{featurefigure}%
486 | \vskip\smallskipamount%
487 | {\small\textbf{\color{LiveCoMSDarkBlue}Box \arabic{featurebox} Figure \arabic{featurefigure}.}\space #1\par}\medskip}
488 | \newcommand{\featuretable}[1]{%
489 | \refstepcounter{featuretable}%
490 | \vskip\smallskipamount%
491 | {\small\textbf{\color{LiveCoMSDarkBlue}Box \arabic{featurebox} Table \arabic{featuretable}.}\space #1\par}\medskip}
492 |
493 | }
494 | \apptocmd{\featurebox}{%
495 | \begin{mdframed}[linewidth=0pt,backgroundcolor=LiveCoMSLightBlue!10,fontcolor=LiveCoMSDarkBlue]
496 | \if@reqslineno\addtolength{\linenumbersep}{1em}\internallinenumbers\fi%
497 | }{}{}
498 | \pretocmd{\endfeaturebox}{\end{mdframed}}{}{}
499 |
500 | %% Starred version for LiveComs two-column
501 | \AtBeginEnvironment{featurebox*}{%
502 | \setcounter{featurefigure}{0}%
503 | \setcounter{featuretable}{0}%
504 | \newcommand{\featurefig}[1]{%
505 | \refstepcounter{featurefigure}%
506 | \vskip\smallskipamount%
507 | {\small\textbf{\color{LiveCoMSDarkBlue}Box \arabic{featurebox} Figure \arabic{featurefigure}.}\space #1\par}\medskip}
508 | \newcommand{\featuretable}[1]{%
509 | \refstepcounter{featuretable}%
510 | \vskip\smallskipamount%
511 | {\small\textbf{\color{LiveCoMSDarkBlue}Box \arabic{featurebox} Table \arabic{featuretable}.}\space #1\par}\medskip}
512 | }
513 | \expandafter\apptocmd\csname featurebox*\endcsname{%
514 | \begin{mdframed}[linewidth=0pt,backgroundcolor=LiveCoMSLightBlue!10,fontcolor=LiveCoMSDarkBlue]
515 | \if@reqslineno\addtolength{\linenumbersep}{1em}\internallinenumbers\fi%
516 | }{}{}
517 | \expandafter\pretocmd\csname endfeaturebox*\endcsname{\end{mdframed}}{}{}
518 |
519 | %% Unnecessary for LiveComs
520 | % \newenvironment{fullwidth}{%
521 | % \begin{adjustwidth}{-4.5cm}{}
522 | % }{\end{adjustwidth}}
523 |
524 | %% Provide support for pseudocode and algorithms
525 | \RequirePackage{algorithm,algpseudocode}
526 | \captionsetup[algorithm]{%
527 | labelfont={bf},font=small,labelsep=period,
528 | justification=raggedright,singlelinecheck=false}
529 | \newcommand\fs@notopruled{\def\@fs@cfont{\bfseries}\let\@fs@capt\floatc@ruled
530 | \def\@fs@pre{}% \hrule height.8pt depth0pt \kern2pt}%
531 | \def\@fs@post{} %\kern2pt\hrule\relax}%
532 | \def\@fs@mid{\medskip\kern2pt\hrule\kern2pt}%
533 | \let\@fs@iftopcapt\iftrue}
534 | \floatstyle{notopruled}
535 | \restylefloat{algorithm}
536 | \newcommand{\algorithmautorefname}{Algorithm}
537 | \newcommand{\ALG}[1]{\autoref{alg:#1}}
538 |
539 | %% Update some appendix sectional styles
540 | \appto{\appendix}{%
541 | \@addtoreset{figure}{section}
542 | \@addtoreset{table}{section}
543 | \@addtoreset{featurebox}{section}
544 | \@addtoreset{algorithm}{section}
545 | % \numberwithin{figure}{section}
546 | % \numberwithin{table}{section}
547 | % \numberwithin{featurebox}{section}
548 | \titleformat{\section}
549 | {\LARGE\bfseries\color{LiveCoMSDarkBlue}}
550 | {\appendixname\ \thesection}{1em}{#1}[]
551 |
552 | \captionsetup*[figure]{name={Appendix \thesection\ Figure },font={color=LiveCoMSDarkBlue,small},skip=\smallskipamount}%
553 |
554 | \captionsetup*[table]{name={Appendix \thesection\ Table },font={color=LiveCoMSDarkBlue,small}}%
555 | }
556 |
557 | \newcounter{figsupp}
558 | \setcounter{figsupp}{0}
559 | \newcounter{data}
560 | \setcounter{data}{0}
561 | \def\supplist{}
562 |
563 | \RequirePackage{newfile}
564 | \newoutputstream{suppinfo}
565 | \openoutputfile{\jobname.suppinfo}{suppinfo}
566 |
567 |
568 | \AtBeginEnvironment{figure}{%
569 | \setcounter{figsupp}{0}
570 | \setcounter{data}{0}
571 | %% Updated 2017/06/30 to allow optional argument
572 | \newcommand{\figsupp}[3][]{%
573 | \refstepcounter{figsupp}%
574 | {%
575 | \ifstrequal{#1}{none}{}{%
576 | \small\textbf{Figure~\thefigure--Figure supplement \arabic{figsupp}.} \ifstrempty{#1}{#2}{#1}}\par}
577 | \addtostream{suppinfo}{%
578 | \noindent\protect\begin{minipage}{\linewidth}
579 | \protect #3\noexpand\par
580 | \textbf{Figure \thefigure--Figure supplement \arabic{figsupp}.} #2\noexpand\par
581 | \vskip8pt
582 | \protect\end{minipage}
583 | \vskip16pt
584 | }
585 | }
586 | \newcommand{\figdata}[1]{%
587 | \refstepcounter{data}
588 | {\small\textbf{Figure~\thefigure--source data \arabic{data}.} #1}\par
589 | }
590 | }
591 |
592 | %% Added for LiveComs (two columns)
593 | \AtBeginEnvironment{figure*}{%
594 | \setcounter{figsupp}{0}
595 | \setcounter{data}{0}
596 | %% Updated 2017/06/30 to allow optional argument
597 | \newcommand{\figsupp}[3][]{%
598 | \refstepcounter{figsupp}%
599 | {%
600 | \ifstrequal{#1}{none}{}{%
601 | \small\textbf{Figure~\thefigure--Figure supplement \arabic{figsupp}.} \ifstrempty{#1}{#2}{#1}}\par}
602 | \addtostream{suppinfo}{%
603 | \noindent\protect\begin{minipage}{\linewidth}
604 | \protect #3\noexpand\par
605 | \textbf{Figure \thefigure--Figure supplement \arabic{figsupp}.} #2\noexpand\par
606 | \vskip8pt
607 | \protect\end{minipage}
608 | \vskip16pt
609 | }
610 | }
611 | \newcommand{\figdata}[1]{%
612 | \refstepcounter{data}
613 | {\small\textbf{Figure~\thefigure--source data \arabic{data}.} #1}\par
614 | }
615 | }
616 |
617 | \AtBeginEnvironment{table}{%
618 | \setcounter{data}{0}
619 | \newcommand{\tabledata}[1]{%
620 | \refstepcounter{data}
621 | {\small\textbf{Table~\thetable--source data \arabic{data}.} #1}\par
622 | }
623 | }
624 |
625 | %% Added for LiveComs (twocolumns)
626 | \AtBeginEnvironment{table*}{%
627 | \setcounter{data}{0}
628 | \newcommand{\tabledata}[1]{%
629 | \refstepcounter{data}
630 | {\small\textbf{Table~\thetable--source data \arabic{data}.} #1}\par
631 | }
632 | }
633 |
634 | %% Checklists as floats
635 | \RequirePackage{fontawesome}
636 | \DeclareFloatingEnvironment[placement=hbtp,name=Checklists]{Checklists}
637 | \newcounter{checklist}
638 | \AtBeginEnvironment{Checklists}{%
639 | \setcounter{checklist}{0}
640 | \mdfsetup{skipabove=0pt,skipbelow=0pt,
641 | frametitleaboveskip=12pt,innerbottommargin=12pt,
642 | hidealllines=true,
643 | frametitlefont=\Large\bfseries\color{LiveCoMSLightBlue}}
644 | }{}{}
645 |
646 | \AtBeginEnvironment{Checklists*}{%
647 | \setcounter{checklist}{0}
648 | \mdfsetup{skipabove=0pt,skipbelow=0pt,
649 | frametitleaboveskip=12pt,innerbottommargin=12pt,
650 | hidealllines=true,
651 | frametitlefont=\Large\bfseries\color{LiveCoMSLightBlue}}
652 | }{}{}
653 |
654 | \newenvironment{checklist}[1]{%
655 | \stepcounter{checklist}
656 | \ifnumodd{\thechecklist}
657 | {\def\cl@bgcolor{gray!12}}
658 | {\def\cl@bgcolor{gray!25}}
659 | \begin{mdframed}[
660 | frametitle=\MakeUppercase{#1},
661 | backgroundcolor=\cl@bgcolor]
662 | \setlist[itemize]{label=$\Box$,leftmargin=*}
663 | }{\end{mdframed}}
664 |
665 | \AtEndDocument{%
666 | \closeoutputstream{suppinfo}
667 | % \pagestyle{empty}
668 | \renewcommand{\footrule}{}
669 | \rfoot{}
670 | \input{\jobname.suppinfo}
671 | }
672 |
673 | %% Use more traditional Appendix section approach
674 | % \newcounter{appendix}
675 | % \setcounter{appendix}{0}
676 | % \newenvironment{appendixbox}{%
677 | % \setcounter{figure}{0}
678 | % \setcounter{table}{0}
679 | % \refstepcounter{appendix}%
680 | % \clearpage%
681 | % \patchcmd{\ttlf@section}{LiveCoMSMediumGrey}{LiveCoMSDarkBlue}{}{}
682 | % \noindent{\bfseries\Large\color{LiveCoMSMediumGrey}Appendix \arabic{appendix}\par}
683 | % \nolinenumbers%
684 | % %% Remove box colours for LiveComs
685 | % \begin{mdframed}[hidealllines=true,
686 | % % backgroundcolor=LiveCoMSLightBlue!10,
687 | % fontcolor=LiveCoMSDarkBlue,
688 | % % leftline=true,linecolor=LiveCoMSLightBlue,linewidth=1em
689 | % ]
690 | % \if@reqslineno\addtolength{\linenumbersep}{2em}\internallinenumbers\fi
691 | % }{%
692 | % \end{mdframed}
693 | % }
694 |
695 | \RequirePackage[colorlinks=true,allcolors=black,citecolor=LiveCoMSLightBlue,linkcolor=LiveCoMSMediumGrey,urlcolor=LiveCoMSLightBlue]{hyperref}
696 | \urlstyle{sf}
697 |
698 | % Other desired commands
699 | \renewcommand{\equationautorefname}{Eq.}
700 | \newcommand{\FIG}[1]{\autoref{fig:#1}}
701 | \newcommand{\TABLE}[1]{\autoref{tab:#1}}
702 | \newcommand{\EQ}[1]{\autoref{eq:#1}}
703 | \newcommand{\BOX}[1]{\autoref{box:#1}}
704 | \let\oldautoref\autoref
705 | \renewcommand{\autoref}[1]{\emph{\textbf{\oldautoref{#1}}}}
706 |
707 | \endinput
708 |
--------------------------------------------------------------------------------
/notebooks/.gitignore:
--------------------------------------------------------------------------------
1 | data/*
2 | *.pyemma
3 |
--------------------------------------------------------------------------------
/notebooks/03-msm-estimation-and-validation.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 03 - MSM estimation and validation\n",
8 | "\n",
9 | "
\n",
10 | "\n",
11 | "In this notebook, we will cover how to estimate a Markov state model (MSM) and do model validation;\n",
12 | "we also show how to save and restore model and estimator objects.\n",
13 | "For this notebook, you need to know how to do data loading/visualization\n",
14 | "([Notebook 01 ➜ 📓](01-data-io-and-featurization.ipynb))\n",
15 | "as well as dimension reduction ([Notebook 02 ➜ 📓](02-dimension-reduction-and-discretization.ipynb)).\n",
16 | "\n",
17 | "We further recommend to have a look at the literature, if you are new to the concept of Markov state models:\n",
18 | "- prinz-11\n",
19 | "- bowman-14\n",
20 | "- husic-18\n",
21 | "\n",
22 | "Maintainers: [@cwehmeyer](https://github.com/cwehmeyer), [@marscher](https://github.com/marscher), [@thempel](https://github.com/thempel), [@psolsson](https://github.com/psolsson)\n",
23 | "\n",
24 | "**Remember**:\n",
25 | "- to run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter;\n",
26 | "- to get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab;\n",
27 | "- you can find the full documentation at [PyEMMA.org](http://www.pyemma.org)."
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": null,
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "%matplotlib inline\n",
37 | "import matplotlib.pyplot as plt\n",
38 | "import numpy as np\n",
39 | "import mdshare\n",
40 | "import pyemma"
41 | ]
42 | },
43 | {
44 | "cell_type": "markdown",
45 | "metadata": {},
46 | "source": [
47 | "## Case 1: preprocessed, two-dimensional data (toy model)\n",
48 | "We load the two-dimensional trajectory from an archive using numpy and directly discretize the full space using $k$-means clustering:"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": null,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')\n",
58 | "with np.load(file) as fh:\n",
59 | " data = fh['trajectory']\n",
60 | "\n",
61 | "cluster = pyemma.coordinates.cluster_kmeans(data, k=50, max_iter=50)"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "To start with, we visualize the marginal and joint distributions of both components as well as the cluster centers:"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": null,
74 | "metadata": {},
75 | "outputs": [],
76 | "source": [
77 | "fig, axes = plt.subplots(1, 2, figsize=(10, 4))\n",
78 | "pyemma.plots.plot_feature_histograms(data, feature_labels=['$x$', '$y$'], ax=axes[0])\n",
79 | "pyemma.plots.plot_density(*data.T, ax=axes[1], cbar=False, alpha=0.1)\n",
80 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
81 | "axes[1].set_xlabel('$x$')\n",
82 | "axes[1].set_ylabel('$y$')\n",
83 | "axes[1].set_xlim(-4, 4)\n",
84 | "axes[1].set_ylim(-4, 4)\n",
85 | "axes[1].set_aspect('equal')\n",
86 | "fig.tight_layout()"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "The first step after obtaining the discretized dynamics is finding a suitable lag time.\n",
94 | "The systematic approach is to estimate MSMs at various lag times and observe how the implied timescales (ITSs) of these models behave.\n",
95 | "In particular, we are looking for lag time ranges in which the implied timescales are constant\n",
96 | "(i.e., lag time independent as described in the manuscript in Section 2.1).\n",
97 | "To this aim, PyEMMA provides the `its()` function which we use to track the first three (`nits=3`) implied timescales:"
98 | ]
99 | },
100 | {
101 | "cell_type": "code",
102 | "execution_count": null,
103 | "metadata": {},
104 | "outputs": [],
105 | "source": [
106 | "its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 3, 5, 7, 10], nits=3, errors='bayes')"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "We can pass the returned `its` object to the `pyemma.plots.plot_implied_timescales()` function:"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": null,
119 | "metadata": {},
120 | "outputs": [],
121 | "source": [
122 | "pyemma.plots.plot_implied_timescales(its, ylog=False);"
123 | ]
124 | },
125 | {
126 | "cell_type": "markdown",
127 | "metadata": {},
128 | "source": [
129 | "The above plot tells us that there is one resolved process with an ITS of approximately $8.5$ steps (blue) which is largely invariant to the MSM lag time.\n",
130 | "The other two ITSs (green, red) are smaller than the lag time (black line, grey-shaded area);\n",
131 | "they correspond to processes which are faster than the lag time and, thus, are not resolved.\n",
132 | "Since the implied timescales are, like the corresponding eigenvalues, sorted in decreasing order,\n",
133 | "we know that all other remaining processes must be even faster.\n",
134 | "\n",
135 | "As MSMs tend to underestimate the true ITSs, we are looking for a converged maximum in the ITS plot.\n",
136 | "In our case, any lag time before the slow process (blue line) crosses the lag time threshold (black line) would work.\n",
137 | "To maximize the kinetic resolution, we choose the lag time $1$ step.\n",
138 | "\n",
139 | "To see whether our model satisfies Markovianity, we perform (and visualize) a Chapman-Kolmogorow (CK) test.\n",
140 | "Since we aim at modeling the dynamics between metastable states rather than between microstates, this will be conducted in the space of metastable states.\n",
141 | "The latter are identified automatically using PCCA++ (which is explained in [Notebook 05 📓](05-pcca-tpt.ipynb)).\n",
142 | "We usually choose the number of metastable states according to the implied timescales plot by identifying a gap between the ITS.\n",
143 | "For a single process, we can assume that there are two metastable states between which the process occurs."
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": null,
149 | "metadata": {},
150 | "outputs": [],
151 | "source": [
152 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=1)\n",
153 | "pyemma.plots.plot_cktest(msm.cktest(2));"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "We can see a perfect agreement between models estimated at higher lag times and predictions of the model at lag time $1$ step.\n",
161 | "Thus, we have estimated a valid MSM according to basic model validation.\n",
162 | "\n",
163 | "Should a CK test fail, it means that the dynamics in the space of metastable states is not Markovian.\n",
164 | "This can have multiple causes since it is the result of the combination of all steps in the pipeline.\n",
165 | "In practice, one would attempt to find a better model by tuning hyper-parameters such as the number of metastable states, the MSM lag time or the number of cluster centers.\n",
166 | "Back-tracking the error by following the pipeline in an upstream direction,\n",
167 | "i.e., by starting with the number of metastable states, is usually advised. \n",
168 | "\n",
169 | "A failing CK test might further hint at poor sampling.\n",
170 | "This case is explained in more detail in [Notebook 08 📓](08-common-problems.ipynb#poorly_sampled_dw).\n",
171 | "\n",
172 | "## Case 2: low-dimensional molecular dynamics data (alanine dipeptide)\n",
173 | "We fetch the alanine dipeptide data set, load the backbone torsions into memory and directly discretize the full space using $k$-means clustering.\n",
174 | "In order to demonstrate how to adjust the MSM lag time,\n",
175 | "we will first set the number of cluster centers to $200$ and justify this choice later."
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": null,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb', working_directory='data')\n",
185 | "files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.xtc', working_directory='data')\n",
186 | "\n",
187 | "feat = pyemma.coordinates.featurizer(pdb)\n",
188 | "feat.add_backbone_torsions(periodic=False)\n",
189 | "data = pyemma.coordinates.load(files, features=feat)\n",
190 | "data_concatenated = np.concatenate(data)\n",
191 | "\n",
192 | "cluster = pyemma.coordinates.cluster_kmeans(data, k=200, max_iter=50, stride=10)"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "metadata": {},
198 | "source": [
199 | "From the discrete trajectories, implied timescales can be estimated:"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": null,
205 | "metadata": {},
206 | "outputs": [],
207 | "source": [
208 | "its = pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')"
209 | ]
210 | },
211 | {
212 | "cell_type": "markdown",
213 | "metadata": {},
214 | "source": [
215 | "We visualize the marginal and joint distributions of both components as well as the cluster centers,\n",
216 | "and show the ITS convergence to help selecting a suitable lag time:"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": null,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n",
226 | "pyemma.plots.plot_feature_histograms(data_concatenated, feature_labels=['$\\Phi$', '$\\Psi$'], ax=axes[0])\n",
227 | "pyemma.plots.plot_density(*data_concatenated.T, ax=axes[1], cbar=False, alpha=0.1)\n",
228 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
229 | "axes[1].set_xlabel('$\\Phi$')\n",
230 | "axes[1].set_ylabel('$\\Psi$')\n",
231 | "pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')\n",
232 | "fig.tight_layout()"
233 | ]
234 | },
235 | {
236 | "cell_type": "markdown",
237 | "metadata": {},
238 | "source": [
239 | "We observe three resolved processes with flat ITS for a lag time of approximately $10$ ps.\n",
240 | "\n",
241 | "Please note though that this ITS convergence analysis is based on the assumption that $200$ $k$-means centers are sufficient to discretize the dynamics.\n",
242 | "In order to study the influence of the clustering on the ITS convergence,\n",
243 | "we repeat the clustering and ITS convergence analysis for various number of cluster centers.\n",
244 | "For the sake of simplicity, we will restrict ourselves to the $k$-means algorithm; alternative clustering methods are presented in [Notebook 02 ➜ 📓](02-dimension-reduction-and-discretization.ipynb)."
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": null,
250 | "metadata": {},
251 | "outputs": [],
252 | "source": [
253 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n",
254 | "for i, k in enumerate([20, 50, 100]):\n",
255 | " cluster = pyemma.coordinates.cluster_kmeans(data, k=k, max_iter=50, stride=10)\n",
256 | " pyemma.plots.plot_density(*data_concatenated.T, ax=axes[0, i], cbar=False, alpha=0.1)\n",
257 | " axes[0, i].scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
258 | " axes[0, i].set_xlabel('$\\Phi$')\n",
259 | " axes[0, i].set_ylabel('$\\Psi$')\n",
260 | " axes[0, i].set_title('k = {} centers'.format(k))\n",
261 | " pyemma.plots.plot_implied_timescales(\n",
262 | " pyemma.msm.its(cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes'),\n",
263 | " ax=axes[1, i], units='ps')\n",
264 | " axes[1, i].set_ylim(1, 2000)\n",
265 | "fig.tight_layout()"
266 | ]
267 | },
268 | {
269 | "cell_type": "markdown",
270 | "metadata": {},
271 | "source": [
272 | "We can see from this analysis that the ITS curves indeed converge towards the $200$ centers case and we can continue with estimating/validating an MSM.\n",
273 | "\n",
274 | "Before we continue with MSM estimation, let us discuss implied timescales convergence for large systems.\n",
275 | "Given sufficient sampling, the task is often to find a discretization that captures the process of interest well enough to obtain implied timescales that converge within the trajectory length. \n",
276 | "\n",
277 | "As we see in the above example with $k=20$ cluster centers,\n",
278 | "increasing the MSM lag time compensates for poor discretization to a certain extent.\n",
279 | "In a more realistic system, however, trajectories have a finite length that limits the choice of our MSM lag time.\n",
280 | "Furthermore, our clustering might be worse than the one presented above,\n",
281 | "so convergence might not be reached at all.\n",
282 | "Thus, we aim to converge the implied timescales at a low lag time by fine-tuning not only the number of cluster centers,\n",
283 | "but also feature selection and dimension reduction measures.\n",
284 | "This additionally ensures that our model has the maximum achievable temporal resolution.\n",
285 | "\n",
286 | "Please note that choosing an appropriate MSM lag time variationally\n",
287 | "(e.g., using VAMP scoring) is as far as we know not possible.\n",
288 | "\n",
289 | "Further details on how to account for poor discretization can be found in our notebook about hidden Markov models [Notebook 07 📓](07-hidden-markov-state-models.ipynb).\n",
290 | "An example on how implied timescales behave in the limit of poor sampling is shown in [Notebook 08 📓](08-common-problems.ipynb).\n",
291 | "\n",
292 | "Now, let's continue with the alanine dipeptide system.\n",
293 | "We estimate an MSM at lag time $10$ ps and, given that we have three slow processes, perform a CK test for four metastable states.\n",
294 | "\n",
295 | "⚠️ In general, the number of metastable states is a modeler's choice and will be explained in more detail in [Notebook 04 ➜ 📓](04-msm-analysis.ipynb) and [Notebook 07 ➜ 📓](07-hidden-markov-state-models.ipynb)."
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": null,
301 | "metadata": {},
302 | "outputs": [],
303 | "source": [
304 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n",
305 | "pyemma.plots.plot_cktest(msm.cktest(4), units='ps');"
306 | ]
307 | },
308 | {
309 | "cell_type": "markdown",
310 | "metadata": {},
311 | "source": [
312 | "The model prediction and re-estimation are in quite good agreement but we do see some small deviations in the first row.\n",
313 | "\n",
314 | "To obtain error bars for the model prediction,\n",
315 | "we estimate a Bayesian MSM under the same conditions as the regular MSM and repeat the CK test for the Bayesian model:"
316 | ]
317 | },
318 | {
319 | "cell_type": "code",
320 | "execution_count": null,
321 | "metadata": {},
322 | "outputs": [],
323 | "source": [
324 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps', conf=0.95)\n",
325 | "pyemma.plots.plot_cktest(bayesian_msm.cktest(4), units='ps');"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {},
331 | "source": [
332 | "Bayesian MSMs are an extension of regular maximum likelihood (ML) MSMs that represent a sample of (reversible) transition matrices.\n",
333 | "As presented here, they are usually used to compute confidence intervals.\n",
334 | "\n",
335 | "A regular MSM estimates a single transition matrix which maximizes the likelihood of the data given the model.\n",
336 | "Thus, all derived quantities are based on this ML estimation.\n",
337 | "A Bayesian MSM, in comparison, starts with a ML-MSM and samples transition matrices using a Monte Carlo scheme.\n",
338 | "Hence, target property posterior distributions can be estimated by computing these properties from each individual transition matrix in the sample. \n",
339 | "\n",
340 | "The initial ML-MSM used for the transition matrix sampling is contained in the `BayesianMSM` object with its properties accessible to the user.\n",
341 | "Please note that different default estimation parameters might yield results that numerically differ from a directly estimated ML-MSM.\n",
342 | "\n",
343 | "In the case of the low dimensional molecular dynamics data, we thus observe that the deviations are within a $95\\%$ confidence interval.\n",
344 | "\n",
345 | "### Persisting and restoring estimators\n",
346 | "\n",
347 | "Because some estimations we have performed so far require more computational effort (e.g., TICA or kmeans with lots of centers),\n",
348 | "it could be desirable to persist the resulting models in a file.\n",
349 | "Luckily, PyEMMA provides a convenience method for this.\n",
350 | "Just try it out:"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": null,
356 | "metadata": {},
357 | "outputs": [],
358 | "source": [
359 | "cluster.save('nb3.pyemma', model_name='kmeans_k200')"
360 | ]
361 | },
362 | {
363 | "cell_type": "markdown",
364 | "metadata": {},
365 | "source": [
366 | "Now we have stored the current state of the clustering estimator to disk.\n",
367 | "A file can contain multiple models, this is why we have used the `model_name` argument to specify the name.\n",
368 | "If omitted, the estimator will be saved under the name `default_model`.\n",
369 | "\n",
370 | "Assume that we have restarted our Python session and do not want to re-compute everything.\n",
371 | "We can now restore the previously saved estimator via"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": null,
377 | "metadata": {},
378 | "outputs": [],
379 | "source": [
380 | "cluster_restored = pyemma.load('nb3.pyemma', model_name='kmeans_k200')\n",
381 | "\n",
382 | "# check that nothing has changed\n",
383 | "np.testing.assert_allclose(cluster_restored.clustercenters, cluster.clustercenters, atol=1e-15)"
384 | ]
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "metadata": {},
389 | "source": [
390 | "To check the contents of a file, you can utilize the list_models function of PyEMMA:"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": null,
396 | "metadata": {},
397 | "outputs": [],
398 | "source": [
399 | "pyemma.list_models('nb3.pyemma')"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": null,
405 | "metadata": {},
406 | "outputs": [],
407 | "source": [
408 | "# we now remove this files again\n",
409 | "import os\n",
410 | "os.unlink('nb3.pyemma')"
411 | ]
412 | },
413 | {
414 | "cell_type": "markdown",
415 | "metadata": {},
416 | "source": [
417 | "As you see, all important attributes of an estimator will be stored.\n",
418 | "PyEMMA provides future compatibility of stored estimators,\n",
419 | "which means that you can always load your files in a new version, but are then restricted to not using older ones.\n",
420 | "\n",
421 | "#### Exercise 1\n",
422 | "\n",
423 | "Load the heavy atom distances into memory, perform PCA and TICA (`lag=3`) with `dim=2`,\n",
424 | "then discretize with $100$ $k$-means centers and a stride of $10$."
425 | ]
426 | },
427 | {
428 | "cell_type": "code",
429 | "execution_count": null,
430 | "metadata": {
431 | "solution2": "hidden",
432 | "solution2_first": true
433 | },
434 | "outputs": [],
435 | "source": [
436 | "feat = #FIXME\n",
437 | "feat. #FIXME\n",
438 | "data = #FIXME\n",
439 | "\n",
440 | "pca = pyemma.coordinates.pca(data, dim=2)\n",
441 | "tica = #FIXME\n",
442 | "\n",
443 | "pca_concatenated = np.concatenate(pca.get_output())\n",
444 | "tica_concatenated = #FIXME\n",
445 | "\n",
446 | "cls_pca = pyemma.coordinates.cluster_kmeans(pca, k=100, max_iter=50, stride=10)\n",
447 | "cls_tica = #FIXME\n",
448 | "\n",
449 | "its_pca = pyemma.msm.its(\n",
450 | " cls_pca.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')\n",
451 | "its_tica = #FIXME"
452 | ]
453 | },
454 | {
455 | "cell_type": "markdown",
456 | "metadata": {
457 | "solution2": "hidden"
458 | },
459 | "source": [
460 | "###### Solution"
461 | ]
462 | },
463 | {
464 | "cell_type": "code",
465 | "execution_count": null,
466 | "metadata": {
467 | "solution2": "hidden"
468 | },
469 | "outputs": [],
470 | "source": [
471 | "feat = pyemma.coordinates.featurizer(pdb)\n",
472 | "pairs = feat.pairs(feat.select_Heavy())\n",
473 | "feat.add_distances(pairs, periodic=False)\n",
474 | "data = pyemma.coordinates.load(files, features=feat)\n",
475 | "\n",
476 | "pca = pyemma.coordinates.pca(data, dim=2)\n",
477 | "tica = pyemma.coordinates.tica(data, lag=3, dim=2)\n",
478 | "\n",
479 | "pca_concatenated = np.concatenate(pca.get_output())\n",
480 | "tica_concatenated = np.concatenate(tica.get_output())\n",
481 | "\n",
482 | "cls_pca = pyemma.coordinates.cluster_kmeans(pca, k=100, max_iter=50, stride=10)\n",
483 | "cls_tica = pyemma.coordinates.cluster_kmeans(tica, k=100, max_iter=50, stride=10)\n",
484 | "\n",
485 | "its_pca = pyemma.msm.its(\n",
486 | " cls_pca.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')\n",
487 | "its_tica = pyemma.msm.its(\n",
488 | " cls_tica.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')"
489 | ]
490 | },
491 | {
492 | "cell_type": "markdown",
493 | "metadata": {},
494 | "source": [
495 | "Let's visualize the ITS convergence for both projections:"
496 | ]
497 | },
498 | {
499 | "cell_type": "code",
500 | "execution_count": null,
501 | "metadata": {},
502 | "outputs": [],
503 | "source": [
504 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n",
505 | "pyemma.plots.plot_feature_histograms(pca_concatenated, ax=axes[0, 0])\n",
506 | "pyemma.plots.plot_feature_histograms(tica_concatenated, ax=axes[1, 0])\n",
507 | "axes[0, 0].set_title('PCA')\n",
508 | "axes[1, 0].set_title('TICA')\n",
509 | "pyemma.plots.plot_density(*pca_concatenated.T, ax=axes[0, 1], cbar=False, alpha=0.1)\n",
510 | "axes[0, 1].scatter(*cls_pca.clustercenters.T, s=15, c='C1')\n",
511 | "axes[0, 1].set_xlabel('PC 1')\n",
512 | "axes[0, 1].set_ylabel('PC 2')\n",
513 | "pyemma.plots.plot_density(*tica_concatenated.T, ax=axes[1, 1], cbar=False, alpha=0.1)\n",
514 | "axes[1, 1].scatter(*cls_tica.clustercenters.T, s=15, c='C1')\n",
515 | "axes[1, 1].set_xlabel('IC 1')\n",
516 | "axes[1, 1].set_ylabel('IC 2')\n",
517 | "pyemma.plots.plot_implied_timescales(its_pca, ax=axes[0, 2], units='ps')\n",
518 | "pyemma.plots.plot_implied_timescales(its_tica, ax=axes[1, 2], units='ps')\n",
519 | "axes[0, 2].set_ylim(1, 2000)\n",
520 | "axes[1, 2].set_ylim(1, 2000)\n",
521 | "fig.tight_layout()"
522 | ]
523 | },
524 | {
525 | "cell_type": "markdown",
526 | "metadata": {},
527 | "source": [
528 | "Despite the fact that PCA yields a projection with some defined basins,\n",
529 | "the ITS plot shows that only one \"slow\" process is resolved which is more than one order of magnitude too fast.\n",
530 | "\n",
531 | "TICA does find three slow processes which agree (in terms of the implied timescales) with the backbone torsions example above.\n",
532 | "\n",
533 | "We conclude that this PCA projection is not suitable to resolve the slow dynamics of alanine dipeptide and we will continue to estimate/validate the TICA-based projection.\n",
534 | "\n",
535 | "#### Exercise 2\n",
536 | "\n",
537 | "Estimate a Bayesian MSM at lag time $10$ ps and perform/show a CK test for four metastable states."
538 | ]
539 | },
540 | {
541 | "cell_type": "code",
542 | "execution_count": null,
543 | "metadata": {
544 | "solution2": "hidden",
545 | "solution2_first": true
546 | },
547 | "outputs": [],
548 | "source": [
549 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cls_tica.dtrajs, lag=10, dt_traj='1 ps')\n",
550 | "pyemma.plots. #FIXME"
551 | ]
552 | },
553 | {
554 | "cell_type": "markdown",
555 | "metadata": {
556 | "solution2": "hidden"
557 | },
558 | "source": [
559 | "###### Solution"
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "execution_count": null,
565 | "metadata": {
566 | "solution2": "hidden"
567 | },
568 | "outputs": [],
569 | "source": [
570 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cls_tica.dtrajs, lag=10, dt_traj='1 ps')\n",
571 | "pyemma.plots.plot_cktest(bayesian_msm.cktest(4), units='ps');"
572 | ]
573 | },
574 | {
575 | "cell_type": "markdown",
576 | "metadata": {},
577 | "source": [
578 | "We again see a good agreement between model prediction and re-estimation.\n",
579 | "\n",
580 | "## Wrapping up\n",
581 | "In this notebook, we have learned how to estimate a regular or Bayesian MSM from discretized molecular simulation data with `pyemma` and how to perform basic model validation.\n",
582 | "In detail, we have selected a suitable lag time by using\n",
583 | "- `pyemma.msm.its()` to obtain an implied timescale object and\n",
584 | "- `pyemma.plots.plot_implied_timescales()` to visualize the convergence of the implied timescales.\n",
585 | "\n",
586 | "We then have used\n",
587 | "- `pyemma.msm.estimate_markov_model()` to estimate a regular MSM,\n",
588 | "- `pyemma.msm.bayesian_markov_model()` to estimate a Bayesian MSM,\n",
589 | "- the `timescales()` method of an estimated MSM object to access its implied timescales,\n",
590 | "- the `cktest()` method of an estimated MSM object to perform a Chapman-Kolmogorow test, and\n",
591 | "- `pyemma.plots.plot_cktest()` to visualize the latter."
592 | ]
593 | },
594 | {
595 | "cell_type": "markdown",
596 | "metadata": {},
597 | "source": [
598 | "## References\n",
599 | "\n",
600 | "[^]Prinz, Jan-Hendrik and Wu, Hao and Sarich, Marco and Keller, Bettina and Senne, Martin and Held, Martin and Chodera, John D. and Schütte, Christof and Noé, Frank. 2011. _Markov models of molecular kinetics: Generation and validation_. [URL](http://scitation.aip.org/content/aip/journal/jcp/134/17/10.1063/1.3565032)\n",
601 | "\n",
602 | "[^]Gregory R. Bowman and Vijay S. Pande and Frank Noé. 2014. _An Introduction to Markov State Models and Their Application to Long Timescale Molecular Simulation_. [URL](https://doi.org/10.1007%2F978-94-007-7606-7)\n",
603 | "\n",
604 | "[^]Brooke E. Husic and Vijay S. Pande. 2018. _Markov State Models: From an Art to a Science_.\n",
605 | "\n"
606 | ]
607 | }
608 | ],
609 | "metadata": {
610 | "kernelspec": {
611 | "display_name": "Python 3",
612 | "language": "python",
613 | "name": "python3"
614 | },
615 | "language_info": {
616 | "codemirror_mode": {
617 | "name": "ipython",
618 | "version": 3
619 | },
620 | "file_extension": ".py",
621 | "mimetype": "text/x-python",
622 | "name": "python",
623 | "nbconvert_exporter": "python",
624 | "pygments_lexer": "ipython3",
625 | "version": "3.6.5"
626 | },
627 | "toc": {
628 | "base_numbering": 1,
629 | "nav_menu": {},
630 | "number_sections": false,
631 | "sideBar": true,
632 | "skip_h1_title": true,
633 | "title_cell": "Table of Contents",
634 | "title_sidebar": "Contents",
635 | "toc_cell": false,
636 | "toc_position": {},
637 | "toc_section_display": true,
638 | "toc_window_display": true
639 | }
640 | },
641 | "nbformat": 4,
642 | "nbformat_minor": 2
643 | }
644 |
--------------------------------------------------------------------------------
/notebooks/04-msm-analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 04 - MSM analysis\n",
8 | "\n",
9 | "
\n",
10 | "\n",
11 | "In this notebook, we will cover how to analyze an MSM and how the modeled processes correspond to MSM spectral properties.\n",
12 | "We assume that you are familiar with data loading/visualization\n",
13 | "([Notebook 01 ➜ 📓](01-data-io-and-featurization.ipynb)),\n",
14 | "dimension reduction ([Notebook 02 ➜ 📓](02-dimension-reduction-and-discretization.ipynb)), and\n",
15 | "the estimation and validation process ([Notebook 03 ➜ 📓](03-msm-estimation-and-validation.ipynb)).\n",
16 | "\n",
17 | "Maintainers: [@cwehmeyer](https://github.com/cwehmeyer), [@marscher](https://github.com/marscher), [@thempel](https://github.com/thempel), [@psolsson](https://github.com/psolsson)\n",
18 | "\n",
19 | "**Remember**:\n",
20 | "- to run the currently highlighted cell, hold ⇧ Shift and press ⏎ Enter;\n",
21 | "- to get help for a specific function, place the cursor within the function's brackets, hold ⇧ Shift, and press ⇥ Tab;\n",
22 | "- you can find the full documentation at [PyEMMA.org](http://www.pyemma.org)."
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "%matplotlib inline\n",
32 | "import matplotlib.pyplot as plt\n",
33 | "import matplotlib as mpl\n",
34 | "import numpy as np\n",
35 | "import mdshare\n",
36 | "import pyemma"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {},
42 | "source": [
43 | "## Case 1: preprocessed, two-dimensional data (toy model)\n",
44 | "We load the two-dimensional trajectory from an archive using numpy,\n",
45 | "directly discretize the full space using $k$-means clustering,\n",
46 | "visualize the marginal and joint distributions of both components as well as the cluster centers,\n",
47 | "and show the implied timescale (ITS) convergence:"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": null,
53 | "metadata": {},
54 | "outputs": [],
55 | "source": [
56 | "file = mdshare.fetch('hmm-doublewell-2d-100k.npz', working_directory='data')\n",
57 | "with np.load(file) as fh:\n",
58 | " data = fh['trajectory']\n",
59 | "\n",
60 | "cluster = pyemma.coordinates.cluster_kmeans(data, k=50, max_iter=50)\n",
61 | "its = pyemma.msm.its(\n",
62 | " cluster.dtrajs, lags=[1, 2, 3, 5, 7, 10], nits=3, errors='bayes')\n",
63 | "\n",
64 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n",
65 | "pyemma.plots.plot_feature_histograms(data, feature_labels=['$x$', '$y$'], ax=axes[0])\n",
66 | "pyemma.plots.plot_density(*data.T, ax=axes[1], cbar=False, alpha=0.1)\n",
67 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
68 | "axes[1].set_xlabel('$x$')\n",
69 | "axes[1].set_ylabel('$y$')\n",
70 | "axes[1].set_xlim(-4, 4)\n",
71 | "axes[1].set_ylim(-4, 4)\n",
72 | "axes[1].set_aspect('equal')\n",
73 | "pyemma.plots.plot_implied_timescales(its, ylog=False, ax=axes[2])\n",
74 | "fig.tight_layout()"
75 | ]
76 | },
77 | {
78 | "cell_type": "markdown",
79 | "metadata": {},
80 | "source": [
81 | "The plots show us the marginal (left panel) and joint distributions along with the cluster centers (middle panel).\n",
82 | "The implied timescales are converged (right panel). \n",
83 | "\n",
84 | "Before we proceed, let's have a look at the implied timescales error bars.\n",
85 | "They were computed from a Bayesian MSM, as requested by the `errors='bayes'` argument of the `pyemma.msm.its()` function.\n",
86 | "As mentioned before, Bayesian MSMs incorporate a sample of transition matrices.\n",
87 | "Target properties such as implied timescales can now simply be computed from the individual matrices.\n",
88 | "Thereby, the posterior distributions of these properties can be estimated.\n",
89 | "The ITS plot shows a confidence interval that contains $95\\%$ of the Bayesian samples."
90 | ]
91 | },
92 | {
93 | "cell_type": "code",
94 | "execution_count": null,
95 | "metadata": {},
96 | "outputs": [],
97 | "source": [
98 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=1, conf=0.95)"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "For any PyEMMA method that derives target properties from MSMs, sample mean and confidence intervals (as defined by the function argument above) are directly accessible with `sample_mean()` and `sample_conf()`.\n",
106 | "Further, `sample_std()` is available for computing the standard deviation.\n",
107 | "In the more general case, it might be interesting to extract the full sample of a function evaluation with `sample_f()`.\n",
108 | "The syntax is equivalent for all those functions."
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": null,
114 | "metadata": {},
115 | "outputs": [],
116 | "source": [
117 | "sample_mean = bayesian_msm.sample_mean('timescales', k=1)\n",
118 | "sample_conf_l, sample_conf_r = bayesian_msm.sample_conf('timescales', k=1)\n",
119 | "\n",
120 | "print('Mean of first ITS: {:f}'.format(sample_mean[0]))\n",
121 | "print('Confidence interval: [{:f}, {:f}]'.format(sample_conf_l[0], sample_conf_r[0]))"
122 | ]
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "metadata": {},
127 | "source": [
128 | "Please note that sample mean and maximum likelihood estimates are not identical and generally do not provide numerically identical results.\n",
129 | "\n",
130 | "Now, for the sake of simplicity we proceed with the analysis of a maximum likelihood MSM.\n",
131 | "We estimate it at lag time $1$ step..."
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": null,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=1)"
141 | ]
142 | },
143 | {
144 | "cell_type": "markdown",
145 | "metadata": {},
146 | "source": [
147 | "... and check for disconnectivity.\n",
148 | "The MSM is constructed on the largest set of discrete states that are (reversibly) connected.\n",
149 | "The `active_state_fraction` and `active_count_fraction` show us the fraction of discrete states and transition counts from our data which are part of this largest set and, thus, used for the model:"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": null,
155 | "metadata": {},
156 | "outputs": [],
157 | "source": [
158 | "print('fraction of states used = {:f}'.format(msm.active_state_fraction))\n",
159 | "print('fraction of counts used = {:f}'.format(msm.active_count_fraction))"
160 | ]
161 | },
162 | {
163 | "cell_type": "markdown",
164 | "metadata": {},
165 | "source": [
166 | "The fraction is, in both cases, $1$ and, thus, we have no disconnected states (which we would have to exclude from our analysis).\n",
167 | "\n",
168 | "If there were any disconnectivities in our data (fractions $<1$),\n",
169 | "we could access the indices of the **active states** (members of the largest connected set) via the `active_set` attribute:"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": null,
175 | "metadata": {},
176 | "outputs": [],
177 | "source": [
178 | "print(msm.active_set)"
179 | ]
180 | },
181 | {
182 | "cell_type": "markdown",
183 | "metadata": {},
184 | "source": [
185 | "With this potential issue out of the way, we can extract our first (stationary/thermodynamic) property,\n",
186 | "the `stationary_distribution` or, as a shortcut, `pi`:"
187 | ]
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": null,
192 | "metadata": {},
193 | "outputs": [],
194 | "source": [
195 | "print(msm.stationary_distribution)\n",
196 | "print('sum of weights = {:f}'.format(msm.pi.sum()))"
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {},
202 | "source": [
203 | "The attribute `msm.pi` tells us, for each discrete state, the absolute probability of observing said state in global equilibrium.\n",
204 | "Mathematically speaking, the stationary distribution $\\pi$ is the left eigenvector of the transition matrix $\\mathbf{P}$ to the eigenvalue $1$:\n",
205 | "\n",
206 | "$$\\pi^\\top \\mathbf{P} = \\pi^\\top.$$\n",
207 | "\n",
208 | "Please note that the $\\pi$ is fundamentaly different from a normalized histogram of states:\n",
209 | "for the histogram of states to accurately describe the stationary distribution, the data needs to be sampled from global equilibrium, i.e, the data points need to be statistically independent.\n",
210 | "The MSM approach, on the other hand, only requires local equilibrium, i.e., statistical independence of state transitions.\n",
211 | "Thus, the MSM approach requires a much weaker and, in practice, much easier to satisfy condition than simply counting state visits.\n",
212 | "\n",
213 | "We can use the stationary distribution to, e.g., visualize the weight of the dicrete states and, thus, to highlight which areas of our feature space are most probable.\n",
214 | "Here, we show all data points in a two dimensional scatter plot and color/weight them according to their discrete state membership:"
215 | ]
216 | },
217 | {
218 | "cell_type": "code",
219 | "execution_count": null,
220 | "metadata": {},
221 | "outputs": [],
222 | "source": [
223 | "fig, ax, misc = pyemma.plots.plot_contour(\n",
224 | " *data.T, msm.pi[cluster.dtrajs[0]],\n",
225 | " cbar_label='stationary distribution',\n",
226 | " method='nearest', mask=True)\n",
227 | "ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
228 | "ax.set_xlabel('$x$')\n",
229 | "ax.set_ylabel('$y$')\n",
230 | "ax.set_xlim(-4, 4)\n",
231 | "ax.set_ylim(-4, 4)\n",
232 | "ax.set_aspect('equal')\n",
233 | "fig.tight_layout()"
234 | ]
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "metadata": {},
239 | "source": [
240 | "The stationary distribution can also be used to correct the `pyemma.plots.plot_free_energy()` function that we used to visualize this dataset in [Notebook 01 ➜ 📓](01-data-io-and-featurization.ipynb).\n",
241 | "This might be necessary if the data points are not sampled from global equilibrium.\n",
242 | "\n",
243 | "In this case, we assign the weight of the corresponding discrete state to each data point and pass this information to the plotting function via its `weights` parameter:"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": null,
249 | "metadata": {},
250 | "outputs": [],
251 | "source": [
252 | "fig, ax, misc = pyemma.plots.plot_free_energy(\n",
253 | " *data.T,\n",
254 | " weights=np.concatenate(msm.trajectory_weights()),\n",
255 | " legacy=False)\n",
256 | "ax.set_xlabel('$x$')\n",
257 | "ax.set_ylabel('$y$')\n",
258 | "ax.set_xlim(-4, 4)\n",
259 | "ax.set_ylim(-4, 4)\n",
260 | "ax.set_aspect('equal')\n",
261 | "fig.tight_layout()"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {},
267 | "source": [
268 | "We will see further uses of the stationary distribution later.\n",
269 | "But for now, we continue the analysis of our model by visualizing its (right) eigenvectors which encode the dynamical processes.\n",
270 | "First, we notice that the first right eigenvector is a constant $1$."
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": null,
276 | "metadata": {},
277 | "outputs": [],
278 | "source": [
279 | "eigvec = msm.eigenvectors_right()\n",
280 | "print('first eigenvector is one: {} (min={}, max={})'.format(\n",
281 | " np.allclose(eigvec[:, 0], 1, atol=1e-15), eigvec[:, 0].min(), eigvec[:, 0].max()))"
282 | ]
283 | },
284 | {
285 | "cell_type": "markdown",
286 | "metadata": {},
287 | "source": [
288 | "Second, the higher eigenvectors can be visualized as follows:"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": null,
294 | "metadata": {},
295 | "outputs": [],
296 | "source": [
297 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n",
298 | "for i, ax in enumerate(axes.flat):\n",
299 | " pyemma.plots.plot_contour(\n",
300 | " *data.T, eigvec[cluster.dtrajs[0], i + 1], ax=ax, cmap='PiYG',\n",
301 | " cbar_label='{}. right eigenvector'.format(i + 2), mask=True)\n",
302 | " ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
303 | " ax.set_xlabel('$x$')\n",
304 | " ax.set_xlim(-4, 4)\n",
305 | " ax.set_ylim(-4, 4)\n",
306 | " ax.set_aspect('equal')\n",
307 | "axes[0].set_ylabel('$y$')\n",
308 | "fig.tight_layout()"
309 | ]
310 | },
311 | {
312 | "cell_type": "markdown",
313 | "metadata": {},
314 | "source": [
315 | "The right eigenvectors can be used to visualize the processes governed by the corresponding implied timescales.\n",
316 | "The first right eigenvector (always) is $(1,\\dots,1)^\\top$ for an MSM transition matrix and it corresponds to the stationary process (infinite implied timescale).\n",
317 | "\n",
318 | "The second right eigenvector corresponds to the slowest process;\n",
319 | "its entries are negative for one group of discrete states and positive for the other group.\n",
320 | "This tells us that the slowest process happens between these two groups and that the process relaxes on the slowest ITS ($\\approx 8.5$ steps).\n",
321 | "\n",
322 | "The third and fourth eigenvectors show a larger spread of values and no clear grouping.\n",
323 | "In combination with the ITS convergence plot, we can safely assume that these eigenvectors contain just noise and do not indicate any resolved processes.\n",
324 | "\n",
325 | "We then continue to validate our MSM with a CK test for $2$ metastable states which are already indicated by the second right eigenvector."
326 | ]
327 | },
328 | {
329 | "cell_type": "code",
330 | "execution_count": null,
331 | "metadata": {},
332 | "outputs": [],
333 | "source": [
334 | "nstates = 2\n",
335 | "pyemma.plots.plot_cktest(msm.cktest(nstates));"
336 | ]
337 | },
338 | {
339 | "cell_type": "markdown",
340 | "metadata": {},
341 | "source": [
342 | "We now save the model to do more analyses with PCCA++ and TPT in [Notebook 05 ➜ 📓](05-pcca-tpt.ipynb):"
343 | ]
344 | },
345 | {
346 | "cell_type": "code",
347 | "execution_count": null,
348 | "metadata": {},
349 | "outputs": [],
350 | "source": [
351 | "cluster.save('nb4.pyemma', model_name='doublewell_cluster', overwrite=True)\n",
352 | "msm.save('nb4.pyemma', model_name='doublewell_msm', overwrite=True)\n",
353 | "bayesian_msm.save('nb4.pyemma', model_name='doublewell_bayesian_msm', overwrite=True)"
354 | ]
355 | },
356 | {
357 | "cell_type": "markdown",
358 | "metadata": {},
359 | "source": [
360 | "## Case 2: low-dimensional molecular dynamics data (alanine dipeptide)\n",
361 | "\n",
362 | "We fetch the alanine dipeptide data set, load the backbone torsions into memory,\n",
363 | "directly discretize the full space using $k$-means clustering,\n",
364 | "visualize the margial and joint distributions of both components as well as the cluster centers,\n",
365 | "and show the ITS convergence to help selecting a suitable lag time:"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": null,
371 | "metadata": {
372 | "scrolled": true
373 | },
374 | "outputs": [],
375 | "source": [
376 | "pdb = mdshare.fetch('alanine-dipeptide-nowater.pdb', working_directory='data')\n",
377 | "files = mdshare.fetch('alanine-dipeptide-*-250ns-nowater.xtc', working_directory='data')\n",
378 | "\n",
379 | "feat = pyemma.coordinates.featurizer(pdb)\n",
380 | "feat.add_backbone_torsions(periodic=False)\n",
381 | "data = pyemma.coordinates.load(files, features=feat)\n",
382 | "data_concatenated = np.concatenate(data)\n",
383 | "\n",
384 | "cluster = pyemma.coordinates.cluster_kmeans(data, k=100, max_iter=50, stride=10)\n",
385 | "dtrajs_concatenated = np.concatenate(cluster.dtrajs)\n",
386 | "\n",
387 | "its = pyemma.msm.its(\n",
388 | " cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')\n",
389 | "\n",
390 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n",
391 | "pyemma.plots.plot_feature_histograms(\n",
392 | " np.concatenate(data), feature_labels=['$\\Phi$', '$\\Psi$'], ax=axes[0])\n",
393 | "pyemma.plots.plot_density(*data_concatenated.T, ax=axes[1], cbar=False, alpha=0.1)\n",
394 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
395 | "axes[1].set_xlabel('$\\Phi$')\n",
396 | "axes[1].set_ylabel('$\\Psi$')\n",
397 | "pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')\n",
398 | "fig.tight_layout()"
399 | ]
400 | },
401 | {
402 | "cell_type": "markdown",
403 | "metadata": {},
404 | "source": [
405 | "The plots show us the marginal (left panel) and joint distributions along with the cluster centers (middle panel).\n",
406 | "The implied timescales are converged (right panel). \n",
407 | "\n",
408 | "We then estimate an MSM at lag time $10$ ps and visualize the stationary distribution by coloring all data points according to the stationary weight of the discrete state they belong to:"
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "execution_count": null,
414 | "metadata": {},
415 | "outputs": [],
416 | "source": [
417 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n",
418 | "\n",
419 | "print('fraction of states used = {:f}'.format(msm.active_state_fraction))\n",
420 | "print('fraction of counts used = {:f}'.format(msm.active_count_fraction))\n",
421 | "\n",
422 | "fig, ax, misc = pyemma.plots.plot_contour(\n",
423 | " *data_concatenated.T, msm.pi[dtrajs_concatenated],\n",
424 | " cbar_label='stationary_distribution',\n",
425 | " method='nearest', mask=True)\n",
426 | "ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
427 | "ax.set_xlabel('$\\Phi$')\n",
428 | "ax.set_ylabel('$\\Psi$')\n",
429 | "fig.tight_layout()"
430 | ]
431 | },
432 | {
433 | "cell_type": "markdown",
434 | "metadata": {},
435 | "source": [
436 | "Next, we visualize the first six right eigenvectors:"
437 | ]
438 | },
439 | {
440 | "cell_type": "code",
441 | "execution_count": null,
442 | "metadata": {},
443 | "outputs": [],
444 | "source": [
445 | "eigvec = msm.eigenvectors_right()\n",
446 | "print('first eigenvector is one: {} (min={}, max={})'.format(\n",
447 | " np.allclose(eigvec[:, 0], 1, atol=1e-15), eigvec[:, 0].min(), eigvec[:, 0].max()))\n",
448 | "\n",
449 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n",
450 | "for i, ax in enumerate(axes.flat):\n",
451 | " pyemma.plots.plot_contour(\n",
452 | " *data_concatenated.T, eigvec[dtrajs_concatenated, i + 1], ax=ax, cmap='PiYG',\n",
453 | " cbar_label='{}. right eigenvector'.format(i + 2), mask=True)\n",
454 | " ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
455 | " ax.set_xlabel('$\\Phi$')\n",
456 | " ax.set_ylabel('$\\Psi$')\n",
457 | "fig.tight_layout()"
458 | ]
459 | },
460 | {
461 | "cell_type": "markdown",
462 | "metadata": {},
463 | "source": [
464 | "Again, we have the $(1,\\dots,1)^\\top$ first right eigenvector of the stationary process.\n",
465 | "\n",
466 | "The second to fourth right eigenvectors illustrate the three slowest processes which are (in that order):\n",
467 | "\n",
468 | "- rotation of the $\\Phi$ dihedral,\n",
469 | "- rotation of the $\\Psi$ dihedral when $\\Phi\\approx-2$ rad, and\n",
470 | "- rotation of the $\\Psi$ dihedral when $\\Phi\\approx1$ rad.\n",
471 | "\n",
472 | "Eigenvectors five, six, and seven indicate further processes which, however, relax faster than the lag time and cannot be resolved clearly.\n",
473 | "\n",
474 | "We now proceed our validation process using a Bayesian MSM with four metastable states:"
475 | ]
476 | },
477 | {
478 | "cell_type": "code",
479 | "execution_count": null,
480 | "metadata": {},
481 | "outputs": [],
482 | "source": [
483 | "nstates = 4\n",
484 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n",
485 | "pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates), units='ps');"
486 | ]
487 | },
488 | {
489 | "cell_type": "markdown",
490 | "metadata": {},
491 | "source": [
492 | "We note that four metastable states are a reasonable choice for our MSM.\n",
493 | "\n",
494 | "In general, the number of metastable states is a modeler's choice; it is adjusted to map the kinetics to be modeled.\n",
495 | "In the current example, increasing the resolution with a higher number of metastable states or resolving only the slowest process between $2$ states would be possible.\n",
496 | "However, the number of states is not arbitrary as the observed processes in metastable state space need not be Markovian in general.\n",
497 | "A failed Chapman-Kolmogorov test can thus also hint to a bad choice of the metastable state number.\n",
498 | "\n",
499 | "In order to perform further analysis, we save the model to disk:"
500 | ]
501 | },
502 | {
503 | "cell_type": "code",
504 | "execution_count": null,
505 | "metadata": {},
506 | "outputs": [],
507 | "source": [
508 | "cluster.save('nb4.pyemma', model_name='ala2_cluster', overwrite=True)\n",
509 | "msm.save('nb4.pyemma', model_name='ala2_msm', overwrite=True)\n",
510 | "bayesian_msm.save('nb4.pyemma', model_name='ala2_bayesian_msm', overwrite=True)"
511 | ]
512 | },
513 | {
514 | "cell_type": "markdown",
515 | "metadata": {},
516 | "source": [
517 | "#### Exercise 1\n",
518 | "Load the heavy atom distances into memory, TICA (`lag=3` and `dim=2`), discretize with 100 $k$-means centers and a stride of $10$, and show the ITS convergence."
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": null,
524 | "metadata": {
525 | "solution2": "hidden",
526 | "solution2_first": true
527 | },
528 | "outputs": [],
529 | "source": [
530 | "feat = #FIXME\n",
531 | "feat. #FIXME\n",
532 | "data = #FIXME\n",
533 | "\n",
534 | "tica = #FIXME\n",
535 | "tica_concatenated = #FIXME\n",
536 | "\n",
537 | "cluster = #FIXME\n",
538 | "dtrajs_concatenated = #FIXME\n",
539 | "\n",
540 | "its = #FIXME\n",
541 | "\n",
542 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n",
543 | "pyemma.plots.plot_feature_histograms(tica_concatenated, feature_labels=['IC 1', 'IC 2'], ax=axes[0])\n",
544 | "pyemma.plots.plot_density(*tica_concatenated.T, ax=axes[1], cbar=False, alpha=0.3)\n",
545 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
546 | "axes[1].set_xlabel('IC 1')\n",
547 | "axes[1].set_ylabel('IC 2')\n",
548 | "pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')\n",
549 | "fig.tight_layout()"
550 | ]
551 | },
552 | {
553 | "cell_type": "markdown",
554 | "metadata": {
555 | "solution2": "hidden"
556 | },
557 | "source": [
558 | "###### Solution"
559 | ]
560 | },
561 | {
562 | "cell_type": "code",
563 | "execution_count": null,
564 | "metadata": {
565 | "solution2": "hidden"
566 | },
567 | "outputs": [],
568 | "source": [
569 | "feat = pyemma.coordinates.featurizer(pdb)\n",
570 | "pairs = feat.pairs(feat.select_Heavy())\n",
571 | "feat.add_distances(pairs, periodic=False)\n",
572 | "data = pyemma.coordinates.load(files, features=feat)\n",
573 | "\n",
574 | "tica = pyemma.coordinates.tica(data, lag=3, dim=2)\n",
575 | "tica_concatenated = np.concatenate(tica.get_output())\n",
576 | "\n",
577 | "cluster = pyemma.coordinates.cluster_kmeans(tica, k=100, max_iter=50, stride=10)\n",
578 | "dtrajs_concatenated = np.concatenate(cluster.dtrajs)\n",
579 | "\n",
580 | "its = pyemma.msm.its(\n",
581 | " cluster.dtrajs, lags=[1, 2, 5, 10, 20, 50], nits=4, errors='bayes')\n",
582 | "\n",
583 | "fig, axes = plt.subplots(1, 3, figsize=(12, 3))\n",
584 | "pyemma.plots.plot_feature_histograms(tica_concatenated, feature_labels=['IC 1', 'IC 2'], ax=axes[0])\n",
585 | "pyemma.plots.plot_density(*tica_concatenated.T, ax=axes[1], cbar=False, alpha=0.3)\n",
586 | "axes[1].scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
587 | "axes[1].set_xlabel('IC 1')\n",
588 | "axes[1].set_ylabel('IC 2')\n",
589 | "pyemma.plots.plot_implied_timescales(its, ax=axes[2], units='ps')\n",
590 | "fig.tight_layout()"
591 | ]
592 | },
593 | {
594 | "cell_type": "markdown",
595 | "metadata": {},
596 | "source": [
597 | "#### Exercise 2\n",
598 | "Estimate an MSM at lag time $10$ ps with `dt_traj='1 ps'` and visualize the stationary distribution using a two-dimensional colored scatter plot of all data points in TICA space."
599 | ]
600 | },
601 | {
602 | "cell_type": "code",
603 | "execution_count": null,
604 | "metadata": {
605 | "solution2": "hidden",
606 | "solution2_first": true
607 | },
608 | "outputs": [],
609 | "source": [
610 | "msm = #FIXME\n",
611 | "\n",
612 | "print('fraction of states used = {:f}'. #FIXME\n",
613 | "print('fraction of counts used = {:f}'. #FIXME\n",
614 | "\n",
615 | "fig, ax, misc = pyemma.plots.plot_contour(\n",
616 | " *tica_concatenated.T, msm.pi[dtrajs_concatenated],\n",
617 | " cbar_label='stationary_distribution',\n",
618 | " method='nearest', mask=True)\n",
619 | "ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
620 | "ax.set_xlabel('IC 1')\n",
621 | "ax.set_ylabel('IC 2')\n",
622 | "fig.tight_layout()"
623 | ]
624 | },
625 | {
626 | "cell_type": "markdown",
627 | "metadata": {
628 | "solution2": "hidden"
629 | },
630 | "source": [
631 | "###### Solution"
632 | ]
633 | },
634 | {
635 | "cell_type": "code",
636 | "execution_count": null,
637 | "metadata": {
638 | "solution2": "hidden"
639 | },
640 | "outputs": [],
641 | "source": [
642 | "msm = pyemma.msm.estimate_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n",
643 | "\n",
644 | "print('fraction of states used = {:f}'.format(msm.active_state_fraction))\n",
645 | "print('fraction of counts used = {:f}'.format(msm.active_count_fraction))\n",
646 | "\n",
647 | "fig, ax, misc = pyemma.plots.plot_contour(\n",
648 | " *tica_concatenated.T, msm.pi[dtrajs_concatenated],\n",
649 | " cbar_label='stationary_distribution',\n",
650 | " method='nearest', mask=True)\n",
651 | "ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
652 | "ax.set_xlabel('IC 1')\n",
653 | "ax.set_ylabel('IC 2')\n",
654 | "fig.tight_layout()"
655 | ]
656 | },
657 | {
658 | "cell_type": "markdown",
659 | "metadata": {},
660 | "source": [
661 | "#### Exercise 3\n",
662 | "Visualize the first six right eigenvectors."
663 | ]
664 | },
665 | {
666 | "cell_type": "code",
667 | "execution_count": null,
668 | "metadata": {
669 | "solution2": "hidden",
670 | "solution2_first": true
671 | },
672 | "outputs": [],
673 | "source": [
674 | "eigvec = #FIXME\n",
675 | "print('first eigenvector is one: {} (min={}, max={})'.format( #FIXME\n",
676 | "\n",
677 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n",
678 | "for i, ax in enumerate(axes.flat):\n",
679 | " pyemma.plots.plot_contour( #FIXME )\n",
680 | " ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
681 | " ax.set_xlabel('IC 1')\n",
682 | " ax.set_ylabel('IC 2')\n",
683 | "fig.tight_layout()"
684 | ]
685 | },
686 | {
687 | "cell_type": "markdown",
688 | "metadata": {
689 | "solution2": "hidden"
690 | },
691 | "source": [
692 | "###### Solution"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": null,
698 | "metadata": {
699 | "solution2": "hidden"
700 | },
701 | "outputs": [],
702 | "source": [
703 | "eigvec = msm.eigenvectors_right()\n",
704 | "print('first eigenvector is one: {} (min={}, max={})'.format(\n",
705 | " np.allclose(eigvec[:, 0], 1, atol=1e-15), eigvec[:, 0].min(), eigvec[:, 0].max()))\n",
706 | "\n",
707 | "fig, axes = plt.subplots(2, 3, figsize=(12, 6))\n",
708 | "for i, ax in enumerate(axes.flat):\n",
709 | " pyemma.plots.plot_contour(\n",
710 | " *tica_concatenated.T, eigvec[dtrajs_concatenated, i + 1], ax=ax, cmap='PiYG',\n",
711 | " cbar_label='{}. right eigenvector'.format(i + 2), mask=True)\n",
712 | " ax.scatter(*cluster.clustercenters.T, s=15, c='C1')\n",
713 | " ax.set_xlabel('IC 1')\n",
714 | " ax.set_ylabel('IC 2')\n",
715 | "fig.tight_layout()"
716 | ]
717 | },
718 | {
719 | "cell_type": "markdown",
720 | "metadata": {},
721 | "source": [
722 | "Can you already guess from eigenvectors two to four which the metastable states are?\n",
723 | "\n",
724 | "#### Exercise 4\n",
725 | "Estimate a Bayesian MSM at lag time $10$ ps and perform/show a CK test for four metastable states."
726 | ]
727 | },
728 | {
729 | "cell_type": "code",
730 | "execution_count": null,
731 | "metadata": {
732 | "solution2": "hidden",
733 | "solution2_first": true
734 | },
735 | "outputs": [],
736 | "source": [
737 | "bayesian_msm = #FIXME\n",
738 | "\n",
739 | "nstates = 4\n",
740 | "pyemma.plots. #FIXME"
741 | ]
742 | },
743 | {
744 | "cell_type": "markdown",
745 | "metadata": {
746 | "solution2": "hidden"
747 | },
748 | "source": [
749 | "###### Solution"
750 | ]
751 | },
752 | {
753 | "cell_type": "code",
754 | "execution_count": null,
755 | "metadata": {
756 | "solution2": "hidden"
757 | },
758 | "outputs": [],
759 | "source": [
760 | "bayesian_msm = pyemma.msm.bayesian_markov_model(cluster.dtrajs, lag=10, dt_traj='1 ps')\n",
761 | "\n",
762 | "nstates = 4\n",
763 | "pyemma.plots.plot_cktest(bayesian_msm.cktest(nstates), units='ps');"
764 | ]
765 | },
766 | {
767 | "cell_type": "markdown",
768 | "metadata": {},
769 | "source": [
770 | "#### Exercise 5\n",
771 | "Save the MSM, Bayesian MSM and Cluster objects to the same file as before.\n",
772 | "Use the model names `ala2tica_msm`, `ala2tica_bayesian_msm` and `ala2tica_cluster`, respectively.\n",
773 | "Further, include the TICA object with model name `ala2tica_tica`."
774 | ]
775 | },
776 | {
777 | "cell_type": "code",
778 | "execution_count": null,
779 | "metadata": {
780 | "solution2": "hidden",
781 | "solution2_first": true
782 | },
783 | "outputs": [],
784 | "source": [
785 | "#FIXME "
786 | ]
787 | },
788 | {
789 | "cell_type": "markdown",
790 | "metadata": {
791 | "solution2": "hidden"
792 | },
793 | "source": [
794 | "###### Solution"
795 | ]
796 | },
797 | {
798 | "cell_type": "code",
799 | "execution_count": null,
800 | "metadata": {
801 | "solution2": "hidden"
802 | },
803 | "outputs": [],
804 | "source": [
805 | "cluster.save('nb4.pyemma', model_name='ala2tica_cluster', overwrite=True)\n",
806 | "msm.save('nb4.pyemma', model_name='ala2tica_msm', overwrite=True)\n",
807 | "bayesian_msm.save('nb4.pyemma', model_name='ala2tica_bayesian_msm', overwrite=True)\n",
808 | "tica.save('nb4.pyemma', model_name='ala2tica_tica', overwrite=True)"
809 | ]
810 | },
811 | {
812 | "cell_type": "markdown",
813 | "metadata": {},
814 | "source": [
815 | "## Wrapping up\n",
816 | "In this notebook, we have learned how to analyze an MSM and how to extract kinetic information from the model. In detail, we have used\n",
817 | "- the `active_state_fraction`, `active_count_fraction`, and `active_set` attributes of an MSM object to see how much (and which parts) of our data form the largest connected set represented by the MSM,\n",
818 | "- the `stationary_distribution` (or `pi`) attribute of an MSM object to access its stationary vector,\n",
819 | "- the `eigenvectors_right()` method of an MSM object to access its (right) eigenvectors,\n",
820 | "\n",
821 | "For visualizing MSMs or kinetic networks we used\n",
822 | "- `pyemma.plots.plot_density()`\n",
823 | "- `pyemma.plots.plot_contour()` and\n",
824 | "- `pyemma.plots.plot_cktest()`."
825 | ]
826 | }
827 | ],
828 | "metadata": {
829 | "kernelspec": {
830 | "display_name": "Python 3",
831 | "language": "python",
832 | "name": "python3"
833 | },
834 | "language_info": {
835 | "codemirror_mode": {
836 | "name": "ipython",
837 | "version": 3
838 | },
839 | "file_extension": ".py",
840 | "mimetype": "text/x-python",
841 | "name": "python",
842 | "nbconvert_exporter": "python",
843 | "pygments_lexer": "ipython3",
844 | "version": "3.6.5"
845 | },
846 | "toc": {
847 | "base_numbering": 1,
848 | "nav_menu": {},
849 | "number_sections": false,
850 | "sideBar": true,
851 | "skip_h1_title": true,
852 | "title_cell": "Table of Contents",
853 | "title_sidebar": "Contents",
854 | "toc_cell": false,
855 | "toc_position": {},
856 | "toc_section_display": true,
857 | "toc_window_display": true
858 | }
859 | },
860 | "nbformat": 4,
861 | "nbformat_minor": 2
862 | }
863 |
--------------------------------------------------------------------------------
/notebooks/static/hmm-backbone-1-385x432.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-1-385x432.png
--------------------------------------------------------------------------------
/notebooks/static/hmm-backbone-2-388x526.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-2-388x526.png
--------------------------------------------------------------------------------
/notebooks/static/hmm-backbone-3-347x500.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-3-347x500.png
--------------------------------------------------------------------------------
/notebooks/static/hmm-backbone-4-367x348.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-4-367x348.png
--------------------------------------------------------------------------------
/notebooks/static/hmm-backbone-5-260x374.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/hmm-backbone-5-260x374.png
--------------------------------------------------------------------------------
/notebooks/static/pentapeptide-states.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/pentapeptide-states.png
--------------------------------------------------------------------------------
/notebooks/static/pentapeptide-structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/notebooks/static/pentapeptide-structure.png
--------------------------------------------------------------------------------
/pyemma_tutorials/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Jupyter notebook launcher for PyEMMA's tutorials series.
3 | """
4 |
5 | from ._version import get_versions
6 | __version__ = get_versions()['version']
7 | del get_versions
8 |
9 | from .util import notebook_location, configs_location, run_dir
10 |
--------------------------------------------------------------------------------
/pyemma_tutorials/__main__.py:
--------------------------------------------------------------------------------
1 |
2 | # this file is here for executing the package like this:
3 | # pythom -m pyemma_tutorials
4 |
5 |
6 | if __name__ == '__main__':
7 | from .cli import main
8 | main()
9 |
--------------------------------------------------------------------------------
/pyemma_tutorials/_version.py:
--------------------------------------------------------------------------------
1 |
2 | # This file helps to compute a version number in source trees obtained from
3 | # git-archive tarball (such as those provided by githubs download-from-tag
4 | # feature). Distribution tarballs (built by setup.py sdist) and build
5 | # directories (produced by setup.py build) will contain a much shorter file
6 | # that just contains the computed version number.
7 |
8 | # This file is released into the public domain. Generated by
9 | # versioneer-0.18 (https://github.com/warner/python-versioneer)
10 |
11 | """Git implementation of _version.py."""
12 |
13 | import errno
14 | import os
15 | import re
16 | import subprocess
17 | import sys
18 |
19 |
20 | def get_keywords():
21 | """Get the keywords needed to look up the version information."""
22 | # these strings will be replaced by git during git-archive.
23 | # setup.py/versioneer.py will grep for the variable names, so they must
24 | # each be defined on a line of their own. _version.py will just call
25 | # get_keywords().
26 | git_refnames = " (HEAD -> master)"
27 | git_full = "6b9183686d2238d4f60c752a73e9b710c667ec10"
28 | git_date = "2019-05-29 16:02:41 +0200"
29 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
30 | return keywords
31 |
32 |
33 | class VersioneerConfig:
34 | """Container for Versioneer configuration parameters."""
35 |
36 |
37 | def get_config():
38 | """Create, populate and return the VersioneerConfig() object."""
39 | # these strings are filled in when 'setup.py versioneer' creates
40 | # _version.py
41 | cfg = VersioneerConfig()
42 | cfg.VCS = "git"
43 | cfg.style = "pep440"
44 | cfg.tag_prefix = "v"
45 | cfg.parentdir_prefix = "None"
46 | cfg.versionfile_source = "pyemma_tutorials/_version.py"
47 | cfg.verbose = False
48 | return cfg
49 |
50 |
51 | class NotThisMethod(Exception):
52 | """Exception raised if a method is not valid for the current scenario."""
53 |
54 |
55 | LONG_VERSION_PY = {}
56 | HANDLERS = {}
57 |
58 |
59 | def register_vcs_handler(vcs, method): # decorator
60 | """Decorator to mark a method as the handler for a particular VCS."""
61 | def decorate(f):
62 | """Store f in HANDLERS[vcs][method]."""
63 | if vcs not in HANDLERS:
64 | HANDLERS[vcs] = {}
65 | HANDLERS[vcs][method] = f
66 | return f
67 | return decorate
68 |
69 |
70 | def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
71 | env=None):
72 | """Call the given command(s)."""
73 | assert isinstance(commands, list)
74 | p = None
75 | for c in commands:
76 | try:
77 | dispcmd = str([c] + args)
78 | # remember shell=False, so use git.cmd on windows, not just git
79 | p = subprocess.Popen([c] + args, cwd=cwd, env=env,
80 | stdout=subprocess.PIPE,
81 | stderr=(subprocess.PIPE if hide_stderr
82 | else None))
83 | break
84 | except EnvironmentError:
85 | e = sys.exc_info()[1]
86 | if e.errno == errno.ENOENT:
87 | continue
88 | if verbose:
89 | print("unable to run %s" % dispcmd)
90 | print(e)
91 | return None, None
92 | else:
93 | if verbose:
94 | print("unable to find command, tried %s" % (commands,))
95 | return None, None
96 | stdout = p.communicate()[0].strip()
97 | if sys.version_info[0] >= 3:
98 | stdout = stdout.decode()
99 | if p.returncode != 0:
100 | if verbose:
101 | print("unable to run %s (error)" % dispcmd)
102 | print("stdout was %s" % stdout)
103 | return None, p.returncode
104 | return stdout, p.returncode
105 |
106 |
107 | def versions_from_parentdir(parentdir_prefix, root, verbose):
108 | """Try to determine the version from the parent directory name.
109 |
110 | Source tarballs conventionally unpack into a directory that includes both
111 | the project name and a version string. We will also support searching up
112 | two directory levels for an appropriately named parent directory
113 | """
114 | rootdirs = []
115 |
116 | for i in range(3):
117 | dirname = os.path.basename(root)
118 | if dirname.startswith(parentdir_prefix):
119 | return {"version": dirname[len(parentdir_prefix):],
120 | "full-revisionid": None,
121 | "dirty": False, "error": None, "date": None}
122 | else:
123 | rootdirs.append(root)
124 | root = os.path.dirname(root) # up a level
125 |
126 | if verbose:
127 | print("Tried directories %s but none started with prefix %s" %
128 | (str(rootdirs), parentdir_prefix))
129 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
130 |
131 |
132 | @register_vcs_handler("git", "get_keywords")
133 | def git_get_keywords(versionfile_abs):
134 | """Extract version information from the given file."""
135 | # the code embedded in _version.py can just fetch the value of these
136 | # keywords. When used from setup.py, we don't want to import _version.py,
137 | # so we do it with a regexp instead. This function is not used from
138 | # _version.py.
139 | keywords = {}
140 | try:
141 | f = open(versionfile_abs, "r")
142 | for line in f.readlines():
143 | if line.strip().startswith("git_refnames ="):
144 | mo = re.search(r'=\s*"(.*)"', line)
145 | if mo:
146 | keywords["refnames"] = mo.group(1)
147 | if line.strip().startswith("git_full ="):
148 | mo = re.search(r'=\s*"(.*)"', line)
149 | if mo:
150 | keywords["full"] = mo.group(1)
151 | if line.strip().startswith("git_date ="):
152 | mo = re.search(r'=\s*"(.*)"', line)
153 | if mo:
154 | keywords["date"] = mo.group(1)
155 | f.close()
156 | except EnvironmentError:
157 | pass
158 | return keywords
159 |
160 |
161 | @register_vcs_handler("git", "keywords")
162 | def git_versions_from_keywords(keywords, tag_prefix, verbose):
163 | """Get version information from git keywords."""
164 | if not keywords:
165 | raise NotThisMethod("no keywords at all, weird")
166 | date = keywords.get("date")
167 | if date is not None:
168 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
169 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
170 | # -like" string, which we must then edit to make compliant), because
171 | # it's been around since git-1.5.3, and it's too difficult to
172 | # discover which version we're using, or to work around using an
173 | # older one.
174 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
175 | refnames = keywords["refnames"].strip()
176 | if refnames.startswith("$Format"):
177 | if verbose:
178 | print("keywords are unexpanded, not using")
179 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
180 | refs = set([r.strip() for r in refnames.strip("()").split(",")])
181 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
182 | # just "foo-1.0". If we see a "tag: " prefix, prefer those.
183 | TAG = "tag: "
184 | tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
185 | if not tags:
186 | # Either we're using git < 1.8.3, or there really are no tags. We use
187 | # a heuristic: assume all version tags have a digit. The old git %d
188 | # expansion behaves like git log --decorate=short and strips out the
189 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish
190 | # between branches and tags. By ignoring refnames without digits, we
191 | # filter out many common branch names like "release" and
192 | # "stabilization", as well as "HEAD" and "master".
193 | tags = set([r for r in refs if re.search(r'\d', r)])
194 | if verbose:
195 | print("discarding '%s', no digits" % ",".join(refs - tags))
196 | if verbose:
197 | print("likely tags: %s" % ",".join(sorted(tags)))
198 | for ref in sorted(tags):
199 | # sorting will prefer e.g. "2.0" over "2.0rc1"
200 | if ref.startswith(tag_prefix):
201 | r = ref[len(tag_prefix):]
202 | if verbose:
203 | print("picking %s" % r)
204 | return {"version": r,
205 | "full-revisionid": keywords["full"].strip(),
206 | "dirty": False, "error": None,
207 | "date": date}
208 | # no suitable tags, so version is "0+unknown", but full hex is still there
209 | if verbose:
210 | print("no suitable tags, using unknown + full revision id")
211 | return {"version": "0+unknown",
212 | "full-revisionid": keywords["full"].strip(),
213 | "dirty": False, "error": "no suitable tags", "date": None}
214 |
215 |
216 | @register_vcs_handler("git", "pieces_from_vcs")
217 | def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
218 | """Get version from 'git describe' in the root of the source tree.
219 |
220 | This only gets called if the git-archive 'subst' keywords were *not*
221 | expanded, and _version.py hasn't already been rewritten with a short
222 | version string, meaning we're inside a checked out source tree.
223 | """
224 | GITS = ["git"]
225 | if sys.platform == "win32":
226 | GITS = ["git.cmd", "git.exe"]
227 |
228 | out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
229 | hide_stderr=True)
230 | if rc != 0:
231 | if verbose:
232 | print("Directory %s not under git control" % root)
233 | raise NotThisMethod("'git rev-parse --git-dir' returned error")
234 |
235 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
236 | # if there isn't one, this yields HEX[-dirty] (no NUM)
237 | describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
238 | "--always", "--long",
239 | "--match", "%s*" % tag_prefix],
240 | cwd=root)
241 | # --long was added in git-1.5.5
242 | if describe_out is None:
243 | raise NotThisMethod("'git describe' failed")
244 | describe_out = describe_out.strip()
245 | full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
246 | if full_out is None:
247 | raise NotThisMethod("'git rev-parse' failed")
248 | full_out = full_out.strip()
249 |
250 | pieces = {}
251 | pieces["long"] = full_out
252 | pieces["short"] = full_out[:7] # maybe improved later
253 | pieces["error"] = None
254 |
255 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
256 | # TAG might have hyphens.
257 | git_describe = describe_out
258 |
259 | # look for -dirty suffix
260 | dirty = git_describe.endswith("-dirty")
261 | pieces["dirty"] = dirty
262 | if dirty:
263 | git_describe = git_describe[:git_describe.rindex("-dirty")]
264 |
265 | # now we have TAG-NUM-gHEX or HEX
266 |
267 | if "-" in git_describe:
268 | # TAG-NUM-gHEX
269 | mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
270 | if not mo:
271 | # unparseable. Maybe git-describe is misbehaving?
272 | pieces["error"] = ("unable to parse git-describe output: '%s'"
273 | % describe_out)
274 | return pieces
275 |
276 | # tag
277 | full_tag = mo.group(1)
278 | if not full_tag.startswith(tag_prefix):
279 | if verbose:
280 | fmt = "tag '%s' doesn't start with prefix '%s'"
281 | print(fmt % (full_tag, tag_prefix))
282 | pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
283 | % (full_tag, tag_prefix))
284 | return pieces
285 | pieces["closest-tag"] = full_tag[len(tag_prefix):]
286 |
287 | # distance: number of commits since tag
288 | pieces["distance"] = int(mo.group(2))
289 |
290 | # commit: short hex revision ID
291 | pieces["short"] = mo.group(3)
292 |
293 | else:
294 | # HEX: no tags
295 | pieces["closest-tag"] = None
296 | count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
297 | cwd=root)
298 | pieces["distance"] = int(count_out) # total number of commits
299 |
300 | # commit date: see ISO-8601 comment in git_versions_from_keywords()
301 | date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
302 | cwd=root)[0].strip()
303 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
304 |
305 | return pieces
306 |
307 |
308 | def plus_or_dot(pieces):
309 | """Return a + if we don't already have one, else return a ."""
310 | if "+" in pieces.get("closest-tag", ""):
311 | return "."
312 | return "+"
313 |
314 |
315 | def render_pep440(pieces):
316 | """Build up version string, with post-release "local version identifier".
317 |
318 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
319 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
320 |
321 | Exceptions:
322 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
323 | """
324 | if pieces["closest-tag"]:
325 | rendered = pieces["closest-tag"]
326 | if pieces["distance"] or pieces["dirty"]:
327 | rendered += plus_or_dot(pieces)
328 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
329 | if pieces["dirty"]:
330 | rendered += ".dirty"
331 | else:
332 | # exception #1
333 | rendered = "0+untagged.%d.g%s" % (pieces["distance"],
334 | pieces["short"])
335 | if pieces["dirty"]:
336 | rendered += ".dirty"
337 | return rendered
338 |
339 |
340 | def render_pep440_pre(pieces):
341 | """TAG[.post.devDISTANCE] -- No -dirty.
342 |
343 | Exceptions:
344 | 1: no tags. 0.post.devDISTANCE
345 | """
346 | if pieces["closest-tag"]:
347 | rendered = pieces["closest-tag"]
348 | if pieces["distance"]:
349 | rendered += ".post.dev%d" % pieces["distance"]
350 | else:
351 | # exception #1
352 | rendered = "0.post.dev%d" % pieces["distance"]
353 | return rendered
354 |
355 |
356 | def render_pep440_post(pieces):
357 | """TAG[.postDISTANCE[.dev0]+gHEX] .
358 |
359 | The ".dev0" means dirty. Note that .dev0 sorts backwards
360 | (a dirty tree will appear "older" than the corresponding clean one),
361 | but you shouldn't be releasing software with -dirty anyways.
362 |
363 | Exceptions:
364 | 1: no tags. 0.postDISTANCE[.dev0]
365 | """
366 | if pieces["closest-tag"]:
367 | rendered = pieces["closest-tag"]
368 | if pieces["distance"] or pieces["dirty"]:
369 | rendered += ".post%d" % pieces["distance"]
370 | if pieces["dirty"]:
371 | rendered += ".dev0"
372 | rendered += plus_or_dot(pieces)
373 | rendered += "g%s" % pieces["short"]
374 | else:
375 | # exception #1
376 | rendered = "0.post%d" % pieces["distance"]
377 | if pieces["dirty"]:
378 | rendered += ".dev0"
379 | rendered += "+g%s" % pieces["short"]
380 | return rendered
381 |
382 |
383 | def render_pep440_old(pieces):
384 | """TAG[.postDISTANCE[.dev0]] .
385 |
386 | The ".dev0" means dirty.
387 |
388 | Eexceptions:
389 | 1: no tags. 0.postDISTANCE[.dev0]
390 | """
391 | if pieces["closest-tag"]:
392 | rendered = pieces["closest-tag"]
393 | if pieces["distance"] or pieces["dirty"]:
394 | rendered += ".post%d" % pieces["distance"]
395 | if pieces["dirty"]:
396 | rendered += ".dev0"
397 | else:
398 | # exception #1
399 | rendered = "0.post%d" % pieces["distance"]
400 | if pieces["dirty"]:
401 | rendered += ".dev0"
402 | return rendered
403 |
404 |
405 | def render_git_describe(pieces):
406 | """TAG[-DISTANCE-gHEX][-dirty].
407 |
408 | Like 'git describe --tags --dirty --always'.
409 |
410 | Exceptions:
411 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
412 | """
413 | if pieces["closest-tag"]:
414 | rendered = pieces["closest-tag"]
415 | if pieces["distance"]:
416 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
417 | else:
418 | # exception #1
419 | rendered = pieces["short"]
420 | if pieces["dirty"]:
421 | rendered += "-dirty"
422 | return rendered
423 |
424 |
425 | def render_git_describe_long(pieces):
426 | """TAG-DISTANCE-gHEX[-dirty].
427 |
428 | Like 'git describe --tags --dirty --always -long'.
429 | The distance/hash is unconditional.
430 |
431 | Exceptions:
432 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
433 | """
434 | if pieces["closest-tag"]:
435 | rendered = pieces["closest-tag"]
436 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
437 | else:
438 | # exception #1
439 | rendered = pieces["short"]
440 | if pieces["dirty"]:
441 | rendered += "-dirty"
442 | return rendered
443 |
444 |
445 | def render(pieces, style):
446 | """Render the given version pieces into the requested style."""
447 | if pieces["error"]:
448 | return {"version": "unknown",
449 | "full-revisionid": pieces.get("long"),
450 | "dirty": None,
451 | "error": pieces["error"],
452 | "date": None}
453 |
454 | if not style or style == "default":
455 | style = "pep440" # the default
456 |
457 | if style == "pep440":
458 | rendered = render_pep440(pieces)
459 | elif style == "pep440-pre":
460 | rendered = render_pep440_pre(pieces)
461 | elif style == "pep440-post":
462 | rendered = render_pep440_post(pieces)
463 | elif style == "pep440-old":
464 | rendered = render_pep440_old(pieces)
465 | elif style == "git-describe":
466 | rendered = render_git_describe(pieces)
467 | elif style == "git-describe-long":
468 | rendered = render_git_describe_long(pieces)
469 | else:
470 | raise ValueError("unknown style '%s'" % style)
471 |
472 | return {"version": rendered, "full-revisionid": pieces["long"],
473 | "dirty": pieces["dirty"], "error": None,
474 | "date": pieces.get("date")}
475 |
476 |
477 | def get_versions():
478 | """Get version information or return default if unable to do so."""
479 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
480 | # __file__, we can work backwards from there to the root. Some
481 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
482 | # case we can only use expanded keywords.
483 |
484 | cfg = get_config()
485 | verbose = cfg.verbose
486 |
487 | try:
488 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
489 | verbose)
490 | except NotThisMethod:
491 | pass
492 |
493 | try:
494 | root = os.path.realpath(__file__)
495 | # versionfile_source is the relative path from the top of the source
496 | # tree (where the .git directory might live) to this file. Invert
497 | # this to find the root from __file__.
498 | for i in cfg.versionfile_source.split('/'):
499 | root = os.path.dirname(root)
500 | except NameError:
501 | return {"version": "0+unknown", "full-revisionid": None,
502 | "dirty": None,
503 | "error": "unable to find root of source tree",
504 | "date": None}
505 |
506 | try:
507 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
508 | return render(pieces, cfg.style)
509 | except NotThisMethod:
510 | pass
511 |
512 | try:
513 | if cfg.parentdir_prefix:
514 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
515 | except NotThisMethod:
516 | pass
517 |
518 | return {"version": "0+unknown", "full-revisionid": None,
519 | "dirty": None,
520 | "error": "unable to compute version", "date": None}
521 |
--------------------------------------------------------------------------------
/pyemma_tutorials/cli.py:
--------------------------------------------------------------------------------
1 |
2 | def _nglview_pip_installed_workaround():
3 | # This is a workaround for people having a pip installation of nglview. The XDG_DATA_DIR is being searched for the
4 | # javascript nbextensions. This would cause mixing up different versions of the widget.
5 | # Further info:
6 | # https://jupyter.readthedocs.io/en/latest/migrating.html?highlight=data-dir#finding-the-location-of-important-files
7 | # https://github.com/arose/nglview/issues/696#issuecomment-332850270
8 | # https://github.com/arose/nglview/issues/718#issuecomment-346041897
9 | import os
10 | os.environ['JUPYTER_DATA_DIR'] = 'non-sense'
11 | assert os.getenv('JUPYTER_DATA_DIR', False)
12 |
13 |
14 | def main():
15 | from notebook.notebookapp import main as main_
16 | from .util import configs_location
17 |
18 | # main eats, argv list and kwargs
19 | notebook_cfg, notebook_cfg_json = configs_location()
20 |
21 | _nglview_pip_installed_workaround()
22 |
23 | # extend passed arguments with our config files
24 | import sys
25 | argv = sys.argv[1:] + ['--config=%s' % notebook_cfg, '--config=%s' % notebook_cfg_json]
26 | print('invoking notebook server with arguments:', argv)
27 | main_(argv=argv)
28 |
29 |
30 | if __name__ == '__main__':
31 | main()
32 |
--------------------------------------------------------------------------------
/pyemma_tutorials/jupyter_notebook_config.json:
--------------------------------------------------------------------------------
1 | {
2 | "load_extensions": {
3 | "jupyter-matplotlib/extension": true,
4 | "jupyter-js-widgets/extension": true,
5 | "nbextensions_configurator/config_menu/main": true,
6 | "contrib_nbextensions_help_item/main": true,
7 | "nglview-js-widgets/extension": true,
8 | "nglview_main": false,
9 | "exercise2/main": true,
10 | "toc2/main": true
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/pyemma_tutorials/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
1 | # Configuration file for jupyter-notebook.
2 |
3 | #------------------------------------------------------------------------------
4 | # Application(SingletonConfigurable) configuration
5 | #------------------------------------------------------------------------------
6 |
7 | ## This is an application.
8 |
9 | ## The date format used by logging formatters for %(asctime)s
10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S'
11 |
12 | ## The Logging format template
13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s'
14 |
15 | ## Set the log level by value or name.
16 | #c.Application.log_level = 30
17 |
18 | #------------------------------------------------------------------------------
19 | # JupyterApp(Application) configuration
20 | #------------------------------------------------------------------------------
21 | ## Base class for Jupyter applications
22 |
23 | ## Answer yes to any prompts.
24 | #c.JupyterApp.answer_yes = False
25 |
26 | ## Full path of a config file.
27 | #c.JupyterApp.config_file = ''
28 |
29 | ## Specify a config file to load.
30 | #c.JupyterApp.config_file_name = ''
31 |
32 | ## Generate default config file.
33 | #c.JupyterApp.generate_config = False
34 |
35 | #------------------------------------------------------------------------------
36 | # NotebookApp(JupyterApp) configuration
37 | #------------------------------------------------------------------------------
38 |
39 | ## Set the Access-Control-Allow-Credentials: true header
40 | #c.NotebookApp.allow_credentials = False
41 |
42 | ## Set the Access-Control-Allow-Origin header
43 | #
44 | # Use '*' to allow any origin to access your server.
45 | #
46 | # Takes precedence over allow_origin_pat.
47 | c.NotebookApp.allow_origin = 'localhost'
48 |
49 | ## Use a regular expression for the Access-Control-Allow-Origin header
50 | #
51 | # Requests from an origin matching the expression will get replies with:
52 | #
53 | # Access-Control-Allow-Origin: origin
54 | #
55 | # where `origin` is the origin of the request.
56 | #
57 | # Ignored if allow_origin is set.
58 | #c.NotebookApp.allow_origin_pat = ''
59 |
60 | ## Allow password to be changed at login for the notebook server.
61 | #
62 | # While loggin in with a token, the notebook server UI will give the opportunity
63 | # to the user to enter a new password at the same time that will replace the
64 | # token login mechanism.
65 | #
66 | # This can be set to false to prevent changing password from the UI/API.
67 | #c.NotebookApp.allow_password_change = True
68 |
69 | ## Whether to allow the user to run the notebook as root.
70 | #c.NotebookApp.allow_root = False
71 |
72 | ## DEPRECATED use base_url
73 | #c.NotebookApp.base_project_url = '/'
74 |
75 | ## The base URL for the notebook server.
76 | #
77 | # Leading and trailing slashes can be omitted, and will automatically be added.
78 | #c.NotebookApp.base_url = '/'
79 |
80 | ## Specify what command to use to invoke a web browser when opening the notebook.
81 | # If not specified, the default browser will be determined by the `webbrowser`
82 | # standard library module, which allows setting of the BROWSER environment
83 | # variable to override it.
84 | #c.NotebookApp.browser = ''
85 |
86 | ## The full path to an SSL/TLS certificate file.
87 | #c.NotebookApp.certfile = ''
88 |
89 | ## The full path to a certificate authority certificate for SSL/TLS client
90 | # authentication.
91 | #c.NotebookApp.client_ca = ''
92 |
93 | ## The config manager class to use
94 | #c.NotebookApp.config_manager_class = 'notebook.services.config.manager.ConfigManager'
95 |
96 | ## The notebook manager class to use.
97 | #c.NotebookApp.contents_manager_class = 'notebook.services.contents.largefilemanager.LargeFileManager'
98 |
99 | ## Extra keyword arguments to pass to `set_secure_cookie`. See tornado's
100 | # set_secure_cookie docs for details.
101 | #c.NotebookApp.cookie_options = {}
102 |
103 | ## The random bytes used to secure cookies. By default this is a new random
104 | # number every time you start the Notebook. Set it to a value in a config file
105 | # to enable logins to persist across server sessions.
106 | #
107 | # Note: Cookie secrets should be kept private, do not share config files with
108 | # cookie_secret stored in plaintext (you can read the value from a file).
109 | #c.NotebookApp.cookie_secret = b''
110 |
111 | ## The file where the cookie secret is stored.
112 | #c.NotebookApp.cookie_secret_file = ''
113 |
114 | ## The default URL to redirect to from `/`
115 | c.NotebookApp.default_url = '/tree#examples'
116 |
117 | ## Disable cross-site-request-forgery protection
118 | #
119 | # Jupyter notebook 4.3.1 introduces protection from cross-site request
120 | # forgeries, requiring API requests to either:
121 | #
122 | # - originate from pages served by this server (validated with XSRF cookie and
123 | # token), or - authenticate with a token
124 | #
125 | # Some anonymous compute resources still desire the ability to run code,
126 | # completely without authentication. These services can disable all
127 | # authentication and security checks, with the full knowledge of what that
128 | # implies.
129 | #c.NotebookApp.disable_check_xsrf = False
130 |
131 | ## Whether to enable MathJax for typesetting math/TeX
132 | #
133 | # MathJax is the javascript library Jupyter uses to render math/LaTeX. It is
134 | # very large, so you may want to disable it if you have a slow internet
135 | # connection, or for offline use of the notebook.
136 | #
137 | # When disabled, equations etc. will appear as their untransformed TeX source.
138 | #c.NotebookApp.enable_mathjax = True
139 |
140 | ## extra paths to look for Javascript notebook extensions
141 | #c.NotebookApp.extra_nbextensions_path = []
142 |
143 | ## handlers that should be loaded at higher priority than the default services
144 | #c.NotebookApp.extra_services = []
145 |
146 | ## Extra paths to search for serving static files.
147 | #
148 | # This allows adding javascript/css to be available from the notebook server
149 | # machine, or overriding individual files in the IPython
150 | #c.NotebookApp.extra_static_paths = []
151 |
152 | ## Extra paths to search for serving jinja templates.
153 | #
154 | # Can be used to override templates from notebook.templates.
155 | #c.NotebookApp.extra_template_paths = []
156 |
157 | ##
158 | #c.NotebookApp.file_to_run = ''
159 |
160 | ## Deprecated: Use minified JS file or not, mainly use during dev to avoid JS
161 | # recompilation
162 | #c.NotebookApp.ignore_minified_js = False
163 |
164 | ## (bytes/sec) Maximum rate at which stream output can be sent on iopub before
165 | # they are limited.
166 | #c.NotebookApp.iopub_data_rate_limit = 1000000
167 |
168 | ## (msgs/sec) Maximum rate at which messages can be sent on iopub before they are
169 | # limited.
170 | #c.NotebookApp.iopub_msg_rate_limit = 1000
171 |
172 | ## The IP address the notebook server will listen on.
173 | c.NotebookApp.ip = 'localhost'
174 |
175 | ## Supply extra arguments that will be passed to Jinja environment.
176 | #c.NotebookApp.jinja_environment_options = {}
177 |
178 | ## Extra variables to supply to jinja templates when rendering.
179 | #c.NotebookApp.jinja_template_vars = {}
180 |
181 | ## The kernel manager class to use.
182 | #c.NotebookApp.kernel_manager_class = 'notebook.services.kernels.kernelmanager.MappingKernelManager'
183 |
184 | ## The kernel spec manager class to use. Should be a subclass of
185 | # `jupyter_client.kernelspec.KernelSpecManager`.
186 | #
187 | # The Api of KernelSpecManager is provisional and might change without warning
188 | # between this version of Jupyter and the next stable one.
189 | #c.NotebookApp.kernel_spec_manager_class = 'jupyter_client.kernelspec.KernelSpecManager'
190 |
191 | ## The full path to a private key file for usage with SSL/TLS.
192 | #c.NotebookApp.keyfile = ''
193 |
194 | ## The login handler class to use.
195 | #c.NotebookApp.login_handler_class = 'notebook.auth.login.LoginHandler'
196 |
197 | ## The logout handler class to use.
198 | #c.NotebookApp.logout_handler_class = 'notebook.auth.logout.LogoutHandler'
199 |
200 | ## The MathJax.js configuration file that is to be used.
201 | #c.NotebookApp.mathjax_config = 'TeX-AMS-MML_HTMLorMML-full,Safe'
202 |
203 | ## A custom url for MathJax.js. Should be in the form of a case-sensitive url to
204 | # MathJax, for example: /static/components/MathJax/MathJax.js
205 | #c.NotebookApp.mathjax_url = ''
206 |
207 | ## Dict of Python modules to load as notebook server extensions.Entry values can
208 | # be used to enable and disable the loading ofthe extensions. The extensions
209 | # will be loaded in alphabetical order.
210 | # notebook examples enables us to have an unmodified copy of the tutorials in the pkg dir and create a working copy
211 | c.NotebookApp.nbserver_extensions = {'nbexamples.handlers': True}
212 | import pyemma_tutorials
213 | c.Examples.reviewed_example_dir = pyemma_tutorials.notebook_location()
214 | c.Examples.unreviewed_example_dir = ''
215 |
216 | ## The directory to use for notebooks and kernels.
217 | run_dir = pyemma_tutorials.run_dir()
218 | c.NotebookApp.notebook_dir = run_dir
219 |
220 | ## Whether to open in a browser after starting. The specific browser used is
221 | # platform dependent and determined by the python standard library `webbrowser`
222 | # module, unless it is overridden using the --browser (NotebookApp.browser)
223 | # configuration option.
224 | #c.NotebookApp.open_browser = True
225 |
226 | ## Hashed password to use for web authentication.
227 | #
228 | # To generate, type in a python/IPython shell:
229 | #
230 | # from notebook.auth import passwd; passwd()
231 | #
232 | # The string should be of the form type:salt:hashed-password.
233 | #c.NotebookApp.password = ''
234 |
235 | ## Forces users to use a password for the Notebook server. This is useful in a
236 | # multi user environment, for instance when everybody in the LAN can access each
237 | # other's machine through ssh.
238 | #
239 | # In such a case, server the notebook server on localhost is not secure since
240 | # any user can connect to the notebook server via ssh.
241 | #c.NotebookApp.password_required = False
242 |
243 | ## The port the notebook server will listen on.
244 | #c.NotebookApp.port = 8888
245 |
246 | ## The number of additional ports to try if the specified port is not available.
247 | #c.NotebookApp.port_retries = 50
248 |
249 | ## DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib.
250 | #c.NotebookApp.pylab = 'disabled'
251 |
252 | ## If True, display a button in the dashboard to quit (shutdown the notebook
253 | # server).
254 | #c.NotebookApp.quit_button = True
255 |
256 | ## (sec) Time window used to check the message and data rate limits.
257 | #c.NotebookApp.rate_limit_window = 3
258 |
259 | ## Reraise exceptions encountered loading server extensions?
260 | #c.NotebookApp.reraise_server_extension_failures = False
261 |
262 | ## DEPRECATED use the nbserver_extensions dict instead
263 | #c.NotebookApp.server_extensions = []
264 |
265 | ## The session manager class to use.
266 | #c.NotebookApp.session_manager_class = 'notebook.services.sessions.sessionmanager.SessionManager'
267 |
268 | ## Shut down the server after N seconds with no kernels or terminals running and
269 | # no activity. This can be used together with culling idle kernels
270 | # (MappingKernelManager.cull_idle_timeout) to shutdown the notebook server when
271 | # it's not in use. This is not precisely timed: it may shut down up to a minute
272 | # later. 0 (the default) disables this automatic shutdown.
273 | #c.NotebookApp.shutdown_no_activity_timeout = 0
274 |
275 | ## Supply SSL options for the tornado HTTPServer. See the tornado docs for
276 | # details.
277 | #c.NotebookApp.ssl_options = {}
278 |
279 | ## Supply overrides for terminado. Currently only supports "shell_command".
280 | #c.NotebookApp.terminado_settings = {}
281 |
282 | ## Set to False to disable terminals.
283 | #
284 | # This does *not* make the notebook server more secure by itself. Anything the
285 | # user can in a terminal, they can also do in a notebook.
286 | #
287 | # Terminals may also be automatically disabled if the terminado package is not
288 | # available.
289 | #c.NotebookApp.terminals_enabled = True
290 |
291 | ## Token used for authenticating first-time connections to the server.
292 | #
293 | # When no password is enabled, the default is to generate a new, random token.
294 | #
295 | # Setting to an empty string disables authentication altogether, which is NOT
296 | # RECOMMENDED.
297 | #c.NotebookApp.token = ''
298 |
299 | ## Supply overrides for the tornado.web.Application that the Jupyter notebook
300 | # uses.
301 | #c.NotebookApp.tornado_settings = {}
302 |
303 | ## Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded-
304 | # For headerssent by the upstream reverse proxy. Necessary if the proxy handles
305 | # SSL
306 | #c.NotebookApp.trust_xheaders = False
307 |
308 | ## DEPRECATED, use tornado_settings
309 | #c.NotebookApp.webapp_settings = {}
310 |
311 | ## Specify Where to open the notebook on startup. This is the `new` argument
312 | # passed to the standard library method `webbrowser.open`. The behaviour is not
313 | # guaranteed, but depends on browser support. Valid values are:
314 | #
315 | # - 2 opens a new tab,
316 | # - 1 opens a new window,
317 | # - 0 opens in an existing window.
318 | #
319 | # See the `webbrowser.open` documentation for details.
320 | #c.NotebookApp.webbrowser_open_new = 2
321 |
322 | ## Set the tornado compression options for websocket connections.
323 | #
324 | # This value will be returned from
325 | # :meth:`WebSocketHandler.get_compression_options`. None (default) will disable
326 | # compression. A dict (even an empty one) will enable compression.
327 | #
328 | # See the tornado docs for WebSocketHandler.get_compression_options for details.
329 | #c.NotebookApp.websocket_compression_options = None
330 |
331 | ## The base URL for websockets, if it differs from the HTTP server (hint: it
332 | # almost certainly doesn't).
333 | #
334 | # Should be in the form of an HTTP origin: ws[s]://hostname[:port]
335 | #c.NotebookApp.websocket_url = ''
336 |
337 | #------------------------------------------------------------------------------
338 | # ConnectionFileMixin(LoggingConfigurable) configuration
339 | #------------------------------------------------------------------------------
340 |
341 | ## Mixin for configurable classes that work with connection files
342 |
343 | ## JSON file in which to store connection info [default: kernel-.json]
344 | #
345 | # This file will contain the IP, ports, and authentication key needed to connect
346 | # clients to this kernel. By default, this file will be created in the security
347 | # dir of the current profile, but can be specified by absolute path.
348 | #c.ConnectionFileMixin.connection_file = ''
349 |
350 | ## set the control (ROUTER) port [default: random]
351 | #c.ConnectionFileMixin.control_port = 0
352 |
353 | ## set the heartbeat port [default: random]
354 | #c.ConnectionFileMixin.hb_port = 0
355 |
356 | ## set the iopub (PUB) port [default: random]
357 | #c.ConnectionFileMixin.iopub_port = 0
358 |
359 | ## Set the kernel's IP address [default localhost]. If the IP address is
360 | # something other than localhost, then Consoles on other machines will be able
361 | # to connect to the Kernel, so be careful!
362 | #c.ConnectionFileMixin.ip = ''
363 |
364 | ## set the shell (ROUTER) port [default: random]
365 | #c.ConnectionFileMixin.shell_port = 0
366 |
367 | ## set the stdin (ROUTER) port [default: random]
368 | #c.ConnectionFileMixin.stdin_port = 0
369 |
370 | ##
371 | #c.ConnectionFileMixin.transport = 'tcp'
372 |
373 | #------------------------------------------------------------------------------
374 | # KernelManager(ConnectionFileMixin) configuration
375 | #------------------------------------------------------------------------------
376 |
377 | ## Manages a single kernel in a subprocess on this host.
378 | #
379 | # This version starts kernels with Popen.
380 |
381 | ## Should we autorestart the kernel if it dies.
382 | #c.KernelManager.autorestart = True
383 |
384 | ## DEPRECATED: Use kernel_name instead.
385 | #
386 | # The Popen Command to launch the kernel. Override this if you have a custom
387 | # kernel. If kernel_cmd is specified in a configuration file, Jupyter does not
388 | # pass any arguments to the kernel, because it cannot make any assumptions about
389 | # the arguments that the kernel understands. In particular, this means that the
390 | # kernel does not receive the option --debug if it given on the Jupyter command
391 | # line.
392 | #c.KernelManager.kernel_cmd = ['python3']
393 |
394 | ## Time to wait for a kernel to terminate before killing it, in seconds.
395 | #c.KernelManager.shutdown_wait_time = 5.0
396 |
397 | #------------------------------------------------------------------------------
398 | # Session(Configurable) configuration
399 | #------------------------------------------------------------------------------
400 |
401 | ## Object for handling serialization and sending of messages.
402 | #
403 | # The Session object handles building messages and sending them with ZMQ sockets
404 | # or ZMQStream objects. Objects can communicate with each other over the
405 | # network via Session objects, and only need to work with the dict-based IPython
406 | # message spec. The Session will handle serialization/deserialization, security,
407 | # and metadata.
408 | #
409 | # Sessions support configurable serialization via packer/unpacker traits, and
410 | # signing with HMAC digests via the key/keyfile traits.
411 | #
412 | # Parameters ----------
413 | #
414 | # debug : bool
415 | # whether to trigger extra debugging statements
416 | # packer/unpacker : str : 'json', 'pickle' or import_string
417 | # importstrings for methods to serialize message parts. If just
418 | # 'json' or 'pickle', predefined JSON and pickle packers will be used.
419 | # Otherwise, the entire importstring must be used.
420 | #
421 | # The functions must accept at least valid JSON input, and output *bytes*.
422 | #
423 | # For example, to use msgpack:
424 | # packer = 'msgpack.packb', unpacker='msgpack.unpackb'
425 | # pack/unpack : callables
426 | # You can also set the pack/unpack callables for serialization directly.
427 | # session : bytes
428 | # the ID of this Session object. The default is to generate a new UUID.
429 | # username : unicode
430 | # username added to message headers. The default is to ask the OS.
431 | # key : bytes
432 | # The key used to initialize an HMAC signature. If unset, messages
433 | # will not be signed or checked.
434 | # keyfile : filepath
435 | # The file containing a key. If this is set, `key` will be initialized
436 | # to the contents of the file.
437 |
438 | ## Threshold (in bytes) beyond which an object's buffer should be extracted to
439 | # avoid pickling.
440 | #c.Session.buffer_threshold = 1024
441 |
442 | ## Whether to check PID to protect against calls after fork.
443 | #
444 | # This check can be disabled if fork-safety is handled elsewhere.
445 | #c.Session.check_pid = True
446 |
447 | ## Threshold (in bytes) beyond which a buffer should be sent without copying.
448 | #c.Session.copy_threshold = 65536
449 |
450 | ## Debug output in the Session
451 | #c.Session.debug = False
452 |
453 | ## The maximum number of digests to remember.
454 | #
455 | # The digest history will be culled when it exceeds this value.
456 | #c.Session.digest_history_size = 65536
457 |
458 | ## The maximum number of items for a container to be introspected for custom
459 | # serialization. Containers larger than this are pickled outright.
460 | #c.Session.item_threshold = 64
461 |
462 | ## execution key, for signing messages.
463 | #c.Session.key = b''
464 |
465 | ## path to file containing execution key.
466 | #c.Session.keyfile = ''
467 |
468 | ## Metadata dictionary, which serves as the default top-level metadata dict for
469 | # each message.
470 | #c.Session.metadata = {}
471 |
472 | ## The name of the packer for serializing messages. Should be one of 'json',
473 | # 'pickle', or an import name for a custom callable serializer.
474 | #c.Session.packer = 'json'
475 |
476 | ## The UUID identifying this session.
477 | #c.Session.session = ''
478 |
479 | ## The digest scheme used to construct the message signatures. Must have the form
480 | # 'hmac-HASH'.
481 | #c.Session.signature_scheme = 'hmac-sha256'
482 |
483 | ## The name of the unpacker for unserializing messages. Only used with custom
484 | # functions for `packer`.
485 | #c.Session.unpacker = 'json'
486 |
487 | ## Username for the Session. Default is your system username.
488 | #c.Session.username = 'marscher'
489 |
490 | #------------------------------------------------------------------------------
491 | # MultiKernelManager(LoggingConfigurable) configuration
492 | #------------------------------------------------------------------------------
493 |
494 | ## A class for managing multiple kernels.
495 |
496 | ## The name of the default kernel to start
497 | #c.MultiKernelManager.default_kernel_name = 'python3'
498 |
499 | ## The kernel manager class. This is configurable to allow subclassing of the
500 | # KernelManager for customized behavior.
501 | #c.MultiKernelManager.kernel_manager_class = 'jupyter_client.ioloop.IOLoopKernelManager'
502 |
503 | #------------------------------------------------------------------------------
504 | # MappingKernelManager(MultiKernelManager) configuration
505 | #------------------------------------------------------------------------------
506 |
507 | ## A KernelManager that handles notebook mapping and HTTP error handling
508 |
509 | ## Whether messages from kernels whose frontends have disconnected should be
510 | # buffered in-memory.
511 | #
512 | # When True (default), messages are buffered and replayed on reconnect, avoiding
513 | # lost messages due to interrupted connectivity.
514 | #
515 | # Disable if long-running kernels will produce too much output while no
516 | # frontends are connected.
517 | #c.MappingKernelManager.buffer_offline_messages = True
518 |
519 | ## Whether to consider culling kernels which are busy. Only effective if
520 | # cull_idle_timeout > 0.
521 | #c.MappingKernelManager.cull_busy = False
522 |
523 | ## Whether to consider culling kernels which have one or more connections. Only
524 | # effective if cull_idle_timeout > 0.
525 | #c.MappingKernelManager.cull_connected = False
526 |
527 | ## Timeout (in seconds) after which a kernel is considered idle and ready to be
528 | # culled. Values of 0 or lower disable culling. Very short timeouts may result
529 | # in kernels being culled for users with poor network connections.
530 | #c.MappingKernelManager.cull_idle_timeout = 0
531 |
532 | ## The interval (in seconds) on which to check for idle kernels exceeding the
533 | # cull timeout value.
534 | #c.MappingKernelManager.cull_interval = 300
535 |
536 | ##
537 | #c.MappingKernelManager.root_dir = ''
538 |
539 | #------------------------------------------------------------------------------
540 | # ContentsManager(LoggingConfigurable) configuration
541 | #------------------------------------------------------------------------------
542 |
543 | ## Base class for serving files and directories.
544 | #
545 | # This serves any text or binary file, as well as directories, with special
546 | # handling for JSON notebook documents.
547 | #
548 | # Most APIs take a path argument, which is always an API-style unicode path, and
549 | # always refers to a directory.
550 | #
551 | # - unicode, not url-escaped
552 | # - '/'-separated
553 | # - leading and trailing '/' will be stripped
554 | # - if unspecified, path defaults to '',
555 | # indicating the root path.
556 |
557 | ## Allow access to hidden files
558 | #c.ContentsManager.allow_hidden = False
559 |
560 | ##
561 | #c.ContentsManager.checkpoints = None
562 |
563 | ##
564 | #c.ContentsManager.checkpoints_class = 'notebook.services.contents.checkpoints.Checkpoints'
565 |
566 | ##
567 | #c.ContentsManager.checkpoints_kwargs = {}
568 |
569 | ## handler class to use when serving raw file requests.
570 | #
571 | # Default is a fallback that talks to the ContentsManager API, which may be
572 | # inefficient, especially for large files.
573 | #
574 | # Local files-based ContentsManagers can use a StaticFileHandler subclass, which
575 | # will be much more efficient.
576 | #
577 | # Access to these files should be Authenticated.
578 | #c.ContentsManager.files_handler_class = 'notebook.files.handlers.FilesHandler'
579 |
580 | ## Extra parameters to pass to files_handler_class.
581 | #
582 | # For example, StaticFileHandlers generally expect a `path` argument specifying
583 | # the root directory from which to serve files.
584 | #c.ContentsManager.files_handler_params = {}
585 |
586 | ## Glob patterns to hide in file and directory listings.
587 | #c.ContentsManager.hide_globs = ['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dylib', '*~']
588 |
589 | ## Python callable or importstring thereof
590 | #
591 | # To be called on a contents model prior to save.
592 | #
593 | # This can be used to process the structure, such as removing notebook outputs
594 | # or other side effects that should not be saved.
595 | #
596 | # It will be called as (all arguments passed by keyword)::
597 | #
598 | # hook(path=path, model=model, contents_manager=self)
599 | #
600 | # - model: the model to be saved. Includes file contents.
601 | # Modifying this dict will affect the file that is stored.
602 | # - path: the API path of the save destination
603 | # - contents_manager: this ContentsManager instance
604 | #c.ContentsManager.pre_save_hook = None
605 |
606 | ##
607 | #c.ContentsManager.root_dir = '/'
608 |
609 | ## The base name used when creating untitled directories.
610 | #c.ContentsManager.untitled_directory = 'Untitled Folder'
611 |
612 | ## The base name used when creating untitled files.
613 | #c.ContentsManager.untitled_file = 'untitled'
614 |
615 | ## The base name used when creating untitled notebooks.
616 | #c.ContentsManager.untitled_notebook = 'Untitled'
617 |
618 | #------------------------------------------------------------------------------
619 | # FileManagerMixin(Configurable) configuration
620 | #------------------------------------------------------------------------------
621 |
622 | ## Mixin for ContentsAPI classes that interact with the filesystem.
623 | #
624 | # Provides facilities for reading, writing, and copying both notebooks and
625 | # generic files.
626 | #
627 | # Shared by FileContentsManager and FileCheckpoints.
628 | #
629 | # Note ---- Classes using this mixin must provide the following attributes:
630 | #
631 | # root_dir : unicode
632 | # A directory against against which API-style paths are to be resolved.
633 | #
634 | # log : logging.Logger
635 |
636 | ## By default notebooks are saved on disk on a temporary file and then if
637 | # succefully written, it replaces the old ones. This procedure, namely
638 | # 'atomic_writing', causes some bugs on file system whitout operation order
639 | # enforcement (like some networked fs). If set to False, the new notebook is
640 | # written directly on the old one which could fail (eg: full filesystem or quota
641 | # )
642 | #c.FileManagerMixin.use_atomic_writing = True
643 |
644 | #------------------------------------------------------------------------------
645 | # FileContentsManager(FileManagerMixin,ContentsManager) configuration
646 | #------------------------------------------------------------------------------
647 |
648 | ## If True (default), deleting files will send them to the platform's
649 | # trash/recycle bin, where they can be recovered. If False, deleting files
650 | # really deletes them.
651 | #c.FileContentsManager.delete_to_trash = True
652 |
653 | ## Python callable or importstring thereof
654 | #
655 | # to be called on the path of a file just saved.
656 | #
657 | # This can be used to process the file on disk, such as converting the notebook
658 | # to a script or HTML via nbconvert.
659 | #
660 | # It will be called as (all arguments passed by keyword)::
661 | #
662 | # hook(os_path=os_path, model=model, contents_manager=instance)
663 | #
664 | # - path: the filesystem path to the file just written - model: the model
665 | # representing the file - contents_manager: this ContentsManager instance
666 | #c.FileContentsManager.post_save_hook = None
667 |
668 | ##
669 | #c.FileContentsManager.root_dir = ''
670 |
671 | ## DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0
672 | #c.FileContentsManager.save_script = False
673 |
674 | #------------------------------------------------------------------------------
675 | # NotebookNotary(LoggingConfigurable) configuration
676 | #------------------------------------------------------------------------------
677 |
678 | ## A class for computing and verifying notebook signatures.
679 |
680 | ## The hashing algorithm used to sign notebooks.
681 | #c.NotebookNotary.algorithm = 'sha256'
682 |
683 | ## The sqlite file in which to store notebook signatures. By default, this will
684 | # be in your Jupyter data directory. You can set it to ':memory:' to disable
685 | # sqlite writing to the filesystem.
686 | #c.NotebookNotary.db_file = ''
687 |
688 | ## The secret key with which notebooks are signed.
689 | #c.NotebookNotary.secret = b''
690 |
691 | ## The file where the secret key is stored.
692 | #c.NotebookNotary.secret_file = ''
693 |
694 | ## A callable returning the storage backend for notebook signatures. The default
695 | # uses an SQLite database.
696 | #c.NotebookNotary.store_factory = traitlets.Undefined
697 |
698 | #------------------------------------------------------------------------------
699 | # KernelSpecManager(LoggingConfigurable) configuration
700 | #------------------------------------------------------------------------------
701 |
702 | ## If there is no Python kernelspec registered and the IPython kernel is
703 | # available, ensure it is added to the spec list.
704 | #c.KernelSpecManager.ensure_native_kernel = True
705 |
706 | ## The kernel spec class. This is configurable to allow subclassing of the
707 | # KernelSpecManager for customized behavior.
708 | #c.KernelSpecManager.kernel_spec_class = 'jupyter_client.kernelspec.KernelSpec'
709 |
710 | ## Whitelist of allowed kernel names.
711 | #
712 | # By default, all installed kernels are allowed.
713 | #c.KernelSpecManager.whitelist = set()
714 |
--------------------------------------------------------------------------------
/pyemma_tutorials/util.py:
--------------------------------------------------------------------------------
1 | import pkg_resources
2 | import os
3 |
4 |
5 | def notebook_location():
6 | d = pkg_resources.resource_filename('pyemma_tutorials', 'notebooks')
7 | assert os.path.isdir(d)
8 | return d
9 |
10 |
11 | def configs_location():
12 | notebook_cfg = pkg_resources.resource_filename('pyemma_tutorials', 'jupyter_notebook_config.py')
13 | notebook_cfg_json = pkg_resources.resource_filename('pyemma_tutorials', 'jupyter_notebook_config.json')
14 |
15 | assert os.path.exists(notebook_cfg)
16 | assert os.path.exists(notebook_cfg_json)
17 |
18 | return notebook_cfg, notebook_cfg_json
19 |
20 |
21 | def run_dir():
22 | """ directory in which the user copies of the notebooks will reside. """
23 | import os
24 | target = os.path.expanduser('~/pyemma_tutorials')
25 | os.makedirs(target, exist_ok=True)
26 |
27 | # copy static data into run dir
28 | src = os.path.join(notebook_location(), 'static')
29 |
30 | def copytree(src, dst, symlinks=False, ignore=None):
31 | # shutil.copytree fails for existing target dirs...
32 | import shutil
33 | for item in os.listdir(src):
34 | s = os.path.join(src, item)
35 | d = os.path.join(dst, item)
36 | if os.path.isdir(s):
37 | shutil.copytree(s, d, symlinks, ignore)
38 | else:
39 | shutil.copy2(s, d)
40 |
41 | copytree(src, os.path.join(target, 'static'))
42 |
43 | return target
44 |
--------------------------------------------------------------------------------
/releases/LiveCoMS_Article_ASAP_V1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/releases/LiveCoMS_Article_ASAP_V1.pdf
--------------------------------------------------------------------------------
/releases/LiveCoMS_Article_V1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/releases/LiveCoMS_Article_V1.pdf
--------------------------------------------------------------------------------
/releases/header_V1.0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/markovmodel/pyemma_tutorials/6b9183686d2238d4f60c752a73e9b710c667ec10/releases/header_V1.0.jpg
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 |
2 | # See the docstring in versioneer.py for instructions. Note that you must
3 | # re-run 'versioneer.py setup' after changing this section, and commit the
4 | # resulting files.
5 |
6 | [versioneer]
7 | VCS = git
8 | style = pep440
9 | versionfile_source = pyemma_tutorials/_version.py
10 | versionfile_build = pyemma_tutorials/_version.py
11 | tag_prefix = v
12 | #parentdir_prefix =
13 |
14 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import versioneer
3 |
4 | from setuptools import setup
5 |
6 |
7 | def copy_notebooks():
8 | import shutil
9 | dest = os.path.join('pyemma_tutorials', 'notebooks')
10 | try:
11 | shutil.rmtree(dest, ignore_errors=True)
12 | shutil.copytree('notebooks', dest)
13 | print('moved notebooks into pkg')
14 | except OSError:
15 | pass
16 |
17 |
18 | metadata=dict(
19 | name='pyemma_tutorials',
20 | version=versioneer.get_version(),
21 | cmdclass=versioneer.get_cmdclass(),
22 | packages=['pyemma_tutorials'],
23 | package_data={'pyemma_tutorials': ['notebooks/*',
24 | 'notebooks/static/*',
25 | 'jupyter_notebook_config.py',
26 | 'jupyter_notebook_config.json',
27 | ]},
28 | include_package_data=True,
29 | entry_points={'console_scripts': ['pyemma_tutorials = pyemma_tutorials.cli:main'],},
30 | install_requires=['pyemma',
31 | 'mdshare',
32 | 'nbexamples',
33 | 'nglview',
34 | 'notebook',
35 | 'jupyter_contrib_nbextensions',
36 | ],
37 | zip_safe=False,
38 | )
39 |
40 | if __name__ == '__main__':
41 | copy_notebooks()
42 | setup(**metadata)
43 |
--------------------------------------------------------------------------------