├── .github
└── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── appveyor.yml
├── basesetup.py
├── devtools
├── README.md
├── conda-recipe
│ ├── bld.bat
│ ├── build.sh
│ └── meta.yaml
└── travis-ci
│ ├── build_docs.sh
│ ├── install_miniconda.sh
│ ├── set_doc_version.py
│ └── update_versions_json.py
├── docs
├── .gitignore
├── LICENSE
├── Makefile
├── _static
│ ├── flow-chart.png
│ ├── fspeptide.png
│ ├── kde-vs-histogram.png
│ ├── lengths-hist.png
│ ├── logo-200px.png
│ ├── logo.png
│ ├── msm-microstates.png
│ ├── tica-heatmap.png
│ ├── tica-movie.gif
│ └── tica_vs_pca.png
├── _templates
│ └── class.rst
├── advanced_examples
│ ├── bayesian-msm.rst
│ ├── gmrq-model-selection.rst
│ ├── hmm-and-msm.rst
│ ├── implied-timescales.rst
│ ├── index.rst
│ ├── plot-tica-heatmap.rst
│ ├── quadwell-n-states.rst
│ ├── quadwell.rst
│ ├── tica-1.rst
│ └── uncertainty.rst
├── apipatterns.rst
├── background.rst
├── bibparse.py
├── changelog.rst
├── cluster.rst
├── conf.py
├── contributing.rst
├── datasets.rst
├── decomposition.rst
├── examples
│ ├── Clustering-Comparison.rst
│ ├── Fs-Peptide-command-line.rst
│ ├── Fs-Peptide-in-RAM.rst
│ ├── Fs-Peptide-with-dataset.rst
│ ├── GMRQ-Model-Selection.rst
│ ├── Ligand-Featurization.rst
│ ├── Ward-Clustering.rst
│ ├── index.rst
│ └── tICA-vs-PCA.rst
├── faq.rst
├── feature_selection.rst
├── featurization.rst
├── figures
│ └── kde-vs-histogram.py
├── gmrq.rst
├── hmm.rst
├── index.rst
├── installation.rst
├── io.rst
├── make.bat
├── msm.rst
├── plugins.rst
├── preprocessing.rst
├── publications.bib
├── publications_templ.rst
├── ratematrix.rst
├── requirements.txt
├── sphinxext
│ ├── embed.tpl
│ └── notebook_sphinxext.py
├── tpt.rst
└── tutorial.rst
├── examples
├── .gitignore
├── Clustering-Comparison.ipynb
├── Coarse-graining-with-MVCA.ipynb
├── Fs-Peptide-command-line.ipynb
├── Fs-Peptide-in-RAM.ipynb
├── Fs-Peptide-with-Pipeline.ipynb
├── Fs-Peptide-with-dataset.ipynb
├── GMRQ-Model-Selection.ipynb
├── LICENSE.md
├── Ligand-Featurization.ipynb
├── Ward-Clustering.ipynb
├── advanced
│ ├── bayesian-msm.ipynb
│ ├── hmm-and-msm.ipynb
│ ├── implied-timescales.ipynb
│ ├── plot-tica-heatmap.ipynb
│ ├── quadwell-n-states.ipynb
│ ├── quadwell.ipynb
│ └── uncertainty.ipynb
└── tICA-vs-PCA.ipynb
├── msmbuilder
├── __init__.py
├── base.py
├── cluster
│ ├── .gitignore
│ ├── __init__.py
│ ├── _kmedoids.pyx
│ ├── agglomerative.py
│ ├── apm.py
│ ├── base.py
│ ├── kcenters.py
│ ├── kmedoids.py
│ ├── minibatchkmedoids.py
│ ├── ndgrid.py
│ ├── regularspatial.py
│ └── src
│ │ ├── kmedoids.cc
│ │ └── kmedoids.h
├── cmdline.py
├── commands
│ ├── __init__.py
│ ├── atom_indices.py
│ ├── convert_chunked_project.py
│ ├── example_datasets.py
│ ├── featurizer.py
│ ├── fit.py
│ ├── fit_transform.py
│ ├── implied_timescales.py
│ ├── template_project.py
│ └── transform.py
├── dataset.py
├── decomposition
│ ├── .gitignore
│ ├── __init__.py
│ ├── _speigh.pyx
│ ├── base.py
│ ├── kernel_approximation.py
│ ├── ksparsetica.py
│ ├── ktica.py
│ ├── pca.py
│ ├── sparsetica.py
│ ├── tica.py
│ └── utils.py
├── example_datasets
│ ├── .gitignore
│ ├── __init__.py
│ ├── _muller.pyx
│ ├── alanine_dipeptide.py
│ ├── base.py
│ ├── brownian1d.py
│ ├── fs_peptide.py
│ ├── met_enkephalin.py
│ └── muller.py
├── feature_extraction
│ └── __init__.py
├── feature_selection
│ ├── __init__.py
│ ├── base.py
│ └── featureselector.py
├── featurizer
│ ├── __init__.py
│ ├── feature_union.py
│ ├── featurizer.py
│ ├── indices.py
│ ├── multichain.py
│ ├── multiseq_featuizer.py
│ └── subset.py
├── hmm
│ ├── .gitignore
│ ├── __init__.py
│ ├── cephes
│ │ ├── README.md
│ │ ├── cephes.h
│ │ ├── cephes_names.h
│ │ ├── chbevl.c
│ │ ├── gamma.c
│ │ ├── i0.c
│ │ ├── i1.c
│ │ ├── mconf.h
│ │ ├── mtherr.c
│ │ ├── polevl.c
│ │ ├── psi.c
│ │ └── zeta.c
│ ├── discrete_approx.py
│ ├── gaussian.pyx
│ ├── src
│ │ ├── GaussianHMMFitter.cpp
│ │ ├── VonMisesHMMFitter.cpp
│ │ ├── include
│ │ │ ├── GaussianHMMFitter.h
│ │ │ ├── HMMFitter.h
│ │ │ ├── Trajectory.h
│ │ │ ├── VonMisesHMMFitter.h
│ │ │ └── sse_mathfun.h
│ │ └── logsumexp.hpp
│ └── vonmises.pyx
├── io
│ ├── __init__.py
│ ├── gather_metadata.py
│ ├── io.py
│ ├── project_template.py
│ └── sampling
│ │ ├── __init__.py
│ │ └── sampling.py
├── io_templates
│ └── twitter-bootstrap.html
├── libdistance
│ ├── .gitignore
│ ├── libdistance.pyx
│ └── src
│ │ ├── assign.hpp
│ │ ├── cdist.hpp
│ │ ├── dist.hpp
│ │ ├── distance_kernels.h
│ │ ├── pdist.hpp
│ │ └── sumdist.hpp
├── lumping
│ ├── __init__.py
│ ├── bace.py
│ ├── mvca.py
│ ├── pcca.py
│ └── pcca_plus.py
├── msm
│ ├── .gitignore
│ ├── __init__.py
│ ├── _markovstatemodel.pyx
│ ├── _metzner_mcmc_fast.pyx
│ ├── _metzner_mcmc_slow.py
│ ├── _ratematrix.pyx
│ ├── _ratematrix_priors.pyx
│ ├── _ratematrix_support.pyx
│ ├── bayes_ratematrix.py
│ ├── bayesmsm.py
│ ├── core.py
│ ├── implied_timescales.py
│ ├── markov_appreciation.py
│ ├── msm.py
│ ├── ratematrix.py
│ ├── src
│ │ ├── metzner_mcmc.c
│ │ ├── metzner_mcmc.h
│ │ ├── transmat_mle_prinz.c
│ │ └── transmat_mle_prinz.h
│ └── validation
│ │ ├── __init__.py
│ │ ├── bootstrapmsm.py
│ │ └── transmat_errorbar.py
├── preprocessing
│ ├── __init__.py
│ ├── base.py
│ └── timeseries.py
├── project_templates
│ ├── 0-test-install.py
│ ├── 1-get-example-data.py
│ ├── LICENSE.md
│ ├── README.md
│ ├── analysis
│ │ ├── gather-metadata-plot.py
│ │ └── gather-metadata.py
│ ├── cluster
│ │ ├── cluster-plot.py
│ │ ├── cluster.py
│ │ ├── sample-clusters-plot.py
│ │ └── sample-clusters.py
│ ├── dihedrals
│ │ ├── featurize-plot.py
│ │ └── featurize.py
│ ├── landmarks
│ │ ├── featurize-plot.py
│ │ ├── featurize.py
│ │ └── find-landmarks.py
│ ├── msm
│ │ ├── microstate-plot.py
│ │ ├── microstate-traj.py
│ │ ├── microstate.py
│ │ ├── timescales-plot.py
│ │ └── timescales.py
│ ├── plot_header.template
│ ├── plot_macros.template
│ ├── rmsd
│ │ ├── rmsd-plot.py
│ │ └── rmsd.py
│ └── tica
│ │ ├── tica-plot.py
│ │ ├── tica-sample-coordinate-plot.py
│ │ ├── tica-sample-coordinate.py
│ │ └── tica.py
├── scripts
│ ├── __init__.py
│ └── msmb.py
├── src
│ ├── cy_blas.pyx
│ ├── f2py
│ │ └── f2pyptr.h
│ ├── scipy_lapack.h
│ └── triu_utils.pyx
├── tests
│ ├── .gitignore
│ ├── __init__.py
│ ├── native.pdb
│ ├── test_agglomerative.py
│ ├── test_alphaanglefeaturizer.py
│ ├── test_apm.py
│ ├── test_bayes_ratematrix.py
│ ├── test_bootstrap_msm.py
│ ├── test_build_counts.py
│ ├── test_clustering.py
│ ├── test_commands.py
│ ├── test_commands_exist.py
│ ├── test_contactfeaturizers.py
│ ├── test_convenience.py
│ ├── test_cyblas.pyx
│ ├── test_cyblas_wrapper.py
│ ├── test_dataset.py
│ ├── test_decomposition.py
│ ├── test_dependencies.py
│ ├── test_divergence.py
│ ├── test_estimator_subclassing.py
│ ├── test_feature_descriptor.py
│ ├── test_feature_selection.py
│ ├── test_featureunion.py
│ ├── test_featurizer.py
│ ├── test_featurizer_subset.py
│ ├── test_gather_metadata.py
│ ├── test_ghmm.py
│ ├── test_kcenters.py
│ ├── test_kernel_approximation.py
│ ├── test_kmedoids.py
│ ├── test_ksparsetica.py
│ ├── test_libdistance.py
│ ├── test_ligandfeaturizers.py
│ ├── test_lumping.py
│ ├── test_metzner_mcmc.py
│ ├── test_msm.py
│ ├── test_msm_uncertainty.py
│ ├── test_muller.py
│ ├── test_ndgrid.py
│ ├── test_nearest.py
│ ├── test_param_sweep.py
│ ├── test_preprocessing.py
│ ├── test_ratematrix.py
│ ├── test_rmsdfeaturizer.py
│ ├── test_sampling.py
│ ├── test_sasa_featurizer.py
│ ├── test_sparsetica.py
│ ├── test_speigh.py
│ ├── test_strongly_connected_subgraph.py
│ ├── test_template_project.py
│ ├── test_tpt.py
│ ├── test_transition_counts.py
│ ├── test_transmat_errorbar.py
│ ├── test_transmat_mle_prinz.py
│ ├── test_utils.py
│ ├── test_vmhmm.py
│ ├── test_workflows.py
│ └── workflows
│ │ ├── basic.sh
│ │ ├── ghmm.sh
│ │ └── rmsd.sh
├── tpt
│ ├── __init__.py
│ ├── committor.py
│ ├── flux.py
│ ├── hub.py
│ ├── mfpt.py
│ └── path.py
└── utils
│ ├── __init__.py
│ ├── compat.py
│ ├── convenience.py
│ ├── divergence.py
│ ├── draw_samples.py
│ ├── io.py
│ ├── nearest.py
│ ├── param_sweep.py
│ ├── probability.py
│ ├── progressbar
│ ├── __init__.py
│ ├── compat.py
│ ├── progressbar.py
│ └── widgets.py
│ ├── subsampler.py
│ └── validation.py
├── runtests.py
└── setup.py
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | - [x] Implement feature / fix bug
2 | - [ ] Add tests
3 | - [ ] Update changelog
4 |
5 | [Describe changes here]
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[cod]
2 |
3 | # C extensions
4 | *.so
5 |
6 | # Packages
7 | *.egg
8 | *.egg-info
9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | __pycache__
21 |
22 | # Installer logs
23 | pip-log.txt
24 |
25 | # Unit test / coverage reports
26 | .coverage
27 | .tox
28 | nosetests.xml
29 |
30 | # Translations
31 | *.mo
32 |
33 | # IDEs
34 | .mr.developer.cfg
35 | .project
36 | .pydevproject
37 | .idea/
38 |
39 | # Autogenerated during setup.py
40 | msmbuilder/src/config.pxi
41 | msmbuilder/version.py
42 |
43 | # Vim temp files
44 | *.swp
45 | *.swo
46 |
47 | build.log
48 |
49 | # Other files
50 | .DS_Store
51 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 | sudo: false
3 |
4 | addons:
5 | apt:
6 | packages:
7 | - pandoc
8 |
9 | branches:
10 | only:
11 | - master
12 |
13 | install:
14 | - source devtools/travis-ci/install_miniconda.sh
15 | - conda config --add channels omnia
16 | - conda config --add channels conda-forge
17 |
18 | script:
19 | # Run tests. If they succeed, build docs only on 3.6
20 | conda build --quiet devtools/conda-recipe
21 | && if [[ $CONDA_PY = 3.6 ]]; then devtools/travis-ci/build_docs.sh; fi
22 |
23 | env:
24 | matrix:
25 | - CONDA_PY=2.7 CONDA_NPY=1.12
26 | - CONDA_PY=3.6 CONDA_NPY=1.12
27 | - CONDA_PY=3.7 CONDA_NPY=1.14
28 |
29 | deploy:
30 | - provider: s3
31 | access_key_id:
32 | secure: "av04wLV7wRmFjPRkDPE0FXNtvL51F597+DzUmrycLnI+Ltg5rxrxEUv2JMr7K1WrTTR1STFNhJBp6aQUwD3zzaA7N/1c0zY9ri35ML75LC/10IDb6UNbY6uPNqbP1co451OSz7tpGbu3JBL/TRL7MkReFbZxPLHPPP1ad/4O6nA="
33 | secret_access_key:
34 | secure: "c4b2fliqot9ZnI5cyTqEXSHQnCao+GoxmR+SJAcSURv381O/z3frlJX7pKf0qai2OrZSSdqX/wa2KdcWNeoDTKrTiCeKgFikc6x839tmjeQYVV0Y3hmSvZCzCFOAXyMf9GfJJ7gLBOBHSzTTJWeZDLZB6nuoi4Xw9Blgid6QxIs="
35 | bucket: "msmbuilder.org"
36 | skip_cleanup: true
37 | local_dir: docs/_deploy/
38 | on:
39 | branch: master
40 | condition: "$CONDA_PY = 3.6"
41 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | graft examples
2 | graft msmbuilder
3 | include basesetup.py
4 | include LICENSE
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | MSMBuilder
2 | ==========
3 |
4 | [](https://travis-ci.org/msmbuilder/msmbuilder)
5 | [](https://pypi.python.org/pypi/msmbuilder/)
6 | [](https://pypi.python.org/pypi/msmbuilder/)
7 | [](http://msmbuilder.org)
8 |
9 | MSMBuilder is a python package which implements a series of statistical
10 | models for high-dimensional time-series. It is particularly focused on the
11 | analysis of atomistic simulations of biomolecular dynamics. For example,
12 | MSMBuilder has been used to model protein folding and conformational change
13 | from molecular dynamics (MD) simulations. MSMBuilder is available under the
14 | LGPL (v2.1 or later).
15 |
16 | Capabilities include:
17 |
18 | - Feature extraction into dihedrals, contact maps, and more
19 | - Geometric clustering with a variety of algorithms.
20 | - Dimensionality reduction using time-structure independent component
21 | analysis (tICA) and principal component analysis (PCA).
22 | - Markov state model (MSM) construction
23 | - Rate-matrix MSM construction
24 | - Hidden markov model (HMM) construction
25 | - Timescale and transition path analysis.
26 |
27 | Check out the documentation at [msmbuilder.org](http://msmbuilder.org) and
28 | join the [mailing list](https://mailman.stanford.edu/mailman/listinfo/msmbuilder-user).
29 | For a broader overview of MSMBuilder, take a look at our [slide deck](http://rawgit.com/msmbuilder/talk/master/index.html).
30 |
31 | Installation
32 | ------------
33 |
34 | The preferred installation mechanism for `msmbuilder` is with `conda`:
35 |
36 | ```bash
37 | $ conda install -c omnia msmbuilder
38 | ```
39 |
40 | If you don't have conda, or are new to scientific python, we recommend that
41 | you download the [Anaconda scientific python distribution](https://store.continuum.io/cshop/anaconda/).
42 |
43 |
44 | Workflow
45 | --------
46 |
47 | An example workflow might be as follows:
48 |
49 | 1. Set up a system for molecular dynamics, and run one or more simulations
50 | for as long as you can on as many CPUs or GPUs as you have access to.
51 | There are a lot of great software packages for running MD, e.g
52 | [OpenMM](https://simtk.org/home/openmm), [Gromacs](http://www.gromacs.org/),
53 | [Amber](http://ambermd.org/), [CHARMM](http://www.charmm.org/), and
54 | many others. MSMBuilder is not one of them.
55 |
56 | 2. Transform your MD coordinates into an appropriate set of features.
57 |
58 | 3. Perform some sort of dimensionality reduction with tICA or PCA.
59 | Reduce your data into discrete states by using clustering.
60 |
61 | 4. Fit an MSM, rate matrix MSM, or HMM. Perform model selection using
62 | cross-validation with the [generalized matrix Rayleigh quotient](http://arxiv.org/abs/1407.8083)
63 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | environment:
2 |
3 | global:
4 | PYTHONUNBUFFERED: on
5 |
6 | matrix:
7 | - PYDIR: "C:\\Miniconda35"
8 | CONDA_PY: "35"
9 | CONDA_NPY: "1.10"
10 |
11 | - PYDIR: "C:\\Miniconda35-x64"
12 | CONDA_PY: "35"
13 | CONDA_NPY: "1.10"
14 |
15 | install:
16 | - set PATH=%PYDIR%;%PYDIR%\\Scripts;%PATH%
17 | - conda config --add channels omnia
18 | - conda config --add channels conda-forge
19 | - conda update -yq --all
20 | - conda install -yq conda-build jinja2
21 |
22 | build: false
23 |
24 | test_script:
25 | - conda build -q devtools\conda-recipe
26 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/bld.bat:
--------------------------------------------------------------------------------
1 | python setup.py install
2 | if errorlevel 1 exit 1
3 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python setup.py install
3 |
--------------------------------------------------------------------------------
/devtools/conda-recipe/meta.yaml:
--------------------------------------------------------------------------------
1 | package:
2 | name: msmbuilder
3 | version: {{ GIT_DESCRIBE_TAG }}
4 |
5 | source:
6 | path: ../../
7 |
8 | build:
9 | preserve_egg_dir: True
10 | number: {{ GIT_DESCRIBE_NUMBER }}
11 | entry_points:
12 | - msmb = msmbuilder.scripts.msmb:main
13 |
14 |
15 | requirements:
16 | build:
17 | - python
18 | - setuptools
19 | - cython <=0.28
20 | - numpy x.x
21 | - mdtraj <=1.8
22 | run:
23 | - python
24 | - setuptools
25 | - numpy x.x
26 | - scipy
27 | - pandas <0.20
28 | - six
29 | - mdtraj <=1.8
30 | - scikit-learn
31 | - numpydoc
32 | - pytables
33 | - pyhmc
34 | - pyyaml
35 | - jinja2
36 | - fastcluster
37 |
38 |
39 | test:
40 | requires:
41 | - nose
42 | - nose-timer
43 | - munkres
44 | - numdifftools
45 | - statsmodels
46 | - hmmlearn=0.2.1
47 | - cvxpy # [not win]
48 | - msmb_data
49 | imports:
50 | - msmbuilder
51 | commands:
52 | - msmb -h
53 | - nosetests msmbuilder -v --with-timer --timer-ok 2 --timer-warning 10 --timer-filter error
54 |
55 |
56 | about:
57 | home: https://github.com/msmbuilder/msmbuilder
58 | license: LGPLv2.1+
59 | summary: 'MSMBuilder: Statistical models for biomolecular dynamics'
60 |
--------------------------------------------------------------------------------
/devtools/travis-ci/build_docs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Print each line, exit on error
4 | set -ev
5 |
6 | # Install the built package
7 | conda create --yes -n docenv python=$CONDA_PY
8 | source activate docenv
9 | conda install -yq --use-local msmbuilder
10 |
11 | # Install doc requirements
12 | conda install -yq --file docs/requirements.txt
13 |
14 | # We don't use conda for these:
15 | # sphinx_rtd_theme's latest releases are not available
16 | # neither is msmb_theme
17 | # neither is sphinx > 1.3.1 (fix #1892 autodoc problem)
18 | pip install -I sphinx
19 | pip install -I sphinx_rtd_theme==0.1.9 msmb_theme==1.2.0
20 |
21 | # Make docs
22 | cd docs && make html && cd -
23 |
24 | # Move the docs into a versioned subdirectory
25 | python devtools/travis-ci/set_doc_version.py
26 |
27 | # Prepare versions.json
28 | python devtools/travis-ci/update_versions_json.py
29 |
--------------------------------------------------------------------------------
/devtools/travis-ci/install_miniconda.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | MINICONDA=Miniconda3-latest-Linux-x86_64.sh
3 | MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *
\(.*\)<\/td> */\1/p')
4 | wget https://repo.continuum.io/miniconda/$MINICONDA
5 | if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then
6 | echo "Miniconda MD5 mismatch"
7 | exit 1
8 | fi
9 | bash $MINICONDA -b
10 | rm -f $MINICONDA
11 |
12 | export PATH=$HOME/miniconda3/bin:$PATH
13 |
14 | conda update -yq conda
15 | conda install -yq conda-build jinja2 conda-verify
16 |
--------------------------------------------------------------------------------
/devtools/travis-ci/set_doc_version.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | from msmbuilder import version
4 |
5 | if version.release:
6 | docversion = version.version
7 | else:
8 | docversion = 'development'
9 |
10 | os.mkdir("docs/_deploy")
11 | shutil.copytree("docs/_build/html", "docs/_deploy/{docversion}"
12 | .format(docversion=docversion))
13 |
--------------------------------------------------------------------------------
/devtools/travis-ci/update_versions_json.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | try:
4 | from urllib.request import urlopen
5 | except ImportError:
6 | from urllib2 import urlopen
7 | from msmbuilder import version
8 |
9 | if not version.release:
10 | print("This is not a release.")
11 | exit(0)
12 |
13 | URL = 'http://www.msmbuilder.org'
14 | data = urlopen(URL + '/versions.json').read().decode()
15 | versions = json.loads(data)
16 |
17 | # new release so all the others are now old
18 | for i in range(len(versions)):
19 | versions[i]['latest'] = False
20 |
21 | versions.append({
22 | 'version': version.version,
23 | 'display': version.short_version,
24 | 'url': "{base}/{version}".format(base=URL, version=version.version),
25 | 'latest': True,
26 | })
27 |
28 | with open("docs/_deploy/versions.json", 'w') as versionf:
29 | json.dump(versions, versionf, indent=2)
30 |
31 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build/
2 |
3 | # autosummary generated files
4 | _cluster/
5 | _msm/
6 | _hmm/
7 | _ratematrix/
8 | _decomposition/
9 | _preprocessing/
10 | _feature_selection/
11 | _featurization/
12 | _tpt/
13 | _io/
14 | _gmrq/
15 |
16 | # autogenerated (see conf.py)
17 | publications.rst
18 |
--------------------------------------------------------------------------------
/docs/LICENSE:
--------------------------------------------------------------------------------
1 | The MSMBuilder documentation is licensed under a Creative Commons
2 | Attribution 4.0 International License.
3 |
4 | https://creativecommons.org/licenses/by/4.0/
5 |
--------------------------------------------------------------------------------
/docs/_static/flow-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/flow-chart.png
--------------------------------------------------------------------------------
/docs/_static/fspeptide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/fspeptide.png
--------------------------------------------------------------------------------
/docs/_static/kde-vs-histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/kde-vs-histogram.png
--------------------------------------------------------------------------------
/docs/_static/lengths-hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/lengths-hist.png
--------------------------------------------------------------------------------
/docs/_static/logo-200px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/logo-200px.png
--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/logo.png
--------------------------------------------------------------------------------
/docs/_static/msm-microstates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/msm-microstates.png
--------------------------------------------------------------------------------
/docs/_static/tica-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica-heatmap.png
--------------------------------------------------------------------------------
/docs/_static/tica-movie.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica-movie.gif
--------------------------------------------------------------------------------
/docs/_static/tica_vs_pca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica_vs_pca.png
--------------------------------------------------------------------------------
/docs/_templates/class.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | {{ underline }}
3 |
4 | .. currentmodule:: {{ module }}
5 |
6 | .. autoclass:: {{ objname }}
7 |
8 |
--------------------------------------------------------------------------------
/docs/advanced_examples/bayesian-msm.rst:
--------------------------------------------------------------------------------
1 | Bayesian Estimation of MSMs
2 | ===========================
3 |
4 | .. notebook:: examples/bayesian-msm.ipynb
5 |
6 |
--------------------------------------------------------------------------------
/docs/advanced_examples/gmrq-model-selection.rst:
--------------------------------------------------------------------------------
1 | GMRQ hyperparameter selection
2 | =============================
3 |
4 | .. notebook:: examples/gmrq-model-selection.ipynb
--------------------------------------------------------------------------------
/docs/advanced_examples/hmm-and-msm.rst:
--------------------------------------------------------------------------------
1 | HMM and MSM Timescales for Ala2
2 | ===============================
3 |
4 | .. notebook:: examples/hmm-and-msm.ipynb
5 |
6 |
--------------------------------------------------------------------------------
/docs/advanced_examples/implied-timescales.rst:
--------------------------------------------------------------------------------
1 | Implied Timescales
2 | ==================
3 |
4 | .. notebook:: examples/implied-timescales.ipynb
5 |
6 |
--------------------------------------------------------------------------------
/docs/advanced_examples/index.rst:
--------------------------------------------------------------------------------
1 | Examples
2 | ========
3 |
4 | This page provides a series of examples, tutorials and recipes for using
5 | MSMBuilder.
6 |
7 | Each subsection is a notebook. To open these notebooks in a "live" IPython
8 | session and execute the documentation interactively, you need to download
9 | the repository and start IPython notebook.
10 |
11 | If you installed `MSMBuilder` from source, you will need to navigate to
12 | :code:`./examples`. The notebook files for these examples and the notebooks
13 | are available in the top level `examples folder
14 | `_ on GitHub
15 | as well. In the directory with the notebook files, start an IPython
16 | notebook server:
17 |
18 | .. code-block:: python
19 |
20 | $ ipython notebook
21 |
22 |
23 | .. toctree::
24 | :maxdepth: 2
25 | :titlesonly:
26 | :glob:
27 |
28 | *
29 |
30 | .. vim: tw=75
31 |
--------------------------------------------------------------------------------
/docs/advanced_examples/plot-tica-heatmap.rst:
--------------------------------------------------------------------------------
1 | Visualization with tICA
2 | =======================
3 |
4 | .. notebook:: examples/plot-tica-heatmap.ipynb
5 |
6 |
--------------------------------------------------------------------------------
/docs/advanced_examples/quadwell-n-states.rst:
--------------------------------------------------------------------------------
1 | Model selection with Randomized CV
2 | ==================================
3 |
4 | .. notebook:: examples/quadwell-n-states.ipynb
5 |
--------------------------------------------------------------------------------
/docs/advanced_examples/quadwell.rst:
--------------------------------------------------------------------------------
1 | Bootstraped MSM CIs
2 | ===================
3 |
4 | .. notebook:: examples/quadwell.ipynb
5 |
6 |
--------------------------------------------------------------------------------
/docs/advanced_examples/tica-1.rst:
--------------------------------------------------------------------------------
1 | tICA and PCA
2 | ============
3 |
4 | .. notebook:: examples/tica-example.ipynb
5 |
6 |
--------------------------------------------------------------------------------
/docs/advanced_examples/uncertainty.rst:
--------------------------------------------------------------------------------
1 | Estimating uncertainty in Markov state models
2 | =============================================
3 |
4 | .. notebook:: examples/uncertainty.ipynb
5 |
--------------------------------------------------------------------------------
/docs/background.rst:
--------------------------------------------------------------------------------
1 | .. _background:
2 |
3 | Motivation
4 | ==========
5 |
6 | The aim of this package is to provide software tools for predictive
7 | modeling of the long timescale dynamics of biomolecular systems using
8 | statistical modeling to analyze physical simulations.
9 |
10 | Given a dataset of one or more stochastic trajectories tracking the
11 | coordinates of every (10,000+) atom in a molecular system at a discrete
12 | time interval, how do we understand the slow dynamical processes and make
13 | quantitative predictions about the system?
14 |
15 |
16 | Workflow
17 | --------
18 |
19 | To build a dynamical model, we apply (stepwise) a series of dimensionality
20 | reductions. The basic set of steps is outlined below. Note that most steps
21 | are optional under certain circumstances. The particulars should become
22 | clear as you continue reading the documentation.
23 |
24 | 1. Set up a system for molecular dynamics, and run one or more simulations
25 | for as long as you can on as many CPUs or GPUs as you have access.
26 | There are a lot of great software packages for running MD, e.g `OpenMM
27 | `_, `Gromacs `_,
28 | `Amber `_, `CHARMM `_, and
29 | many others. MSMBuilder is not one of them.
30 |
31 | 2. :ref:`Featurize` trajectories into an appropriate vector
32 | of features. The full :math:`3N` set of atomic coordinates is
33 | potentially unwieldy and redundant. It likely does not respect the
34 | rotational or translational symmetry of your system either. We commonly
35 | use backbone dihedral angles as our features, although this depends
36 | highly on the system being modeled.
37 |
38 | 3. :ref:`Decompose` your features into a new basis that
39 | preserves the relevant information in your data with fewer dimensions.
40 | We typically use tICA, which finds linear combinations of input degrees
41 | of freedom that maximize autocorrelation or "slowness".
42 |
43 | 4. :ref:`Cluster` your data to define (micro-)states by grouping
44 | similar input data points. At this stage, we've reduced the
45 | dimensionality of the problem from potentially thousands of :math:`xyz`
46 | coordinates to a single cluster (state) index.
47 |
48 | 5. :ref:`Estimate a model` from the clustered data. We typically build
49 | an MSM, which models the important dynamics of the system.
50 |
51 | 6. Use :ref:`GMRQ cross-validation` to select the best model. There
52 | are many hyperparameters (knobs to tweak) in the workflow. This scoring
53 | function can help us pick the best values.
54 |
55 |
56 | .. figure:: _static/flow-chart.png
57 | :align: center
58 | :width: 80%
59 |
60 | A diagram of potential workflows.
61 |
62 | .. vim: tw=75
63 |
--------------------------------------------------------------------------------
/docs/bibparse.py:
--------------------------------------------------------------------------------
1 | """Very simple bibtex parser for use in MSMBuilder doc generation
2 |
3 | Matthew Harrigan
4 | (c)2016, MIT License
5 | """
6 |
7 | from pyparsing import CaselessKeyword as kwd
8 | from pyparsing import QuotedString, Word, alphanums, Suppress, OneOrMore, nums, \
9 | Group, Optional, ZeroOrMore, alphas, alphas8bit, delimitedList
10 |
11 | # Change these if you need more flexibility:
12 | entry_type = kwd("article") | kwd("unpublished")
13 | cite_key = Word(alphanums + ":/._")
14 |
15 | LCURLY = Suppress('{')
16 | RCURLY = Suppress('}')
17 | COMMA = Suppress(',')
18 | AT = Suppress('@')
19 | EQUALS = Suppress('=')
20 |
21 | field_val = Word(nums) | QuotedString('{', endQuoteChar='}', multiline=True,
22 | convertWhitespaceEscapes=False)
23 | title_field = Group(kwd('title') + EQUALS + field_val)
24 | journal_field = Group(kwd('journal') + EQUALS + field_val)
25 | year_field = Group(kwd('year') + EQUALS + field_val)
26 | volume_field = Group(kwd('volume') + EQUALS + field_val)
27 | pages_field = Group(kwd('pages') + EQUALS + field_val)
28 | abstract_field = Group(kwd('abstract') + EQUALS + field_val)
29 | doi_field = Group(kwd('doi') + EQUALS + field_val)
30 | other_field = Group(Word(alphanums) + EQUALS + field_val)
31 |
32 | author = OneOrMore(~kwd('and') + Word(alphas + alphas8bit + '.,-'))
33 | author.setParseAction(lambda xx: ' '.join(str(x) for x in xx))
34 | author_list = LCURLY + delimitedList(author, 'and') + RCURLY
35 | author_field = Group(kwd('author') + EQUALS + Group(author_list))
36 |
37 | entry_item = (title_field | author_field | journal_field | year_field
38 | | volume_field | pages_field | abstract_field | doi_field
39 | | Suppress(other_field))
40 |
41 |
42 | class BibEntry(object):
43 | def __init__(self, type, cite_key, fields):
44 | self.type = type
45 | self.cite_key = cite_key
46 | self.fields = fields
47 | self.__dict__.update(**fields)
48 |
49 |
50 | def to_BibEntry(toks):
51 | return BibEntry(toks[0], toks[1], dict(toks[2:]))
52 |
53 |
54 | entry = (AT + entry_type + LCURLY + cite_key + COMMA
55 | + ZeroOrMore(entry_item + COMMA) + Optional(entry_item) + RCURLY)
56 | entry.setParseAction(to_BibEntry)
57 | entries = OneOrMore(entry)
58 |
--------------------------------------------------------------------------------
/docs/cluster.rst:
--------------------------------------------------------------------------------
1 | .. _cluster:
2 | .. currentmodule:: msmbuilder.cluster
3 |
4 | Clustering
5 | ==========
6 |
7 | Clustering MD trajectories groups the data [#f1]_ into a set of
8 | clusters such that conformations in the same cluster are structurally
9 | similar to one another, and conformations in different clusters are
10 | structurally distinct. The questions that arise are
11 |
12 | #. How should "structurally similar" be defined? What distance metric
13 | should be used?
14 |
15 | #. Given the distance metric, what algorithm should be used to actually
16 | cluster the data?
17 |
18 | On point 1, there is no consensus in the protein MD literature. Popular
19 | distance metrics include cartesian root-mean-squared deviation of atomic
20 | positions (RMSD) [#f3]_, distances based on the number of native contacts
21 | formed, distances based on the difference in backbone dihedral angles, and
22 | probably others.
23 |
24 | On point 2, "Optimal" clustering is NP-hard [#f2]_, so there's usually a
25 | tradeoff between clustering quality and computational cost. For that reason,
26 | MSMBuilder has a variety of different clustering algorithms implemented.
27 |
28 | Algorithms
29 | ----------
30 |
31 | All clustering algorithms in MSMBuilder follow the following basic API.
32 | Hyperparameters, including the number of clusters, random seeds, the
33 | distance metric (if applicable) are passed to the class constructor.
34 | Then, the computation is done by calling ``fit(sequences)``. The argument
35 | to ``fit`` should be a *list* of molecular dynamics trajectories or a list
36 | of 2D numpy arrays, each of shape ``(length_of_trajecotry, n_features)``.
37 |
38 |
39 | .. autosummary::
40 | :toctree: _cluster/
41 | :nosignatures:
42 |
43 | KCenters
44 | KMeans
45 | KMedoids
46 | MiniBatchKMedoids
47 | RegularSpatial
48 | LandmarkAgglomerative
49 | AffinityPropagation
50 | GMM
51 | MeanShift
52 | MiniBatchKMeans
53 | SpectralClustering
54 | Ward
55 |
56 |
57 | .. todo: Example of clustering
58 |
59 | References
60 | ----------
61 |
62 | .. [#f1] The "data", for MD, refers to snapshots of the structure of a molecular system at a given time point -- i.e the set of cartesian coordinates for all the atoms, or some mathematical transformation thereof.
63 | .. [#f2] Aloise, Daniel, et al. `NP-hardness of Euclidean sum-of-squares clustering. `_ Machine Learning 75.2 (2009): 245-248.
64 | .. [#f3] http://en.wikipedia.org/wiki/Root-mean-square_deviation_of_atomic_positions
65 |
66 | .. vim: tw=75
67 |
--------------------------------------------------------------------------------
/docs/examples/Clustering-Comparison.rst:
--------------------------------------------------------------------------------
1 | Clustering Comparison
2 | =====================
3 |
4 | .. notebook:: Clustering-Comparison
5 |
--------------------------------------------------------------------------------
/docs/examples/Fs-Peptide-command-line.rst:
--------------------------------------------------------------------------------
1 | Fs Peptide (command line)
2 | =========================
3 |
4 | .. notebook:: Fs-Peptide-command-line
5 |
--------------------------------------------------------------------------------
/docs/examples/Fs-Peptide-in-RAM.rst:
--------------------------------------------------------------------------------
1 | Fs Peptide (in RAM)
2 | ===================
3 |
4 | .. notebook:: Fs-Peptide-in-RAM
5 |
--------------------------------------------------------------------------------
/docs/examples/Fs-Peptide-with-dataset.rst:
--------------------------------------------------------------------------------
1 | Fs Peptide (using ``dataset``)
2 | ==============================
3 |
4 | .. notebook:: Fs-Peptide-with-dataset
5 |
--------------------------------------------------------------------------------
/docs/examples/GMRQ-Model-Selection.rst:
--------------------------------------------------------------------------------
1 | GMRQ Model Selection
2 | ====================
3 |
4 | .. notebook:: GMRQ-Model-Selection
5 |
--------------------------------------------------------------------------------
/docs/examples/Ligand-Featurization.rst:
--------------------------------------------------------------------------------
1 | Ligand Featurization
2 | ====================
3 |
4 | .. notebook:: Ligand-Featurization
5 |
--------------------------------------------------------------------------------
/docs/examples/Ward-Clustering.rst:
--------------------------------------------------------------------------------
1 | Ward Clustering
2 | ===============
3 |
4 | .. notebook:: Ward-Clustering
5 |
--------------------------------------------------------------------------------
/docs/examples/index.rst:
--------------------------------------------------------------------------------
1 | .. _examples:
2 |
3 | Examples
4 | ========
5 |
6 | The following examples show off various aspects or capabilities of
7 | MSMBuilder. They can be run interactively in Jupyter (IPython) notebook.
8 | Download the `notebook files
9 | `_ and open
10 | them in Jupyter::
11 |
12 | $ jupyter notebook
13 |
14 | .. To make the ipython rendered images show up, each rst file must be
15 | in its own directory.
16 |
17 | .. toctree::
18 | :maxdepth: 2
19 | :titlesonly:
20 |
21 | Fs-Peptide-in-RAM
22 | Fs-Peptide-with-dataset
23 | Fs-Peptide-command-line
24 | tICA-vs-PCA
25 | Clustering-Comparison
26 | GMRQ-Model-Selection
27 | Ward-Clustering
28 | Ligand-Featurization
29 |
30 |
31 | Contributing examples
32 | ---------------------
33 |
34 | Do you have a neat example of using MSMBuilder? Format your code
35 | into an IPython notebook and submit a pull request!
36 |
37 | .. vim: tw=75
38 |
--------------------------------------------------------------------------------
/docs/examples/tICA-vs-PCA.rst:
--------------------------------------------------------------------------------
1 | tICA vs. PCA
2 | ============
3 |
4 | .. notebook:: tICA-vs-PCA
5 |
--------------------------------------------------------------------------------
/docs/feature_selection.rst:
--------------------------------------------------------------------------------
1 | .. _feature_selection:
2 | .. currentmodule:: msmbuilder.feature_selection
3 |
4 |
5 | Feature Selection
6 | =================
7 |
8 | Feature selection can be used to reduce the dimensionality of data sets,
9 | either to improve estimators’ accuracy or to boost their performance on very
10 | high-dimensional datasets.
11 |
12 | Feature Selectors
13 | -----------------
14 |
15 | .. autosummary::
16 | :toctree: _feature_selection/
17 |
18 | FeatureSelector
19 | VarianceThreshold
20 |
21 |
22 | .. vim: tw=75
23 |
--------------------------------------------------------------------------------
/docs/featurization.rst:
--------------------------------------------------------------------------------
1 | .. _featurization:
2 | .. currentmodule:: msmbuilder.featurizer
3 |
4 |
5 | Featurization
6 | =============
7 |
8 | Many algorithms require that the input data be vectors in a (euclidean)
9 | vector space. This includes :class:`~msmbuilder.cluster.KMeans` clustering,
10 | :class:`~msmbuilder.decomposition.tICA`, and others.
11 |
12 | Since there's usually no special rotational or translational reference
13 | frame in an MD simulation, it's often desirable to remove rotational and
14 | translational motion via featurization that is insensitive to rotations and
15 | translations.
16 |
17 | Featurizations
18 | --------------
19 |
20 | .. autosummary::
21 | :toctree: _featurization/
22 |
23 | AtomPairsFeaturizer
24 | ContactFeaturizer
25 | DRIDFeaturizer
26 | DihedralFeaturizer
27 | GaussianSolventFeaturizer
28 | RMSDFeaturizer
29 | RawPositionsFeaturizer
30 | SuperposeFeaturizer
31 |
32 |
33 | Alternative to Featurization
34 | ----------------------------
35 |
36 | Many algorithms require vectorizable data. Other algorithms only require a
37 | pairwise distance metric, e.g. RMSD between two protein conformations. In
38 | general, you can define a pairwise distance among vectorized data, but you
39 | cannot embed data into a vector space only from pairwise distance.
40 |
41 | Some :ref:`clustering ` methods let you use an arbitrary distance
42 | metric, including RMSD. In this case, the input to ``fit()`` may be a list
43 | of MD trajectories instead of a list of numpy arrays. Clustering methods
44 | that allow this currently include :class:`~msmbuilder.cluster.KCenters` and
45 | :class:`~msmbuilder.cluster.KMedoids`.
46 |
47 | .. vim: tw=75
48 |
--------------------------------------------------------------------------------
/docs/figures/kde-vs-histogram.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as pp
3 | from scipy.stats import norm
4 | from sklearn.neighbors import KernelDensity
5 |
6 |
7 | #----------------------------------------------------------------------
8 | # Plot the progression of histograms to kernels
9 | N = 100
10 | np.random.seed(1)
11 | X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
12 | np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
13 |
14 | X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
15 |
16 | true_dens = (0.3 * norm(0, 1).pdf(X_plot[:, 0])
17 | + 0.7 * norm(5, 1).pdf(X_plot[:, 0]))
18 |
19 |
20 | ax = pp.subplot(axisbg='w')
21 | ax.fill(X_plot[:, 0], true_dens, fc='black', alpha=0.2)
22 | pp.plot(X_plot[:, 0], true_dens, 'k-', lw=2, label='input distribution')
23 |
24 | kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)
25 | log_dens = kde.score_samples(X_plot)
26 | ax.plot(X_plot[:, 0], np.exp(log_dens), '-', lw=2, c='r', label='Gaussian KDE')
27 | pp.twinx().hist(X, bins=20, alpha=0.5, label='Histogram')
28 |
29 |
30 | ax.text(6, 0.38, "N={0} points".format(N))
31 |
32 | ax.legend(loc='upper left')
33 |
34 | ax.set_xlim(-4, 9)
35 | ax.set_ylim(0, 0.4)
36 | pp.savefig('_static/kde-vs-histogram.png')
37 |
38 |
--------------------------------------------------------------------------------
/docs/gmrq.rst:
--------------------------------------------------------------------------------
1 | .. _gmrq:
2 | .. currentmodule:: msmbuilder
3 |
4 | Model Selection using GMRQ
5 | ==========================
6 |
7 | The generalized matrix Rayleigh quotient (GMRQ) is a specific application of
8 | the variational principle (adapted from `quantum mechanics
9 | `_)
10 | for Markov state models and a useful tool for model parameter selection.
11 |
12 | The variational principle yields a rigorous way of comparing two different
13 | Markov models for the same underlying stochastic process when using different
14 | state decompositions. Even under the assumption that you have access to
15 | infinite sampling, there is still some error associated with approximating the
16 | true continuous eigenfunctions of your modeled process with the indicator
17 | functions, as is the case with Markov state models. If we interpret the
18 | variational theorem as the measure of the quality of this approximation, the
19 | state decomposition that leads to a Markov model with larger leading dynamical
20 | eigenvalues is consequently the better state decomposition. If you wish to see
21 | the full derivation of this quantity, please refer to [#f1]_.
22 |
23 | Using this method, we can generate single scalar-valued scores for a proposed
24 | model given a supplied data set. This allows for the use of separate testing
25 | and training data sets to quantify and avoid statistical overfitting.
26 | This method extends these tools, making it possible to score trained models on
27 | new datasets and to perform hyperparameter selection. **PLEASE NOTE**: You cannot
28 | use GMRQ to optimize the MSM lag time. Changing the lag time fundamentally
29 | alters the model's eigenfunctions, which no longer makes it a useful scoring function.
30 | The number of timescales used to score the model must also be constant and user-
31 | selected.
32 |
33 | Algorithms
34 | ----------
35 |
36 | .. autosummary::
37 | :toctree: _gmrq/
38 |
39 | decomposition.tICA.score
40 | msm.MarkovStateModel.score
41 | msm.ContinuousTimeMSM.score
42 |
43 |
44 |
45 |
46 | References
47 | ----------
48 |
49 | .. [#f1] McGibbon, Robert T., and Vijay S. Pande. `Variational cross-validation of slow dynamical modes in molecular kinetics `_ J. Chem. Phys. 142, 124105 (2015).
50 |
51 | .. vim: tw=75
52 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. _msmbuilder:
2 |
3 | MSMBuilder
4 | ==========
5 |
6 |
7 | .. raw:: html
8 |
9 |
10 | Statistical models for Biomolecular Dynamics
11 |
12 | MSMBuilder is an application and python library. It builds
13 | statistical models for high-dimensional time-series. The particular focus
14 | of the package is on the analysis of atomistic simulations of biomolecular
15 | dynamics such as protein folding and conformational change.
16 |
17 | To get started via `Anaconda Python `_,
18 | use::
19 |
20 | conda install -c omnia msmbuilder
21 |
22 | MSMBuilder includes algorithms for constructing dynamical models:
23 |
24 | - :ref:`featurization`
25 | - :ref:`feature_selection`
26 | - :ref:`preprocessing`
27 | - :ref:`decomposition`
28 | - :ref:`cluster`
29 | - :ref:`msm`
30 | - :ref:`hmm`
31 | - :ref:`ratematrix`
32 |
33 | As well as methods for analysis and validation of the models:
34 |
35 | - :ref:`gmrq`
36 | - :ref:`tpt`
37 |
38 | New users should check out:
39 |
40 | - :ref:`background`
41 | - :ref:`installation`
42 | - :ref:`tutorial`
43 | - :ref:`examples`
44 | - :ref:`faq`
45 |
46 | MSMBuilder is most effective as a library. Intermediate users should
47 | familiarize themselves with:
48 |
49 | - :ref:`apipatterns`
50 | - :ref:`datasets`
51 | - :ref:`changelog`
52 |
53 |
54 | MSMBuilder is developed by primarily by researchers at Stanford University,
55 | and we welcome contributions. The development all takes place on `Github
56 | `_. MSMBuilder is licensed under
57 | the GNU LGPL (v2.1 or later).
58 |
59 |
60 |
61 | .. toctree::
62 | :maxdepth: 2
63 | :hidden:
64 |
65 | background
66 | installation
67 | tutorial
68 | examples/index
69 | featurization
70 | feature_selection
71 | preprocessing
72 | decomposition
73 | cluster
74 | msm
75 | gmrq
76 | tpt
77 | ratematrix
78 | hmm
79 | datasets
80 | io
81 | apipatterns
82 | plugins
83 | faq
84 | changelog
85 | publications
86 | contributing
87 |
88 | .. vim: tw=75
89 |
--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | .. _installation:
2 |
3 | Installation
4 | ============
5 |
6 | The preferred installation mechanism for ``msmbuilder`` is with ``conda``.
7 |
8 | .. code-block:: bash
9 |
10 | $ conda install -c omnia msmbuilder
11 |
12 |
13 | If you don't have conda, or are new to scientific python, we recommend that
14 | you download the `Anaconda scientific python distribution
15 | `_.
16 |
17 |
18 | From Source
19 | -----------
20 |
21 | MSMBuilder is a python package that heavily leans on other components of the
22 | scientific python ecosystem. See ``devtools/conda-recipe/meta.yaml`` for a
23 | complete and up-to-date list of build, run, and test dependencies. When you
24 | are sure the dependencies are satisfied you can install from PyPI
25 |
26 | .. code-block:: bash
27 |
28 | $ pip install msmbuilder
29 |
30 | or from source
31 |
32 | .. code-block:: bash
33 |
34 | $ git clone git@github.com:msmbuilder/msmbuilder
35 | $ cd msmbuilder/
36 | $ pip install .
37 | $ # (or: python setup.py install)
38 |
39 | Frequently Asked Questions
40 | --------------------------
41 |
42 | **Do I need Anaconda python? Can't I use the python that comes with my
43 | operating like /usr/bin/python?**
44 |
45 | You can have multiple ``python`` installations on your computer which do
46 | not interact with one another at all. The system python interpreter is used
47 | by your operating system for some of its own programs but is not the best
48 | choice for data analysis or science.
49 |
50 | We strongly recommend that you install Anaconda or Miniconda python
51 | distribution and that you have the ``conda`` package manager available.
52 |
53 | If you're interested in some of the details about packaging and scientific
54 | python, see `this blog post by Travis Oliphant
55 | `_.
56 |
57 | .. vim: tw=75
58 |
--------------------------------------------------------------------------------
/docs/plugins.rst:
--------------------------------------------------------------------------------
1 | .. _plugins:
2 |
3 | Writing Plugins
4 | ===============
5 |
6 | You can easily extend MSMBuilder by subclassing ``BaseEstimator`` or any of
7 | its children. You can even build your plugin to work with the ``msmb``
8 | command-line interface.
9 |
10 | 1. Subclass ``cmdline.Command`` or any of its children. For example,
11 | if you want to expose a new Featurizer from the command line.
12 |
13 | .. code-block:: python
14 |
15 | from msmbuilder.commands.featurizer import FeaturizerCommand
16 | class MyNiftyFeaturizerCommand(FeaturizerCommand):
17 | klass = MyNiftyFeaturizer
18 | _concrete = True
19 |
20 | 2. Provide your command as an "entry point" with ``setuptools``.
21 | Use ``"msmbuilder.commands"`` as the entry point.
22 | For example, in your ``setup.py``.
23 |
24 | .. code-block:: python
25 |
26 | setup(
27 | ...
28 | entry_points={'msmbuilder.commands':
29 | 'niftyfeat = niftyfeat:MyNiftyFeaturizerCommand'
30 | )
31 |
32 | See the
33 | `setuptools documentation `_
34 | for more information.
35 |
36 | .. vim: tw=75
37 |
--------------------------------------------------------------------------------
/docs/preprocessing.rst:
--------------------------------------------------------------------------------
1 | .. _preprocessing:
2 | .. currentmodule:: msmbuilder.preprocessing
3 |
4 |
5 | Preprocessing
6 | =============
7 |
8 | Preprocessing of a dataset is a common requirement for many machine learning
9 | estimators and may involve scaling, centering, normalization, smoothing,
10 | binarization, and imputation methods.
11 |
12 | Preprocessors
13 | -------------
14 |
15 | .. autosummary::
16 | :toctree: _preprocessing/
17 |
18 | Binarizer
19 | Butterworth
20 | EWMA
21 | DoubleEWMA
22 | Imputer
23 | KernelCenterer
24 | LabelBinarizer
25 | MultiLabelBinarizer
26 | MinMaxScaler
27 | MaxAbsScaler
28 | Normalizer
29 | RobustScaler
30 | StandardScaler
31 | PolynomialFeatures
32 |
33 | .. vim: tw=75
34 |
--------------------------------------------------------------------------------
/docs/publications_templ.rst:
--------------------------------------------------------------------------------
1 | .. _publications:
2 |
3 | Publications
4 | ============
5 |
6 | The following published works use MSMBuilder. To add your publication
7 | to the list, open an issue on GitHub with the relevant information or
8 | edit ``docs/publications.bib`` and submit a pull request.
9 |
10 | .. publications.bib lists the relevant publications
11 | .. publications_templ.rst defines how the publications will be displayed
12 | .. publications.rst is generated during sphinx build (see conf.py)
13 | and should not be edited directly!
14 |
15 | {% for pub in publications %}
16 | {{pub.title}}
17 | --------------------------------------------------------------------------------
18 |
19 | * {{pub.author | join('; ')}}
20 | * *{{pub.journal}}* **{{pub.year}}**, {{pub.volume}} {{pub.pages}}
21 | * `doi: {{pub.doi}} `_
22 |
23 | {{pub.abstract | wordwrap }}
24 |
25 | {% endfor %}
26 |
27 |
--------------------------------------------------------------------------------
/docs/ratematrix.rst:
--------------------------------------------------------------------------------
1 | .. _ratematrix:
2 | .. currentmodule:: msmbuilder.msm
3 |
4 | Continuous-time MSMs
5 | ====================
6 |
7 | :class:`MarkovStateModel` estimates a series of
8 | transition *probabilities* among states that depend on the discrete
9 | lag-time. Physically, we are probably more interested in a sparse set of
10 | transition *rates* in and out of states, estimated by
11 | :class:`ContinuousTimeMSM`.
12 |
13 |
14 | Theory
15 | ------
16 |
17 | Consider an `n`-state time-homogeneous Markov process, :math:`X(t)`. At
18 | time :math:`t`, the :math:`n`-vector :math:`P(t) = Pr[ X(t) = i ]` is the
19 | probability that the system is in each of the :math:`n` states. These
20 | probabilities evolve forward in time, governed by an :math:`n \times n`
21 | transition rate matrix :math:`K`
22 |
23 | .. math ::
24 | dP(t)/dt = P(t) \cdot K
25 |
26 | The solution is
27 |
28 | .. math ::
29 | P(t) = \exp(tK) \cdot P(0)
30 |
31 | Where :math:`\exp(tK)` is the matrix exponential. Written differently, the
32 | state-to-state lag-:math:`\tau` transition probabilities are
33 |
34 | .. math ::
35 | Pr[ X(t+\tau) = j \;|\; X(t) = i ] = \exp(\tau K)_{ij}
36 |
37 | For this model, we observe the evolution of one or more chains,
38 | :math:`X(t)` at a regular interval, :math:`\tau`. Let :math:`C_{ij}` be the
39 | number of times the chain was observed at state :math:`i` at time :math:`t`
40 | and at state :math:`j` at time :math:`t+\tau` (the number of observed
41 | transition counts). Suppose that :math:`K` depends on a parameter vector,
42 | :math:`\theta`. The log-likelihood is
43 |
44 | .. math ::
45 | \mathcal{L}(\theta) = \sum_{ij} \left[
46 | C_{ij} \log\left(\left[\exp(\tau K(\theta))\right]_{ij}\right)\right]
47 |
48 | The :class:`ContinuousTimeMSM` model finds a rate matrix that fits the data
49 | by maximizing this likelihood expression. Specifically, it uses L-BFGS-B
50 | to find a maximum likelihood estimate (MLE) rate matrix,
51 | :math:`\hat{\theta}` and :math:`K(\hat{\theta})`.
52 |
53 | Uncertainties
54 | ~~~~~~~~~~~~~
55 |
56 | Analytical estimates of the asymptotic standard deviation in estimated
57 | parameters like the stationary distribution, rate matrix, eigenvalues, and
58 | relaxation timescales can be computed by calling methods on the
59 | :class:`ContinuousTimeMSM` object. See [1] for more detail.
60 |
61 |
62 | Algorithms
63 | ----------
64 |
65 | .. autosummary::
66 | :toctree: _ratematrix/
67 |
68 | ContinuousTimeMSM
69 |
70 |
71 | References
72 | ----------
73 | .. [1] McGibbon, R. T. and V. S. Pande, "Efficient maximum likelihood parameterization
74 | of continuous-time Markov processes." J. Chem. Phys. 143 034109 (2015) http://dx.doi.org/10.1063/1.4926516
75 | .. [2] Kalbfleisch, J. D., and Jerald F. Lawless. "The analysis of panel data
76 | under a Markov assumption." J. Am. Stat. Assoc. 80.392 (1985): 863-871.
77 |
78 | .. vim: tw=75
79 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | numpydoc
2 | matplotlib
3 | jupyter
4 | notebook
5 | jinja2
6 | openmm
7 | nbconvert
8 | msmb_data
9 | pyparsing
10 | msmexplorer
11 |
--------------------------------------------------------------------------------
/docs/tpt.rst:
--------------------------------------------------------------------------------
1 | .. _tpt:
2 | .. currentmodule:: msmbuilder.tpt
3 |
4 | Transition Path Theory
5 | ======================
6 |
7 |
8 | Transition path theory (TPT) is a way to extract the highest-flux pathways
9 | of your system from an estimated MSM.
10 |
11 | .. todo: more
12 |
13 | .. todo: example
14 |
15 |
16 | References
17 | ----------
18 |
19 | These are some canonical references for TPT. Note that TPT is really a
20 | specialization of ideas very familiar to the mathematical study of Markov
21 | chains, and there are many books, manuscripts in the mathematical
22 | literature that cover the same concepts.
23 |
24 | .. [1] E, Weinan and Vanden-Eijnden, Eric Towards a Theory of Transition Paths
25 | J. Stat. Phys. 123 503-523 (2006)
26 | .. [2] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory
27 | for Markov jump processes. Multiscale Model. Simul. 7, 1192-1219
28 | (2009).
29 | .. [3] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding
30 | pathways in network models of coarse-grained protein dynamics. J.
31 | Chem. Phys. 130, 205102 (2009).
32 | .. [4] Noé, Frank, et al. "Constructing the equilibrium ensemble of folding
33 | pathways from short off-equilibrium simulations." PNAS 106.45 (2009):
34 | 19011-19016.
35 |
36 | Functions
37 | ---------
38 |
39 | .. autosummary::
40 | :toctree: _tpt/
41 |
42 | fluxes
43 | net_fluxes
44 | fraction_visited
45 | hub_scores
46 | paths
47 | top_path
48 | committors
49 | conditional_committors
50 | mfpts
51 |
52 | .. vim: tw=75
53 |
--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 |
--------------------------------------------------------------------------------
/examples/LICENSE.md:
--------------------------------------------------------------------------------
1 | These example scripts are released under the MIT license. MSMBuilder
2 | is LGPL. Please consider citing MSMBuilder if you use it in your work.
3 |
4 | The MIT License (MIT)
5 |
6 | Copyright (c) 2016 Stanford University and the Authors
7 |
8 | Permission is hereby granted, free of charge, to any person obtaining a
9 | copy of this software and associated documentation files (the "Software"),
10 | to deal in the Software without restriction, including without limitation
11 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 | and/or sell copies of the Software, and to permit persons to whom the
13 | Software is furnished to do so, subject to the following conditions:
14 |
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 |
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 | DEALINGS IN THE SOFTWARE.
25 |
--------------------------------------------------------------------------------
/examples/advanced/quadwell.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": false
8 | },
9 | "outputs": [],
10 | "source": [
11 | "%matplotlib inline\n",
12 | "import numpy as np\n",
13 | "from matplotlib import pyplot as plt\n",
14 | "from msmbuilder.example_datasets import QuadWell, quadwell_eigs\n",
15 | "from msmbuilder.cluster import NDGrid\n",
16 | "from msmbuilder.msm import MarkovStateModel\n",
17 | "from sklearn.pipeline import Pipeline"
18 | ]
19 | },
20 | {
21 | "cell_type": "code",
22 | "execution_count": null,
23 | "metadata": {
24 | "collapsed": false
25 | },
26 | "outputs": [],
27 | "source": [
28 | "dataset = QuadWell(random_state=0).get()\n",
29 | "true_eigenvalues = quadwell_eigs(200)[0]\n",
30 | "true_timescales = -1 / np.log(true_eigenvalues[1:])\n",
31 | "print(QuadWell.description())"
32 | ]
33 | },
34 | {
35 | "cell_type": "code",
36 | "execution_count": null,
37 | "metadata": {
38 | "collapsed": false
39 | },
40 | "outputs": [],
41 | "source": [
42 | "def msm_timescales(trajectories, n_states):\n",
43 | " pipeline = Pipeline([\n",
44 | " ('grid', NDGrid(min=-1.2, max=1.2)),\n",
45 | " ('msm', MarkovStateModel(n_timescales=4, reversible_type='transpose', verbose=False))\n",
46 | " ])\n",
47 | " pipeline.set_params(grid__n_bins_per_feature=n_states)\n",
48 | " pipeline.fit(trajectories)\n",
49 | " return pipeline.named_steps['msm'].timescales_\n",
50 | "\n",
51 | "n_states = [5, 10, 50, 100]\n",
52 | "ts = np.array([msm_timescales(dataset.trajectories, n) for n in n_states])"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "metadata": {
59 | "collapsed": false
60 | },
61 | "outputs": [],
62 | "source": [
63 | "for i, c in enumerate(['b', 'r', 'm']):\n",
64 | " plt.plot(n_states, ts[:, i], c=c, marker='x')\n",
65 | " plt.axhline(true_timescales[i], ls='--', c=c, lw=2)\n",
66 | "\n",
67 | "plt.xlabel('Number of states')\n",
68 | "plt.ylabel('Timescale (steps)')\n",
69 | "plt.show()"
70 | ]
71 | }
72 | ],
73 | "metadata": {
74 | "kernelspec": {
75 | "display_name": "Python 3",
76 | "language": "python",
77 | "name": "python3"
78 | },
79 | "language_info": {
80 | "codemirror_mode": {
81 | "name": "ipython",
82 | "version": 3
83 | },
84 | "file_extension": ".py",
85 | "mimetype": "text/x-python",
86 | "name": "python",
87 | "nbconvert_exporter": "python",
88 | "pygments_lexer": "ipython3",
89 | "version": "3.4.3"
90 | }
91 | },
92 | "nbformat": 4,
93 | "nbformat_minor": 0
94 | }
95 |
--------------------------------------------------------------------------------
/msmbuilder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/msmbuilder/__init__.py
--------------------------------------------------------------------------------
/msmbuilder/base.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import, division
2 |
3 | from sklearn.base import BaseEstimator as SklearnBaseEstimator
4 |
5 |
6 | class BaseEstimator(SklearnBaseEstimator):
7 | # http://msmbuilder.org/development/apipatterns.html
8 |
9 | def summarize(self):
10 | """Return some diagnostic summary statistics about this Markov model"""
11 | return 'NotImplemented'
12 |
--------------------------------------------------------------------------------
/msmbuilder/cluster/.gitignore:
--------------------------------------------------------------------------------
1 | _kmedoids.cpp
--------------------------------------------------------------------------------
/msmbuilder/cluster/src/kmedoids.h:
--------------------------------------------------------------------------------
1 | /******************************************************************************/
2 | /* The C Clustering Library.
3 | * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
4 | *
5 | * This library was written at the Laboratory of DNA Information Analysis,
6 | * Human Genome Center, Institute of Medical Science, University of Tokyo,
7 | * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
8 | * Contact: mdehoon 'AT' gsc.riken.jp
9 | *
10 | * Permission to use, copy, modify, and distribute this software and its
11 | * documentation with or without modifications and for any purpose and
12 | * without fee is hereby granted, provided that any copyright notices
13 | * appear in all copies and that both those copyright notices and this
14 | * permission notice appear in supporting documentation, and that the
15 | * names of the contributors or copyright holders not be used in
16 | * advertising or publicity pertaining to distribution of the software
17 | * without specific prior permission.
18 | *
19 | * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
20 | * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
21 | * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
22 | * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
23 | * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
24 | * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
25 | * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
26 | * OR PERFORMANCE OF THIS SOFTWARE.
27 | *
28 | */
29 |
30 | #ifndef MIXTAPE_CLUSTER_KMEDOIDS_H
31 | #define MIXTAPE_CLUSTER_KMEDOIDS_H
32 | #include
33 | #include
34 | #include
35 |
36 | void kmedoids(npy_intp nclusters, npy_intp nelements, double* distmatrix,
37 | npy_intp npass, npy_intp clusterid[], PyObject* random,
38 | double* error, npy_intp* ifound);
39 |
40 |
41 | /*
42 | Renumber cluster ids to go from 0 to n_clusters - 1.
43 | This function modifies the array inplace, and returns
44 | the mapping from the old values to new values.
45 | */
46 | std::map contigify_ids(npy_intp* ids, npy_intp length);
47 |
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/msmbuilder/commands/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from .atom_indices import AtomIndices
4 | from .convert_chunked_project import ConvertChunkedProject
5 | from .example_datasets import AlanineDipeptideDatasetCommand
6 | from .featurizer import (AtomPairsFeaturizerCommand, ContactFeaturizerCommand,
7 | DihedralFeaturizerCommand, DRIDFeaturizerCommand,
8 | SuperposeFeaturizerCommand,
9 | KappaAngleFeaturizerCommand,
10 | AlphaAngleFeaturizerCommand, RMSDFeaturizerCommand,
11 | LandMarkRMSDFeaturizerCommand,
12 | BinaryContactFeaturizerCommand,
13 | LogisticContactFeaturizerCommand,
14 | VonMisesFeaturizerCommand,
15 | RawPositionsFeaturizerCommand, SASAFeaturizerCommand,
16 | LigandContactFeaturizerCommand,
17 | BinaryLigandContactFeaturizerCommand,
18 | LigandRMSDFeaturizerCommand)
19 | from .fit import (GaussianHMMCommand, MarkovStateModelCommand,
20 | BayesianMarkovStateModelCommand, ContinuousTimeMSMCommand,
21 | BayesianContinuousTimeMSMCommand)
22 |
23 | try:
24 | from .fit_transform import RobustScalerCommand, StandardScalerCommand
25 | except:
26 | pass
27 |
28 | from .fit_transform import (tICACommand, ButterworthCommand, DoubleEWMACommand,
29 | SparseTICACommand, FastICACommand,
30 | FactorAnalysisCommand, KernelTICACommand,
31 | PCACommand, SparsePCACommand,
32 | MiniBatchSparsePCACommand,
33 | KMeansCommand, MiniBatchKMeansCommand,
34 | KCentersCommand, KMedoidsCommand,
35 | MiniBatchKMedoidsCommand, RegularSpatialCommand,
36 | LandmarkAgglomerativeCommand, GMMCommand,
37 | MeanShiftCommand, NDGridCommand,
38 | SpectralClusteringCommand,
39 | AffinityPropagationCommand, APMCommand,
40 | AgglomerativeClusteringCommand, KSparseTICACommand)
41 | from .transform import TransformCommand
42 | from .example_datasets import (AlanineDipeptideDatasetCommand,
43 | FsPeptideDatasetCommand)
44 | from .atom_indices import AtomIndices
45 | from .implied_timescales import ImpliedTimescales
46 | from .template_project import TemplateProjectCommand
47 | from .transform import TransformCommand
48 |
--------------------------------------------------------------------------------
/msmbuilder/commands/example_datasets.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import
2 |
3 | from ..cmdline import NumpydocClassCommand
4 | from ..example_datasets import (AlanineDipeptide, DoubleWell, QuadWell, FsPeptide,
5 | MetEnkephalin, MullerPotential)
6 |
7 |
8 | class DatasetCommand(NumpydocClassCommand):
9 | _group = 'Dataset'
10 | def start(self):
11 | self.instance.cache()
12 | print('Example dataset saved: %s' % self.instance.data_dir)
13 |
14 |
15 | class AlanineDipeptideDatasetCommand(DatasetCommand):
16 | _concrete = True
17 | klass = AlanineDipeptide
18 | description = 'Download example alanine dipeptide dataset.'
19 |
20 |
21 | class _NWellDatasetCommand(DatasetCommand):
22 | def _random_state_type(self, s):
23 | if s is not None:
24 | return int(s)
25 | else:
26 | return s
27 |
28 |
29 | class DoubleWellDatasetCommand(_NWellDatasetCommand):
30 | _concrete = True
31 | klass = DoubleWell
32 | description = ('Generate example double well potential dataset.\n\n' +
33 | DoubleWell.description())
34 |
35 |
36 | class QuadWellDatasetCommand(_NWellDatasetCommand):
37 | _concrete = True
38 | klass = QuadWell
39 | description = ('Generate example quad-well potential dataset.\n\n' +
40 | QuadWell.description())
41 |
42 |
43 | class MullerPotentialDatasetCommand(_NWellDatasetCommand):
44 | _concrete = True
45 | klass = MullerPotential
46 | description = ('Generate example Muller potential dataset.\n\n'
47 | + MullerPotential.description())
48 |
49 |
50 | class FsPeptideDatasetCommand(DatasetCommand):
51 | _concrete = True
52 | klass = FsPeptide
53 | description = 'Download example Fs-peptide dataset.'
54 |
55 |
56 | class MetEnkephalinDatasetCommand(DatasetCommand):
57 | _concrete = True
58 | klass = MetEnkephalin
59 | description = 'Download example Met-Enkephalin dataset.'
60 |
--------------------------------------------------------------------------------
/msmbuilder/commands/fit.py:
--------------------------------------------------------------------------------
1 | # Author: Robert McGibbon
2 | # Contributors: Brooke Husic
3 | # Copyright (c) 2014, Stanford University
4 | # All rights reserved.
5 |
6 | # -----------------------------------------------------------------------------
7 | # Imports
8 | # -----------------------------------------------------------------------------
9 |
10 | from __future__ import print_function, absolute_import
11 |
12 | import os
13 |
14 | from ..dataset import dataset
15 | from ..utils import verbosedump
16 | from ..hmm import GaussianHMM
17 | from ..msm import (MarkovStateModel, BayesianMarkovStateModel, ContinuousTimeMSM,
18 | BayesianContinuousTimeMSM)
19 | from ..cmdline import NumpydocClassCommand, argument, exttype
20 |
21 |
22 | class FitCommand(NumpydocClassCommand):
23 | inp = argument(
24 | '-i', '--inp', help='''Input dataset. This should be serialized
25 | list of numpy arrays.''', required=True, type=os.path.expanduser)
26 | model = argument(
27 | '-o', '--out', help='''Output (fit) model. This will be a
28 | serialized instance of the fit model object.''', required=True,
29 | type=exttype('.pkl'))
30 |
31 | def start(self):
32 | if not os.path.exists(self.inp):
33 | self.error('File does not exist: %s' % self.inp)
34 |
35 | print(self.instance)
36 | inp_ds = dataset(self.inp, mode='r')
37 | self.instance.fit(inp_ds)
38 |
39 | print("*********\n*RESULTS*\n*********")
40 | print(self.instance.summarize())
41 | print('-' * 80)
42 |
43 | verbosedump(self.instance, self.out)
44 | print("To load this %s object interactively inside an IPython\n"
45 | "shell or notebook, run: \n" % self.klass.__name__)
46 | print(" $ ipython")
47 | print(" >>> from msmbuilder.utils import load")
48 | print(" >>> model = load('%s')\n" % self.out)
49 |
50 | inp_ds.close()
51 |
52 | class GaussianHMMCommand(FitCommand):
53 | klass = GaussianHMM
54 | _concrete = True
55 | _group = 'MSM'
56 |
57 |
58 | class MarkovStateModelCommand(FitCommand):
59 | klass = MarkovStateModel
60 | _concrete = True
61 | _group = 'MSM'
62 |
63 | def _ergodic_cutoff_type(self, erg):
64 | if erg.lower() in ['on', 'off']:
65 | return erg
66 | else:
67 | return float(erg)
68 |
69 |
70 | class BayesianMarkovStateModelCommand(FitCommand):
71 | klass = BayesianMarkovStateModel
72 | _concrete = True
73 | _group = 'MSM'
74 |
75 |
76 | class ContinuousTimeMSMCommand(FitCommand):
77 | klass = ContinuousTimeMSM
78 | _concrete = True
79 | _group = 'MSM'
80 |
81 |
82 | class BayesianContinuousTimeMSMCommand(FitCommand):
83 | klass = BayesianContinuousTimeMSM
84 | _concrete = True
85 | _group = 'MSM'
86 |
--------------------------------------------------------------------------------
/msmbuilder/commands/template_project.py:
--------------------------------------------------------------------------------
1 | """Set up a new MSMBuilder project
2 |
3 | """
4 | # Author: Matthew Harrigan
5 | # Contributors:
6 | # Copyright (c) 2016, Stanford University
7 | # All rights reserved.
8 |
9 | from __future__ import print_function, division, absolute_import
10 |
11 | import os
12 | import stat
13 | import textwrap
14 |
15 | from ..cmdline import NumpydocClassCommand, argument
16 | from ..io import TemplateProject
17 |
18 |
19 | def chmod_plus_x(fn):
20 | st = os.stat(fn)
21 | os.chmod(fn, st.st_mode | stat.S_IEXEC)
22 |
23 |
24 | class TemplateProjectCommand(NumpydocClassCommand):
25 | _group = '0-Support'
26 | _concrete = True
27 | description = __doc__
28 | klass = TemplateProject
29 |
30 | disclaimer = argument('--disclaimer', default=False, action='store_true',
31 | help="Print a disclaimer about using these templates.")
32 |
33 | def print_disclaimer(self):
34 | print('\n'.join(textwrap.wrap(
35 | "This writes a bunch of Python files that can guide you "
36 | "through analyzing a system with MSMBuilder. I implore you to "
37 | "look at the scripts before you start blindly running them. "
38 | "You will likely have to change some (hyper-)parameters or "
39 | "filenames to match your particular project."
40 | )))
41 | print()
42 | print('\n'.join(textwrap.wrap(
43 | "More than that, however, it is important that you understand "
44 | "exactly what the scripts are doing. Each protein system is "
45 | "different, and it is up to you (the researcher) to hone in on "
46 | "interesting aspects. This very generic pipeline may not give "
47 | "you any new insight for anything but the simplest systems."
48 | )))
49 |
50 | def start(self):
51 | if self.disclaimer:
52 | self.print_disclaimer()
53 | print()
54 | print("Run again without --disclaimer to actually write tempaltes.")
55 | return
56 |
57 | self.instance.do()
58 |
--------------------------------------------------------------------------------
/msmbuilder/decomposition/.gitignore:
--------------------------------------------------------------------------------
1 | _speigh.cpp
--------------------------------------------------------------------------------
/msmbuilder/decomposition/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from sklearn import decomposition as _decomposition
4 |
5 | from .base import MultiSequenceDecompositionMixin
6 | from .ktica import KernelTICA
7 | from .pca import PCA, SparsePCA, MiniBatchSparsePCA
8 | from .sparsetica import SparseTICA
9 | from .ksparsetica import KSparseTICA
10 | from .tica import tICA
11 |
12 |
13 | class FastICA(MultiSequenceDecompositionMixin, _decomposition.FastICA):
14 | __doc__ = _decomposition.FastICA.__doc__
15 |
16 | def summarize(self):
17 | return '\n'.join([
18 | "Independent Component Analysis (ICA)",
19 | "----------",
20 | "Number of components: {n_components}",
21 | "Number of iterations: {n_iter_}",
22 | ]).format(**self.__dict__)
23 |
24 |
25 | class FactorAnalysis(MultiSequenceDecompositionMixin,
26 | _decomposition.FactorAnalysis):
27 | __doc__ = _decomposition.FactorAnalysis.__doc__
28 |
29 | def summarize(self):
30 | return '\n'.join([
31 | "FactorAnalysis (FA)",
32 | "----------",
33 | "Number of components: {n_components}",
34 | "Log likelihood: {loglike_}",
35 | "Noise variance: {noise_variance_}",
36 | "Number of iterations: {n_iter_}",
37 | ]).format(**self.__dict__)
38 |
--------------------------------------------------------------------------------
/msmbuilder/decomposition/pca.py:
--------------------------------------------------------------------------------
1 | # Author: Matthew Harrigan
2 | # Contributors:
3 | # Copyright (c) 2016, Stanford University and the Authors
4 | # All rights reserved.
5 |
6 | from __future__ import print_function, division, absolute_import
7 |
8 | from sklearn import decomposition
9 |
10 | from .base import MultiSequenceDecompositionMixin
11 |
12 | __all__ = ['PCA', 'SparsePCA']
13 |
14 |
15 | class PCA(MultiSequenceDecompositionMixin, decomposition.PCA):
16 | __doc__ = decomposition.PCA.__doc__
17 |
18 | def summarize(self):
19 | return '\n'.join([
20 | "Principal Component Analysis (PCA)",
21 | "----------",
22 | "Number of components: {n_components}",
23 | "explained variance raio: {explained_variance_ratio_}",
24 | "Noise variance: {noise_variance_}",
25 | ]).format(**self.__dict__)
26 |
27 |
28 | class SparsePCA(MultiSequenceDecompositionMixin, decomposition.SparsePCA):
29 | __doc__ = decomposition.SparsePCA.__doc__
30 |
31 | def summarize(self):
32 | return '\n'.join([
33 | "Sparse PCA",
34 | "----------",
35 | "Number of components: {n_components}",
36 | ]).format(**self.__dict__)
37 |
38 |
39 | class MiniBatchSparsePCA(MultiSequenceDecompositionMixin,
40 | decomposition.MiniBatchSparsePCA):
41 | __doc__ = decomposition.MiniBatchSparsePCA.__doc__
42 |
43 | def summarize(self):
44 | return '\n'.join([
45 | "MiniBatch Sparse PCA",
46 | "--------------------",
47 | "Number of components: {n_components}",
48 | "Batch size: {batch_size}"
49 | ]).format(**self.__dict__)
50 |
51 |
52 | class KernelPCA(MultiSequenceDecompositionMixin, decomposition.KernelPCA):
53 | __doc__ = decomposition.KernelPCA.__doc__
54 |
55 | def summarize(self):
56 | return '\n'.join([
57 | "Kernel PCA",
58 | "--------------------",
59 | "Number of components: {n_components}",
60 | "Kernel: {kernel}",
61 | ]).format(**self.__dict__)
62 |
--------------------------------------------------------------------------------
/msmbuilder/decomposition/utils.py:
--------------------------------------------------------------------------------
1 | import hashlib
2 | import itertools
3 | import numpy as np
4 | from six.moves import xrange
5 |
6 |
7 | def iterate_tracker(maxiter, max_nc, verbose=False):
8 | """Generator that breaks after maxiter, or after the same
9 | array has been sent in more max_nc times in a row.
10 | """
11 | last_hash = None
12 | last_hash_count = 0
13 | arr = yield
14 |
15 | for i in xrange(maxiter):
16 | arr = yield i
17 | if arr is not None:
18 | hsh = hashlib.sha1(arr.view(np.uint8)).hexdigest()
19 | if last_hash == hsh:
20 | last_hash_count += 1
21 | else:
22 | last_hash = hsh
23 | last_hash_count = 1
24 |
25 | if last_hash_count >= max_nc:
26 | if verbose:
27 | print('Termination. Over %d iterations without '
28 | 'change.' % max_nc)
29 | break
--------------------------------------------------------------------------------
/msmbuilder/example_datasets/.gitignore:
--------------------------------------------------------------------------------
1 | _muller.c
2 |
--------------------------------------------------------------------------------
/msmbuilder/example_datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .base import get_data_home, clear_data_home, has_msmb_data
3 | from .brownian1d import DoubleWell, QuadWell
4 | from .brownian1d import load_doublewell, load_quadwell
5 | from .brownian1d import doublewell_eigs, quadwell_eigs
6 | from .alanine_dipeptide import fetch_alanine_dipeptide, AlanineDipeptide
7 | from .met_enkephalin import fetch_met_enkephalin, MetEnkephalin
8 | from .fs_peptide import fetch_fs_peptide, FsPeptide, MinimalFsPeptide
9 | from .muller import MullerPotential, load_muller
10 |
11 | __all__ = [
12 | 'get_data_home',
13 | 'clear_data_home',
14 | 'has_msmb_data',
15 | 'load_doublewell',
16 | 'load_quadwell',
17 | 'doublewell_eigs',
18 | 'quadwell_eigs',
19 | 'fetch_alanine_dipeptide',
20 | 'fetch_met_enkephalin',
21 | 'fetch_fs_peptide',
22 | 'AlanineDipeptide',
23 | 'MetEnkephalin',
24 | 'FsPeptide',
25 | 'DoubleWell',
26 | 'QuadWell',
27 | 'MullerPotential',
28 | 'load_muller',
29 | ]
30 |
--------------------------------------------------------------------------------
/msmbuilder/example_datasets/alanine_dipeptide.py:
--------------------------------------------------------------------------------
1 | # Author: Robert McGibbon
2 | # Contributors: Matthew Harrigan
3 | # Copyright (c) 2016, Stanford University and the Authors
4 | # All rights reserved.
5 |
6 | # -----------------------------------------------------------------------------
7 | # Imports
8 | # -----------------------------------------------------------------------------
9 | from __future__ import print_function, absolute_import, division
10 |
11 | from glob import glob
12 | from os.path import join
13 |
14 | import mdtraj as md
15 |
16 | from .base import Bunch, _MDDataset
17 |
18 | DATA_URL = "https://ndownloader.figshare.com/articles/1026131/versions/8"
19 | TARGET_DIRECTORY = "alanine_dipeptide"
20 |
21 |
22 | class AlanineDipeptide(_MDDataset):
23 | """Alanine dipeptide dataset
24 |
25 | Parameters
26 | ----------
27 | data_home : optional, default: None
28 | Specify another download and cache folder for the datasets. By default
29 | all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
30 |
31 |
32 | Notes
33 | -----
34 | The dataset consists of ten 10ns trajectories of of alanine dipeptide,
35 | simulated using OpenMM 6.0.1 (CUDA platform, NVIDIA GTX660) with the
36 | AMBER99SB-ILDN force field at 300K (langevin dynamics, friction coefficient
37 | of 91/ps, timestep of 2fs) with GBSA implicit solvent. The coordinates are
38 | saved every 1ps. Each trajectory contains 9,999 snapshots.
39 |
40 | The dataset, including the script used to generate the dataset
41 | is available on figshare at
42 |
43 | http://dx.doi.org/10.6084/m9.figshare.1026131
44 | """
45 | target_directory = TARGET_DIRECTORY
46 | data_url = DATA_URL
47 |
48 | def get_cached(self):
49 | top = md.load(join(self.data_dir, 'ala2.pdb'))
50 | trajectories = []
51 | for fn in glob(join(self.data_dir, 'trajectory*.dcd')):
52 | trajectories.append(md.load(fn, top=top))
53 |
54 | return Bunch(trajectories=trajectories, DESCR=self.description())
55 |
56 |
57 | def fetch_alanine_dipeptide(data_home=None):
58 | return AlanineDipeptide(data_home).get()
59 |
60 |
61 | fetch_alanine_dipeptide.__doc__ = AlanineDipeptide.__doc__
62 |
--------------------------------------------------------------------------------
/msmbuilder/example_datasets/met_enkephalin.py:
--------------------------------------------------------------------------------
1 | # Author: Robert McGibbon
2 | # Contributors:
3 | # Copyright (c) 2014, Stanford University and the Authors
4 | # All rights reserved.
5 |
6 | # -----------------------------------------------------------------------------
7 | # Imports
8 | # -----------------------------------------------------------------------------
9 | from __future__ import print_function, absolute_import, division
10 |
11 | from glob import glob
12 | from os.path import join
13 |
14 | import mdtraj as md
15 |
16 | from .base import Bunch, _MDDataset
17 |
18 | DATA_URL = "https://ndownloader.figshare.com/articles/1026324/versions/1"
19 | TARGET_DIRECTORY = "met_enkephalin"
20 |
21 |
22 | class MetEnkephalin(_MDDataset):
23 | """Loader for the met-enkephalin dataset
24 |
25 | Parameters
26 | ----------
27 | data_home : optional, default: None
28 | Specify another download and cache folder for the datasets. By default
29 | all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
30 |
31 | download_if_missing: optional, True by default
32 | If False, raise a IOError if the data is not locally available
33 | instead of trying to download the data from the source site.
34 |
35 | Notes
36 | -----
37 | The dataset consists of ten ~50 ns molecular dynamics (MD) simulation
38 | trajectories of the 5 residue Met-enkaphalin peptide. The aggregate
39 | sampling is 499.58 ns. Simulations were performed starting from the 1st
40 | model in the 1PLX PDB file, solvated with 832 TIP3P water molecules using
41 | OpenMM 6.0. The coordinates (protein only -- the water was stripped)
42 | are saved every 5 picoseconds. Each of the ten trajectories is roughly
43 | 50 ns long and contains about 10,000 snapshots.
44 |
45 | Forcefield: amber99sb-ildn; water: tip3p; nonbonded method: PME; cutoffs:
46 | 1nm; bonds to hydrogen were constrained; integrator: langevin dynamics;
47 | temperature: 300K; friction coefficient: 1.0/ps; pressure control: Monte
48 | Carlo barostat (interval of 25 steps); timestep 2 fs.
49 |
50 | The dataset is available on figshare at
51 |
52 | http://dx.doi.org/10.6084/m9.figshare.1026324
53 | """
54 |
55 | data_url = DATA_URL
56 | target_directory = TARGET_DIRECTORY
57 |
58 | def get_cached(self):
59 | top = md.load(join(self.data_dir, '1plx.pdb'))
60 | trajectories = []
61 | for fn in glob(join(self.data_dir, 'trajectory*.dcd')):
62 | trajectories.append(md.load(fn, top=top))
63 |
64 | return Bunch(trajectories=trajectories, DESCR=self.description())
65 |
66 |
67 | def fetch_met_enkephalin(data_home=None):
68 | return MetEnkephalin(data_home).get()
69 |
70 |
71 | fetch_met_enkephalin.__doc__ = MetEnkephalin.__doc__
72 |
--------------------------------------------------------------------------------
/msmbuilder/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from ..featurizer import *
4 | from ..featurizer import subset
5 |
--------------------------------------------------------------------------------
/msmbuilder/feature_selection/__init__.py:
--------------------------------------------------------------------------------
1 | # Author: Carlos Xavier Hernandez
2 | # Contributors:
3 | # Copyright (c) 2016, Stanford University and the Authors
4 | # All rights reserved.
5 |
6 | from __future__ import absolute_import
7 |
8 | from .base import MultiSequenceFeatureSelectionMixin
9 | from .featureselector import FeatureSlicer, FeatureSelector
10 |
11 | from sklearn import feature_selection
12 |
13 | class VarianceThreshold(MultiSequenceFeatureSelectionMixin,
14 | feature_selection.VarianceThreshold):
15 | __doc__ = feature_selection.VarianceThreshold.__doc__
16 |
--------------------------------------------------------------------------------
/msmbuilder/feature_selection/base.py:
--------------------------------------------------------------------------------
1 | # Author: Carlos Xavier Hernandez
2 | # Contributors:
3 | # Copyright (c) 2016, Stanford University and the Authors
4 | # All rights reserved.
5 |
6 | from __future__ import absolute_import
7 |
8 | from ..decomposition.base import MultiSequenceDecompositionMixin
9 |
10 |
11 | class MultiSequenceFeatureSelectionMixin(MultiSequenceDecompositionMixin):
12 | __doc__ = MultiSequenceDecompositionMixin.__doc__
13 |
--------------------------------------------------------------------------------
/msmbuilder/featurizer/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | from .feature_union import FeatureUnion
4 | from .featurizer import *
5 | from .indices import get_atompair_indices
6 | from .multiseq_featuizer import *
7 | from .multichain import *
8 |
--------------------------------------------------------------------------------
/msmbuilder/featurizer/indices.py:
--------------------------------------------------------------------------------
1 | import itertools
2 |
3 | import numpy as np
4 |
5 | ATOM_NAMES = ["N", "CA", "CB", "C", "O", "H"]
6 |
7 |
8 | def get_atompair_indices(reference_traj, keep_atoms=None,
9 | exclude_atoms=None, reject_bonded=True):
10 | """Get a list of acceptable atom pairs.
11 |
12 | Parameters
13 | ----------
14 | reference_traj : mdtraj.Trajectory
15 | Trajectory to grab atom pairs from
16 | keep_atoms : np.ndarray, dtype=string, optional
17 | Select only these atom names. Defaults to N, CA, CB, C, O, H
18 | exclude_atoms : np.ndarray, dtype=string, optional
19 | Exclude these atom names
20 | reject_bonded : bool, default=True
21 | If True, exclude bonded atompairs.
22 |
23 | Returns
24 | -------
25 | atom_indices : np.ndarray, dtype=int
26 | The atom indices that pass your criteria
27 | pair_indices : np.ndarray, dtype=int, shape=(N, 2)
28 | Pairs of atom indices that pass your criteria.
29 |
30 | Notes
31 | -----
32 | This function has been optimized for speed. A naive implementation
33 | can be slow (~minutes) for large proteins.
34 | """
35 | if keep_atoms is None:
36 | keep_atoms = ATOM_NAMES
37 |
38 | top, bonds = reference_traj.top.to_dataframe()
39 |
40 | if keep_atoms is not None:
41 | atom_indices = top[top.name.isin(keep_atoms) == True].index.values
42 |
43 | if exclude_atoms is not None:
44 | atom_indices = top[top.name.isin(exclude_atoms) == False].index.values
45 |
46 | pair_indices = np.array(list(itertools.combinations(atom_indices, 2)))
47 |
48 | if reject_bonded:
49 | a_list = bonds.min(1)
50 | b_list = bonds.max(1)
51 |
52 | n = atom_indices.max() + 1
53 |
54 | bond_hashes = a_list + b_list * n
55 | pair_hashes = pair_indices[:, 0] + pair_indices[:, 1] * n
56 |
57 | not_bonds = ~np.in1d(pair_hashes, bond_hashes)
58 |
59 | pair_indices = np.array([(a, b) for k, (a, b)
60 | in enumerate(pair_indices)
61 | if not_bonds[k]])
62 |
63 | return atom_indices, pair_indices
64 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/.gitignore:
--------------------------------------------------------------------------------
1 | gaussian.cpp
2 | gaussian.h
3 | vonmises.cpp
4 | vonmises.h
5 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .gaussian import GaussianHMM
3 | from .vonmises import VonMisesHMM
4 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/README.md:
--------------------------------------------------------------------------------
1 | This code is from Cephes, download directly from netlib:
2 |
3 | http://www.netlib.no/netlib/cephes/
4 |
5 |
6 | The original copyright, from the readme file of that distribution:
7 |
8 | Some software in this archive may be from the book _Methods and
9 | Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
10 | International, 1989) or from the Cephes Mathematical Library, a
11 | commercial product. In either event, it is copyrighted by the author.
12 | What you see here may be used freely but it comes with no support or
13 | guarantee.
14 |
15 | The two known misprints in the book are repaired here in the
16 | source listings for the gamma function and the incomplete beta
17 | integral.
18 |
19 |
20 | Stephen L. Moshier
21 | moshier@na-net.ornl.gov
22 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/cephes.h:
--------------------------------------------------------------------------------
1 | #ifndef _CEPHES_H_
2 | #define _CEPHES_H_
3 |
4 | #include "cephes_names.h"
5 | int mtherr(char *name, int code);
6 | double i0(double x);
7 | double i1(double x);
8 | double zeta(double x, double q);
9 | double psi(double x);
10 | double lgam(double x);
11 | double p1evl(double x, double coef[], int N);
12 | double polevl(double x, double coef[], int N);
13 | double chbevl(double x, double array[], int n);
14 |
15 | #endif
16 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/cephes_names.h:
--------------------------------------------------------------------------------
1 | #ifndef CEPHES_NAMES_H
2 | #define CEPHES_NAMES_H
3 |
4 | #define airy cephes_airy
5 | #define bdtrc cephes_bdtrc
6 | #define bdtr cephes_bdtr
7 | #define bdtri cephes_bdtri
8 | #define beta cephes_beta
9 | #define lbeta cephes_lbeta
10 | #define btdtr cephes_btdtr
11 | #define cbrt cephes_cbrt
12 | #define chdtrc cephes_chdtrc
13 | #define chdtr cephes_chdtr
14 | #define chdtri cephes_chdtri
15 | #define dawsn cephes_dawsn
16 | #define ellie cephes_ellie
17 | #define ellik cephes_ellik
18 | #define ellpe cephes_ellpe
19 | #define ellpj cephes_ellpj
20 | #define ellpk cephes_ellpk
21 | #define exp10 cephes_exp10
22 | #define exp1m cephes_exp1m
23 | #define exp2 cephes_exp2
24 | #define expn cephes_expn
25 | // #define fabs cephes_fabs
26 | #define fdtrc cephes_fdtrc
27 | #define fdtr cephes_fdtr
28 | #define fdtri cephes_fdtri
29 | #define fresnl cephes_fresnl
30 | #define Gamma cephes_Gamma
31 | #define lgam cephes_lgam
32 | #define gdtr cephes_gdtr
33 | #define gdtrc cephes_gdtrc
34 | #define gdtri cephes_gdtri
35 | #define hyp2f1 cephes_hyp2f1
36 | #define hyperg cephes_hyperg
37 | #define hyp2f0 cephes_hyp2f0
38 | #define onef2 cephes_onef2
39 | #define threef0 cephes_threef0
40 | #define i0 cephes_i0
41 | #define i0e cephes_i0e
42 | #define i1 cephes_i1
43 | #define i1e cephes_i1e
44 | #define igamc cephes_igamc
45 | #define igam cephes_igam
46 | #define igami cephes_igami
47 | #define incbet cephes_incbet
48 | #define incbi cephes_incbi
49 | #define iv cephes_iv
50 | #define j0 cephes_j0
51 | #define y0 cephes_y0
52 | #define j1 cephes_j1
53 | #define y1 cephes_y1
54 | #define jn cephes_jn
55 | #define jv cephes_jv
56 | #define k0 cephes_k0
57 | #define k0e cephes_k0e
58 | #define k1 cephes_k1
59 | #define k1e cephes_k1e
60 | #define kn cephes_kn
61 | #define nbdtrc cephes_nbdtrc
62 | #define nbdtr cephes_nbdtr
63 | #define nbdtri cephes_nbdtri
64 | #define ndtr cephes_ndtr
65 | #define erfc cephes_erfc
66 | #define erf cephes_erf
67 | #define ndtri cephes_ndtri
68 | #define pdtrc cephes_pdtrc
69 | #define pdtr cephes_pdtr
70 | #define pdtri cephes_pdtri
71 | #define psi cephes_psi
72 | #define rgamma cephes_rgamma
73 | #define round cephes_round
74 | #define shichi cephes_shichi
75 | #define sici cephes_sici
76 | #define radian cephes_radian
77 | #define sindg cephes_sindg
78 | #define cosdg cephes_cosdg
79 | #define sincos cephes_sincos
80 | #define spence cephes_spence
81 | #define stdtr cephes_stdtr
82 | #define stdtri cephes_stdtri
83 | #define struve cephes_struve
84 | #define yv cephes_yv
85 | #define tandg cephes_tandg
86 | #define cotdg cephes_cotdg
87 | #define log1p cephes_log1p
88 | #define expm1 cephes_expm1
89 | #define cosm1 cephes_cosm1
90 | #define yn cephes_yn
91 | #define zeta cephes_zeta
92 | #define zetac cephes_zetac
93 | #define smirnov cephes_smirnov
94 | #define smirnovi cephes_smirnovi
95 | #define kolmogorov cephes_kolmogorov
96 | #define kolmogi cephes_kolmogi
97 |
98 | #endif
99 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/chbevl.c:
--------------------------------------------------------------------------------
1 | /* chbevl.c
2 | *
3 | * Evaluate Chebyshev series
4 | *
5 | *
6 | *
7 | * SYNOPSIS:
8 | *
9 | * int N;
10 | * double x, y, coef[N], chebevl();
11 | *
12 | * y = chbevl( x, coef, N );
13 | *
14 | *
15 | *
16 | * DESCRIPTION:
17 | *
18 | * Evaluates the series
19 | *
20 | * N-1
21 | * - '
22 | * y = > coef[i] T (x/2)
23 | * - i
24 | * i=0
25 | *
26 | * of Chebyshev polynomials Ti at argument x/2.
27 | *
28 | * Coefficients are stored in reverse order, i.e. the zero
29 | * order term is last in the array. Note N is the number of
30 | * coefficients, not the order.
31 | *
32 | * If coefficients are for the interval a to b, x must
33 | * have been transformed to x -> 2(2x - b - a)/(b-a) before
34 | * entering the routine. This maps x from (a, b) to (-1, 1),
35 | * over which the Chebyshev polynomials are defined.
36 | *
37 | * If the coefficients are for the inverted interval, in
38 | * which (a, b) is mapped to (1/b, 1/a), the transformation
39 | * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity,
40 | * this becomes x -> 4a/x - 1.
41 | *
42 | *
43 | *
44 | * SPEED:
45 | *
46 | * Taking advantage of the recurrence properties of the
47 | * Chebyshev polynomials, the routine requires one more
48 | * addition per loop than evaluating a nested polynomial of
49 | * the same degree.
50 | *
51 | */
52 | /* chbevl.c */
53 |
54 | /*
55 | Cephes Math Library Release 2.0: April, 1987
56 | Copyright 1985, 1987 by Stephen L. Moshier
57 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140
58 | */
59 |
60 | double chbevl(double x, double array[] , int n ) {
61 | double b0, b1, b2, *p;
62 | int i;
63 |
64 | p = array;
65 | b0 = *p++;
66 | b1 = 0.0;
67 | i = n - 1;
68 |
69 | do {
70 | b2 = b1;
71 | b1 = b0;
72 | b0 = x * b1 - b2 + *p++;
73 | } while( --i );
74 |
75 | return( 0.5*(b0-b2) );
76 | }
77 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/mtherr.c:
--------------------------------------------------------------------------------
1 | /* mtherr.c
2 | *
3 | * Library common error handling routine
4 | *
5 | *
6 | *
7 | * SYNOPSIS:
8 | *
9 | * char *fctnam;
10 | * int code;
11 | * int mtherr();
12 | *
13 | * mtherr( fctnam, code );
14 | *
15 | *
16 | *
17 | * DESCRIPTION:
18 | *
19 | * This routine may be called to report one of the following
20 | * error conditions (in the include file mconf.h).
21 | *
22 | * Mnemonic Value Significance
23 | *
24 | * DOMAIN 1 argument domain error
25 | * SING 2 function singularity
26 | * OVERFLOW 3 overflow range error
27 | * UNDERFLOW 4 underflow range error
28 | * TLOSS 5 total loss of precision
29 | * PLOSS 6 partial loss of precision
30 | * EDOM 33 Unix domain error code
31 | * ERANGE 34 Unix range error code
32 | *
33 | * The default version of the file prints the function name,
34 | * passed to it by the pointer fctnam, followed by the
35 | * error condition. The display is directed to the standard
36 | * output device. The routine then returns to the calling
37 | * program. Users may wish to modify the program to abort by
38 | * calling exit() under severe error conditions such as domain
39 | * errors.
40 | *
41 | * Since all error conditions pass control to this function,
42 | * the display may be easily changed, eliminated, or directed
43 | * to an error logging device.
44 | *
45 | * SEE ALSO:
46 | *
47 | * mconf.h
48 | *
49 | */
50 |
51 | /*
52 | Cephes Math Library Release 2.0: April, 1987
53 | Copyright 1984, 1987 by Stephen L. Moshier
54 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140
55 | */
56 |
57 | #include
58 | #include "mconf.h"
59 |
60 | int merror = 0;
61 |
62 | /* Notice: the order of appearance of the following
63 | * messages is bound to the error codes defined
64 | * in mconf.h.
65 | */
66 | static char *ermsg[7] = {
67 | "unknown", /* error code 0 */
68 | "domain", /* error code 1 */
69 | "singularity", /* et seq. */
70 | "overflow",
71 | "underflow",
72 | "total loss of precision",
73 | "partial loss of precision"
74 | };
75 |
76 |
77 | int mtherr(char* name, int code)
78 | {
79 |
80 | /* Display string passed by calling program,
81 | * which is supposed to be the name of the
82 | * function in which the error occurred:
83 | */
84 | printf( "\n%s ", name );
85 |
86 | /* Set global error message word */
87 | merror = code;
88 |
89 | /* Display error message defined
90 | * by the code argument.
91 | */
92 | if( (code <= 0) || (code >= 7) )
93 | code = 0;
94 | printf( "%s error\n", ermsg[code] );
95 |
96 | /* Return to calling
97 | * program
98 | */
99 | return( 0 );
100 | }
101 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/polevl.c:
--------------------------------------------------------------------------------
1 | /* polevl.c
2 | * p1evl.c
3 | *
4 | * Evaluate polynomial
5 | *
6 | *
7 | *
8 | * SYNOPSIS:
9 | *
10 | * int N;
11 | * double x, y, coef[N+1], polevl[];
12 | *
13 | * y = polevl( x, coef, N );
14 | *
15 | *
16 | *
17 | * DESCRIPTION:
18 | *
19 | * Evaluates polynomial of degree N:
20 | *
21 | * 2 N
22 | * y = C + C x + C x +...+ C x
23 | * 0 1 2 N
24 | *
25 | * Coefficients are stored in reverse order:
26 | *
27 | * coef[0] = C , ..., coef[N] = C .
28 | * N 0
29 | *
30 | * The function p1evl() assumes that coef[N] = 1.0 and is
31 | * omitted from the array. Its calling arguments are
32 | * otherwise the same as polevl().
33 | *
34 | *
35 | * SPEED:
36 | *
37 | * In the interest of speed, there are no checks for out
38 | * of bounds arithmetic. This routine is used by most of
39 | * the functions in the library. Depending on available
40 | * equipment features, the user may wish to rewrite the
41 | * program in microcode or assembly language.
42 | *
43 | */
44 |
45 |
46 | /*
47 | Cephes Math Library Release 2.1: December, 1988
48 | Copyright 1984, 1987, 1988 by Stephen L. Moshier
49 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140
50 | */
51 |
52 |
53 | double polevl(double x, double coef[], int N)
54 | {
55 | double ans;
56 | int i;
57 | double *p;
58 |
59 | p = coef;
60 | ans = *p++;
61 | i = N;
62 |
63 | do
64 | ans = ans * x + *p++;
65 | while( --i );
66 |
67 | return( ans );
68 | }
69 |
70 | /* p1evl() */
71 | /* N
72 | * Evaluate polynomial when coefficient of x is 1.0.
73 | * Otherwise same as polevl.
74 | */
75 |
76 | double p1evl(double x, double coef[], int N)
77 | {
78 | double ans;
79 | double *p;
80 | int i;
81 |
82 | p = coef;
83 | ans = x + *p++;
84 | i = N-1;
85 |
86 | do
87 | ans = ans * x + *p++;
88 | while( --i );
89 |
90 | return( ans );
91 | }
92 |
--------------------------------------------------------------------------------
/msmbuilder/hmm/src/include/GaussianHMMFitter.h:
--------------------------------------------------------------------------------
1 | #ifndef MIXTAPE_GAUSSIAN_HMM_FITTER_H
2 | #define MIXTAPE_GAUSSIAN_HMM_FITTER_H
3 |
4 | #include "HMMFitter.h"
5 |
6 | namespace msmbuilder {
7 |
8 | /**
9 | * This subclass of HMMFitter computes Gaussian HMMs.
10 | */
11 | template
12 | class GaussianHMMFitter : public HMMFitter {
13 | public:
14 | GaussianHMMFitter(void* owner, int n_states, int n_features, int n_iter, const double* log_startprob);
15 |
16 | ~GaussianHMMFitter();
17 |
18 | void set_means_and_variances(const double* means, const double* variances);
19 |
20 | void initialize_sufficient_statistics();
21 |
22 | void compute_log_likelihood(const Trajectory& trajectory,
23 | std::vector >& frame_log_probability) const;
24 |
25 | void accumulate_sufficient_statistics(const Trajectory& trajectory,
26 | const std::vector >& frame_log_probability,
27 | const std::vector >& posteriors,
28 | const std::vector >& fwdlattice,
29 | const std::vector >& bwdlattice);
30 |
31 | void get_obs(double* output);
32 |
33 | void get_obs2(double* output);
34 |
35 | void do_mstep();
36 | private:
37 | void* owner;
38 | std::vector obs, obs2, a0, a1, a2;
39 | };
40 |
41 | } // namespace msmbuilder
42 |
43 | #endif
--------------------------------------------------------------------------------
/msmbuilder/hmm/src/include/VonMisesHMMFitter.h:
--------------------------------------------------------------------------------
1 | #ifndef MIXTAPE_GAUSSIAN_HMM_FITTER_H
2 | #define MIXTAPE_GAUSSIAN_HMM_FITTER_H
3 |
4 | #include "HMMFitter.h"
5 |
6 | namespace msmbuilder {
7 |
8 | /**
9 | * This subclass of HMMFitter computes von Mises HMMs.
10 | */
11 | template
12 | class VonMisesHMMFitter : public HMMFitter {
13 | public:
14 | VonMisesHMMFitter(void* owner, int n_states, int n_features, int n_iter, const double* log_startprob);
15 |
16 | ~VonMisesHMMFitter();
17 |
18 | void set_means_and_kappas(const double* means, const double* kappas);
19 |
20 | void initialize_sufficient_statistics();
21 |
22 | void compute_log_likelihood(const Trajectory& trajectory,
23 | std::vector >& frame_log_probability) const;
24 |
25 | void accumulate_sufficient_statistics(const Trajectory& trajectory,
26 | const std::vector >& frame_log_probability,
27 | const std::vector >& posteriors,
28 | const std::vector >& fwdlattice,
29 | const std::vector >& bwdlattice);
30 |
31 | void get_cosobs(double* output);
32 |
33 | void get_sinobs(double* output);
34 |
35 | void do_mstep();
36 | private:
37 | void* owner;
38 | std::vector cosobs, sinobs, means, kappas;
39 | };
40 |
41 | } // namespace msmbuilder
42 |
43 | #endif
--------------------------------------------------------------------------------
/msmbuilder/io/__init__.py:
--------------------------------------------------------------------------------
1 | from .gather_metadata import (gather_metadata, GenericParser,
2 | NumberedRunsParser, HierarchyParser, ParseWarning)
3 | from .io import (backup, preload_top, preload_tops, load_meta, load_generic,
4 | load_trajs, save_meta, render_meta, save_generic, save_trajs,
5 | itertrajs)
6 | from .project_template import TemplateProject
--------------------------------------------------------------------------------
/msmbuilder/io/sampling/__init__.py:
--------------------------------------------------------------------------------
1 | from .sampling import sample_dimension, sample_states, sample_msm
--------------------------------------------------------------------------------
/msmbuilder/io_templates/twitter-bootstrap.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | {{title}}
8 |
9 |
10 |
14 |
18 |
19 |
20 |
21 |
25 |
26 |
27 |
28 | {{content}}
29 |
30 |
31 |
--------------------------------------------------------------------------------
/msmbuilder/libdistance/.gitignore:
--------------------------------------------------------------------------------
1 | libdistance.cpp
--------------------------------------------------------------------------------
/msmbuilder/libdistance/src/cdist.hpp:
--------------------------------------------------------------------------------
1 | #include "distance_kernels.h"
2 |
3 |
4 | void cdist_double(const double* XA, const double* XB, const char* metric,
5 | npy_intp na, npy_intp nb, npy_intp m, double* out)
6 |
7 | {
8 | npy_intp i, j, k;
9 | const double *u, *v;
10 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
11 | metric_double(metric);
12 | if (metricfunc == NULL) {
13 | fprintf(stderr, "Error");
14 | return;
15 | }
16 |
17 | k = 0;
18 | for (i = 0; i < na; i++) {
19 | for (j = 0; j < nb; j++) {
20 | u = XA + m * i;
21 | v = XB + m * j;
22 | out[k++] = metricfunc(u, v, m);
23 | }
24 | }
25 | }
26 |
27 |
28 | void cdist_float(const float* XA, const float* XB, const char* metric,
29 | npy_intp na, npy_intp nb, npy_intp m, double* out)
30 |
31 | {
32 | npy_intp i, j, k;
33 | const float *u, *v;
34 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
35 | metric_float(metric);
36 | if (metricfunc == NULL) {
37 | fprintf(stderr, "Error");
38 | return;
39 | }
40 |
41 | k = 0;
42 | for (i = 0; i < na; i++) {
43 | for (j = 0; j < nb; j++) {
44 | u = XA + m * i;
45 | v = XB + m * j;
46 | out[k++] = metricfunc(u, v, m);
47 | }
48 | }
49 | }
--------------------------------------------------------------------------------
/msmbuilder/libdistance/src/dist.hpp:
--------------------------------------------------------------------------------
1 | #include "distance_kernels.h"
2 |
3 |
4 | void dist_double(const double* X, const double* y, const char* metric, npy_intp n,
5 | npy_intp m, double* out)
6 | {
7 | npy_intp i;
8 | const double *u;
9 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
10 | metric_double(metric);
11 | if (metricfunc == NULL) {
12 | fprintf(stderr, "Error");
13 | return;
14 | }
15 |
16 | for (i = 0; i < n; i++) {
17 | u = X + m * i;
18 | out[i] = metricfunc(u, y, m);
19 | }
20 | }
21 |
22 |
23 | void dist_double_X_indices(const double* X, const double* y, const char* metric,
24 | npy_intp n, npy_intp m, const npy_intp* X_indices,
25 | npy_intp n_X_indices, double* out)
26 | {
27 | npy_intp i, ii;
28 | const double *u;
29 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
30 | metric_double(metric);
31 | if (metricfunc == NULL) {
32 | fprintf(stderr, "Error");
33 | return;
34 | }
35 |
36 | for (ii = 0; ii < n_X_indices; ii++) {
37 | i = X_indices[ii];
38 | u = X + m * i;
39 | out[ii] = metricfunc(u, y, m);
40 | }
41 | }
42 |
43 |
44 | void dist_float(const float* X, const float* y, const char* metric, npy_intp n,
45 | npy_intp m, double* out)
46 | {
47 | npy_intp i;
48 | const float *u;
49 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
50 | metric_float(metric);
51 | if (metricfunc == NULL) {
52 | fprintf(stderr, "Error");
53 | return;
54 | }
55 |
56 | for (i = 0; i < n; i++) {
57 | u = X + m * i;
58 | out[i] = metricfunc(u, y, m);
59 | }
60 | }
61 |
62 | void dist_float_X_indices(const float* X, const float* y, const char* metric,
63 | npy_intp n, npy_intp m, const npy_intp* X_indices,
64 | npy_intp n_X_indices, double* out)
65 | {
66 | npy_intp i, ii;
67 | const float *u;
68 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
69 | metric_float(metric);
70 | if (metricfunc == NULL) {
71 | fprintf(stderr, "Error");
72 | return;
73 | }
74 |
75 | for (ii = 0; ii < n_X_indices; ii++) {
76 | i = X_indices[ii];
77 | u = X + m * i;
78 | out[ii] = metricfunc(u, y, m);
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/msmbuilder/libdistance/src/pdist.hpp:
--------------------------------------------------------------------------------
1 | #include "distance_kernels.h"
2 |
3 |
4 | void pdist_double(const double* X, const char* metric, npy_intp n, npy_intp m,
5 | double* out)
6 | {
7 | npy_intp i, j, k;
8 | const double *u, *v;
9 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
10 | metric_double(metric);
11 | if (metricfunc == NULL) {
12 | fprintf(stderr, "Error");
13 | return;
14 | }
15 |
16 | k = 0;
17 | for (i = 0; i < n; i++) {
18 | for (j = i+1; j < n; j++) {
19 | u = X + m * i;
20 | v = X + m * j;
21 | out[k++] = metricfunc(u, v, m);
22 | }
23 | }
24 | }
25 |
26 | void pdist_double_X_indices(const double* X, const char* metric, npy_intp n,
27 | npy_intp m, const npy_intp* X_indices,
28 | npy_intp n_X_indices, double* out)
29 | {
30 | npy_intp i, ii, j, jj, k;
31 | const double *u, *v;
32 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
33 | metric_double(metric);
34 | if (metricfunc == NULL) {
35 | fprintf(stderr, "Error");
36 | return;
37 | }
38 |
39 | k = 0;
40 | for (ii = 0; ii < n_X_indices; ii++) {
41 | i = X_indices[ii];
42 | for (jj = ii+1; jj < n_X_indices; jj++) {
43 | j = X_indices[jj];
44 | u = X + m * i;
45 | v = X + m * j;
46 | out[k++] = metricfunc(u, v, m);
47 | }
48 | }
49 | }
50 |
51 |
52 | void pdist_float(const float* X, const char* metric, npy_intp n, npy_intp m,
53 | double* out)
54 | {
55 | npy_intp i, j, k;
56 | const float *u, *v;
57 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
58 | metric_float(metric);
59 | if (metricfunc == NULL) {
60 | fprintf(stderr, "Error");
61 | return;
62 | }
63 |
64 | k = 0;
65 | for (i = 0; i < n; i++) {
66 | for (j = i+1; j < n; j++) {
67 | u = X + m * i;
68 | v = X + m * j;
69 | out[k++] = metricfunc(u, v, m);
70 | }
71 | }
72 | }
73 | void pdist_float_X_indices(const float* X, const char* metric, npy_intp n,
74 | npy_intp m, const npy_intp* X_indices,
75 | npy_intp n_X_indices, double* out)
76 | {
77 | npy_intp i, ii, j, jj, k;
78 | const float *u, *v;
79 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
80 | metric_float(metric);
81 | if (metricfunc == NULL) {
82 | fprintf(stderr, "Error");
83 | return;
84 | }
85 |
86 | k = 0;
87 | for (ii = 0; ii < n_X_indices; ii++) {
88 | i = X_indices[ii];
89 | for (jj = ii+1; jj < n_X_indices; jj++) {
90 | j = X_indices[jj];
91 | u = X + m * i;
92 | v = X + m * j;
93 | out[k++] = metricfunc(u, v, m);
94 | }
95 | }
96 | }
--------------------------------------------------------------------------------
/msmbuilder/libdistance/src/sumdist.hpp:
--------------------------------------------------------------------------------
1 | #include "distance_kernels.h"
2 |
3 | double sumdist_double(const double* X, const char* metric, npy_intp n, npy_intp m,
4 | const npy_intp* pairs, npy_intp p)
5 | {
6 | npy_intp i;
7 | double s = 0;
8 | const double *u, *v;
9 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
10 | metric_double(metric);
11 | if (metricfunc == NULL) {
12 | fprintf(stderr, "Error");
13 | return -1;
14 | }
15 |
16 | for (i = 0; i < p; i++) {
17 | u = X + m * pairs[2*i];
18 | v = X + m * pairs[2*i+1];
19 | s += metricfunc(u, v, m);
20 | }
21 |
22 | return s;
23 | }
24 |
25 |
26 | double sumdist_float(const float* X, const char* metric, npy_intp n, npy_intp m,
27 | const npy_intp* pairs, npy_intp p)
28 | {
29 | npy_intp i;
30 | double s = 0;
31 | const float *u, *v;
32 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
33 | metric_float(metric);
34 | if (metricfunc == NULL) {
35 | fprintf(stderr, "Error");
36 | return -1;
37 | }
38 | for (i = 0; i < p; i++) {
39 | u = X + m * pairs[2*i];
40 | v = X + m * pairs[2*i+1];
41 | s += metricfunc(u, v, m);
42 | }
43 |
44 | return s;
45 | }
46 |
--------------------------------------------------------------------------------
/msmbuilder/lumping/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function, division
2 |
3 | from .pcca import PCCA
4 | from .pcca_plus import PCCAPlus
5 | from .mvca import MVCA
6 | from .bace import BACE
7 |
8 | __all__ = ["PCCA", "PCCAPlus", "MVCA", "BACE"]
9 |
--------------------------------------------------------------------------------
/msmbuilder/msm/.gitignore:
--------------------------------------------------------------------------------
1 | _markovstatemodel.c
2 | _metzner_mcmc_fast.c
3 | _ratematrix.c*
4 |
--------------------------------------------------------------------------------
/msmbuilder/msm/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .core import *
3 | from .msm import MarkovStateModel
4 | from .ratematrix import ContinuousTimeMSM
5 | from .bayesmsm import BayesianMarkovStateModel
6 | from .implied_timescales import implied_timescales
7 | from .bayes_ratematrix import BayesianContinuousTimeMSM
8 |
--------------------------------------------------------------------------------
/msmbuilder/msm/_markovstatemodel.pyx:
--------------------------------------------------------------------------------
1 | # Author: Robert McGibbon
2 | # Contributors:
3 | # Copyright (c) 2014, Stanford University
4 | # All rights reserved.
5 |
6 | import numpy as np
7 |
8 | cdef extern from "transmat_mle_prinz.h":
9 | int transmat_mle_prinz(const double* C, int n_states,
10 | double tol, double* T, double* pi)
11 |
12 | def _transmat_mle_prinz(double[:, ::1] C, double tol=1e-10):
13 | """Compute a maximum likelihood reversible transition matrix, given
14 | a set of directed transition counts.
15 |
16 | Algorithim 1 of Prinz et al.[1]
17 |
18 | Parameters
19 | ----------
20 | C : (input) 2d array of shape=(n_states, n_states)
21 | The directed transition counts, in C (row-major) order.
22 | tol : (input) float
23 | Convergence tolerance. The algorithm will iterate until the
24 | change in the log-likelihood is less than `tol`.
25 |
26 | Returns
27 | -------
28 | T : (output) pointer to output 2d array of shape=(n_states, n_states)
29 | Once the algorithim is completed, the resulting transition
30 | matrix will be written to `T`.
31 | populations : array, shape = (n_states_,)
32 | The equilibrium population (stationary left eigenvector) of T
33 |
34 | References
35 | ----------
36 | .. [1] Prinz, Jan-Hendrik, et al. "Markov models of molecular kinetics:
37 | Generation and validation." J Chem. Phys. 134.17 (2011): 174105.
38 | """
39 |
40 | cdef int n_states = len(C)
41 | if n_states == 0:
42 | return np.zeros((0, 0)), np.zeros(0)
43 |
44 | if len(C[0]) != n_states:
45 | raise ValueError('C must be square')
46 | cdef double[:, ::1] T = np.zeros((n_states, n_states))
47 | cdef double[::1] pi = np.zeros(n_states)
48 | cdef int n_iter
49 |
50 | n_iter = transmat_mle_prinz(&C[0,0], n_states, tol, &T[0,0], &pi[0])
51 | if n_iter < 0:
52 | # diagnose the error
53 | msg = ' Error code=%d' % n_iter
54 | if np.any(np.less(C, 0)):
55 | msg = 'Domain error. C must be positive.' + msg
56 | if np.any(np.sum(C, axis=1) == 0):
57 | msg = 'Row-sums of C must be positive.' + msg
58 | if n_iter == -3:
59 | msg = 'Likelihood not converged.' + msg
60 | raise ValueError(msg)
61 |
62 | return np.array(T), np.array(pi)
63 |
--------------------------------------------------------------------------------
/msmbuilder/msm/implied_timescales.py:
--------------------------------------------------------------------------------
1 | # Author: Christian Schwantes
2 | # Contributors:
3 | # Copyright (c) 2014, Stanford University
4 | # All rights reserved.
5 |
6 |
7 | import numpy as np
8 | from ..utils import param_sweep
9 | from . import MarkovStateModel
10 |
11 |
12 | def implied_timescales(sequences, lag_times, n_timescales=10,
13 | msm=None, n_jobs=1, verbose=0):
14 | """
15 | Calculate the implied timescales for a given MSM.
16 |
17 | Parameters
18 | ----------
19 | sequences : list of array-like
20 | List of sequences, or a single sequence. Each
21 | sequence should be a 1D iterable of state
22 | labels. Labels can be integers, strings, or
23 | other orderable objects.
24 | lag_times : array-like
25 | Lag times to calculate implied timescales at.
26 | n_timescales : int, optional
27 | Number of timescales to calculate.
28 | msm : msmbuilder.msm.MarkovStateModel, optional
29 | Instance of an MSM to specify parameters other
30 | than the lag time. If None, then the default
31 | parameters (as implemented by msmbuilder.msm.MarkovStateModel)
32 | will be used.
33 | n_jobs : int, optional
34 | Number of jobs to run in parallel
35 |
36 | Returns
37 | -------
38 | timescales : np.ndarray, shape = [n_models, n_timescales]
39 | The slowest timescales (in units of lag times) for each
40 | model.
41 | """
42 |
43 | if msm is None:
44 | msm = MarkovStateModel()
45 |
46 | param_grid = {'lag_time' : lag_times}
47 | models = param_sweep(msm, sequences, param_grid, n_jobs=n_jobs,
48 | verbose=verbose)
49 | timescales = [m.timescales_ for m in models]
50 | n_timescales = min(n_timescales, min(len(ts) for ts in timescales))
51 | timescales = np.array([ts[:n_timescales] for ts in timescales])
52 | return timescales
53 |
--------------------------------------------------------------------------------
/msmbuilder/msm/markov_appreciation.py:
--------------------------------------------------------------------------------
1 | # Author: Muneeb Sultan
2 | # Contributors: Matthew Harrigan
3 | # Copyright (c) 2016, Stanford University
4 | # All rights reserved.
5 |
6 |
7 | def show_markov_appreciation():
8 | from PIL import Image
9 | import requests
10 | from io import BytesIO
11 | response = requests.get("https://upload.wikimedia.org/wikipedia/commons/"
12 | "thumb/7/70/AAMarkov.jpg/330px-AAMarkov.jpg")
13 | img = Image.open(BytesIO(response.content))
14 | img.show()
15 |
--------------------------------------------------------------------------------
/msmbuilder/msm/src/metzner_mcmc.h:
--------------------------------------------------------------------------------
1 | #ifndef METZNER_MCMC_STEP_H
2 | #define METZNER_MCMC_STEP_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | void
9 | metzner_mcmc_step(const double* Z, const double* N, double* K,
10 | double* Q, const double* random, double* sc, int n_states,
11 | int n_steps);
12 |
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 |
17 | #endif
18 |
--------------------------------------------------------------------------------
/msmbuilder/msm/src/transmat_mle_prinz.h:
--------------------------------------------------------------------------------
1 | #ifndef TRANSMAT_MLE_PRINZ_H
2 | #define TRANSMAT_MLE_PRINZ_H
3 |
4 | #ifdef __cplusplus
5 | extern "C" {
6 | #endif
7 |
8 | int transmat_mle_prinz(const double* C, int n_states, double tol,
9 | double* T, double* pi);
10 | #ifdef __cplusplus
11 | }
12 | #endif
13 |
14 | #endif
15 |
--------------------------------------------------------------------------------
/msmbuilder/msm/validation/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .bootstrapmsm import BootStrapMarkovStateModel
--------------------------------------------------------------------------------
/msmbuilder/msm/validation/transmat_errorbar.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def create_perturb_params(countsmat, transmat=None):
5 | '''
6 | Computes transition probabilities and standard errors of the transition probabilities due to
7 | finite sampling using the MSM counts matrix. First, the transition probabilities are computed
8 | by dividing the each element c_ij by the row-sumemd counts of row i. THe standard errors are then
9 | computed by first computing the standard deviation of the transition probability, treating each count
10 | as a Bernoulli process with p = t_ij (std = (t_ij - t_ij ^2)^0.5). This is then divided by the
11 | square root of the row-summed counts of row i to obtain the standard error.
12 |
13 | Parameters:
14 | ----------
15 | countsmat: np.ndarray
16 | The msm counts matrix
17 | transmat: np.ndarray
18 | If you have a transition matrix you want to use (e.g. MLE symmetrized), you can supply that here. This
19 | function will use the transition probabilities from this matrix to calculate the Bernoulli standard deviations,
20 | which will be divided by the row-summed counts in the original supplied counts matrix.
21 |
22 | Returns:
23 | -----------
24 | transmat, np.ndarray:
25 | The MSM transition matrix
26 | scale, np.ndarray:
27 | The matrix of standard errors for each transition probability
28 | '''
29 | norm = np.sum(countsmat, axis=1)
30 | if not transmat:
31 | transmat = (countsmat.transpose() / norm).transpose()
32 | counts = (np.ones((len(transmat), len(transmat))) * norm).transpose()
33 | scale = ((transmat - transmat ** 2) ** 0.5 / counts ** 0.5) + 10 ** -15
34 | return transmat, scale
35 |
36 |
37 | def perturb_tmat(transmat, scale):
38 | '''
39 | Perturbs each nonzero entry in the MSM transition matrix by treating it as a Gaussian random variable
40 | with mean t_ij and standard deviation equal to the standard error computed using "create_perturb_params".
41 | Returns a sampled transition matrix that takes into consideration errors due to finite sampling
42 | (useful for boostrapping, etc.)
43 |
44 | Parameters:
45 | ----------
46 | transmat: np.ndarray:
47 | The transition matrix, whose elements serve as the means of the Gaussian random variables
48 | scale: np.ndarray:
49 | The matrix of standard errors. For transition probability t_ij, this is assumed to be the standard
50 | error of the mean of a binomial distribution with p = transition probability and number of observations
51 | equal to the summed counts in row i.
52 |
53 | '''
54 | output = np.vectorize(np.random.normal)(transmat, scale)
55 | output[np.where(output < 0)] = 0
56 | return (output.transpose() / np.sum(output, axis=1)).transpose()
57 |
58 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/0-test-install.py:
--------------------------------------------------------------------------------
1 | """This script tests your python installation as it pertains to running project templates.
2 |
3 | MSMBuilder supports Python 2.7 and 3.3+ and has some necessary dependencies
4 | like numpy, scipy, and scikit-learn. This templated project enforces
5 | some more stringent requirements to make sure all the users are more-or-less
6 | on the same page and to allow developers to exploit more helper libraries.
7 |
8 | You can modify the template scripts to work for your particular set-up,
9 | but it's probably easier to install `conda` and get the packages we
10 | recommend.
11 |
12 | {{header}}
13 | """
14 |
15 | import textwrap
16 |
17 | # Show intro text
18 | paragraphs = __doc__.split('\n\n')
19 | for p in paragraphs:
20 | print(textwrap.fill(p))
21 | print()
22 |
23 | warnings = 0
24 |
25 | ## Test for python 3.5
26 | import sys
27 |
28 | if sys.version_info < (3, 5):
29 | print(textwrap.fill(
30 | "These scripts were all developed on Python 3.5, "
31 | "which is the current, stable release of Python. "
32 | "In particular, we use subprocess.run "
33 | "(and probably some other new features). "
34 | "You can easily modify the scripts to work on older versions "
35 | "of Python, but why not just upgrade? We like Continuum's "
36 | "Anaconda Python distribution for a simple install (without root)."
37 | ))
38 | print()
39 | warnings += 1
40 |
41 | ## Test for matplotlib
42 | try:
43 | import matplotlib as plt
44 | except ImportError:
45 | print(textwrap.fill(
46 | "These scripts try to make some mildly intesting plots. "
47 | "That requires `matplotlib`."
48 | ))
49 | print()
50 | warnings += 1
51 |
52 | ## Test for seaborn
53 | try:
54 | import seaborn as sns
55 | except ImportError:
56 | print(textwrap.fill(
57 | "The default matplotlib styling is a little ugly. "
58 | "By default, these scripts try to use `seaborn` to make prettier "
59 | "plots. You can remove all the seaborn imports if you don't want "
60 | "to install this library, but why not just install it? Try "
61 | "`conda install seaborn`"
62 | ))
63 | print()
64 | warnings += 1
65 |
66 | ## Test for xdg-open
67 | try:
68 | import subprocess
69 |
70 | subprocess.check_call(['xdg-open', '--version'])
71 | except:
72 | print(textwrap.fill(
73 | "For convenience, the plotting scripts can try to use `xdg-open` "
74 | "to pop up the result of the plot. Use the --display flag on "
75 | "msmb TemplateProject to enable this behavior."
76 | ))
77 | warnings += 1
78 |
79 | ## Report results
80 | if warnings == 0:
81 | print("I didn't find any problems with your installation! Good job.")
82 | print()
83 | else:
84 | print("I found {} warnings, see above. Good luck!".format(warnings))
85 | print()
86 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/1-get-example-data.py:
--------------------------------------------------------------------------------
1 | """Get sample data for testing and experimenting
2 |
3 | {{header}}
4 | """
5 | import os
6 |
7 | from msmbuilder.example_datasets import FsPeptide
8 |
9 | FsPeptide("./").cache()
10 | if not os.path.exists("trajs"):
11 | os.symlink("fs_peptide", "trajs")
12 | if not os.path.exists("top.pdb"):
13 | os.symlink("fs_peptide/fs-peptide.pdb", "top.pdb")
14 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/LICENSE.md:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Stanford University and the Authors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a
6 | copy of this software and associated documentation files (the "Software"),
7 | to deal in the Software without restriction, including without limitation
8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 | and/or sell copies of the Software, and to permit persons to whom the
10 | Software is furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | DEALINGS IN THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/README.md:
--------------------------------------------------------------------------------
1 | My msmb Project
2 | ===============
3 |
4 | Initialized with `msmb TemplateProject` on {{date}}
5 |
6 | Keep notes about your project here.
7 |
8 | ## Folder layout
9 |
10 | Each new step in MSM construction is in a new folder with symlinks
11 | to the files on which it depends from previous steps.
12 |
13 | ## Variable names convention
14 |
15 | variable | filename | description
16 | ------------|-------------------|-----------------------------------------------
17 | meta | meta.pandas.pickl | pandas dataframe of trajectory metadata
18 | ftrajs | ftrajs/ | trajectories of feature vectors (dihedrals, ...)
19 | dihed_feat | featurizer.pickl | featurizer object
20 | ttrajs | ttrajs/ | dimensionality-reduced, tica trajectories
21 | tica | tica.pickl | tica object
22 | ktrajs | ktrajs/ | trajecories of cluster indices
23 | kmeans | clusterer.pickl | cluserer object
24 | microktrajs | microktrajs/ | trimmed cluster indices
25 | macroktrajs | macroktrajs/ | macrostate indices
26 |
27 | ## License
28 |
29 | These templates are licensed under the MIT license. Do whatever
30 | you want with them.
31 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/analysis/gather-metadata-plot.py:
--------------------------------------------------------------------------------
1 | """Plot metadata info
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 |
13 | from msmbuilder.io import load_meta, render_meta
14 |
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 |
18 | ## Load
19 | meta = load_meta()
20 |
21 |
22 | ## Histogram of trajectory lengths
23 | def plot_lengths(ax):
24 | lengths_ns = meta['nframes'] * (meta['step_ps'] / 1000)
25 | ax.hist(lengths_ns)
26 | ax.set_xlabel("Lenths / ns", fontsize=16)
27 | ax.set_ylabel("Count", fontsize=16)
28 |
29 | total_label = ("Total length: {us:.2f}"
30 | .format(us=np.sum(lengths_ns) / 1000))
31 | total_label += r" / $\mathrm{\mu s}$"
32 | ax.annotate(total_label,
33 | xy=(0.05, 0.95),
34 | xycoords='axes fraction',
35 | fontsize=18,
36 | va='top',
37 | )
38 |
39 |
40 | ## Pie graph
41 | def plot_pie(ax):
42 | lengths_ns = meta['nframes'] * (meta['step_ps'] / 1000)
43 | sampling = lengths_ns.groupby(level=0).sum()
44 |
45 | ax.pie(sampling,
46 | shadow=True,
47 | labels=sampling.index,
48 | colors=sns.color_palette(),
49 | )
50 | ax.axis('equal')
51 |
52 |
53 | ## Box plot
54 | def plot_boxplot(ax):
55 | meta2 = meta.copy()
56 | meta2['ns'] = meta['nframes'] * (meta['step_ps'] / 1000)
57 | sns.boxplot(
58 | x=meta2.index.names[0],
59 | y='ns',
60 | data=meta2.reset_index(),
61 | ax=ax,
62 | )
63 |
64 |
65 | ## Plot hist
66 | fig, ax = plt.subplots(figsize=(7, 5))
67 | plot_lengths(ax)
68 | fig.tight_layout()
69 | fig.savefig("lengths-hist.pdf")
70 | # {{xdg_open('lengths-hist.pdf')}}
71 |
72 | ## Plot pie
73 | fig, ax = plt.subplots(figsize=(7, 5))
74 | plot_pie(ax)
75 | fig.tight_layout()
76 | fig.savefig("lengths-pie.pdf")
77 | # {{xdg_open('lengths-pie.pdf')}}
78 |
79 | ## Plot box
80 | fig, ax = plt.subplots(figsize=(7, 5))
81 | plot_boxplot(ax)
82 | fig.tight_layout()
83 | fig.savefig("lengths-boxplot.pdf")
84 | # {{xdg_open('lengths-boxplot.pdf')}}
85 |
86 | ## Save metadata as html table
87 | render_meta(meta, 'meta.pandas.html')
88 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/analysis/gather-metadata.py:
--------------------------------------------------------------------------------
1 | """Find trajectories and associated metadata
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - trajs
9 | - top.pdb
10 | """
11 |
12 | from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser
13 |
14 | ## Construct and save the dataframe
15 | parser = NumberedRunsParser(
16 | traj_fmt="trajectory-{run}.xtc",
17 | top_fn="top.pdb",
18 | step_ps=50,
19 | )
20 | meta = gather_metadata("trajs/*.xtc", parser)
21 | save_meta(meta)
22 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/cluster/cluster-plot.py:
--------------------------------------------------------------------------------
1 | """Plot cluster centers on tICA coordinates
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 |
13 | from msmbuilder.io import load_trajs, load_generic
14 |
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 |
18 | ## Load
19 | kmeans = load_generic('kmeans.pickl')
20 | meta, ktrajs = load_trajs('ktrajs')
21 | meta, ttrajs = load_trajs('ttrajs', meta)
22 | txx = np.concatenate(list(ttrajs.values()))
23 |
24 |
25 | def plot_cluster_centers(ax):
26 | ax.hexbin(txx[:, 0], txx[:, 1],
27 | cmap=sns.cubehelix_palette(as_cmap=True),
28 | mincnt=1,
29 | bins='log',
30 | )
31 | ax.scatter(kmeans.cluster_centers_[:, 0],
32 | kmeans.cluster_centers_[:, 1],
33 | s=40, c=colors[0],
34 | )
35 | ax.set_xlabel("tIC 1", fontsize=16)
36 | ax.set_ylabel("tIC 2", fontsize=16)
37 |
38 |
39 | ## Plot 1
40 | fig, ax = plt.subplots(figsize=(7, 5))
41 | plot_cluster_centers(ax)
42 | fig.tight_layout()
43 | fig.savefig('kmeans-centers.pdf')
44 | # {{xdg_open('kmeans-centers.pdf')}}
45 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/cluster/cluster.py:
--------------------------------------------------------------------------------
1 | """Cluster tICA results
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - ttrajs
9 | - meta.pandas.pickl
10 | """
11 | from msmbuilder.io import load_trajs, save_trajs, save_generic
12 | from msmbuilder.cluster import MiniBatchKMeans
13 |
14 | ## Load
15 | meta, ttrajs = load_trajs('ttrajs')
16 |
17 | ## Fit
18 | dim = 5
19 | kmeans = MiniBatchKMeans(n_clusters=500)
20 | kmeans.fit([traj[:, :dim] for traj in ttrajs.values()])
21 |
22 | ## Transform
23 | ktrajs = {}
24 | for k, v in ttrajs.items():
25 | ktrajs[k] = kmeans.partial_transform(v[:, :dim])
26 |
27 | ## Save
28 | print(kmeans.summarize())
29 | save_trajs(ktrajs, 'ktrajs', meta)
30 | save_generic(kmeans, 'kmeans.pickl')
31 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/cluster/sample-clusters-plot.py:
--------------------------------------------------------------------------------
1 | """Plot the result of sampling clusters
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 |
13 | from msmbuilder.io import load_trajs, load_generic
14 |
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 |
18 | ## Load
19 | meta, ttrajs = load_trajs('ttrajs')
20 | txx = np.concatenate(list(ttrajs.values()))
21 | kmeans = load_generic('kmeans.pickl')
22 |
23 | inds = load_generic("cluster-sample-inds.pickl")
24 | coordinates = [
25 | np.asarray([ttrajs[traj_i][frame_i, :] for traj_i, frame_i in state_inds])
26 | for state_inds in inds
27 | ]
28 |
29 |
30 | ## Overlay sampled states on histogram
31 | def plot_sampled_states(ax):
32 | ax.hexbin(txx[:, 0], txx[:, 1],
33 | cmap='magma_r',
34 | mincnt=1,
35 | bins='log',
36 | alpha=0.8,
37 | )
38 |
39 | # Show sampled points as scatter
40 | # Annotate cluster index
41 | for i, coo in enumerate(coordinates):
42 | plt.scatter(coo[:, 0], coo[:, 1], c=colors[i % 6], s=40)
43 | ax.text(kmeans.cluster_centers_[i, 0],
44 | kmeans.cluster_centers_[i, 1],
45 | "{}".format(i),
46 | ha='center',
47 | va='center',
48 | size=16,
49 | bbox=dict(
50 | boxstyle='round',
51 | fc='w',
52 | ec="0.5",
53 | alpha=0.9,
54 | ),
55 | zorder=10,
56 | )
57 |
58 | ax.set_xlabel("tIC 1", fontsize=16)
59 | ax.set_ylabel("tIC 2", fontsize=16)
60 |
61 |
62 | ## Render a script for loading in vmd
63 | def load_in_vmd(dirname='cluster_samples'):
64 | k = len(inds[0])
65 | templ = [
66 | '# autogenerated by msmbuilder',
67 | '# open with `vmd -e load-cluster-samples.tcl`',
68 | '',
69 | '# Defaults',
70 | 'mol default material Transparent',
71 | 'mol default representation NewCartoon',
72 | '',
73 | ]
74 | for i in range(len(inds)):
75 | templ += [
76 | '# State {}'.format(i),
77 | 'mol new top.pdb',
78 | 'mol addfile {}/{}.xtc waitfor all'.format(dirname, i),
79 | 'animate delete beg 0 end 0 top',
80 | 'mol rename top State-{}'.format(i),
81 | 'mol modcolor 0 top ColorID {}'.format(i),
82 | 'mol drawframes top 0 0:{k}'.format(k=k),
83 | '',
84 | ]
85 | return '\n'.join(templ)
86 |
87 |
88 | ## Plot
89 | fig, ax = plt.subplots(figsize=(7, 5))
90 | plot_sampled_states(ax)
91 | fig.tight_layout()
92 | fig.savefig('cluster-samples.pdf')
93 | # {{xdg_open('cluster-samples.pdf')}}
94 |
95 | ## Render vmd
96 | with open('load-cluster-samples.tcl', 'w') as f:
97 | f.write(load_in_vmd())
98 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/cluster/sample-clusters.py:
--------------------------------------------------------------------------------
1 | """Sample conformations from clusters
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - ../../top.pdb
9 | - ../../trajs
10 | """
11 |
12 | import mdtraj as md
13 | import os
14 |
15 | from msmbuilder.io.sampling import sample_states
16 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic
17 |
18 | ## Load
19 | meta, ttrajs = load_trajs('ttrajs')
20 | kmeans = load_generic("kmeans.pickl")
21 |
22 | ## Sample
23 | inds = sample_states(ttrajs,
24 | kmeans.cluster_centers_,
25 | k=10)
26 |
27 | save_generic(inds, "cluster-sample-inds.pickl")
28 |
29 | ## Make trajectories
30 | top = preload_top(meta)
31 | out_folder = "cluster_samples"
32 | backup(out_folder)
33 | os.mkdir(out_folder)
34 |
35 | for state_i, state_inds in enumerate(inds):
36 | traj = md.join(
37 | md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
38 | for traj_i, frame_i in state_inds
39 | )
40 | traj.save("{}/{}.xtc".format(out_folder, state_i))
41 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/dihedrals/featurize-plot.py:
--------------------------------------------------------------------------------
1 | """Plot diagnostic feature info
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | from matplotlib import pyplot as plt
11 | import seaborn as sns
12 | from msmbuilder.io import load_trajs
13 |
14 | sns.set_style('ticks')
15 | colors = sns.color_palette()
16 |
17 | ## Load
18 | meta, ftrajs = load_trajs('ftrajs')
19 | # (stride by 100 for memory concerns)
20 | fxx = np.concatenate([fx[::100] for fx in ftrajs.values()])
21 |
22 |
23 | ## Box and whisker plot
24 | def plot_box(ax):
25 | n_feats_plot = min(fxx.shape[1], 100)
26 | ax.boxplot(fxx[:, :100],
27 | boxprops={'color': colors[0]},
28 | whiskerprops={'color': colors[0]},
29 | capprops={'color': colors[0]},
30 | medianprops={'color': colors[2]},
31 | )
32 |
33 | if fxx.shape[1] > 100:
34 | ax.annotate("(Only showing the first 100 features)",
35 | xy=(0.05, 0.95),
36 | xycoords='axes fraction',
37 | fontsize=14,
38 | va='top',
39 | )
40 |
41 | ax.set_xlabel("Feature Index", fontsize=16)
42 | xx = np.arange(0, n_feats_plot, 10)
43 | ax.set_xticks(xx)
44 | ax.set_xticklabels([str(x) for x in xx])
45 | ax.set_xlim((0, n_feats_plot + 1))
46 | ax.set_ylabel("Feature Value", fontsize=16)
47 |
48 |
49 | ## Plot
50 | fig, ax = plt.subplots(figsize=(15, 5))
51 | plot_box(ax)
52 | fig.tight_layout()
53 | fig.savefig("ftrajs-box.pdf")
54 | # {{ xdg_open('ftrajs-box.pdf') }}
55 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/dihedrals/featurize.py:
--------------------------------------------------------------------------------
1 | """Turn trajectories into dihedral features
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - meta.pandas.pickl
9 | - trajs
10 | - top.pdb
11 | """
12 | import mdtraj as md
13 |
14 | from msmbuilder.featurizer import DihedralFeaturizer
15 | from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic
16 | from multiprocessing import Pool
17 |
18 | ## Load
19 | meta = load_meta()
20 | tops = preload_tops(meta)
21 | dihed_feat = DihedralFeaturizer()
22 |
23 |
24 | ## Featurize logic
25 | def feat(irow):
26 | i, row = irow
27 | traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
28 | feat_traj = dihed_feat.partial_transform(traj)
29 | return i, feat_traj
30 |
31 |
32 | ## Do it in parallel
33 | with Pool() as pool:
34 | dihed_trajs = dict(pool.imap_unordered(feat, meta.iterrows()))
35 |
36 | ## Save
37 | save_trajs(dihed_trajs, 'ftrajs', meta)
38 | save_generic(dihed_feat, 'featurizer.pickl')
39 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/landmarks/featurize-plot.py:
--------------------------------------------------------------------------------
1 | """Plot statistics from RMSD clustering
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 |
13 | from msmbuilder.io import load_trajs
14 |
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 |
18 | ## Load
19 | meta, ftrajs = load_trajs('ftrajs')
20 | # (stride by 100 for memory concerns)
21 | fxx = np.concatenate([fx[::100] for fx in ftrajs.values()])
22 |
23 |
24 | ## Box and whisker plot
25 | def plot_box(ax):
26 | n_feats_plot = min(fxx.shape[1], 100)
27 | ax.boxplot(fxx[:, :100],
28 | boxprops={'color': colors[0]},
29 | whiskerprops={'color': colors[0]},
30 | capprops={'color': colors[0]},
31 | medianprops={'color': colors[2]},
32 | )
33 |
34 | if fxx.shape[1] > 100:
35 | ax.annotate("(Only showing the first 100 features)",
36 | xy=(0.05, 0.95),
37 | xycoords='axes fraction',
38 | fontsize=14,
39 | va='top',
40 | )
41 |
42 | ax.set_xlabel("Feature Index", fontsize=16)
43 | xx = np.arange(0, n_feats_plot, 10)
44 | ax.set_xticks(xx)
45 | ax.set_xticklabels([str(x) for x in xx])
46 | ax.set_xlim((0, n_feats_plot + 1))
47 | ax.set_ylabel("Feature Value", fontsize=16)
48 |
49 |
50 | ## Plot
51 | fig, ax = plt.subplots(figsize=(15, 5))
52 | plot_box(ax)
53 | fig.tight_layout()
54 | fig.savefig("ftrajs-box.pdf")
55 | # {{ xdg_open('ftrajs-box.pdf') }}
56 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/landmarks/featurize.py:
--------------------------------------------------------------------------------
1 | """Cluster based on RMSD between conformations
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - meta.pandas.pickl
9 | - trajs
10 | - top.pdb
11 | """
12 | import mdtraj as md
13 |
14 | from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top
15 |
16 | ## Load
17 | meta = load_meta()
18 | centroids = md.load("centroids.xtc", top=preload_top(meta))
19 |
20 | ## Kernel
21 | SIGMA = 0.3 # nm
22 | from msmbuilder.featurizer import RMSDFeaturizer
23 | import numpy as np
24 |
25 | featurizer = RMSDFeaturizer(centroids)
26 | lfeats = {}
27 | for i, traj in itertrajs(meta):
28 | lfeat = featurizer.partial_transform(traj)
29 | lfeat = np.exp(-lfeat ** 2 / (2 * (SIGMA ** 2)))
30 | lfeats[i] = lfeat
31 | save_trajs(lfeats, 'ftrajs', meta)
32 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/landmarks/find-landmarks.py:
--------------------------------------------------------------------------------
1 | """Cluster based on RMSD between conformations
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - meta.pandas.pickl
9 | - trajs
10 | - top.pdb
11 | """
12 | import mdtraj as md
13 |
14 | from msmbuilder.cluster import MiniBatchKMedoids
15 | from msmbuilder.io import load_meta, itertrajs, save_generic, backup
16 |
17 | ## Set up parameters
18 | kmed = MiniBatchKMedoids(
19 | n_clusters=500,
20 | metric='rmsd',
21 | )
22 |
23 | ## Load
24 | meta = load_meta()
25 |
26 |
27 | ## Try to limit RAM usage
28 | def guestimate_stride():
29 | total_data = meta['nframes'].sum()
30 | want = kmed.n_clusters * 10
31 | stride = max(1, total_data // want)
32 | print("Since we have", total_data, "frames, we're going to stride by",
33 | stride, "during fitting, because this is probably adequate for",
34 | kmed.n_clusters, "clusters")
35 | return stride
36 |
37 |
38 | ## Fit
39 | kmed.fit([traj for _, traj in itertrajs(meta, stride=guestimate_stride())])
40 | print(kmed.summarize())
41 |
42 | ## Save
43 | save_generic(kmed, 'clusterer.pickl')
44 |
45 |
46 | ## Save centroids
47 | def frame(traj_i, frame_i):
48 | # Note: kmedoids does 0-based, contiguous integers so we use .iloc
49 | row = meta.iloc[traj_i]
50 | return md.load_frame(row['traj_fn'], frame_i, top=row['top_fn'])
51 |
52 |
53 | centroids = md.join((frame(ti, fi) for ti, fi in kmed.cluster_ids_),
54 | check_topology=False)
55 | centroids_fn = 'centroids.xtc'
56 | backup(centroids_fn)
57 | centroids.save("centroids.xtc")
58 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/microstate-plot.py:
--------------------------------------------------------------------------------
1 | """Plot populations and eigvectors from microstate MSM
2 |
3 | {{header}}
4 | Meta
5 | ----
6 | depends:
7 | - kmeans.pickl
8 | - ../ttrajs
9 | """
10 |
11 | # ? include "plot_header.template"
12 | # ? from "plot_macros.template" import xdg_open with context
13 |
14 | import numpy as np
15 | import seaborn as sns
16 | from matplotlib import pyplot as plt
17 |
18 | from msmbuilder.io import load_trajs, load_generic
19 |
20 | sns.set_style('ticks')
21 | colors = sns.color_palette()
22 |
23 | ## Load
24 | kmeans = load_generic('kmeans.pickl')
25 | msm = load_generic('msm.pickl')
26 | meta, ttrajs = load_trajs('ttrajs')
27 | txx = np.concatenate(list(ttrajs.values()))
28 |
29 |
30 | ## Plot microstates
31 | def plot_microstates(ax):
32 | ax.hexbin(txx[:, 0], txx[:, 1],
33 | cmap='Greys',
34 | mincnt=1,
35 | bins='log',
36 | )
37 |
38 | scale = 100 / np.max(msm.populations_)
39 | add_a_bit = 5
40 | ax.scatter(kmeans.cluster_centers_[msm.state_labels_, 0],
41 | kmeans.cluster_centers_[msm.state_labels_, 1],
42 | s=scale * msm.populations_ + add_a_bit,
43 | c=msm.left_eigenvectors_[:, 1],
44 | cmap='RdBu'
45 | )
46 | ax.set_xlabel("tIC 1", fontsize=16)
47 | ax.set_ylabel("tIC 2", fontsize=16)
48 | # ax.colorbar(label='First Dynamical Eigenvector', fontsize=16)
49 |
50 |
51 | ## Plot
52 | fig, ax = plt.subplots(figsize=(7, 5))
53 | plot_microstates(ax)
54 | fig.tight_layout()
55 | fig.savefig('msm-microstates.pdf')
56 | # {{xdg_open('msm-microstates.pdf')}}
57 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/microstate-traj.py:
--------------------------------------------------------------------------------
1 | """Sample a trajectory from microstate MSM
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - top.pdb
9 | - trajs
10 | """
11 |
12 | import mdtraj as md
13 |
14 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic
15 | from msmbuilder.io.sampling import sample_msm
16 |
17 | ## Load
18 | meta, ttrajs = load_trajs('ttrajs')
19 | msm = load_generic('msm.pickl')
20 | kmeans = load_generic('kmeans.pickl')
21 |
22 | ## Sample
23 | # Warning: make sure ttrajs and kmeans centers have
24 | # the same number of dimensions
25 | inds = sample_msm(ttrajs, kmeans.cluster_centers_, msm, n_steps=200, stride=1)
26 | save_generic(inds, "msm-traj-inds.pickl")
27 |
28 | ## Make trajectory
29 | top = preload_top(meta)
30 | traj = md.join(
31 | md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
32 | for traj_i, frame_i in inds
33 | )
34 |
35 | ## Save
36 | traj_fn = "msm-traj.xtc"
37 | backup(traj_fn)
38 | traj.save(traj_fn)
39 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/microstate.py:
--------------------------------------------------------------------------------
1 | """Make a microstate MSM
2 |
3 | {{header}}
4 | """
5 |
6 | from msmbuilder.io import load_trajs, save_trajs, save_generic
7 | from msmbuilder.msm import MarkovStateModel
8 |
9 | ## Load
10 | meta, ktrajs = load_trajs('ktrajs')
11 |
12 | ## Fit
13 | msm = MarkovStateModel(lag_time=2, n_timescales=10, verbose=False)
14 | msm.fit(list(ktrajs.values()))
15 |
16 | ## Transform
17 | microktrajs = {}
18 | for k, v in ktrajs.items():
19 | microktrajs[k] = msm.partial_transform(v)
20 |
21 | ## Save
22 | print(msm.summarize())
23 | save_generic(msm, 'msm.pickl')
24 | save_trajs(microktrajs, 'microktrajs', meta)
25 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/timescales-plot.py:
--------------------------------------------------------------------------------
1 | """Plot implied timescales vs lagtime
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | import pandas as pd
11 | import seaborn as sns
12 | from matplotlib import pyplot as plt
13 |
14 | sns.set_style('ticks')
15 | colors = sns.color_palette()
16 |
17 | ## Load
18 | timescales = pd.read_pickle('timescales.pandas.pickl')
19 | n_timescales = len([x for x in timescales.columns
20 | if x.startswith('timescale_')])
21 |
22 |
23 | ## Implied timescales vs lagtime
24 | def plot_timescales(ax):
25 | for i in range(n_timescales):
26 | ax.scatter(timescales['lag_time'],
27 | timescales['timescale_{}'.format(i)],
28 | s=50, c=colors[0],
29 | label=None, # pandas be interfering
30 | )
31 |
32 | xmin, xmax = ax.get_xlim()
33 | xx = np.linspace(xmin, xmax)
34 | ax.plot(xx, xx, color=colors[2], label='$y=x$')
35 | ax.legend(loc='best', fontsize=14)
36 | ax.set_xlabel('Lag Time / todo:units', fontsize=18)
37 | ax.set_ylabel('Implied Timescales / todo:units', fontsize=18)
38 | ax.set_xscale('log')
39 | ax.set_yscale('log')
40 |
41 | ## Percent trimmed vs lagtime
42 | def plot_trimmed(ax):
43 | ax.plot(timescales['lag_time'],
44 | timescales['percent_retained'],
45 | 'o-',
46 | label=None, # pandas be interfering
47 | )
48 | ax.axhline(100, color='k', ls='--', label='100%')
49 | ax.legend(loc='best', fontsize=14)
50 | ax.set_xlabel('Lag Time / todo:units', fontsize=18)
51 | ax.set_ylabel('Retained / %', fontsize=18)
52 | ax.set_xscale('log')
53 | ax.set_ylim((0, 110))
54 |
55 | ## Plot timescales
56 | fig, ax = plt.subplots(figsize=(7, 5))
57 | plot_timescales(ax)
58 | fig.tight_layout()
59 | fig.savefig('implied-timescales.pdf')
60 | # {{xdg_open('implied-timescales.pdf')}}
61 |
62 | ## Plot trimmed
63 | fig, ax = plt.subplots(figsize=(7,5))
64 | plot_trimmed(ax)
65 | fig.tight_layout()
66 | fig.savefig('percent-trimmed.pdf')
67 | # {{xdg_open('percent-trimmed.pdf')}}
68 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/timescales.py:
--------------------------------------------------------------------------------
1 | """Calculate implied timescales vs. lagtime
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - meta.pandas.pickl
9 | - ktrajs
10 | """
11 | from multiprocessing import Pool
12 |
13 | import pandas as pd
14 |
15 | from msmbuilder.io import load_trajs
16 | from msmbuilder.msm import MarkovStateModel
17 |
18 | ## Load
19 | meta, ktrajs = load_trajs('ktrajs')
20 |
21 | ## Parameters
22 | lagtimes = [2 ** i for i in range(8)]
23 |
24 |
25 | ## Define what to do for parallel execution
26 | def at_lagtime(lt):
27 | msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False)
28 | msm.fit(list(ktrajs.values()))
29 | ret = {
30 | 'lag_time': lt,
31 | 'percent_retained': msm.percent_retained_,
32 | }
33 | for i in range(msm.n_timescales):
34 | ret['timescale_{}'.format(i)] = msm.timescales_[i]
35 | return ret
36 |
37 |
38 | ## Do the calculation
39 | with Pool() as p:
40 | results = p.map(at_lagtime, lagtimes)
41 |
42 | lt_df = pd.DataFrame(results)
43 |
44 | ## Save
45 | print(lt_df.head())
46 | lt_df.to_pickle('timescales.pandas.pickl')
47 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/plot_header.template:
--------------------------------------------------------------------------------
1 | # ? if use_xdgopen
2 | from subprocess import run
3 | # ? endif
4 | # ? if use_agg
5 | import matplotlib
6 | matplotlib.use('Agg')
7 | # ? endif
8 | # ? if ipynb
9 | %matplotlib inline
10 | # ? endif
11 |
12 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/plot_macros.template:
--------------------------------------------------------------------------------
1 | {% macro xdg_open(fn) -%}
2 | {% if use_xdgopen -%}
3 | Launch with default pdf viewer:
4 | run(['xdg-open', '{{fn}}'])
5 | {%- endif %}
6 | {%- endmacro %}
7 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/rmsd/rmsd-plot.py:
--------------------------------------------------------------------------------
1 | """Plot RMSD results
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 |
13 | from msmbuilder.io import load_trajs
14 |
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 |
18 | ## Load
19 | meta, rmsds = load_trajs('rmsds')
20 |
21 |
22 | ## Plot box plot
23 | def plot_boxplot(ax):
24 | catted = np.concatenate([rmsds[k] for k in meta.index])
25 | sns.boxplot(catted * 10, ax=ax)
26 | ax.set_xlabel(r'RMSD / $\mathrm{\AA}$', fontsize=18)
27 | ax.set_yticks([])
28 | # ax.set_xticks(fontsize=16) #TODO: fontsize
29 |
30 |
31 | ## Report bad trajectories
32 | def bad_trajs(cutoff=0.7):
33 | bad = {}
34 | for k in meta.index:
35 | arr = rmsds[k]
36 | wh = np.where(np.asarray(arr) > cutoff)[0]
37 | if len(wh) > 0:
38 | bad[k] = wh
39 | return bad
40 |
41 |
42 | ## Plot
43 | fig, ax = plt.subplots(figsize=(6, 3))
44 | plot_boxplot(ax)
45 | fig.tight_layout()
46 | fig.savefig("rmsd-boxplot.pdf")
47 | # {{xdg_open('rmsd-boxplot.pdf')}}
48 |
49 | ## Bad trajectories
50 | for k, frame_is in bad_trajs().items():
51 | print("Trajectory", k)
52 | print("Frames:", frame_is)
53 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/rmsd/rmsd.py:
--------------------------------------------------------------------------------
1 | """Check for abnormally high rmsd values to a reference structure
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - meta.pandas.pickl
9 | - trajs
10 | - top.pdb
11 |
12 | """
13 |
14 | import mdtraj as md
15 |
16 | from msmbuilder.io import load_meta, itertrajs, save_trajs
17 |
18 | ## Load reference structure
19 | ref = md.load("top.pdb")
20 | meta = load_meta()
21 |
22 | ## Do calculation and save
23 | rmsds = {k: md.rmsd(traj, ref) for k, traj in itertrajs(meta)}
24 | save_trajs(rmsds, 'rmsds', meta)
25 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/tica/tica-plot.py:
--------------------------------------------------------------------------------
1 | """Plot tICA-transformed coordinates
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 |
13 | from msmbuilder.io import load_trajs, load_generic
14 |
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 |
18 | ## Load
19 | tica = load_generic('tica.pickl')
20 | meta, ttrajs = load_trajs('ttrajs')
21 | txx = np.concatenate(list(ttrajs.values()))
22 |
23 |
24 | ## Heatmap
25 | def plot_heatmap(ax):
26 | ax.hexbin(txx[:, 0], txx[:, 1],
27 | cmap=sns.cubehelix_palette(as_cmap=True),
28 | mincnt=1,
29 | bins='log'
30 | )
31 | ax.set_xlabel("tIC 1", fontsize=16)
32 | ax.set_ylabel("tIC 2", fontsize=16)
33 |
34 |
35 | ## Timescales
36 | def plot_timescales(ax):
37 | timestep = meta['step_ps'].unique()
38 | assert len(timestep) == 1, timestep
39 | timestep = float(timestep[0]) # ps
40 | to_us = (
41 | (1.0 / 1000) # ps -> ns
42 | * (1.0 / 1000) # ns -> us
43 | * (timestep / 1) # steps -> ps
44 | )
45 | ax.hlines(tica.timescales_ * to_us,
46 | 0, 1,
47 | color=colors[0])
48 | ax.set_ylabel(r'Timescales / $\mathrm{\mu s}$', fontsize=18)
49 | ax.set_xticks([])
50 | ax.set_xlim((0, 1))
51 |
52 |
53 | ## Plot 1
54 | fig, ax = plt.subplots(figsize=(7, 5))
55 | plot_heatmap(ax)
56 | fig.tight_layout()
57 | fig.savefig('tica-heatmap.pdf')
58 | # {{xdg_open('tica-heatmap.pdf')}}
59 |
60 | ## Plot 2
61 | fig, ax = plt.subplots(figsize=(3, 5))
62 | plot_timescales(ax)
63 | fig.tight_layout()
64 | fig.savefig('tica-timescales.pdf')
65 | # {{xdg_open('tica-heatmap.pdf')}}
66 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/tica/tica-sample-coordinate-plot.py:
--------------------------------------------------------------------------------
1 | """Plot the result of sampling a tICA coordinate
2 |
3 | {{header}}
4 | """
5 |
6 | # ? include "plot_header.template"
7 | # ? from "plot_macros.template" import xdg_open with context
8 |
9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 |
13 | from msmbuilder.io import load_trajs, load_generic
14 |
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 |
18 | ## Load
19 | meta, ttrajs = load_trajs('ttrajs')
20 | txx = np.concatenate(list(ttrajs.values()))
21 |
22 | inds = load_generic("tica-dimension-0-inds.pickl")
23 | straj = []
24 | for traj_i, frame_i in inds:
25 | straj += [ttrajs[traj_i][frame_i, :]]
26 | straj = np.asarray(straj)
27 |
28 |
29 | ## Overlay sampled trajectory on histogram
30 | def plot_sampled_traj(ax):
31 | ax.hexbin(txx[:, 0], txx[:, 1],
32 | cmap='magma_r',
33 | mincnt=1,
34 | bins='log',
35 | alpha=0.8,
36 | )
37 |
38 | ax.plot(straj[:, 0], straj[:, 1], 'o-', label='Sampled')
39 |
40 | ax.set_xlabel("tIC 1", fontsize=16)
41 | ax.set_ylabel("tIC 2", fontsize=16)
42 | ax.legend(loc='best')
43 |
44 |
45 | ## Plot
46 | fig, ax = plt.subplots(figsize=(7, 5))
47 | plot_sampled_traj(ax)
48 | fig.tight_layout()
49 | fig.savefig('tica-dimension-0-heatmap.pdf')
50 | # {{xdg_open('tica-dimension-0-heatmap.pdf')}}
51 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/tica/tica-sample-coordinate.py:
--------------------------------------------------------------------------------
1 | """Sample tICA coordinates
2 |
3 | {{header}}
4 |
5 | Meta
6 | ----
7 | depends:
8 | - ../top.pdb
9 | - ../trajs
10 | """
11 |
12 | import mdtraj as md
13 |
14 | from msmbuilder.io.sampling import sample_dimension
15 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup
16 |
17 | ## Load
18 | meta, ttrajs = load_trajs('ttrajs')
19 |
20 | ## Sample
21 | inds = sample_dimension(ttrajs,
22 | dimension=0,
23 | n_frames=200, scheme='random')
24 |
25 | save_generic(inds, "tica-dimension-0-inds.pickl")
26 |
27 | ## Make trajectory
28 | top = preload_top(meta)
29 |
30 | # Use loc because sample_dimension is nice
31 | traj = md.join(
32 | md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
33 | for traj_i, frame_i in inds
34 | )
35 |
36 | ## Save
37 | traj_fn = "tica-dimension-0.xtc"
38 | backup(traj_fn)
39 | traj.save(traj_fn)
40 |
--------------------------------------------------------------------------------
/msmbuilder/project_templates/tica/tica.py:
--------------------------------------------------------------------------------
1 | """Reduce dimensionality with tICA
2 |
3 | {{header}}
4 | Meta
5 | ----
6 | depends:
7 | - ftrajs
8 | - meta.pandas.pickl
9 | """
10 |
11 | from msmbuilder.io import load_trajs, save_trajs, save_generic
12 | from msmbuilder.decomposition import tICA
13 |
14 | ## Load
15 | tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True)
16 | meta, ftrajs = load_trajs("ftrajs")
17 |
18 | ## Fit
19 | tica.fit(ftrajs.values())
20 |
21 | ## Transform
22 | ttrajs = {}
23 | for k, v in ftrajs.items():
24 | ttrajs[k] = tica.partial_transform(v)
25 |
26 | ## Save
27 | save_trajs(ttrajs, 'ttrajs', meta)
28 | save_generic(tica, 'tica.pickl')
29 |
--------------------------------------------------------------------------------
/msmbuilder/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/msmbuilder/scripts/__init__.py
--------------------------------------------------------------------------------
/msmbuilder/scripts/msmb.py:
--------------------------------------------------------------------------------
1 | """Statistical models for biomolecular dynamics"""
2 | from __future__ import print_function, absolute_import, division
3 | import sys
4 | from ..cmdline import App
5 | from ..commands import *
6 | from ..version import version
7 | # the commands register themselves when they're imported
8 |
9 | # Load external commands which register themselves
10 | # with entry point msmbuilder.commands
11 | from pkg_resources import iter_entry_points
12 |
13 | for ep in iter_entry_points("msmbuilder.commands"):
14 | external_command = ep.load()
15 | # Some groups start with numbers for ordering
16 | # Some start with descriptions e.g. "MSM"
17 | # Let's set the group to start with ZZZ to put plugins last.
18 | external_command._group = "ZZZ-External_" + external_command._group
19 |
20 |
21 | class MSMBuilderApp(App):
22 | pass
23 |
24 |
25 | def main():
26 | try:
27 | app = MSMBuilderApp(name='MSMBuilder', description=__doc__)
28 | app.start()
29 | except RuntimeError as e:
30 | sys.exit("Error: %s" % e)
31 | except Exception as e:
32 | message = """\
33 | An unexpected error has occurred with MSMBuilder (version %s), please
34 | consider sending the following traceback to MSMBuilder GitHub issue tracker at:
35 | https://github.com/msmbuilder/msmbuilder/issues
36 | """
37 | print(message % version, file=sys.stderr)
38 | raise # as if we did not catch it
39 |
40 |
41 | if __name__ == '__main__':
42 | main()
43 |
--------------------------------------------------------------------------------
/msmbuilder/src/f2py/f2pyptr.h:
--------------------------------------------------------------------------------
1 | #ifndef F2PYPTR_H_
2 | #define F2PYPTR_H_
3 |
4 | #include
5 |
6 | void *f2py_pointer(PyObject *obj)
7 | {
8 | #if PY_VERSION_HEX < 0x03000000
9 | if (PyCObject_Check(obj)) {
10 | return PyCObject_AsVoidPtr(obj);
11 | }
12 | #endif
13 | #if PY_VERSION_HEX >= 0x02070000
14 | if (PyCapsule_CheckExact(obj)) {
15 | return PyCapsule_GetPointer(obj, NULL);
16 | }
17 | #endif
18 | PyErr_SetString(PyExc_ValueError, "Not an object containing a void ptr");
19 | return NULL;
20 | }
21 |
22 | #endif
23 |
--------------------------------------------------------------------------------
/msmbuilder/src/scipy_lapack.h:
--------------------------------------------------------------------------------
1 | #ifndef MIXTAPE_SCIPY_LAPACK
2 | #define MIXTAPE_SCIPY_LAPACK
3 |
4 | #include
5 | #include "f2py/f2pyptr.h"
6 |
7 | typedef int sgemm_t(const char *transa, const char *transb, const int *m, const int *n, const int *k, const float *alpha, const float *a, const int *lda, float *b, const int *ldb, const float *beta, float *c, const int *ldc);
8 | typedef int spotrf_t(const char *uplo, const int *n, float *a, const int *lda, int *info);
9 | typedef int strtrs_t(const char *uplo, const char *trans, const char *diag, const int *n, const int *nrhs, const float *a, const int *lda, float *b, const int *ldb, int * info);
10 |
11 | typedef struct {
12 | sgemm_t *sgemm;
13 | spotrf_t *spotrf;
14 | strtrs_t *strtrs;
15 | } lapack_t;
16 | static lapack_t __lapack;
17 |
18 |
19 | static lapack_t* get_lapack(void) {
20 | PyObject *mod_lapack, *mod_blas, *func, *cpointer;
21 | if (__lapack.sgemm == NULL) {
22 | mod_blas = PyImport_ImportModule("scipy.linalg.blas");
23 | mod_lapack = PyImport_ImportModule("scipy.linalg.lapack");
24 |
25 | func = PyObject_GetAttrString(mod_blas, "sgemm");
26 | cpointer = PyObject_GetAttrString(func, "_cpointer");
27 | __lapack.sgemm = (sgemm_t*) f2py_pointer(cpointer);
28 |
29 | func = PyObject_GetAttrString(mod_lapack, "spotrf");
30 | cpointer = PyObject_GetAttrString(func, "_cpointer");
31 | __lapack.spotrf = (spotrf_t*) f2py_pointer(cpointer);
32 |
33 | func = PyObject_GetAttrString(mod_lapack, "strtrs");
34 | cpointer = PyObject_GetAttrString(func, "_cpointer");
35 | __lapack.strtrs = (strtrs_t*) f2py_pointer(cpointer);
36 | }
37 |
38 | return &__lapack;
39 | }
40 |
41 | #endif
42 |
--------------------------------------------------------------------------------
/msmbuilder/src/triu_utils.pyx:
--------------------------------------------------------------------------------
1 | """
2 | Utilities related to indexing upper triangular matrices with a diagonal
3 | offset of 1. The semantics match ``numpy.triu_indices(n, k=1)``
4 | """
5 | from numpy cimport npy_intp
6 |
7 |
8 | cdef inline npy_intp ij_to_k(npy_intp i, npy_intp j, npy_intp n) nogil:
9 | """2D (i, j) square matrix index to linearized upper triangular index
10 |
11 | [ 0 a0 a1 a2 a3 ] (i=0,j=1) -> 0
12 | [ 0 0 a4 a5 a6 ] (i=0,j=2) -> 1
13 | [ 0 0 0 a7 a8 ] (i=1,j=3) -> 5
14 | [ 0 0 0 0 a9 ] etc
15 | [ 0 0 0 0 0 ] (i=4,j=5) -> 9
16 |
17 | For further explanation, see http://stackoverflow.com/a/27088560/1079728
18 |
19 | Parameters
20 | ----------
21 | i : int
22 | Row index
23 | j : int
24 | Column index
25 | n : int
26 | Matrix size. The matrix is assumed to be square
27 |
28 | Returns
29 | -------
30 | k : int
31 | Linearized upper triangular index
32 |
33 | See Also
34 | --------
35 | k_to_ij : the inverse operation
36 | """
37 | if j > i:
38 | return (n*(n-1)/2) - (n-i)*((n-i)-1)/2 + j - i - 1
39 | return (n*(n-1)/2) - (n-j)*((n-j)-1)/2 + i - j - 1
40 |
41 |
42 | cdef inline void k_to_ij(npy_intp k, npy_intp n, npy_intp *i, npy_intp *j) nogil:
43 | """Linearized upper triangular index to 2D (i, j) index
44 |
45 | [ 0 a0 a1 a2 a3 ] 0 -> (i=0,j=1)
46 | [ 0 0 a4 a5 a6 ] 1 -> (i=0,j=2)
47 | [ 0 0 0 a7 a8 ] 5 -> (i=1,j=3)
48 | [ 0 0 0 0 a9 ] etc
49 | [ 0 0 0 0 0 ]
50 |
51 | http://stackoverflow.com/a/27088560/1079728
52 |
53 | Parameters
54 | ----------
55 | k : int
56 | Linearized upper triangular index
57 |
58 | Returns
59 | -------
60 | i : int
61 | Row index, written into *i on exit
62 | j : int
63 | Column index, written into *j on exit
64 | """
65 |
66 | i[0] = n - 2 - (sqrt(-8.0*k + 4.0*n*(n-1)-7.0)/2.0 - 0.5)
67 | j[0] = k + i[0] + 1 - n*(n-1)/2 + (n-i[0])*(n-i[0]-1)/2
--------------------------------------------------------------------------------
/msmbuilder/tests/.gitignore:
--------------------------------------------------------------------------------
1 | test_cyblas.c
--------------------------------------------------------------------------------
/msmbuilder/tests/__init__.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | # Show warnings for our package
4 | warnings.filterwarnings('always', module='msmbuilder.*')
5 |
6 | # Show warnings for packages where we want to be conscious of warnings
7 | warnings.filterwarnings('always', module='mdtraj.*')
8 | warnings.filterwarnings('default', module='scipy.*')
9 | warnings.filterwarnings('default', module='sklearn.*')
10 |
--------------------------------------------------------------------------------
/msmbuilder/tests/native.pdb:
--------------------------------------------------------------------------------
1 | ATOM 1 1HH3 ACE 1 4.300 13.100 8.600 1.00 0.00
2 | ATOM 2 CH3 ACE 1 5.200 13.600 8.800 1.00 0.00
3 | ATOM 3 2HH3 ACE 1 4.900 14.300 9.600 1.00 0.00
4 | ATOM 4 3HH3 ACE 1 5.600 14.200 7.900 1.00 0.00
5 | ATOM 5 C ACE 1 6.100 12.500 9.400 1.00 0.00
6 | ATOM 6 O ACE 1 6.400 12.500 10.600 1.00 0.00
7 | ATOM 7 N ALA 2 6.600 11.600 8.500 1.00 0.00
8 | ATOM 8 H ALA 2 6.500 11.600 7.500 1.00 0.00
9 | ATOM 9 CA ALA 2 7.300 10.400 9.100 1.00 0.00
10 | ATOM 10 HA ALA 2 7.900 10.700 10.000 1.00 0.00
11 | ATOM 11 CB ALA 2 6.200 9.500 9.600 1.00 0.00
12 | ATOM 12 HB1 ALA 2 5.700 9.100 8.800 1.00 0.00
13 | ATOM 13 HB2 ALA 2 6.600 8.700 10.200 1.00 0.00
14 | ATOM 14 HB3 ALA 2 5.400 10.000 10.200 1.00 0.00
15 | ATOM 15 C ALA 2 8.400 9.800 8.200 1.00 0.00
16 | ATOM 16 O ALA 2 8.400 9.900 7.000 1.00 0.00
17 | ATOM 17 N NME 3 9.300 9.000 8.800 1.00 0.00
18 | ATOM 18 H NME 3 9.100 9.000 9.800 1.00 0.00
19 | ATOM 19 CH3 NME 3 10.500 8.400 8.300 1.00 0.00
20 | ATOM 20 1HH3 NME 3 10.700 7.700 9.100 1.00 0.00
21 | ATOM 21 2HH3 NME 3 10.400 8.000 7.300 1.00 0.00
22 | ATOM 22 3HH3 NME 3 11.300 9.100 8.300 1.00 0.00
23 | TER
24 | ENDMDL
25 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_alphaanglefeaturizer.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import msmbuilder.featurizer
4 | from msmbuilder.example_datasets import MinimalFsPeptide, AlanineDipeptide
5 |
6 | warnings.filterwarnings('ignore', message='.*Unlikely unit cell vectors.*')
7 |
8 |
9 | def test_alanine_dipeptide():
10 | # will produce 0 features because not enough peptides
11 |
12 | trajectories = AlanineDipeptide().get_cached().trajectories
13 | featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer()
14 | nothing = featurizer.transform(trajectories)
15 |
16 | assert (nothing[0].shape[1] == 0)
17 |
18 |
19 | def test_fs_peptide():
20 | # will produce 36 features
21 |
22 | trajectories = MinimalFsPeptide().get_cached().trajectories
23 | featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer()
24 | alphas = featurizer.transform(trajectories)
25 |
26 | assert (alphas[0].shape[1] == 36)
27 |
28 |
29 | def test_fs_peptide_nosincos():
30 | # will produce 18 features
31 |
32 | trajectories = MinimalFsPeptide().get_cached().trajectories
33 | featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer(sincos=False)
34 | alphas = featurizer.transform(trajectories)
35 |
36 | assert (alphas[0].shape[1] == 18)
37 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_apm.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import mdtraj as md
4 |
5 | from numpy.testing.decorators import skipif
6 | import numpy as np
7 | from mdtraj.testing import eq
8 |
9 | from msmbuilder.cluster import APM
10 | from msmbuilder.example_datasets import FsPeptide
11 |
12 | rs = np.random.RandomState(42)
13 |
14 | X1 = 0.3 * rs.randn(1000, 10).astype(np.double)
15 | X2 = 0.3 * rs.randn(1000, 10).astype(np.float32)
16 | # trj = md.load(md.testing.get_fn("frame0.pdb"))
17 | trj = FsPeptide().get().trajectories[0]
18 |
19 | @skipif(True)
20 | def test_shapes():
21 | # make sure all the shapes are correct of the fit parameters
22 | m = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs)
23 | m.fit([rs.randn(100, 2)])
24 | assert isinstance(m.labels_, list)
25 | eq(m.labels_[0].shape, (100,))
26 |
27 |
28 | @skipif(True)
29 | def test_euclidean():
30 | # test for predict using euclidean distance
31 | data = rs.randn(100, 2)
32 | m1 = APM(n_macrostates=2, metric='euclidean', lag_time=1, random_state=rs)
33 | m2 = APM(n_macrostates=2, metric='euclidean', lag_time=1, random_state=rs)
34 |
35 | labels1 = m1.fit_predict([data])
36 | labels2 = m2.fit([data]).MacroAssignments_
37 | eq(labels1[0], labels2[0])
38 |
39 |
40 | @skipif(True)
41 | def test_euclidean_10000():
42 | # test for predict using euclidean distance
43 | m1 = APM(n_macrostates=2, metric='euclidean', lag_time=10, random_state=rs)
44 | m2 = APM(n_macrostates=2, metric='euclidean', lag_time=10, random_state=rs)
45 | data = rs.randn(10000, 2)
46 | labels1 = m1.fit_predict([data])
47 | labels2 = m2.fit([data]).MacroAssignments_
48 | eq(labels1[0], labels2[0])
49 |
50 |
51 | @skipif(True)
52 | def test_rmsd():
53 | # test for predict using rmsd
54 | m1 = APM(n_macrostates=4, metric='rmsd', lag_time=1, random_state=rs)
55 | m2 = APM(n_macrostates=4, metric='rmsd', lag_time=1, random_state=rs)
56 | labels1 = m1.fit_predict([trj])
57 | labels2 = m2.fit([trj]).MacroAssignments_
58 |
59 | eq(labels1[0], labels2[0])
60 |
61 |
62 | @skipif(True)
63 | def test_dtype():
64 | X = rs.randn(100, 2)
65 | X32 = X.astype(np.float32)
66 | X64 = X.astype(np.float64)
67 | m1 = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs).fit([X32])
68 | m2 = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs).fit([X64])
69 |
70 | eq(m1.labels_[0], m2.labels_[0])
71 | eq(m1.MacroAssignments_[0], m2.MacroAssignments_[0])
72 | eq(m1.fit_predict([X32])[0], m2.fit_predict([X64])[0])
73 | eq(m1.fit_predict([X32])[0], m1.MacroAssignments_[0])
74 |
75 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_build_counts.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division
2 |
3 | from msmbuilder.msm import MarkovStateModel,BayesianMarkovStateModel,\
4 | ContinuousTimeMSM
5 |
6 | from mdtraj.testing import eq
7 | import numpy as np
8 |
9 | def test_build_counts():
10 | seq=[[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0]]
11 | counts = np.array([[8, 1, 1], [1, 3, 0], [1, 0, 3]])
12 | for mdl_type in [MarkovStateModel, BayesianMarkovStateModel,
13 | ContinuousTimeMSM]:
14 | mdl_instance = mdl_type()
15 | mdl_instance.fit(seq)
16 | eq(mdl_instance.countsmat_, counts)
17 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_clustering.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import mdtraj as md
4 | import mdtraj.testing
5 | import numpy as np
6 | import scipy.spatial.distance
7 |
8 | import msmbuilder.cluster
9 | from msmbuilder.example_datasets import FsPeptide
10 |
11 | X1 = 0.3 * np.random.RandomState(0).randn(1000, 10).astype(np.double)
12 | X2 = 0.3 * np.random.RandomState(1).randn(1000, 10).astype(np.float32)
13 | # trj = md.load(md.testing.get_fn("traj.h5"))
14 | trj = FsPeptide().get().trajectories[0][:100]
15 |
16 |
17 | def test_regular_spatial_rmsd():
18 | model = msmbuilder.cluster.RegularSpatial(d_min=0.01, metric='rmsd')
19 | model.fit([trj])
20 |
21 | assert isinstance(model.cluster_centers_, md.Trajectory)
22 | assert len(model.cluster_centers_) == model.n_clusters_
23 | predict = model.predict([trj])
24 | assert isinstance(predict, list) and len(predict) == 1
25 | assert len(predict[0]) == len(trj)
26 | assert isinstance(predict[0], np.ndarray) and predict[0].dtype == np.intp
27 |
28 |
29 | def test_regular_spatial():
30 | model = msmbuilder.cluster.RegularSpatial(d_min=0.8)
31 |
32 | for X in [X1, X2]:
33 | model.fit([X])
34 |
35 | assert model.cluster_centers_.shape[1] == 10
36 | assert isinstance(model.cluster_centers_, np.ndarray)
37 | assert len(model.cluster_centers_) == model.n_clusters_
38 | predict = model.predict([X])
39 | assert isinstance(predict, list) and len(predict) == 1
40 | assert len(predict[0]) == len(X)
41 | assert (isinstance(predict[0], np.ndarray)
42 | and predict[0].dtype == np.intp)
43 |
44 | assert model.cluster_centers_.shape[0] > 200
45 | assert not np.all(scipy.spatial.distance.pdist(X) > model.d_min)
46 | assert np.all(scipy.spatial.distance.pdist(model.cluster_centers_)
47 | > model.d_min)
48 |
49 | assert np.all(np.shape(model.cluster_center_indices_)
50 | == (len(model.cluster_center_indices_), 2))
51 |
52 |
53 | def test_kcenters_rmsd():
54 | model = msmbuilder.cluster.KCenters(3, metric='rmsd')
55 | model.fit([trj])
56 |
57 | assert len(model.cluster_centers_) == 3
58 | assert isinstance(model.cluster_centers_, md.Trajectory)
59 | predict = model.predict([trj])
60 | assert isinstance(predict, list) and len(predict) == 1
61 | assert len(predict[0]) == len(trj)
62 | assert isinstance(predict[0], np.ndarray) and predict[0].dtype == np.intp
63 |
64 |
65 | def test_kcenters_spatial():
66 | model = msmbuilder.cluster.KCenters(5)
67 |
68 | for X in [X1, X2]:
69 | model.fit([X])
70 |
71 | assert model.cluster_centers_.shape[1] == 10
72 | assert isinstance(model.cluster_centers_, np.ndarray)
73 | assert len(model.cluster_centers_) == 5
74 | predict = model.predict([X])
75 | assert isinstance(predict, list) and len(predict) == 1
76 | assert len(predict[0]) == len(X)
77 | assert (isinstance(predict[0], np.ndarray)
78 | and predict[0].dtype == np.intp)
79 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_convenience.py:
--------------------------------------------------------------------------------
1 |
2 | from msmbuilder.utils import unique
3 |
4 | def test_unique():
5 | assert unique([1,2,3,3,2,1]) == [1,2,3]
6 | assert unique([3,3,2,2,1,1]) == [3,2,1]
7 | assert unique([3,1,2,1,2,3]) == [3,1,2]
8 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_cyblas_wrapper.py:
--------------------------------------------------------------------------------
1 | # this file gets nose to find the tests that appear in the cython module
2 | from msmbuilder.tests import test_cyblas
3 |
4 |
5 | def test():
6 | count = 0
7 | for name in dir(test_cyblas):
8 | if name.startswith('test'):
9 | count += 1
10 | yield getattr(test_cyblas, name)
11 | if count == 0:
12 | assert False
13 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_dependencies.py:
--------------------------------------------------------------------------------
1 | import os, pip, sys, warnings
2 | from msmbuilder.example_datasets import has_msmb_data
3 |
4 | def test_installed_packages():
5 | try:
6 | installed_packages = pip.get_installed_distributions
7 | except:
8 | from pip._internal.utils.misc import get_installed_distributions as installed_packages
9 |
10 | package_names = [package.project_name for package in installed_packages()]
11 |
12 | test_dependencies = ['munkres', 'numdifftools', 'statsmodels', 'hmmlearn']
13 |
14 | if not hasattr(sys, 'getwindowsversion'):
15 | test_dependencies += ['cvxpy']
16 |
17 | for td in test_dependencies:
18 | if td not in package_names:
19 | raise RuntimeError('Please install {} to continue'.format(td))
20 |
21 | def test_msmb_data():
22 | if has_msmb_data() is None:
23 | raise RuntimeError('Please install {} to continue'.format('msmb_data'))
24 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_estimator_subclassing.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import, division
2 |
3 | import importlib
4 | import inspect
5 | import pkgutil
6 | import warnings
7 | from contextlib import contextmanager
8 |
9 | from sklearn.base import BaseEstimator
10 |
11 | import msmbuilder
12 | import msmbuilder.base
13 |
14 |
15 | def silent_warnings(*args, **kwargs):
16 | print(args, kwargs)
17 |
18 |
19 | @contextmanager
20 | def supress_warnings():
21 | original_warn = warnings.warn
22 | warnings.warn = silent_warnings
23 | yield
24 | warnings.warn = original_warn
25 |
26 |
27 | def import_all_estimators(pkg):
28 | def estimator_in_module(mod):
29 | for name, obj in inspect.getmembers(mod):
30 | if name.startswith('_'):
31 | continue
32 | if inspect.isclass(obj) and issubclass(obj, BaseEstimator):
33 | yield obj
34 |
35 | with supress_warnings():
36 | result = {}
37 | for _, modname, ispkg in pkgutil.iter_modules(pkg.__path__):
38 | c = '%s.%s' % (pkg.__name__, modname)
39 | try:
40 | mod = importlib.import_module(c)
41 | if ispkg:
42 | result.update(import_all_estimators(mod))
43 | for kls in estimator_in_module(mod):
44 | result[kls.__name__] = kls
45 | except ImportError as e:
46 | print('e', e)
47 | continue
48 |
49 | return result
50 |
51 |
52 | def test_all_estimators():
53 | for key, value in import_all_estimators(msmbuilder).items():
54 | if 'msmbuilder' in value.__module__:
55 | assert issubclass(value, msmbuilder.base.BaseEstimator), value
56 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_featurizer_subset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from mdtraj.testing import eq
3 |
4 | from msmbuilder.example_datasets import AlanineDipeptide
5 | from msmbuilder.featurizer import AtomPairsFeaturizer, get_atompair_indices
6 | from msmbuilder.featurizer.subset import SubsetAtomPairs, \
7 | SubsetCosPhiFeaturizer, SubsetCosPsiFeaturizer, \
8 | SubsetSinPhiFeaturizer, SubsetSinPsiFeaturizer
9 |
10 |
11 | def test_SubsetAtomPairs_1():
12 | trajectories = AlanineDipeptide().get_cached().trajectories
13 | trj0 = trajectories[0][0]
14 | atom_indices, pair_indices = get_atompair_indices(trj0)
15 | featurizer = AtomPairsFeaturizer(pair_indices)
16 | X_all0 = featurizer.transform(trajectories)
17 |
18 | featurizer = SubsetAtomPairs(pair_indices, trj0)
19 | featurizer.subset = np.arange(len(pair_indices))
20 | X_all = featurizer.transform(trajectories)
21 |
22 | any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
23 |
24 |
25 | def test_SubsetAtomPairs_2():
26 | trajectories = AlanineDipeptide().get_cached().trajectories
27 | trj0 = trajectories[0][0]
28 | atom_indices, pair_indices = get_atompair_indices(trj0)
29 | featurizer = AtomPairsFeaturizer(pair_indices)
30 | X_all0 = featurizer.transform(trajectories)
31 |
32 | featurizer = SubsetAtomPairs(pair_indices, trj0,
33 | subset=np.arange(len(pair_indices)))
34 | X_all = featurizer.transform(trajectories)
35 |
36 | any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
37 |
38 |
39 | def test_SubsetAtomPairs_3():
40 | trajectories = AlanineDipeptide().get_cached().trajectories
41 | trj0 = trajectories[0][0]
42 | atom_indices, pair_indices = get_atompair_indices(trj0)
43 | featurizer = AtomPairsFeaturizer(pair_indices)
44 | X_all0 = featurizer.transform(trajectories)
45 |
46 | featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1]))
47 | X_all = featurizer.transform(trajectories)
48 |
49 | try:
50 | any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
51 | except AssertionError:
52 | pass
53 | else:
54 | raise AssertionError("Did not raise an assertion!")
55 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_kernel_approximation.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | import numpy as np
4 | from numpy.testing import assert_array_almost_equal
5 | from sklearn.kernel_approximation import Nystroem as NystroemR
6 |
7 | from msmbuilder.decomposition.kernel_approximation import Nystroem, LandmarkNystroem
8 |
9 |
10 | def test_nystroem_vs_sklearn():
11 | np.random.seed(42)
12 | X = np.random.randn(100, 5)
13 |
14 | kernel = Nystroem(kernel='linear', random_state=42)
15 | kernelR = NystroemR(kernel='linear', random_state=42)
16 |
17 | y1 = kernel.fit_transform([X])[0]
18 | y2 = kernelR.fit_transform(X)
19 |
20 | assert_array_almost_equal(y1, y2)
21 |
22 |
23 | def test_lndmrk_nystroem_approximation():
24 | np.random.seed(42)
25 | X = np.random.randn(100, 5)
26 |
27 | u = np.arange(X.shape[0])[5::1]
28 | v = np.arange(X.shape[0])[::1][:u.shape[0]]
29 | lndmrks = X[np.unique((u, v))]
30 |
31 | kernel = LandmarkNystroem(kernel='rbf', random_state=42)
32 | kernelR = NystroemR(kernel='rbf', random_state=42)
33 |
34 | y1_1 = kernel.fit_transform([X])[0]
35 | kernel.landmarks = lndmrks
36 | y1_2 = kernel.fit_transform([X])[0]
37 |
38 | y2 = kernelR.fit_transform(X)
39 |
40 | assert_array_almost_equal(y2, y1_1)
41 |
42 | assert not all((np.abs(y2 - y1_2) > 1E-6).flatten())
43 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_ksparsetica.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from msmbuilder.decomposition import tICA, KSparseTICA
4 | from msmbuilder.example_datasets import MetEnkephalin
5 | from msmbuilder.featurizer import AtomPairsFeaturizer
6 |
7 | def build_dataset():
8 | trajs = MetEnkephalin().get().trajectories
9 |
10 | pairs = []
11 | for i in range(trajs[0].n_atoms):
12 | for j in range(i):
13 | pairs.append((i, j))
14 | np.random.seed(0)
15 | np.random.shuffle(pairs)
16 | n_pairs = 200
17 |
18 | return AtomPairsFeaturizer(pairs[:n_pairs]).transform([traj[::10] for traj in trajs])
19 |
20 | def test_MetEnkephalin():
21 | np.random.seed(0)
22 | data = build_dataset()
23 | n_features = data[0].shape[1]
24 |
25 | # check whether this recovers a single 1-sparse eigenpair without error
26 | kstica = KSparseTICA(n_components=1, k = 1)
27 | _ = kstica.fit_transform(data)
28 | assert (np.sum(kstica.components_ != 0) == 1)
29 |
30 | ## check whether this recovers >1 eigenpair without error
31 | #kstica = KSparseTICA(n_components=2)
32 | #_ = kstica.fit_transform(data)
33 |
34 | ## check whether this recovers all eigenpairs without error
35 | #kstica = KSparseTICA()
36 | #_ = kstica.fit_transform(data)
37 |
38 | # check whether we recover the same solution as standard tICA when k = n_features
39 | n_components = 10
40 | kstica = KSparseTICA(n_components=n_components, k=n_features)
41 | tica = tICA(n_components=n_components)
42 | _ = kstica.fit_transform(data)
43 | _ = tica.fit_transform(data)
44 | np.testing.assert_array_almost_equal(kstica.eigenvalues_, tica.eigenvalues_)
--------------------------------------------------------------------------------
/msmbuilder/tests/test_muller.py:
--------------------------------------------------------------------------------
1 | from msmbuilder.example_datasets import MullerPotential, load_muller
2 | from msmbuilder.utils import array2d
3 |
4 |
5 | def test_func():
6 | xx = load_muller(random_state=1110102)['trajectories']
7 | assert len(xx) == 10
8 | assert xx[0].ndim == 2
9 | assert xx[0].shape[1] == 2
10 | array2d(xx)
11 |
12 |
13 | def test_class():
14 | xx = MullerPotential(random_state=123122).get()['trajectories']
15 | assert len(xx) == 10
16 | assert xx[0].ndim == 2
17 | assert xx[0].shape[1] == 2
18 | array2d(xx)
19 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_ndgrid.py:
--------------------------------------------------------------------------------
1 | import itertools
2 |
3 | import numpy as np
4 |
5 | from msmbuilder.cluster import NDGrid
6 |
7 |
8 | def test_ndgrid_1():
9 | X = np.array([-3, -2, -1, 1, 2, 3]).reshape(-1, 1)
10 | labels = NDGrid(n_bins_per_feature=2).fit([X]).predict([X])[0]
11 | np.testing.assert_array_equal(labels, np.array([0, 0, 0, 1, 1, 1]))
12 |
13 |
14 | def test_ndgrid_2():
15 | X = np.random.RandomState(0).randn(100, 2)
16 | ndgrid = NDGrid(n_bins_per_feature=2, min=-5, max=5)
17 | labels = ndgrid.fit([X]).predict([X])[0]
18 |
19 | mask0 = np.logical_and(X[:, 0] < 0, X[:, 1] < 0)
20 | assert np.all(labels[mask0] == 0)
21 | mask1 = np.logical_and(X[:, 0] > 0, X[:, 1] < 0)
22 | assert np.all(labels[mask1] == 1)
23 | mask2 = np.logical_and(X[:, 0] < 0, X[:, 1] > 0)
24 | assert np.all(labels[mask2] == 2)
25 | mask3 = np.logical_and(X[:, 0] > 0, X[:, 1] > 0)
26 | assert np.all(labels[mask3] == 3)
27 |
28 |
29 | def test_ndgrid_3():
30 | X = np.random.RandomState(0).randn(100, 3)
31 | ndgrid = NDGrid(n_bins_per_feature=2, min=-5, max=5)
32 | labels = ndgrid.fit([X]).predict([X])[0]
33 |
34 | operators = [np.less, np.greater]
35 | x = X[:, 0]
36 | y = X[:, 1]
37 | z = X[:, 2]
38 |
39 | it = itertools.product(operators, repeat=3)
40 |
41 | for indx, (op_z, op_y, op_x) in enumerate(it):
42 | mask = np.logical_and.reduce((op_x(x, 0), op_y(y, 0), op_z(z, 0)))
43 | assert np.all(labels[mask] == indx)
44 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_nearest.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 |
3 | import numpy as np
4 |
5 | from msmbuilder.utils import KDTree
6 |
7 | X1 = 0.3 * np.random.RandomState(0).randn(500, 10)
8 | X2 = 0.3 * np.random.RandomState(1).randn(1000, 10) + 10
9 |
10 |
11 | def test_kdtree_k1():
12 | kdtree = KDTree([X1, X2])
13 | dists, inds = kdtree.query([
14 | [0] * 10,
15 | [10] * 10,
16 | [0] * 10
17 | ])
18 |
19 | assert len(inds) == 3
20 | for subind in inds:
21 | assert len(subind) == 2
22 |
23 | # traj i
24 | assert inds[0][0] == 0
25 | assert inds[1][0] == 1
26 | assert inds[2][0] == 0
27 |
28 | # framei
29 | assert 0 <= inds[0][1] < 500
30 | assert 0 <= inds[1][1] < 1000
31 | assert 0 <= inds[2][1] < 500
32 |
33 | # distances
34 | assert len(dists) == 3
35 | for d in dists:
36 | assert 0 <= d < 0.5
37 |
38 |
39 | def test_kdtree_k2():
40 | kdtree = KDTree([X1, X2])
41 | dists, inds = kdtree.query([
42 | [0] * 10,
43 | [10] * 10,
44 | [0] * 10
45 | ], k=2)
46 |
47 | assert len(inds) == 3
48 |
49 | # traj i
50 | for qp in inds[0]: assert qp[0] == 0
51 | for qp in inds[1]: assert qp[0] == 1
52 | for qp in inds[2]: assert qp[0] == 0
53 |
54 | # frame i
55 | for qp in inds[0]: assert 0 <= qp[1] < 500
56 | for qp in inds[1]: assert 0 <= qp[1] < 1000
57 | for qp in inds[2]: assert 0 <= qp[1] < 500
58 |
59 | # distances
60 | assert len(dists) == 3
61 | for d in dists:
62 | assert 0 <= d[0] < 0.5
63 | assert 0 <= d[1] < 0.5
64 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_param_sweep.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division
2 |
3 | import numpy as np
4 | import numpy.testing as npt
5 |
6 | from msmbuilder.msm import MarkovStateModel
7 | from msmbuilder.msm import implied_timescales
8 | from msmbuilder.utils import param_sweep
9 |
10 |
11 | def test_both():
12 | sequences = [np.random.randint(20, size=1000) for _ in range(10)]
13 | lag_times = [1, 5, 10]
14 |
15 | models_ref = []
16 | for tau in lag_times:
17 | msm = MarkovStateModel(reversible_type='mle', lag_time=tau,
18 | n_timescales=10)
19 | msm.fit(sequences)
20 | models_ref.append(msm)
21 |
22 | timescales_ref = [m.timescales_ for m in models_ref]
23 |
24 | model = MarkovStateModel(reversible_type='mle', lag_time=1, n_timescales=10)
25 | models = param_sweep(model, sequences, {'lag_time': lag_times}, n_jobs=2)
26 | timescales = implied_timescales(sequences, lag_times, msm=model,
27 | n_timescales=10, n_jobs=2)
28 |
29 | print(timescales)
30 | print(timescales_ref)
31 |
32 | if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1E-6:
33 | raise Exception("you wrote a bad test.")
34 |
35 | for i in range(len(lag_times)):
36 | npt.assert_array_almost_equal(models[i].transmat_,
37 | models_ref[i].transmat_)
38 | npt.assert_array_almost_equal(timescales_ref[i], timescales[i])
39 |
40 |
41 | def test_multi_params():
42 | msm = MarkovStateModel()
43 | param_grid = {
44 | 'lag_time': [1, 2, 3],
45 | 'reversible_type': ['mle', 'transpose']
46 | }
47 |
48 | sequences = np.random.randint(20, size=(10, 1000))
49 | models = param_sweep(msm, sequences, param_grid, n_jobs=2)
50 | assert len(models) == 6
51 |
52 | # I don't know what the order should be, so I'm just going
53 | # to check that there are no duplicates
54 | params = []
55 | for m in models:
56 | params.append('%s%d' % (m.reversible_type, m.lag_time))
57 |
58 | for l in param_grid['lag_time']:
59 | for s in param_grid['reversible_type']:
60 | assert ('%s%d' % (s, l)) in params
61 |
62 | # this is redundant, but w/e
63 | assert len(set(params)) == 6
64 |
65 |
66 | def test_ntimescales():
67 | # see issue #603
68 | trajs = [np.random.randint(0, 30, 500) for _ in range(5)]
69 | its = implied_timescales(trajs, [1, 2, 3], n_timescales=11)
70 | assert its.shape[1] == 11
71 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_sampling.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from msmbuilder.decomposition import tICA
4 | from msmbuilder.io.sampling import sample_dimension
5 |
6 |
7 | def test_sample_dimension():
8 | np.random.seed(42)
9 | X = np.random.randn(500, 5)
10 | data = [X, X, X]
11 |
12 | tica = tICA(n_components=2, lag_time=1).fit(data)
13 | tica_trajs = {k: tica.partial_transform(v) for k, v in enumerate(data)}
14 | res = sample_dimension(tica_trajs, 0, 10, scheme="linear")
15 | res2 = sample_dimension(tica_trajs, 1, 10, scheme="linear")
16 |
17 | assert len(res) == len(res2) == 10
18 |
19 | def test_sample_dimension_2():
20 | np.random.seed(42)
21 | X = np.random.randn(500, 5)
22 | data = [X, X, X]
23 |
24 | tica = tICA(n_components=2, lag_time=1).fit(data)
25 | tica_trajs = {k: tica.partial_transform(v) for k, v in enumerate(data)}
26 | res = sample_dimension(tica_trajs, 0, 10, scheme="random")
27 | res2 = sample_dimension(tica_trajs, 1, 10, scheme="edge")
28 |
29 | assert len(res) == len(res2) == 10
30 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_sasa_featurizer.py:
--------------------------------------------------------------------------------
1 | import mdtraj as md
2 | import numpy as np
3 | from mdtraj.testing import eq
4 |
5 | from msmbuilder.featurizer import SASAFeaturizer
6 | from msmbuilder.example_datasets import FsPeptide
7 |
8 | t = FsPeptide().get().trajectories[0][:10]
9 |
10 | def _test_sasa_featurizer(t, value):
11 | sasa = md.shrake_rupley(t)
12 | rids = np.array([a.residue.index for a in t.top.atoms])
13 |
14 | for i, rid in enumerate(np.unique(rids)):
15 | mask = (rids == rid)
16 | eq(value[:, i], np.sum(sasa[:, mask], axis=1))
17 |
18 |
19 | def test_sasa_featurizer_1():
20 | # t = md.load(get_fn('frame0.h5'))
21 |
22 | value = SASAFeaturizer(mode='residue').partial_transform(t)
23 | assert value.shape == (t.n_frames, t.n_residues)
24 | _test_sasa_featurizer(t, value)
25 |
26 |
27 | def test_sasa_featurizer_2():
28 | # t = md.load(get_fn('frame0.h5'))
29 |
30 | # scramle the order of the atoms, and which residue each is a
31 | # member of
32 | df, bonds = t.top.to_dataframe()
33 | df['resSeq'] = np.random.randint(5, size=(t.n_atoms))
34 | df['resName'] = df['resSeq']
35 | t.top = md.Topology.from_dataframe(df, bonds)
36 |
37 | value = SASAFeaturizer(mode='residue').partial_transform(t)
38 | _test_sasa_featurizer(t, value)
39 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_sparsetica.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from msmbuilder.decomposition import tICA, SparseTICA
4 | from msmbuilder.example_datasets import DoubleWell
5 |
6 |
7 | def build_dataset():
8 | slow = DoubleWell(random_state=0).get_cached().trajectories
9 | data = []
10 |
11 | # each trajectory is a double-well along the first dof,
12 | # and then 9 degrees of freedom of gaussian white noise.
13 | for s in slow:
14 | t = np.hstack((s, np.random.randn(len(s), 9)))
15 | data.append(t)
16 | return data
17 |
18 |
19 | def test_doublewell():
20 | data = build_dataset()
21 | tica = tICA(n_components=1).fit(data)
22 | tic0 = tica.components_[0]
23 |
24 | stica = SparseTICA(n_components=1, verbose=False).fit(data)
25 | stic0 = stica.components_[0]
26 |
27 | np.testing.assert_array_almost_equal(stic0[1:], np.zeros(9))
28 | np.testing.assert_almost_equal(stic0[0], 0.58, decimal=1)
29 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_strongly_connected_subgraph.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from msmbuilder.msm import _strongly_connected_subgraph
4 |
5 |
6 | def test_completely_disconnected_1():
7 | # what do you do with 1 state that is not even connected to itself?
8 | tC, m, p_r = _strongly_connected_subgraph(np.zeros((1, 1)))
9 | assert tC.shape == (0, 0)
10 | assert m == {}
11 | assert np.isnan(p_r)
12 |
13 |
14 | def test_completely_disconnected_2():
15 | tC, m, p_r = _strongly_connected_subgraph(np.zeros((3, 3)))
16 | assert tC.shape == (0, 0)
17 | assert m == {}
18 | assert np.isnan(p_r)
19 |
20 |
21 | def test_one_state():
22 | # but if that state does have a self-connection, it should be retained
23 | tC, m, p_r = _strongly_connected_subgraph(np.ones((1, 1)))
24 | assert tC.shape == (1, 1)
25 | assert m == {0: 0}
26 | np.testing.assert_almost_equal(p_r, 100)
27 |
28 |
29 | def test_counts_1():
30 | C = np.array([[1, 0, 0],
31 | [0, 1, 1],
32 | [0, 1, 1]])
33 |
34 | tC, m, p_r = _strongly_connected_subgraph(np.array(C))
35 | np.testing.assert_array_equal(tC, np.array([[1, 1], [1, 1]]))
36 | assert m == {1: 0, 2: 1}
37 | np.testing.assert_almost_equal(p_r, 80.0)
38 |
39 |
40 | def test_counts_2():
41 | C = np.array([[1, 1, 0],
42 | [0, 1, 1],
43 | [0, 1, 1]])
44 | tC, m, p_r = _strongly_connected_subgraph(np.array(C))
45 | np.testing.assert_array_equal(tC, np.array([[1, 1], [1, 1]]))
46 | assert m == {1: 0, 2: 1}
47 | np.testing.assert_almost_equal(p_r, 83.333333333333)
48 |
49 |
50 | def test_fully_connected():
51 | tC, m, p_r = _strongly_connected_subgraph(np.ones((3, 3)))
52 | np.testing.assert_array_almost_equal(tC, np.ones((3, 3)))
53 | assert m == {0: 0, 1: 1, 2: 2}
54 | np.testing.assert_almost_equal(p_r, 100.0)
55 |
56 |
57 | def test_disconnected():
58 | tC, m, p_r = _strongly_connected_subgraph(np.eye(3))
59 | assert tC.shape == (1, 1)
60 | assert type(p_r) == np.float64
61 |
62 |
63 | def test_upper_triangular():
64 | tC, m, p_r = _strongly_connected_subgraph(np.eye(3, k=1))
65 | assert tC.shape == (0, 0)
66 | assert m == {}
67 | np.testing.assert_almost_equal(p_r, 50.0)
68 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_template_project.py:
--------------------------------------------------------------------------------
1 | from msmbuilder.io import TemplateProject
2 | import tempfile
3 | import shutil
4 | import os
5 |
6 |
7 | def setup_module():
8 | global WD, PWD
9 | PWD = os.path.abspath(".")
10 | WD = tempfile.mkdtemp()
11 | os.chdir(WD)
12 |
13 |
14 | def teardown_module():
15 | os.chdir(PWD)
16 | shutil.rmtree(WD)
17 |
18 |
19 | def test_template_project():
20 | tp = TemplateProject()
21 | tp.do()
22 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_transition_counts.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from six import PY3
3 |
4 | from msmbuilder.msm import _transition_counts
5 |
6 |
7 | def test_argument():
8 | # test that first argument must be a list of sequences
9 | with np.testing.assert_raises(ValueError):
10 | _transition_counts([1, 2, 3])
11 |
12 |
13 | def test_upper_triangular():
14 | # test a simple example
15 | c, m = _transition_counts([np.arange(10)])
16 | np.testing.assert_array_equal(c, np.eye(10, k=1))
17 | assert list(m.keys()) == list(range(10))
18 | assert list(m.values()) == list(range(10))
19 |
20 |
21 | def test_lag_time():
22 | # test the simple example with lag_time > 1
23 | c, m = _transition_counts([range(10)], lag_time=2)
24 | np.testing.assert_array_equal(c, 0.5 * np.eye(10, k=2))
25 |
26 |
27 | def test_string_labels():
28 | # try using strings as labels
29 | c, m = _transition_counts([['alpha', 'b', 'b', 'b', 'c']])
30 | np.testing.assert_array_equal(c, 1.0 * np.array([
31 | [0, 1, 0],
32 | [0, 2, 1],
33 | [0, 0, 0]
34 | ]))
35 | assert m == {'alpha': 0, 'b': 1, 'c': 2}
36 |
37 |
38 | def test_big_counts():
39 | # try using really big numbers, and we still want a small transition matrix
40 | c, m = _transition_counts([[100000000, 100000000, 100000001, 100000001]])
41 | np.testing.assert_array_equal(c, 1.0 * np.array([
42 | [1, 1],
43 | [0, 1],
44 | ]))
45 | assert m == {100000000: 0, 100000001: 1}
46 |
47 |
48 | def test_no_counts():
49 | c, m = _transition_counts([[0]])
50 |
51 |
52 | def test_nan_and_none():
53 | # deal with NaN, None?
54 | c, m = _transition_counts([[0, np.nan]])
55 | assert m == {0: 0}
56 | np.testing.assert_array_equal(c, np.zeros((1, 1)))
57 |
58 | c, m = _transition_counts([[np.nan]])
59 | assert m == {}
60 | np.testing.assert_array_equal(c, np.zeros((0, 0)))
61 |
62 | if not PY3:
63 | c, m = _transition_counts([[None, None]])
64 | assert m == {}
65 | np.testing.assert_array_equal(c, np.zeros((0, 0)))
66 |
67 |
68 | def test_lag_time_norm():
69 | X = np.arange(6)
70 | C, _ = _transition_counts([X], lag_time=3)
71 | np.testing.assert_array_almost_equal(C, np.eye(6, k=3) / 3)
72 |
73 |
74 | def test_sliding_window():
75 | X = np.arange(10)
76 | C1, m1 = _transition_counts([X], lag_time=3, sliding_window=False)
77 | C2, m2 = _transition_counts([X[::3]], sliding_window=True)
78 | np.testing.assert_array_almost_equal(C1, C2)
79 | assert m1 == m2
80 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_transmat_errorbar.py:
--------------------------------------------------------------------------------
1 | from msmbuilder.msm.validation.transmat_errorbar import *
2 | import numpy as np
3 |
4 |
5 | def test_create_perturb_params():
6 | # Test with a 10x10 counts matrix, with all entries in the counts set to 100
7 | countsmat = 100 * np.ones((10,10))
8 | params = create_perturb_params(countsmat)
9 | # Check dimensions of outputs are equal to those of inputs
10 | for param in params:
11 | assert np.shape(param) == np.shape(countsmat)
12 |
13 |
14 | def test_perturb_tmat():
15 | # The transition matrix is perturbed under the CLT approximation, which is only valid for well-sampled data w.r.t. transition probability (tprob >> 1 / row-summed counts)
16 | countsmat = 100 * np.ones((10,10)) # 10-state MSM, 1000 counts per state, 100 transition events between states, no zero entries
17 | params = create_perturb_params(countsmat)
18 | new_transmat = (perturb_tmat(params[0], params[1]))
19 | # All transition probabilities are by design nonzero, so there should be no nonzero entries after the perturbation
20 | assert len(np.where(new_transmat == 0)[0]) == 0
21 | # Now let's assume you have a poorly sampled dataset where all elements in the counts matrix is 1
22 | countsmat = np.ones((10,10))
23 | params = create_perturb_params(countsmat)
24 | new_transmat = (perturb_tmat(params[0], params[1]))
25 | # Your perturbed transition matrix will have several negative values (set automatically to 0), indicating this method probably isn't appropriate for your dataset
26 | # (This will also cause your distribution of MFPTs to have very obvious outliers to an otherwise approximately Gaussian distribution due to the artificial zeros in the transition matrix)
27 | assert len(np.where(new_transmat == 0)[0] > 0)
28 |
29 |
--------------------------------------------------------------------------------
/msmbuilder/tests/test_workflows.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 |
3 | import os
4 | import shlex
5 | import shutil
6 | import subprocess
7 | import tempfile
8 |
9 | from pkg_resources import resource_filename
10 |
11 |
12 | class tempdir(object):
13 | def __enter__(self):
14 | self._curdir = os.path.abspath(os.curdir)
15 | self._tempdir = tempfile.mkdtemp()
16 | os.chdir(self._tempdir)
17 |
18 | def __exit__(self, *exc_info):
19 | os.chdir(self._curdir)
20 | shutil.rmtree(self._tempdir)
21 |
22 |
23 | def shell_lines(resource):
24 | fn = resource_filename('msmbuilder', resource)
25 | buf = ''
26 | with open(fn) as f:
27 | for line in f:
28 | line = line.strip()
29 | if not line or line.startswith('#'):
30 | continue
31 | if line.endswith('\\'):
32 | buf += line.rstrip('\\')
33 | else:
34 | yield buf + ' ' + line
35 | buf = ''
36 |
37 |
38 | def check_call(tokens):
39 | try:
40 | subprocess.check_output(tokens, stderr=subprocess.STDOUT,
41 | universal_newlines=True)
42 | except subprocess.CalledProcessError as e:
43 | print(e.cmd)
44 | print(e.output)
45 | raise
46 |
47 |
48 | class workflow_tester(object):
49 | def __init__(self, fn):
50 | self.fn = fn
51 | self.path = "tests/workflows/{}".format(fn)
52 | self.description = "{}.test_{}".format(__name__, fn)
53 |
54 | def __call__(self, *args, **kwargs):
55 | with tempdir():
56 | for line in shell_lines(self.path):
57 | check_call(shlex.split(line, posix=False))
58 |
59 |
60 | def test_workflows():
61 | for fn in [
62 | 'basic.sh',
63 | 'rmsd.sh',
64 | 'ghmm.sh',
65 | ]:
66 | yield workflow_tester(fn)
67 |
--------------------------------------------------------------------------------
/msmbuilder/tests/workflows/basic.sh:
--------------------------------------------------------------------------------
1 | msmb AlanineDipeptide --data_home ./
2 | msmb AtomIndices --out atom_indices.txt -p ./alanine_dipeptide/ala2.pdb -d --heavy
3 | msmb AtomPairsFeaturizer --transformed atom_pairs --trjs './alanine_dipeptide/*.dcd' \
4 | --pair_indices atom_indices.txt --top ./alanine_dipeptide/ala2.pdb --out atom_pairs.pkl
5 | msmb RobustScaler -i atom_pairs/ -t scaled_atom_pairs.h5
6 | msmb tICA -i scaled_atom_pairs.h5 -t atom_pairs_tica.h5 --n_components 4 \
7 | --shrinkage 0 \
8 | --kinetic_mapping \
9 | --lag_time 2
10 | msmb KCenters -i atom_pairs_tica.h5 -t kcenters_clusters.h5 --metric cityblock
11 | msmb MarkovStateModel --inp kcenters_clusters.h5 --out mymsm.pkl
12 |
--------------------------------------------------------------------------------
/msmbuilder/tests/workflows/ghmm.sh:
--------------------------------------------------------------------------------
1 | msmb AlanineDipeptide --data_home ./
2 |
3 | msmb DihedralFeaturizer --transformed feats/ \
4 | --trjs './alanine_dipeptide/*.dcd' \
5 | --top ./alanine_dipeptide/ala2.pdb \
6 | --out featy.pkl
7 |
8 | msmb tICA --inp feats/ --transformed tica_trajs.h5 \
9 | --n_components 4 \
10 | --kinetic_mapping \
11 | --lag_time 2
12 |
13 | msmb GaussianHMM --inp tica_trajs.h5 \
14 | --out hmm.pkl \
15 | --n_states 2
16 |
--------------------------------------------------------------------------------
/msmbuilder/tests/workflows/rmsd.sh:
--------------------------------------------------------------------------------
1 | msmb AlanineDipeptide --data_home ./
2 | msmb AtomIndices --out atom_indices.txt \
3 | -p ./alanine_dipeptide/ala2.pdb \
4 | -d --heavy
5 |
6 | msmb MiniBatchKMedoids --n_clusters 10 \
7 | --metric rmsd \
8 | --inp './alanine_dipeptide/*.dcd' \
9 | --top ./alanine_dipeptide/ala2.pdb \
10 | --atom_indices atom_indices.txt \
11 | --transformed kmedoids_centers.h5
12 |
13 | msmb RegularSpatial --inp './alanine_dipeptide/*.dcd' \
14 | --transformed rs_rmsd.h5 \
15 | --metric rmsd \
16 | --top ./alanine_dipeptide/ala2.pdb \
17 | --d_min 0.5
18 |
19 |
--------------------------------------------------------------------------------
/msmbuilder/tpt/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Module for analyzing Markov State Models, with an emphasis
3 | on Transition Path Theory.
4 |
5 | These are the canonical references for TPT. Note that TPT
6 | is really a specialization of ideas very familiar to the
7 | mathematical study of Markov chains, and there are many
8 | books, manuscripts in the mathematical literature that
9 | cover the same concepts.
10 |
11 | References
12 | ----------
13 | .. [1] Weinan, E. and Vanden-Eijnden, E. Towards a theory of
14 | transition paths. J. Stat. Phys. 123, 503-523 (2006).
15 | .. [2] Metzner, P., Schutte, C. & Vanden-Eijnden, E.
16 | Transition path theory for Markov jump processes.
17 | Multiscale Model. Simul. 7, 1192-1219 (2009).
18 | .. [3] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive
19 | flux and folding pathways in network models of
20 | coarse-grained protein dynamics. J. Chem. Phys.
21 | 130, 205102 (2009).
22 | .. [4] Noe, Frank, et al. "Constructing the equilibrium ensemble of folding
23 | pathways from short off-equilibrium simulations." PNAS 106.45 (2009):
24 | 19011-19016.
25 | """
26 |
27 | from __future__ import absolute_import
28 |
29 | from .committor import committors, conditional_committors
30 | from .flux import fluxes, net_fluxes
31 | from .hub import fraction_visited, hub_scores
32 | from .path import paths, top_path
33 | from .mfpt import mfpts
34 |
35 | __all__ = ['fluxes', 'net_fluxes', 'fraction_visited',
36 | 'hub_scores', 'paths', 'top_path', 'committors',
37 | 'conditional_committors', 'mfpts']
38 |
--------------------------------------------------------------------------------
/msmbuilder/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | from .draw_samples import *
3 | from .io import *
4 | from .param_sweep import *
5 | from .probability import *
6 | from .subsampler import *
7 | from .validation import *
8 | from .compat import *
9 | from .nearest import KDTree
10 | from .divergence import *
11 | from .convenience import *
12 |
--------------------------------------------------------------------------------
/msmbuilder/utils/compat.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, absolute_import, division
2 | import os
3 | import functools
4 | import warnings
5 |
6 | # Copyright (C) 2012-2013 Marcus von Appen
7 | #
8 | # This software is provided 'as-is', without any express or implied
9 | # warranty. In no event will the authors be held liable for any damages
10 | # arising from the use of this software.
11 | #
12 | # Permission is granted to anyone to use this software for any purpose,
13 | # including commercial applications, and to alter it and redistribute it
14 | # freely, subject to the following restrictions:
15 | #
16 | # 1. The origin of this software must not be misrepresented; you must not
17 | # claim that you wrote the original software. If you use this software
18 | # in a product, an acknowledgment in the product documentation would be
19 | # appreciated but is not required.
20 | # 2. Altered source versions must be plainly marked as such, and must not be
21 | # misrepresented as being the original software.
22 | # 3. This notice may not be removed or altered from any source distribution.
23 |
24 |
25 | class ExperimentalWarning(Warning):
26 | """Indicates that a certain class, function or behavior is in an
27 | experimental state.
28 | """
29 | def __init__(self, obj, msg=None):
30 | """Creates a ExperimentalWarning for the specified obj.
31 |
32 | If a message is passed in msg, it will be printed instead of the
33 | default message.
34 | """
35 | super(ExperimentalWarning, self).__init__()
36 | self.obj = obj
37 | self.msg = msg
38 |
39 | def __str__(self):
40 | if self.msg is None:
41 | line = "Warning: %s is in an experimental state." % repr(self.obj)
42 | return os.linesep.join(('', '"' * len(line), line, '"' * len(line)))
43 | return repr(self.msg)
44 |
45 |
46 | def experimental(name=None):
47 | """A simple decorator to mark functions and methods as experimental."""
48 | def inner(func):
49 | @functools.wraps(func)
50 | def wrapper(*fargs, **kw):
51 | fname = name
52 | if name is None:
53 | fname = func.__name__
54 | warnings.warn("%s" % fname, category=ExperimentalWarning,
55 | stacklevel=2)
56 | return func(*fargs, **kw)
57 | return wrapper
58 | return inner
--------------------------------------------------------------------------------
/msmbuilder/utils/convenience.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 |
3 | def unique(seq):
4 | '''Returns a list of unique items maintaining the order of the original.
5 | '''
6 | seen = set()
7 | seen_add = seen.add
8 | return [x for x in seq if not (x in seen or seen_add(x))]
9 |
--------------------------------------------------------------------------------
/msmbuilder/utils/draw_samples.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | import numpy as np
3 | import mdtraj as md
4 |
5 | __all__ = ['map_drawn_samples']
6 |
7 |
8 | def map_drawn_samples(selected_pairs_by_state, trajectories, top=None):
9 | """Lookup trajectory frames using pairs of (trajectory, frame) indices.
10 |
11 | Parameters
12 | ----------
13 | selected_pairs_by_state : array, dtype=int, shape=(n_states, n_samples, 2)
14 | selected_pairs_by_state[state, sample] gives the (trajectory, frame)
15 | index associated with a particular sample from that state.
16 | trajectories : list(md.Trajectory) or list(np.ndarray) or list(filenames)
17 | The trajectories assocated with sequences,
18 | which will be used to extract coordinates of the state centers
19 | from the raw trajectory data. This can also be a list of np.ndarray
20 | objects or filenames. If they are filenames, mdtraj will be used to
21 | load them
22 | top : md.Topology, optional, default=None
23 | Use this topology object to help mdtraj load filenames
24 |
25 | Returns
26 | -------
27 | frames_by_state : mdtraj.Trajectory
28 | Output will be a list of trajectories such that frames_by_state[state]
29 | is a trajectory drawn from `state` of length `n_samples`. If
30 | trajectories are numpy arrays, the output will be numpy arrays instead
31 | of md.Trajectories
32 |
33 | Examples
34 | --------
35 | >>> selected_pairs_by_state = hmm.draw_samples(sequences, 3)
36 | >>> samples = map_drawn_samples(selected_pairs_by_state, trajectories)
37 |
38 | Notes
39 | -----
40 | YOU are responsible for ensuring that selected_pairs_by_state and
41 | trajectories correspond to the same dataset!
42 |
43 | See Also
44 | --------
45 | ghmm.GaussianHMM.draw_samples : Draw samples from GHMM
46 | ghmm.GaussianHMM.draw_centroids : Draw centroids from GHMM
47 | """
48 |
49 | frames_by_state = []
50 |
51 | for state, pairs in enumerate(selected_pairs_by_state):
52 | if isinstance(trajectories[0], str):
53 | if top:
54 | process = lambda x, frame: md.load_frame(x, frame, top=top)
55 | else:
56 | process = lambda x, frame: md.load_frame(x, frame)
57 | else:
58 | process = lambda x, frame: x[frame]
59 |
60 | frames = [process(trajectories[trj], frame) for trj, frame in pairs]
61 | try: # If frames are mdtraj Trajectories
62 | # Get an empty trajectory with correct shape and call the join
63 | # method on it to merge trajectories
64 | state_trj = frames[0][0:0].join(frames)
65 | except AttributeError:
66 | state_trj = np.array(frames) # Just a bunch of np arrays
67 | frames_by_state.append(state_trj)
68 |
69 | return frames_by_state
70 |
--------------------------------------------------------------------------------
/msmbuilder/utils/io.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 |
3 | import contextlib
4 | import pickle
5 | import warnings
6 |
7 | import numpy as np
8 | from sklearn.externals.joblib import load as jl_load
9 |
10 | __all__ = ['printoptions', 'verbosedump', 'verboseload', 'dump', 'load']
11 |
12 | warnings.warn("This module might be deprecated in favor of msmbuilder.io",
13 | PendingDeprecationWarning)
14 |
15 |
16 | @contextlib.contextmanager
17 | def printoptions(*args, **kwargs):
18 | original = np.get_printoptions()
19 | np.set_printoptions(*args, **kwargs)
20 | yield
21 | np.set_printoptions(**original)
22 |
23 |
24 | def dump(value, filename, compress=None, cache_size=None):
25 | """Save an arbitrary python object using pickle.
26 |
27 | Parameters
28 | -----------
29 | value : any Python object
30 | The object to store to disk using pickle.
31 | filename : string
32 | The name of the file in which it is to be stored
33 | compress : None
34 | No longer used
35 | cache_size : positive number, optional
36 | No longer used
37 |
38 | See Also
39 | --------
40 | load : corresponding loader
41 | """
42 | if compress is not None or cache_size is not None:
43 | warnings.warn("compress and cache_size are no longer valid options")
44 |
45 | with open(filename, 'wb') as f:
46 | pickle.dump(value, f)
47 |
48 |
49 | def load(filename):
50 | """Load an object that has been saved with dump.
51 |
52 | We try to open it using the pickle protocol. As a fallback, we
53 | use joblib.load. Joblib was the default prior to msmbuilder v3.2
54 |
55 | Parameters
56 | ----------
57 | filename : string
58 | The name of the file to load.
59 | """
60 | try:
61 | with open(filename, 'rb') as f:
62 | return pickle.load(f)
63 | except Exception as e1:
64 | try:
65 | return jl_load(filename)
66 | except Exception as e2:
67 | raise IOError(
68 | "Unable to load {} using the pickle or joblib protocol.\n"
69 | "Pickle: {}\n"
70 | "Joblib: {}".format(filename, e1, e2)
71 | )
72 |
73 |
74 | def verbosedump(value, fn, compress=None):
75 | """Verbose wrapper around dump"""
76 | print('Saving "%s"... (%s)' % (fn, type(value)))
77 | dump(value, fn, compress=compress)
78 |
79 |
80 | def verboseload(fn):
81 | """Verbose wrapper around load.
82 |
83 | Try to use pickle. If that fails, try to use joblib.
84 | """
85 | print('loading "%s"...' % fn)
86 | return load(fn)
87 |
--------------------------------------------------------------------------------
/msmbuilder/utils/param_sweep.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | from sklearn import clone
3 | try:
4 | from sklearn.model_selection import ParameterGrid
5 | except ImportError:
6 | from sklearn.grid_search import ParameterGrid
7 |
8 | from sklearn.externals.joblib import Parallel, delayed
9 |
10 | __all__ = ['param_sweep']
11 |
12 |
13 | def param_sweep(model, sequences, param_grid, n_jobs=1, verbose=0):
14 | """Fit a series of models over a range of parameters.
15 |
16 | Parameters
17 | ----------
18 | model : msmbuilder.BaseEstimator
19 | An *instance* of an estimator to be used
20 | to fit data.
21 | sequences : list of array-like
22 | List of sequences, or a single sequence. Each
23 | sequence should be a 1D iterable of state
24 | labels. Labels can be integers, strings, or
25 | other orderable objects.
26 | param_grid : dict or sklearn.grid_search.ParameterGrid
27 | Parameter grid to specify models to fit. See
28 | sklearn.grid_search.ParameterGrid for an explanation
29 | n_jobs : int, optional
30 | Number of jobs to run in parallel using joblib.Parallel
31 |
32 | Returns
33 | -------
34 | models : list
35 | List of models fit to the data according to
36 | param_grid
37 | """
38 |
39 | if isinstance(param_grid, dict):
40 | param_grid = ParameterGrid(param_grid)
41 | elif not isinstance(param_grid, ParameterGrid):
42 | raise ValueError("param_grid must be a dict or ParamaterGrid instance")
43 |
44 | # iterable with (model, sequence) as items
45 | iter_args = ((clone(model).set_params(**params), sequences)
46 | for params in param_grid)
47 |
48 | models = Parallel(n_jobs=n_jobs, verbose=verbose)(
49 | delayed(_param_sweep_helper)(args) for args in iter_args)
50 |
51 | return models
52 |
53 |
54 | def _param_sweep_helper(args):
55 | """
56 | helper for fitting many models on some data
57 | """
58 | model, sequences = args
59 | model.fit(sequences)
60 |
61 | return model
62 |
--------------------------------------------------------------------------------
/msmbuilder/utils/probability.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | import numpy as np
3 | from sklearn.utils import check_random_state
4 |
5 | __all__ = ['categorical']
6 |
7 |
8 | def categorical(pvals, size=None, random_state=None):
9 | """Return random integer from a categorical distribution
10 |
11 | Parameters
12 | ----------
13 | pvals : sequence of floats, length p
14 | Probabilities of each of the ``p`` different outcomes. These
15 | should sum to 1.
16 | size : int or tuple of ints, optional
17 | Defines the shape of the returned array of random integers. If None
18 | (the default), returns a single float.
19 | random_state: RandomState or an int seed, optional
20 | A random number generator instance.
21 | """
22 | cumsum = np.cumsum(pvals)
23 | if size is None:
24 | size = (1,)
25 | axis = 0
26 | elif isinstance(size, tuple):
27 | size = size + (1,)
28 | axis = len(size) - 1
29 | else:
30 | raise TypeError('size must be an int or tuple of ints')
31 |
32 | random_state = check_random_state(random_state)
33 | return np.sum(cumsum < random_state.random_sample(size), axis=axis)
34 |
--------------------------------------------------------------------------------
/msmbuilder/utils/progressbar/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # progressbar - Text progress bar library for Python.
5 | # Copyright (c) 2005 Nilton Volpato
6 | #
7 | # This library is free software; you can redistribute it and/or
8 | # modify it under the terms of the GNU Lesser General Public
9 | # License as published by the Free Software Foundation; either
10 | # version 2.1 of the License, or (at your option) any later version.
11 | #
12 | # This library is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # Lesser General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU Lesser General Public
18 | # License along with this library; if not, write to the Free Software
19 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 |
21 | """Text progress bar library for Python.
22 |
23 | A text progress bar is typically used to display the progress of a long
24 | running operation, providing a visual cue that processing is underway.
25 |
26 | The ProgressBar class manages the current progress, and the format of the line
27 | is given by a number of widgets. A widget is an object that may display
28 | differently depending on the state of the progress bar. There are three types
29 | of widgets:
30 | - a string, which always shows itself
31 |
32 | - a ProgressBarWidget, which may return a different value every time its
33 | update method is called
34 |
35 | - a ProgressBarWidgetHFill, which is like ProgressBarWidget, except it
36 | expands to fill the remaining width of the line.
37 |
38 | The progressbar module is very easy to use, yet very powerful. It will also
39 | automatically enable features like auto-resizing when the system supports it.
40 | """
41 |
42 | from __future__ import absolute_import
43 | __author__ = 'Nilton Volpato'
44 | __author_email__ = 'first-name dot last-name @ gmail.com'
45 | __date__ = '2011-05-14'
46 | __version__ = '2.3'
47 |
48 | from .compat import *
49 | from .widgets import *
50 | from .progressbar import *
51 |
--------------------------------------------------------------------------------
/msmbuilder/utils/progressbar/compat.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # progressbar - Text progress bar library for Python.
5 | # Copyright (c) 2005 Nilton Volpato
6 | #
7 | # This library is free software; you can redistribute it and/or
8 | # modify it under the terms of the GNU Lesser General Public
9 | # License as published by the Free Software Foundation; either
10 | # version 2.1 of the License, or (at your option) any later version.
11 | #
12 | # This library is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 | # Lesser General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU Lesser General Public
18 | # License along with this library; if not, write to the Free Software
19 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 |
21 | """Compatibility methods and classes for the progressbar module."""
22 |
23 |
24 | # Python 3.x (and backports) use a modified iterator syntax
25 | # This will allow 2.x to behave with 3.x iterators
26 | try:
27 | next
28 | except NameError:
29 | def next(iter):
30 | try:
31 | # Try new style iterators
32 | return iter.__next__()
33 | except AttributeError:
34 | # Fallback in case of a "native" iterator
35 | return iter.next()
36 |
37 |
38 | # Python < 2.5 does not have "any"
39 | try:
40 | any
41 | except NameError:
42 | def any(iterator):
43 | for item in iterator:
44 | if item: return True
45 | return False
46 |
--------------------------------------------------------------------------------
/msmbuilder/utils/subsampler.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | from sklearn.base import TransformerMixin
3 | from ..base import BaseEstimator
4 |
5 | __all__ = ['Subsampler']
6 |
7 |
8 | class Subsampler(BaseEstimator, TransformerMixin):
9 | """Convert a list of feature time series (`X_all`) into a `lag_time`
10 | subsampled time series.
11 |
12 | Parameters
13 | ----------
14 | lag_time : int
15 | The lag time to subsample by
16 | sliding_window : bool, default=True
17 | If True, each time series is transformed into `lag_time` interlaced
18 | sliding-window (not statistically independent) sequences. If
19 | False, each time series is transformed into a single subsampled
20 | time series.
21 | """
22 | def __init__(self, lag_time, sliding_window=True):
23 | self._lag_time = lag_time
24 | self._sliding_window = sliding_window
25 |
26 | def fit(self, X_all, y=None):
27 | return self
28 |
29 | def transform(self, X_all, y=None):
30 | """Subsample several time series.
31 |
32 | Parameters
33 | ----------
34 | X_all : list(np.ndarray)
35 | List of feature time series
36 |
37 | Returns
38 | -------
39 | features : list(np.ndarray), length = len(X_all)
40 | The subsampled trajectories.
41 | """
42 | if self._sliding_window:
43 | return [X[k::self._lag_time] for k in range(self._lag_time) for X in X_all]
44 | else:
45 | return [X[::self._lag_time] for X in X_all]
46 |
--------------------------------------------------------------------------------
/msmbuilder/utils/validation.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | import numpy as np
3 | import mdtraj as md
4 |
5 | __all__ = ['list_of_1d', 'check_iter_of_sequences', 'array2d']
6 |
7 |
8 | def list_of_1d(y):
9 | if not hasattr(y, '__iter__') or len(y) == 0:
10 | raise ValueError('Bad input shape')
11 | if not hasattr(y[0], '__iter__'):
12 | return [np.array(y)]
13 |
14 | result = []
15 | for i, x in enumerate(y):
16 | value = np.array(x)
17 | if value.ndim != 1:
18 | raise ValueError(
19 | "Bad input shape. Element %d has shape %s, but "
20 | "should be 1D" % (i, str(value.shape)))
21 | result.append(value)
22 | return result
23 |
24 |
25 | def check_iter_of_sequences(sequences, allow_trajectory=False, ndim=2,
26 | max_iter=None):
27 | """Check that ``sequences`` is a iterable of trajectory-like sequences,
28 | suitable as input to ``fit()`` for estimators following the MSMBuilder
29 | API.
30 |
31 | Parameters
32 | ----------
33 | sequences : object
34 | The object to check
35 | allow_trajectory : bool
36 | Are ``md.Trajectory``s allowed?
37 | ndim : int
38 | The expected dimensionality of the sequences
39 | max_iter : int, optional
40 | Only check at maximum the first ``max_iter`` entries in ``sequences``.
41 | """
42 | value = True
43 | for i, X in enumerate(sequences):
44 | if not isinstance(X, np.ndarray):
45 | if (not allow_trajectory) and isinstance(X, md.Trajectory):
46 | value = False
47 | break
48 | if not isinstance(X, md.Trajectory) and X.ndim != ndim:
49 | value = False
50 | break
51 | if max_iter is not None and i >= max_iter:
52 | break
53 |
54 | if not value:
55 | raise ValueError('sequences must be a list of sequences')
56 |
57 |
58 | def array2d(X, dtype=None, order=None, copy=False, force_all_finite=True):
59 | """Returns at least 2-d array with data from X"""
60 | X_2d = np.asarray(np.atleast_2d(X), dtype=dtype, order=order)
61 | if force_all_finite:
62 | _assert_all_finite(X_2d)
63 | if X is X_2d and copy:
64 | X_2d = _safe_copy(X_2d)
65 | return X_2d
66 |
67 |
68 | def _assert_all_finite(X):
69 | """Like assert_all_finite, but only for ndarray."""
70 | X = np.asanyarray(X)
71 | if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum())
72 | and not np.isfinite(X).all()):
73 | raise ValueError("Input contains NaN, infinity"
74 | " or a value too large for %r." % X.dtype)
75 |
76 | def _safe_copy(X):
77 | # Copy, but keep the order
78 | return np.copy(X, order='K')
79 |
--------------------------------------------------------------------------------