├── .github
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.md
├── appveyor.yml
├── basesetup.py
├── devtools
    ├── README.md
    ├── conda-recipe
    │   ├── bld.bat
    │   ├── build.sh
    │   └── meta.yaml
    └── travis-ci
    │   ├── build_docs.sh
    │   ├── install_miniconda.sh
    │   ├── set_doc_version.py
    │   └── update_versions_json.py
├── docs
    ├── .gitignore
    ├── LICENSE
    ├── Makefile
    ├── _static
    │   ├── flow-chart.png
    │   ├── fspeptide.png
    │   ├── kde-vs-histogram.png
    │   ├── lengths-hist.png
    │   ├── logo-200px.png
    │   ├── logo.png
    │   ├── msm-microstates.png
    │   ├── tica-heatmap.png
    │   ├── tica-movie.gif
    │   └── tica_vs_pca.png
    ├── _templates
    │   └── class.rst
    ├── advanced_examples
    │   ├── bayesian-msm.rst
    │   ├── gmrq-model-selection.rst
    │   ├── hmm-and-msm.rst
    │   ├── implied-timescales.rst
    │   ├── index.rst
    │   ├── plot-tica-heatmap.rst
    │   ├── quadwell-n-states.rst
    │   ├── quadwell.rst
    │   ├── tica-1.rst
    │   └── uncertainty.rst
    ├── apipatterns.rst
    ├── background.rst
    ├── bibparse.py
    ├── changelog.rst
    ├── cluster.rst
    ├── conf.py
    ├── contributing.rst
    ├── datasets.rst
    ├── decomposition.rst
    ├── examples
    │   ├── Clustering-Comparison.rst
    │   ├── Fs-Peptide-command-line.rst
    │   ├── Fs-Peptide-in-RAM.rst
    │   ├── Fs-Peptide-with-dataset.rst
    │   ├── GMRQ-Model-Selection.rst
    │   ├── Ligand-Featurization.rst
    │   ├── Ward-Clustering.rst
    │   ├── index.rst
    │   └── tICA-vs-PCA.rst
    ├── faq.rst
    ├── feature_selection.rst
    ├── featurization.rst
    ├── figures
    │   └── kde-vs-histogram.py
    ├── gmrq.rst
    ├── hmm.rst
    ├── index.rst
    ├── installation.rst
    ├── io.rst
    ├── make.bat
    ├── msm.rst
    ├── plugins.rst
    ├── preprocessing.rst
    ├── publications.bib
    ├── publications_templ.rst
    ├── ratematrix.rst
    ├── requirements.txt
    ├── sphinxext
    │   ├── embed.tpl
    │   └── notebook_sphinxext.py
    ├── tpt.rst
    └── tutorial.rst
├── examples
    ├── .gitignore
    ├── Clustering-Comparison.ipynb
    ├── Coarse-graining-with-MVCA.ipynb
    ├── Fs-Peptide-command-line.ipynb
    ├── Fs-Peptide-in-RAM.ipynb
    ├── Fs-Peptide-with-Pipeline.ipynb
    ├── Fs-Peptide-with-dataset.ipynb
    ├── GMRQ-Model-Selection.ipynb
    ├── LICENSE.md
    ├── Ligand-Featurization.ipynb
    ├── Ward-Clustering.ipynb
    ├── advanced
    │   ├── bayesian-msm.ipynb
    │   ├── hmm-and-msm.ipynb
    │   ├── implied-timescales.ipynb
    │   ├── plot-tica-heatmap.ipynb
    │   ├── quadwell-n-states.ipynb
    │   ├── quadwell.ipynb
    │   └── uncertainty.ipynb
    └── tICA-vs-PCA.ipynb
├── msmbuilder
    ├── __init__.py
    ├── base.py
    ├── cluster
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── _kmedoids.pyx
    │   ├── agglomerative.py
    │   ├── apm.py
    │   ├── base.py
    │   ├── kcenters.py
    │   ├── kmedoids.py
    │   ├── minibatchkmedoids.py
    │   ├── ndgrid.py
    │   ├── regularspatial.py
    │   └── src
    │   │   ├── kmedoids.cc
    │   │   └── kmedoids.h
    ├── cmdline.py
    ├── commands
    │   ├── __init__.py
    │   ├── atom_indices.py
    │   ├── convert_chunked_project.py
    │   ├── example_datasets.py
    │   ├── featurizer.py
    │   ├── fit.py
    │   ├── fit_transform.py
    │   ├── implied_timescales.py
    │   ├── template_project.py
    │   └── transform.py
    ├── dataset.py
    ├── decomposition
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── _speigh.pyx
    │   ├── base.py
    │   ├── kernel_approximation.py
    │   ├── ksparsetica.py
    │   ├── ktica.py
    │   ├── pca.py
    │   ├── sparsetica.py
    │   ├── tica.py
    │   └── utils.py
    ├── example_datasets
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── _muller.pyx
    │   ├── alanine_dipeptide.py
    │   ├── base.py
    │   ├── brownian1d.py
    │   ├── fs_peptide.py
    │   ├── met_enkephalin.py
    │   └── muller.py
    ├── feature_extraction
    │   └── __init__.py
    ├── feature_selection
    │   ├── __init__.py
    │   ├── base.py
    │   └── featureselector.py
    ├── featurizer
    │   ├── __init__.py
    │   ├── feature_union.py
    │   ├── featurizer.py
    │   ├── indices.py
    │   ├── multichain.py
    │   ├── multiseq_featuizer.py
    │   └── subset.py
    ├── hmm
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── cephes
    │   │   ├── README.md
    │   │   ├── cephes.h
    │   │   ├── cephes_names.h
    │   │   ├── chbevl.c
    │   │   ├── gamma.c
    │   │   ├── i0.c
    │   │   ├── i1.c
    │   │   ├── mconf.h
    │   │   ├── mtherr.c
    │   │   ├── polevl.c
    │   │   ├── psi.c
    │   │   └── zeta.c
    │   ├── discrete_approx.py
    │   ├── gaussian.pyx
    │   ├── src
    │   │   ├── GaussianHMMFitter.cpp
    │   │   ├── VonMisesHMMFitter.cpp
    │   │   ├── include
    │   │   │   ├── GaussianHMMFitter.h
    │   │   │   ├── HMMFitter.h
    │   │   │   ├── Trajectory.h
    │   │   │   ├── VonMisesHMMFitter.h
    │   │   │   └── sse_mathfun.h
    │   │   └── logsumexp.hpp
    │   └── vonmises.pyx
    ├── io
    │   ├── __init__.py
    │   ├── gather_metadata.py
    │   ├── io.py
    │   ├── project_template.py
    │   └── sampling
    │   │   ├── __init__.py
    │   │   └── sampling.py
    ├── io_templates
    │   └── twitter-bootstrap.html
    ├── libdistance
    │   ├── .gitignore
    │   ├── libdistance.pyx
    │   └── src
    │   │   ├── assign.hpp
    │   │   ├── cdist.hpp
    │   │   ├── dist.hpp
    │   │   ├── distance_kernels.h
    │   │   ├── pdist.hpp
    │   │   └── sumdist.hpp
    ├── lumping
    │   ├── __init__.py
    │   ├── bace.py
    │   ├── mvca.py
    │   ├── pcca.py
    │   └── pcca_plus.py
    ├── msm
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── _markovstatemodel.pyx
    │   ├── _metzner_mcmc_fast.pyx
    │   ├── _metzner_mcmc_slow.py
    │   ├── _ratematrix.pyx
    │   ├── _ratematrix_priors.pyx
    │   ├── _ratematrix_support.pyx
    │   ├── bayes_ratematrix.py
    │   ├── bayesmsm.py
    │   ├── core.py
    │   ├── implied_timescales.py
    │   ├── markov_appreciation.py
    │   ├── msm.py
    │   ├── ratematrix.py
    │   ├── src
    │   │   ├── metzner_mcmc.c
    │   │   ├── metzner_mcmc.h
    │   │   ├── transmat_mle_prinz.c
    │   │   └── transmat_mle_prinz.h
    │   └── validation
    │   │   ├── __init__.py
    │   │   ├── bootstrapmsm.py
    │   │   └── transmat_errorbar.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── base.py
    │   └── timeseries.py
    ├── project_templates
    │   ├── 0-test-install.py
    │   ├── 1-get-example-data.py
    │   ├── LICENSE.md
    │   ├── README.md
    │   ├── analysis
    │   │   ├── gather-metadata-plot.py
    │   │   └── gather-metadata.py
    │   ├── cluster
    │   │   ├── cluster-plot.py
    │   │   ├── cluster.py
    │   │   ├── sample-clusters-plot.py
    │   │   └── sample-clusters.py
    │   ├── dihedrals
    │   │   ├── featurize-plot.py
    │   │   └── featurize.py
    │   ├── landmarks
    │   │   ├── featurize-plot.py
    │   │   ├── featurize.py
    │   │   └── find-landmarks.py
    │   ├── msm
    │   │   ├── microstate-plot.py
    │   │   ├── microstate-traj.py
    │   │   ├── microstate.py
    │   │   ├── timescales-plot.py
    │   │   └── timescales.py
    │   ├── plot_header.template
    │   ├── plot_macros.template
    │   ├── rmsd
    │   │   ├── rmsd-plot.py
    │   │   └── rmsd.py
    │   └── tica
    │   │   ├── tica-plot.py
    │   │   ├── tica-sample-coordinate-plot.py
    │   │   ├── tica-sample-coordinate.py
    │   │   └── tica.py
    ├── scripts
    │   ├── __init__.py
    │   └── msmb.py
    ├── src
    │   ├── cy_blas.pyx
    │   ├── f2py
    │   │   └── f2pyptr.h
    │   ├── scipy_lapack.h
    │   └── triu_utils.pyx
    ├── tests
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── native.pdb
    │   ├── test_agglomerative.py
    │   ├── test_alphaanglefeaturizer.py
    │   ├── test_apm.py
    │   ├── test_bayes_ratematrix.py
    │   ├── test_bootstrap_msm.py
    │   ├── test_build_counts.py
    │   ├── test_clustering.py
    │   ├── test_commands.py
    │   ├── test_commands_exist.py
    │   ├── test_contactfeaturizers.py
    │   ├── test_convenience.py
    │   ├── test_cyblas.pyx
    │   ├── test_cyblas_wrapper.py
    │   ├── test_dataset.py
    │   ├── test_decomposition.py
    │   ├── test_dependencies.py
    │   ├── test_divergence.py
    │   ├── test_estimator_subclassing.py
    │   ├── test_feature_descriptor.py
    │   ├── test_feature_selection.py
    │   ├── test_featureunion.py
    │   ├── test_featurizer.py
    │   ├── test_featurizer_subset.py
    │   ├── test_gather_metadata.py
    │   ├── test_ghmm.py
    │   ├── test_kcenters.py
    │   ├── test_kernel_approximation.py
    │   ├── test_kmedoids.py
    │   ├── test_ksparsetica.py
    │   ├── test_libdistance.py
    │   ├── test_ligandfeaturizers.py
    │   ├── test_lumping.py
    │   ├── test_metzner_mcmc.py
    │   ├── test_msm.py
    │   ├── test_msm_uncertainty.py
    │   ├── test_muller.py
    │   ├── test_ndgrid.py
    │   ├── test_nearest.py
    │   ├── test_param_sweep.py
    │   ├── test_preprocessing.py
    │   ├── test_ratematrix.py
    │   ├── test_rmsdfeaturizer.py
    │   ├── test_sampling.py
    │   ├── test_sasa_featurizer.py
    │   ├── test_sparsetica.py
    │   ├── test_speigh.py
    │   ├── test_strongly_connected_subgraph.py
    │   ├── test_template_project.py
    │   ├── test_tpt.py
    │   ├── test_transition_counts.py
    │   ├── test_transmat_errorbar.py
    │   ├── test_transmat_mle_prinz.py
    │   ├── test_utils.py
    │   ├── test_vmhmm.py
    │   ├── test_workflows.py
    │   └── workflows
    │   │   ├── basic.sh
    │   │   ├── ghmm.sh
    │   │   └── rmsd.sh
    ├── tpt
    │   ├── __init__.py
    │   ├── committor.py
    │   ├── flux.py
    │   ├── hub.py
    │   ├── mfpt.py
    │   └── path.py
    └── utils
    │   ├── __init__.py
    │   ├── compat.py
    │   ├── convenience.py
    │   ├── divergence.py
    │   ├── draw_samples.py
    │   ├── io.py
    │   ├── nearest.py
    │   ├── param_sweep.py
    │   ├── probability.py
    │   ├── progressbar
    │       ├── __init__.py
    │       ├── compat.py
    │       ├── progressbar.py
    │       └── widgets.py
    │   ├── subsampler.py
    │   └── validation.py
├── runtests.py
└── setup.py


/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 |  - [x] Implement feature / fix bug
2 |  - [ ] Add tests
3 |  - [ ] Update changelog
4 | 
5 | [Describe changes here]
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | __pycache__
21 | 
22 | # Installer logs
23 | pip-log.txt
24 | 
25 | # Unit test / coverage reports
26 | .coverage
27 | .tox
28 | nosetests.xml
29 | 
30 | # Translations
31 | *.mo
32 | 
33 | # IDEs
34 | .mr.developer.cfg
35 | .project
36 | .pydevproject
37 | .idea/
38 | 
39 | # Autogenerated during setup.py
40 | msmbuilder/src/config.pxi
41 | msmbuilder/version.py
42 | 
43 | # Vim temp files
44 | *.swp
45 | *.swo
46 | 
47 | build.log
48 | 
49 | # Other files
50 | .DS_Store
51 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: c
 2 | sudo: false
 3 | 
 4 | addons:
 5 |   apt:
 6 |     packages:
 7 |       - pandoc
 8 | 
 9 | branches:
10 |   only:
11 |     - master
12 | 
13 | install:
14 |     - source devtools/travis-ci/install_miniconda.sh
15 |     - conda config --add channels omnia
16 |     - conda config --add channels conda-forge
17 | 
18 | script:
19 |   # Run tests. If they succeed, build docs only on 3.6
20 |   conda build --quiet devtools/conda-recipe
21 |   && if [[ $CONDA_PY = 3.6 ]]; then devtools/travis-ci/build_docs.sh; fi
22 | 
23 | env:
24 |   matrix:
25 |     - CONDA_PY=2.7 CONDA_NPY=1.12
26 |     - CONDA_PY=3.6 CONDA_NPY=1.12
27 |     - CONDA_PY=3.7 CONDA_NPY=1.14
28 | 
29 | deploy:
30 |   - provider: s3
31 |     access_key_id:
32 |       secure: "av04wLV7wRmFjPRkDPE0FXNtvL51F597+DzUmrycLnI+Ltg5rxrxEUv2JMr7K1WrTTR1STFNhJBp6aQUwD3zzaA7N/1c0zY9ri35ML75LC/10IDb6UNbY6uPNqbP1co451OSz7tpGbu3JBL/TRL7MkReFbZxPLHPPP1ad/4O6nA="
33 |     secret_access_key:
34 |       secure: "c4b2fliqot9ZnI5cyTqEXSHQnCao+GoxmR+SJAcSURv381O/z3frlJX7pKf0qai2OrZSSdqX/wa2KdcWNeoDTKrTiCeKgFikc6x839tmjeQYVV0Y3hmSvZCzCFOAXyMf9GfJJ7gLBOBHSzTTJWeZDLZB6nuoi4Xw9Blgid6QxIs="
35 |     bucket: "msmbuilder.org"
36 |     skip_cleanup: true
37 |     local_dir: docs/_deploy/
38 |     on:
39 |       branch: master
40 |       condition: "$CONDA_PY = 3.6"
41 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | graft examples
2 | graft msmbuilder
3 | include basesetup.py
4 | include LICENSE
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | MSMBuilder
 2 | ==========
 3 | 
 4 | [![Build Status](https://travis-ci.org/msmbuilder/msmbuilder.svg?branch=master)](https://travis-ci.org/msmbuilder/msmbuilder)
 5 | [![PyPi version](https://badge.fury.io/py/msmbuilder.svg)](https://pypi.python.org/pypi/msmbuilder/)
 6 | [![License](https://img.shields.io/badge/license-LGPLv2.1+-red.svg?style=flat)](https://pypi.python.org/pypi/msmbuilder/)
 7 | [![Documentation](https://img.shields.io/badge/docs-latest-blue.svg?style=flat)](http://msmbuilder.org)
 8 | 
 9 | MSMBuilder is a python package which implements a series of statistical
10 | models for high-dimensional time-series. It is particularly focused on the
11 | analysis of atomistic simulations of biomolecular dynamics. For example,
12 | MSMBuilder has been used to model protein folding and conformational change
13 | from molecular dynamics (MD) simulations. MSMBuilder is available under the
14 | LGPL (v2.1 or later).
15 | 
16 | Capabilities include:
17 | 
18 | - Feature extraction into dihedrals, contact maps, and more
19 | - Geometric clustering with a variety of algorithms.
20 | - Dimensionality reduction using time-structure independent component
21 |   analysis (tICA) and principal component analysis (PCA).
22 | - Markov state model (MSM) construction
23 | - Rate-matrix MSM construction
24 | - Hidden markov model (HMM) construction
25 | - Timescale and transition path analysis.
26 | 
27 | Check out the documentation at [msmbuilder.org](http://msmbuilder.org) and
28 | join the [mailing list](https://mailman.stanford.edu/mailman/listinfo/msmbuilder-user).
29 | For a broader overview of MSMBuilder, take a look at our [slide deck](http://rawgit.com/msmbuilder/talk/master/index.html).
30 | 
31 | Installation
32 | ------------
33 | 
34 | The preferred installation mechanism for `msmbuilder` is with `conda`:
35 | 
36 | ```bash
37 | $ conda install -c omnia msmbuilder
38 | ```
39 | 
40 | If you don't have conda, or are new to scientific python, we recommend that
41 | you download the [Anaconda scientific python distribution](https://store.continuum.io/cshop/anaconda/).
42 | 
43 | 
44 | Workflow
45 | --------
46 | 
47 | An example workflow might be as follows:
48 | 
49 | 1. Set up a system for molecular dynamics, and run one or more simulations
50 |    for as long as you can on as many CPUs or GPUs as you have access to.
51 |    There are a lot of great software packages for running MD, e.g
52 |    [OpenMM](https://simtk.org/home/openmm), [Gromacs](http://www.gromacs.org/),
53 |    [Amber](http://ambermd.org/), [CHARMM](http://www.charmm.org/), and
54 |    many others. MSMBuilder is not one of them.
55 | 
56 | 2. Transform your MD coordinates into an appropriate set of features.
57 | 
58 | 3. Perform some sort of dimensionality reduction with tICA or PCA.
59 |    Reduce your data into discrete states by using clustering.
60 | 
61 | 4. Fit an MSM, rate matrix MSM, or HMM. Perform model selection using
62 |    cross-validation with the [generalized matrix Rayleigh quotient](http://arxiv.org/abs/1407.8083)
63 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 | 
 3 |   global:
 4 |     PYTHONUNBUFFERED: on
 5 | 
 6 |   matrix:
 7 |     - PYDIR: "C:\\Miniconda35"
 8 |       CONDA_PY: "35"
 9 |       CONDA_NPY: "1.10"
10 | 
11 |     - PYDIR: "C:\\Miniconda35-x64"
12 |       CONDA_PY: "35"
13 |       CONDA_NPY: "1.10"
14 | 
15 | install:
16 |   - set PATH=%PYDIR%;%PYDIR%\\Scripts;%PATH%
17 |   - conda config --add channels omnia
18 |   - conda config --add channels conda-forge
19 |   - conda update -yq --all
20 |   - conda install -yq conda-build jinja2
21 | 
22 | build: false
23 | 
24 | test_script:
25 |   - conda build -q devtools\conda-recipe
26 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/bld.bat:
--------------------------------------------------------------------------------
1 | python setup.py install
2 | if errorlevel 1 exit 1
3 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python setup.py install
3 | 


--------------------------------------------------------------------------------
/devtools/conda-recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: msmbuilder
 3 |   version: {{ GIT_DESCRIBE_TAG }}
 4 | 
 5 | source:
 6 |   path: ../../
 7 | 
 8 | build:
 9 |   preserve_egg_dir: True
10 |   number: {{ GIT_DESCRIBE_NUMBER }}
11 |   entry_points:
12 |     - msmb = msmbuilder.scripts.msmb:main
13 | 
14 | 
15 | requirements:
16 |   build:
17 |     - python
18 |     - setuptools
19 |     - cython <=0.28
20 |     - numpy x.x
21 |     - mdtraj <=1.8
22 |   run:
23 |     - python
24 |     - setuptools
25 |     - numpy x.x
26 |     - scipy
27 |     - pandas <0.20
28 |     - six
29 |     - mdtraj <=1.8
30 |     - scikit-learn
31 |     - numpydoc
32 |     - pytables
33 |     - pyhmc
34 |     - pyyaml
35 |     - jinja2
36 |     - fastcluster
37 | 
38 | 
39 | test:
40 |   requires:
41 |     - nose
42 |     - nose-timer
43 |     - munkres
44 |     - numdifftools
45 |     - statsmodels
46 |     - hmmlearn=0.2.1
47 |     - cvxpy # [not win]
48 |     - msmb_data
49 |   imports:
50 |     - msmbuilder
51 |   commands:
52 |     - msmb -h
53 |     - nosetests msmbuilder -v --with-timer --timer-ok 2 --timer-warning 10 --timer-filter error
54 | 
55 | 
56 | about:
57 |   home: https://github.com/msmbuilder/msmbuilder
58 |   license: LGPLv2.1+
59 |   summary: 'MSMBuilder: Statistical models for biomolecular dynamics'
60 | 


--------------------------------------------------------------------------------
/devtools/travis-ci/build_docs.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Print each line, exit on error
 4 | set -ev
 5 | 
 6 | # Install the built package
 7 | conda create --yes -n docenv python=$CONDA_PY
 8 | source activate docenv
 9 | conda install -yq --use-local msmbuilder
10 | 
11 | # Install doc requirements
12 | conda install -yq --file docs/requirements.txt
13 | 
14 | # We don't use conda for these:
15 | # sphinx_rtd_theme's latest releases are not available
16 | # neither is msmb_theme
17 | # neither is sphinx > 1.3.1 (fix #1892 autodoc problem)
18 | pip install -I sphinx 
19 | pip install -I sphinx_rtd_theme==0.1.9 msmb_theme==1.2.0
20 | 
21 | # Make docs
22 | cd docs && make html && cd -
23 | 
24 | # Move the docs into a versioned subdirectory
25 | python devtools/travis-ci/set_doc_version.py
26 | 
27 | # Prepare versions.json
28 | python devtools/travis-ci/update_versions_json.py
29 | 


--------------------------------------------------------------------------------
/devtools/travis-ci/install_miniconda.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | MINICONDA=Miniconda3-latest-Linux-x86_64.sh
 3 | MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *<td>\(.*\)<\/td> */\1/p')
 4 | wget https://repo.continuum.io/miniconda/$MINICONDA
 5 | if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then
 6 |     echo "Miniconda MD5 mismatch"
 7 |     exit 1
 8 | fi
 9 | bash $MINICONDA -b
10 | rm -f $MINICONDA
11 | 
12 | export PATH=$HOME/miniconda3/bin:$PATH
13 | 
14 | conda update -yq conda
15 | conda install -yq conda-build jinja2 conda-verify
16 | 


--------------------------------------------------------------------------------
/devtools/travis-ci/set_doc_version.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | from msmbuilder import version
 4 | 
 5 | if version.release:
 6 |     docversion = version.version
 7 | else:
 8 |     docversion = 'development'
 9 | 
10 | os.mkdir("docs/_deploy")
11 | shutil.copytree("docs/_build/html", "docs/_deploy/{docversion}"
12 |                 .format(docversion=docversion))
13 | 


--------------------------------------------------------------------------------
/devtools/travis-ci/update_versions_json.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | try:
 4 |     from urllib.request import urlopen
 5 | except ImportError:
 6 |     from urllib2 import urlopen
 7 | from msmbuilder import version
 8 | 
 9 | if not version.release:
10 |     print("This is not a release.")
11 |     exit(0)
12 | 
13 | URL = 'http://www.msmbuilder.org'
14 | data = urlopen(URL + '/versions.json').read().decode()
15 | versions = json.loads(data)
16 | 
17 | # new release so all the others are now old
18 | for i in range(len(versions)):
19 |     versions[i]['latest'] = False
20 | 
21 | versions.append({
22 |     'version': version.version,
23 |     'display': version.short_version,
24 |     'url': "{base}/{version}".format(base=URL, version=version.version),
25 |     'latest': True,
26 | })
27 | 
28 | with open("docs/_deploy/versions.json", 'w') as versionf:
29 |     json.dump(versions, versionf, indent=2)
30 | 
31 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
 1 | _build/
 2 | 
 3 | # autosummary generated files
 4 | _cluster/
 5 | _msm/
 6 | _hmm/
 7 | _ratematrix/
 8 | _decomposition/
 9 | _preprocessing/
10 | _feature_selection/
11 | _featurization/
12 | _tpt/
13 | _io/
14 | _gmrq/
15 | 
16 | # autogenerated (see conf.py)
17 | publications.rst
18 | 


--------------------------------------------------------------------------------
/docs/LICENSE:
--------------------------------------------------------------------------------
1 | The MSMBuilder documentation is licensed under a Creative Commons
2 | Attribution 4.0 International License.
3 | 
4 | https://creativecommons.org/licenses/by/4.0/
5 | 


--------------------------------------------------------------------------------
/docs/_static/flow-chart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/flow-chart.png


--------------------------------------------------------------------------------
/docs/_static/fspeptide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/fspeptide.png


--------------------------------------------------------------------------------
/docs/_static/kde-vs-histogram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/kde-vs-histogram.png


--------------------------------------------------------------------------------
/docs/_static/lengths-hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/lengths-hist.png


--------------------------------------------------------------------------------
/docs/_static/logo-200px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/logo-200px.png


--------------------------------------------------------------------------------
/docs/_static/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/logo.png


--------------------------------------------------------------------------------
/docs/_static/msm-microstates.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/msm-microstates.png


--------------------------------------------------------------------------------
/docs/_static/tica-heatmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica-heatmap.png


--------------------------------------------------------------------------------
/docs/_static/tica-movie.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica-movie.gif


--------------------------------------------------------------------------------
/docs/_static/tica_vs_pca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica_vs_pca.png


--------------------------------------------------------------------------------
/docs/_templates/class.rst:
--------------------------------------------------------------------------------
1 | {{ fullname }}
2 | {{ underline }}
3 | 
4 | .. currentmodule:: {{ module }}
5 | 
6 | .. autoclass:: {{ objname }}
7 | 
8 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/bayesian-msm.rst:
--------------------------------------------------------------------------------
1 | Bayesian Estimation of MSMs
2 | ===========================
3 | 
4 | .. notebook:: examples/bayesian-msm.ipynb
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/gmrq-model-selection.rst:
--------------------------------------------------------------------------------
1 | GMRQ hyperparameter selection
2 | =============================
3 | 
4 | .. notebook:: examples/gmrq-model-selection.ipynb


--------------------------------------------------------------------------------
/docs/advanced_examples/hmm-and-msm.rst:
--------------------------------------------------------------------------------
1 | HMM and MSM Timescales for Ala2
2 | ===============================
3 | 
4 | .. notebook:: examples/hmm-and-msm.ipynb
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/implied-timescales.rst:
--------------------------------------------------------------------------------
1 | Implied Timescales
2 | ==================
3 | 
4 | .. notebook:: examples/implied-timescales.ipynb
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/index.rst:
--------------------------------------------------------------------------------
 1 | Examples
 2 | ========
 3 | 
 4 | This page provides a series of examples, tutorials and recipes for using
 5 | MSMBuilder.
 6 | 
 7 | Each subsection is a notebook.  To open these notebooks in a "live" IPython
 8 | session and execute the documentation interactively, you need to download
 9 | the repository and start IPython notebook.
10 | 
11 | If you installed `MSMBuilder` from source, you will need to navigate to
12 | :code:`./examples`. The notebook files for these examples and the notebooks
13 | are available in the top level `examples folder
14 | <https://github.com/msmbuilder/msmbuilder/tree/master/examples>`_ on GitHub
15 | as well. In the directory with the notebook files, start an IPython
16 | notebook server:
17 | 
18 | .. code-block:: python
19 |   
20 |    $ ipython notebook
21 | 
22 | 
23 | .. toctree::
24 |    :maxdepth: 2
25 |    :titlesonly:
26 |    :glob:
27 | 
28 |    *
29 | 
30 | .. vim: tw=75
31 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/plot-tica-heatmap.rst:
--------------------------------------------------------------------------------
1 | Visualization with tICA
2 | =======================
3 | 
4 | .. notebook:: examples/plot-tica-heatmap.ipynb
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/quadwell-n-states.rst:
--------------------------------------------------------------------------------
1 | Model selection with Randomized CV
2 | ==================================
3 | 
4 | .. notebook:: examples/quadwell-n-states.ipynb
5 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/quadwell.rst:
--------------------------------------------------------------------------------
1 | Bootstraped MSM CIs
2 | ===================
3 | 
4 | .. notebook:: examples/quadwell.ipynb
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/tica-1.rst:
--------------------------------------------------------------------------------
1 | tICA and PCA
2 | ============
3 | 
4 | .. notebook:: examples/tica-example.ipynb
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/advanced_examples/uncertainty.rst:
--------------------------------------------------------------------------------
1 | Estimating uncertainty in Markov state models
2 | =============================================
3 | 
4 | .. notebook:: examples/uncertainty.ipynb
5 | 


--------------------------------------------------------------------------------
/docs/background.rst:
--------------------------------------------------------------------------------
 1 | .. _background:
 2 | 
 3 | Motivation
 4 | ==========
 5 | 
 6 | The aim of this package is to provide software tools for predictive
 7 | modeling of the long timescale dynamics of biomolecular systems using
 8 | statistical modeling to analyze physical simulations.
 9 | 
10 | Given a dataset of one or more stochastic trajectories tracking the
11 | coordinates of every (10,000+) atom in a molecular system at a discrete
12 | time interval, how do we understand the slow dynamical processes and make
13 | quantitative predictions about the system?
14 | 
15 | 
16 | Workflow
17 | --------
18 | 
19 | To build a dynamical model, we apply (stepwise) a series of dimensionality
20 | reductions. The basic set of steps is outlined below. Note that most steps
21 | are optional under certain circumstances. The particulars should become
22 | clear as you continue reading the documentation.
23 | 
24 | 1. Set up a system for molecular dynamics, and run one or more simulations
25 |    for as long as you can on as many CPUs or GPUs as you have access.
26 |    There are a lot of great software packages for running MD, e.g `OpenMM
27 |    <https://simtk.org/home/openmm>`_, `Gromacs <http://www.gromacs.org/>`_,
28 |    `Amber <http://ambermd.org/>`_, `CHARMM <http://www.charmm.org/>`_, and
29 |    many others. MSMBuilder is not one of them.
30 | 
31 | 2. :ref:`Featurize<featurization>` trajectories into an appropriate vector
32 |    of features. The full :math:`3N` set of atomic coordinates is
33 |    potentially unwieldy and redundant. It likely does not respect the
34 |    rotational or translational symmetry of your system either. We commonly
35 |    use backbone dihedral angles as our features, although this depends
36 |    highly on the system being modeled.
37 | 
38 | 3. :ref:`Decompose<decomposition>` your features into a new basis that
39 |    preserves the relevant information in your data with fewer dimensions.
40 |    We typically use tICA, which finds linear combinations of input degrees
41 |    of freedom that maximize autocorrelation or "slowness".
42 | 
43 | 4. :ref:`Cluster<cluster>` your data to define (micro-)states by grouping
44 |    similar input data points. At this stage, we've reduced the
45 |    dimensionality of the problem from potentially thousands of :math:`xyz`
46 |    coordinates to a single cluster (state) index.
47 | 
48 | 5. :ref:`Estimate a model<msm>` from the clustered data. We typically build
49 |    an MSM, which models the important dynamics of the system.
50 | 
51 | 6. Use :ref:`GMRQ cross-validation<gmrq>` to select the best model. There
52 |    are many hyperparameters (knobs to tweak) in the workflow. This scoring
53 |    function can help us pick the best values.
54 | 
55 | 
56 | .. figure:: _static/flow-chart.png
57 |     :align: center
58 |     :width: 80%
59 | 
60 |     A diagram of potential workflows.
61 | 
62 | .. vim: tw=75
63 | 


--------------------------------------------------------------------------------
/docs/bibparse.py:
--------------------------------------------------------------------------------
 1 | """Very simple bibtex parser for use in MSMBuilder doc generation
 2 | 
 3 | Matthew Harrigan
 4 | (c)2016, MIT License
 5 | """
 6 | 
 7 | from pyparsing import CaselessKeyword as kwd
 8 | from pyparsing import QuotedString, Word, alphanums, Suppress, OneOrMore, nums, \
 9 |     Group, Optional, ZeroOrMore, alphas, alphas8bit, delimitedList
10 | 
11 | # Change these if you need more flexibility:
12 | entry_type = kwd("article") | kwd("unpublished")
13 | cite_key = Word(alphanums + ":/._")
14 | 
15 | LCURLY = Suppress('{')
16 | RCURLY = Suppress('}')
17 | COMMA = Suppress(',')
18 | AT = Suppress('@')
19 | EQUALS = Suppress('=')
20 | 
21 | field_val = Word(nums) | QuotedString('{', endQuoteChar='}', multiline=True,
22 |                                       convertWhitespaceEscapes=False)
23 | title_field = Group(kwd('title') + EQUALS + field_val)
24 | journal_field = Group(kwd('journal') + EQUALS + field_val)
25 | year_field = Group(kwd('year') + EQUALS + field_val)
26 | volume_field = Group(kwd('volume') + EQUALS + field_val)
27 | pages_field = Group(kwd('pages') + EQUALS + field_val)
28 | abstract_field = Group(kwd('abstract') + EQUALS + field_val)
29 | doi_field = Group(kwd('doi') + EQUALS + field_val)
30 | other_field = Group(Word(alphanums) + EQUALS + field_val)
31 | 
32 | author = OneOrMore(~kwd('and') + Word(alphas + alphas8bit + '.,-'))
33 | author.setParseAction(lambda xx: ' '.join(str(x) for x in xx))
34 | author_list = LCURLY + delimitedList(author, 'and') + RCURLY
35 | author_field = Group(kwd('author') + EQUALS + Group(author_list))
36 | 
37 | entry_item = (title_field | author_field | journal_field | year_field
38 |               | volume_field | pages_field | abstract_field | doi_field
39 |               | Suppress(other_field))
40 | 
41 | 
42 | class BibEntry(object):
43 |     def __init__(self, type, cite_key, fields):
44 |         self.type = type
45 |         self.cite_key = cite_key
46 |         self.fields = fields
47 |         self.__dict__.update(**fields)
48 | 
49 | 
50 | def to_BibEntry(toks):
51 |     return BibEntry(toks[0], toks[1], dict(toks[2:]))
52 | 
53 | 
54 | entry = (AT + entry_type + LCURLY + cite_key + COMMA
55 |          + ZeroOrMore(entry_item + COMMA) + Optional(entry_item) + RCURLY)
56 | entry.setParseAction(to_BibEntry)
57 | entries = OneOrMore(entry)
58 | 


--------------------------------------------------------------------------------
/docs/cluster.rst:
--------------------------------------------------------------------------------
 1 | .. _cluster:
 2 | .. currentmodule:: msmbuilder.cluster
 3 | 
 4 | Clustering
 5 | ==========
 6 | 
 7 | Clustering MD trajectories groups the data [#f1]_ into a set of
 8 | clusters such that conformations in the same cluster are structurally
 9 | similar to one another, and conformations in different clusters are
10 | structurally distinct. The questions that arise are
11 | 
12 | #. How should "structurally similar" be defined? What distance metric
13 |    should be used?
14 | 
15 | #. Given the distance metric, what algorithm should be used to actually
16 |    cluster the data?
17 | 
18 | On point 1, there is no consensus in the protein MD literature. Popular
19 | distance metrics include cartesian root-mean-squared deviation of atomic
20 | positions (RMSD) [#f3]_, distances based on the number of native contacts
21 | formed, distances based on the difference in backbone dihedral angles, and
22 | probably others.
23 | 
24 | On point 2, "Optimal" clustering is NP-hard [#f2]_, so there's usually a
25 | tradeoff between clustering quality and computational cost. For that reason,
26 | MSMBuilder has a variety of different clustering algorithms implemented.
27 | 
28 | Algorithms
29 | ----------
30 | 
31 | All clustering algorithms in MSMBuilder follow the following basic API.
32 | Hyperparameters, including the number of clusters, random seeds, the
33 | distance metric (if applicable) are passed to the class constructor.
34 | Then, the computation is done by calling ``fit(sequences)``. The argument
35 | to ``fit`` should be a *list* of molecular dynamics trajectories or a list
36 | of 2D numpy arrays, each of shape ``(length_of_trajecotry, n_features)``.
37 | 
38 | 
39 | .. autosummary::
40 |     :toctree: _cluster/
41 |     :nosignatures:
42 | 
43 |     KCenters
44 |     KMeans
45 |     KMedoids
46 |     MiniBatchKMedoids
47 |     RegularSpatial
48 |     LandmarkAgglomerative
49 |     AffinityPropagation
50 |     GMM
51 |     MeanShift
52 |     MiniBatchKMeans
53 |     SpectralClustering
54 |     Ward
55 | 
56 | 
57 | .. todo: Example of clustering
58 | 
59 | References
60 | ----------
61 | 
62 | .. [#f1] The "data", for MD, refers to snapshots of the structure of a molecular system at a given time point -- i.e the set of cartesian coordinates for all the atoms, or some mathematical transformation thereof.
63 | .. [#f2] Aloise, Daniel, et al. `NP-hardness of Euclidean sum-of-squares clustering. <http://link.springer.com/article/10.1007/s10994-009-5103-0#page-1>`_ Machine Learning 75.2 (2009): 245-248.
64 | .. [#f3] http://en.wikipedia.org/wiki/Root-mean-square_deviation_of_atomic_positions
65 | 
66 | .. vim: tw=75
67 | 


--------------------------------------------------------------------------------
/docs/examples/Clustering-Comparison.rst:
--------------------------------------------------------------------------------
1 | Clustering Comparison
2 | =====================
3 | 
4 | .. notebook:: Clustering-Comparison
5 | 


--------------------------------------------------------------------------------
/docs/examples/Fs-Peptide-command-line.rst:
--------------------------------------------------------------------------------
1 | Fs Peptide (command line)
2 | =========================
3 | 
4 | .. notebook:: Fs-Peptide-command-line
5 | 


--------------------------------------------------------------------------------
/docs/examples/Fs-Peptide-in-RAM.rst:
--------------------------------------------------------------------------------
1 | Fs Peptide (in RAM)
2 | ===================
3 | 
4 | .. notebook:: Fs-Peptide-in-RAM
5 | 


--------------------------------------------------------------------------------
/docs/examples/Fs-Peptide-with-dataset.rst:
--------------------------------------------------------------------------------
1 | Fs Peptide (using ``dataset``)
2 | ==============================
3 | 
4 | .. notebook:: Fs-Peptide-with-dataset
5 | 


--------------------------------------------------------------------------------
/docs/examples/GMRQ-Model-Selection.rst:
--------------------------------------------------------------------------------
1 | GMRQ Model Selection
2 | ====================
3 | 
4 | .. notebook:: GMRQ-Model-Selection
5 | 


--------------------------------------------------------------------------------
/docs/examples/Ligand-Featurization.rst:
--------------------------------------------------------------------------------
1 | Ligand Featurization
2 | ====================
3 | 
4 | .. notebook:: Ligand-Featurization
5 | 


--------------------------------------------------------------------------------
/docs/examples/Ward-Clustering.rst:
--------------------------------------------------------------------------------
1 | Ward Clustering
2 | ===============
3 | 
4 | .. notebook:: Ward-Clustering
5 | 


--------------------------------------------------------------------------------
/docs/examples/index.rst:
--------------------------------------------------------------------------------
 1 | .. _examples:
 2 | 
 3 | Examples
 4 | ========
 5 | 
 6 | The following examples show off various aspects or capabilities of
 7 | MSMBuilder. They can be run interactively in Jupyter (IPython) notebook.
 8 | Download the `notebook files
 9 | <https://github.com/msmbuilder/msmbuilder/tree/master/examples>`_ and open
10 | them in Jupyter::
11 | 
12 |     $ jupyter notebook
13 | 
14 | .. To make the ipython rendered images show up, each rst file must be
15 |    in its own directory.
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 |    :titlesonly:
20 |    
21 |    Fs-Peptide-in-RAM
22 |    Fs-Peptide-with-dataset
23 |    Fs-Peptide-command-line
24 |    tICA-vs-PCA
25 |    Clustering-Comparison
26 |    GMRQ-Model-Selection
27 |    Ward-Clustering
28 |    Ligand-Featurization
29 | 
30 | 
31 | Contributing examples
32 | ---------------------
33 | 
34 | Do you have a neat example of using MSMBuilder? Format your code
35 | into an IPython notebook and submit a pull request!
36 | 
37 | .. vim: tw=75
38 | 


--------------------------------------------------------------------------------
/docs/examples/tICA-vs-PCA.rst:
--------------------------------------------------------------------------------
1 | tICA vs. PCA
2 | ============
3 | 
4 | .. notebook:: tICA-vs-PCA
5 | 


--------------------------------------------------------------------------------
/docs/feature_selection.rst:
--------------------------------------------------------------------------------
 1 | .. _feature_selection:
 2 | .. currentmodule:: msmbuilder.feature_selection
 3 | 
 4 | 
 5 | Feature Selection
 6 | =================
 7 | 
 8 |  Feature selection can be used to reduce the dimensionality of data sets,
 9 |  either to improve estimators’ accuracy or to boost their performance on very
10 |  high-dimensional datasets.
11 | 
12 | Feature Selectors
13 | -----------------
14 | 
15 | .. autosummary::
16 |     :toctree: _feature_selection/
17 | 
18 |     FeatureSelector
19 |     VarianceThreshold
20 | 
21 | 
22 | .. vim: tw=75
23 | 


--------------------------------------------------------------------------------
/docs/featurization.rst:
--------------------------------------------------------------------------------
 1 | .. _featurization:
 2 | .. currentmodule:: msmbuilder.featurizer
 3 | 
 4 | 
 5 | Featurization
 6 | =============
 7 | 
 8 | Many algorithms require that the input data be vectors in a (euclidean)
 9 | vector space. This includes :class:`~msmbuilder.cluster.KMeans` clustering,
10 | :class:`~msmbuilder.decomposition.tICA`, and others.
11 | 
12 | Since there's usually no special rotational or translational reference
13 | frame in an MD simulation, it's often desirable to remove rotational and
14 | translational motion via featurization that is insensitive to rotations and
15 | translations. 
16 | 
17 | Featurizations
18 | --------------
19 | 
20 | .. autosummary::
21 |     :toctree: _featurization/
22 | 
23 |     AtomPairsFeaturizer
24 |     ContactFeaturizer
25 |     DRIDFeaturizer
26 |     DihedralFeaturizer
27 |     GaussianSolventFeaturizer
28 |     RMSDFeaturizer
29 |     RawPositionsFeaturizer
30 |     SuperposeFeaturizer
31 | 
32 | 
33 | Alternative to Featurization
34 | ----------------------------
35 | 
36 | Many algorithms require vectorizable data. Other algorithms only require a
37 | pairwise distance metric, e.g. RMSD between two protein conformations. In
38 | general, you can define a pairwise distance among vectorized data, but you
39 | cannot embed data into a vector space only from pairwise distance.
40 | 
41 | Some :ref:`clustering <cluster>` methods let you use an arbitrary distance
42 | metric, including RMSD. In this case, the input to ``fit()`` may be a list
43 | of MD trajectories instead of a list of numpy arrays. Clustering methods
44 | that allow this currently include :class:`~msmbuilder.cluster.KCenters` and
45 | :class:`~msmbuilder.cluster.KMedoids`.
46 | 
47 | .. vim: tw=75
48 | 


--------------------------------------------------------------------------------
/docs/figures/kde-vs-histogram.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as pp
 3 | from scipy.stats import norm
 4 | from sklearn.neighbors import KernelDensity
 5 | 
 6 | 
 7 | #----------------------------------------------------------------------
 8 | # Plot the progression of histograms to kernels
 9 | N = 100
10 | np.random.seed(1)
11 | X = np.concatenate((np.random.normal(0, 1, 0.3 * N),
12 |                     np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis]
13 | 
14 | X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis]
15 | 
16 | true_dens = (0.3 * norm(0, 1).pdf(X_plot[:, 0])
17 |              + 0.7 * norm(5, 1).pdf(X_plot[:, 0]))
18 | 
19 | 
20 | ax = pp.subplot(axisbg='w')
21 | ax.fill(X_plot[:, 0], true_dens, fc='black', alpha=0.2)
22 | pp.plot(X_plot[:, 0], true_dens, 'k-', lw=2, label='input distribution')
23 |         
24 | kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X)
25 | log_dens = kde.score_samples(X_plot)
26 | ax.plot(X_plot[:, 0], np.exp(log_dens), '-', lw=2, c='r', label='Gaussian KDE')
27 | pp.twinx().hist(X, bins=20, alpha=0.5, label='Histogram')
28 | 
29 | 
30 | ax.text(6, 0.38, "N={0} points".format(N))
31 | 
32 | ax.legend(loc='upper left')
33 | 
34 | ax.set_xlim(-4, 9)
35 | ax.set_ylim(0, 0.4)
36 | pp.savefig('_static/kde-vs-histogram.png')
37 | 
38 |              


--------------------------------------------------------------------------------
/docs/gmrq.rst:
--------------------------------------------------------------------------------
 1 | .. _gmrq:
 2 | .. currentmodule:: msmbuilder
 3 | 
 4 | Model Selection using GMRQ
 5 | ==========================
 6 | 
 7 | The generalized matrix Rayleigh quotient (GMRQ) is a specific application of
 8 | the variational principle (adapted from `quantum mechanics
 9 | <https://en.wikipedia.org/wiki/Variational_method_(quantum_mechanics)>`_)
10 | for Markov state models and a useful tool for model parameter selection.
11 | 
12 | The variational principle yields a rigorous way of comparing two different
13 | Markov models for the same underlying stochastic process when using different
14 | state decompositions. Even under the assumption that you have access to
15 | infinite sampling, there is still some error associated with approximating the
16 | true continuous eigenfunctions of your modeled process with the indicator
17 | functions, as is the case with Markov state models. If we interpret the
18 | variational theorem as the measure of the quality of this approximation, the
19 | state decomposition that leads to a Markov model with larger leading dynamical
20 | eigenvalues is consequently the better state decomposition. If you wish to see
21 | the full derivation of this quantity, please refer to [#f1]_.
22 | 
23 | Using this method, we can generate single scalar-valued scores for a proposed
24 | model given a supplied data set. This allows for the use of separate testing
25 | and training data sets to quantify and avoid statistical overfitting.
26 | This method extends these tools, making it possible to score trained models on
27 | new datasets and to perform hyperparameter selection. **PLEASE NOTE**: You cannot
28 | use GMRQ to optimize the MSM lag time. Changing the lag time fundamentally 
29 | alters the model's eigenfunctions, which no longer makes it a useful scoring function.
30 | The number of timescales used to score the model must also be constant and user-
31 | selected.
32 | 
33 | Algorithms
34 | ----------
35 | 
36 | .. autosummary::
37 |     :toctree: _gmrq/
38 | 
39 |     decomposition.tICA.score
40 |     msm.MarkovStateModel.score
41 |     msm.ContinuousTimeMSM.score
42 | 
43 | 
44 | 
45 | 
46 | References
47 | ----------
48 | 
49 | .. [#f1] McGibbon, Robert T., and Vijay S. Pande. `Variational cross-validation of slow dynamical modes in molecular kinetics <http://dx.doi.org/10.1063/1.4916292>`_ J. Chem. Phys. 142, 124105 (2015).
50 | 
51 | .. vim: tw=75
52 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. _msmbuilder:
 2 | 
 3 | MSMBuilder
 4 | ==========
 5 | 
 6 | 
 7 | .. raw:: html
 8 | 
 9 |     <h2 style="margin-top: -1em; font-size: 120%;">
10 |     Statistical models for Biomolecular Dynamics</h2>
11 | 
12 | MSMBuilder is an application and python library. It builds
13 | statistical models for high-dimensional time-series. The particular focus
14 | of the package is on the analysis of atomistic simulations of biomolecular
15 | dynamics such as protein folding and conformational change.
16 | 
17 | To get started via `Anaconda Python <https://www.continuum.io/downloads>`_,
18 | use::
19 | 
20 |     conda install -c omnia msmbuilder
21 | 
22 | MSMBuilder includes algorithms for constructing dynamical models:
23 | 
24 | - :ref:`featurization`
25 | - :ref:`feature_selection`
26 | - :ref:`preprocessing`
27 | - :ref:`decomposition`
28 | - :ref:`cluster`
29 | - :ref:`msm`
30 | - :ref:`hmm`
31 | - :ref:`ratematrix`
32 | 
33 | As well as methods for analysis and validation of the models:
34 | 
35 | - :ref:`gmrq`
36 | - :ref:`tpt`
37 | 
38 | New users should check out:
39 | 
40 | - :ref:`background`
41 | - :ref:`installation`
42 | - :ref:`tutorial`
43 | - :ref:`examples`
44 | - :ref:`faq`
45 | 
46 | MSMBuilder is most effective as a library. Intermediate users should
47 | familiarize themselves with:
48 | 
49 | - :ref:`apipatterns`
50 | - :ref:`datasets`
51 | - :ref:`changelog`
52 | 
53 | 
54 | MSMBuilder is developed by primarily by researchers at Stanford University,
55 | and we welcome contributions. The development all takes place on `Github
56 | <https://github.com/msmbuilder/msmbuilder>`_.  MSMBuilder is licensed under
57 | the GNU LGPL (v2.1 or later).
58 | 
59 | 
60 | 
61 | .. toctree::
62 |     :maxdepth: 2
63 |     :hidden:
64 | 
65 |     background
66 |     installation
67 |     tutorial
68 |     examples/index
69 |     featurization
70 |     feature_selection
71 |     preprocessing
72 |     decomposition
73 |     cluster
74 |     msm
75 |     gmrq
76 |     tpt
77 |     ratematrix
78 |     hmm
79 |     datasets
80 |     io
81 |     apipatterns
82 |     plugins
83 |     faq
84 |     changelog
85 |     publications
86 |     contributing
87 | 
88 | .. vim: tw=75
89 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _installation:
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | The preferred installation mechanism for ``msmbuilder`` is with ``conda``.
 7 | 
 8 | .. code-block:: bash
 9 | 
10 |     $ conda install -c omnia msmbuilder
11 | 
12 | 
13 | If you don't have conda, or are new to scientific python, we recommend that
14 | you download the `Anaconda scientific python distribution
15 | <https://store.continuum.io/cshop/anaconda/>`_.
16 | 
17 | 
18 | From Source
19 | -----------
20 | 
21 | MSMBuilder is a python package that heavily leans on other components of the
22 | scientific python ecosystem. See ``devtools/conda-recipe/meta.yaml`` for a
23 | complete and up-to-date list of build, run, and test dependencies. When you
24 | are sure the dependencies are satisfied you can install from PyPI
25 | 
26 | .. code-block:: bash
27 | 
28 |     $ pip install msmbuilder
29 | 
30 | or from source
31 | 
32 | .. code-block:: bash
33 | 
34 |     $ git clone git@github.com:msmbuilder/msmbuilder
35 |     $ cd msmbuilder/
36 |     $ pip install .
37 |     $ # (or: python setup.py install)
38 | 
39 | Frequently Asked Questions
40 | --------------------------
41 | 
42 | **Do I need Anaconda python? Can't I use the python that comes with my
43 | operating like /usr/bin/python?**
44 | 
45 | You can have multiple ``python`` installations on your computer which do
46 | not interact with one another at all. The system python interpreter is used
47 | by your operating system for some of its own programs but is not the best
48 | choice for data analysis or science.
49 | 
50 | We strongly recommend that you install Anaconda or Miniconda python
51 | distribution and that you have the ``conda`` package manager available.
52 | 
53 | If you're interested in some of the details about packaging and scientific
54 | python, see `this blog post by Travis Oliphant
55 | <http://technicaldiscovery.blogspot.com/2013/12/why-i-promote-conda.html>`_.
56 | 
57 | .. vim: tw=75
58 | 


--------------------------------------------------------------------------------
/docs/plugins.rst:
--------------------------------------------------------------------------------
 1 | .. _plugins:
 2 | 
 3 | Writing Plugins
 4 | ===============
 5 | 
 6 | You can easily extend MSMBuilder by subclassing ``BaseEstimator`` or any of
 7 | its children. You can even build your plugin to work with the ``msmb``
 8 | command-line interface. 
 9 | 
10 | 1. Subclass ``cmdline.Command`` or any of its children. For example,
11 |    if you want to expose a new Featurizer from the command line.
12 | 
13 | .. code-block:: python
14 | 
15 |     from msmbuilder.commands.featurizer import FeaturizerCommand
16 |     class MyNiftyFeaturizerCommand(FeaturizerCommand):
17 |         klass = MyNiftyFeaturizer
18 |         _concrete = True
19 | 
20 | 2. Provide your command as an "entry point" with ``setuptools``.
21 |    Use ``"msmbuilder.commands"`` as the entry point.
22 |    For example, in your ``setup.py``.
23 | 
24 | .. code-block:: python
25 | 
26 |     setup(
27 |         ...
28 |         entry_points={'msmbuilder.commands':
29 |                            'niftyfeat = niftyfeat:MyNiftyFeaturizerCommand'
30 |     )
31 | 
32 | See the 
33 | `setuptools documentation <https://pythonhosted.org/setuptools/setuptools.html#extensible-applications-and-frameworks>`_
34 | for more information.
35 | 
36 | .. vim: tw=75
37 | 


--------------------------------------------------------------------------------
/docs/preprocessing.rst:
--------------------------------------------------------------------------------
 1 | .. _preprocessing:
 2 | .. currentmodule:: msmbuilder.preprocessing
 3 | 
 4 | 
 5 | Preprocessing
 6 | =============
 7 | 
 8 | Preprocessing of a dataset is a common requirement for many machine learning
 9 | estimators and may involve scaling, centering, normalization, smoothing,
10 | binarization, and imputation methods.
11 | 
12 | Preprocessors
13 | -------------
14 | 
15 | .. autosummary::
16 |     :toctree: _preprocessing/
17 | 
18 |     Binarizer
19 |     Butterworth
20 |     EWMA
21 |     DoubleEWMA
22 |     Imputer
23 |     KernelCenterer
24 |     LabelBinarizer
25 |     MultiLabelBinarizer
26 |     MinMaxScaler
27 |     MaxAbsScaler
28 |     Normalizer
29 |     RobustScaler
30 |     StandardScaler
31 |     PolynomialFeatures
32 | 
33 | .. vim: tw=75
34 | 


--------------------------------------------------------------------------------
/docs/publications_templ.rst:
--------------------------------------------------------------------------------
 1 | .. _publications:
 2 | 
 3 | Publications
 4 | ============
 5 | 
 6 | The following published works use MSMBuilder. To add your publication
 7 | to the list, open an issue on GitHub with the relevant information or
 8 | edit ``docs/publications.bib`` and submit a pull request.
 9 | 
10 | .. publications.bib lists the relevant publications
11 | .. publications_templ.rst defines how the publications will be displayed
12 | .. publications.rst is generated during sphinx build (see conf.py)
13 |    and should not be edited directly!
14 | 
15 | {% for pub in publications %}
16 | {{pub.title}}
17 | --------------------------------------------------------------------------------
18 | 
19 |  * {{pub.author | join('; ')}}
20 |  * *{{pub.journal}}* **{{pub.year}}**, {{pub.volume}} {{pub.pages}}
21 |  * `doi: {{pub.doi}} <http://dx.doi.org/{{pub.doi}}>`_
22 | 
23 | {{pub.abstract | wordwrap }}
24 | 
25 | {% endfor %}
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/ratematrix.rst:
--------------------------------------------------------------------------------
 1 | .. _ratematrix:
 2 | .. currentmodule:: msmbuilder.msm
 3 | 
 4 | Continuous-time MSMs
 5 | ====================
 6 | 
 7 | :class:`MarkovStateModel` estimates a series of
 8 | transition *probabilities* among states that depend on the discrete
 9 | lag-time. Physically, we are probably more interested in a sparse set of
10 | transition *rates* in and out of states, estimated by
11 | :class:`ContinuousTimeMSM`.
12 | 
13 | 
14 | Theory
15 | ------
16 | 
17 | Consider an `n`-state time-homogeneous Markov process, :math:`X(t)`. At
18 | time :math:`t`, the :math:`n`-vector :math:`P(t) = Pr[ X(t) = i ]` is the
19 | probability that the system is in each of the :math:`n` states. These
20 | probabilities evolve forward in time, governed by an :math:`n \times n`
21 | transition rate matrix :math:`K`
22 | 
23 | .. math ::
24 |     dP(t)/dt = P(t) \cdot K
25 | 
26 | The solution is
27 | 
28 | .. math ::
29 |     P(t) = \exp(tK) \cdot P(0)
30 | 
31 | Where :math:`\exp(tK)` is the matrix exponential. Written differently, the
32 | state-to-state lag-:math:`\tau` transition probabilities are
33 | 
34 | .. math ::
35 |     Pr[ X(t+\tau) = j \;|\; X(t) = i ] = \exp(\tau K)_{ij}
36 | 
37 | For this model, we observe the evolution of one or more chains,
38 | :math:`X(t)` at a regular interval, :math:`\tau`. Let :math:`C_{ij}` be the
39 | number of times the chain was observed at state :math:`i` at time :math:`t`
40 | and at state :math:`j` at time :math:`t+\tau` (the number of observed
41 | transition counts). Suppose that :math:`K` depends on a parameter vector,
42 | :math:`\theta`. The log-likelihood is
43 | 
44 | .. math ::
45 |   \mathcal{L}(\theta) = \sum_{ij} \left[
46 |       C_{ij} \log\left(\left[\exp(\tau K(\theta))\right]_{ij}\right)\right]
47 | 
48 | The :class:`ContinuousTimeMSM` model finds a rate matrix that fits the data
49 | by maximizing this likelihood expression.  Specifically, it uses L-BFGS-B
50 | to find a maximum likelihood estimate (MLE) rate matrix,
51 | :math:`\hat{\theta}` and :math:`K(\hat{\theta})`.
52 | 
53 | Uncertainties
54 | ~~~~~~~~~~~~~
55 | 
56 | Analytical estimates of the asymptotic standard deviation in estimated
57 | parameters like the stationary distribution, rate matrix, eigenvalues, and
58 | relaxation timescales can be computed by calling methods on the
59 | :class:`ContinuousTimeMSM` object. See [1] for more detail.
60 | 
61 | 
62 | Algorithms
63 | ----------
64 | 
65 | .. autosummary::
66 |     :toctree: _ratematrix/
67 | 
68 |     ContinuousTimeMSM
69 | 
70 | 
71 | References
72 | ----------
73 | .. [1] McGibbon, R. T. and V. S. Pande, "Efficient maximum likelihood parameterization
74 |    of continuous-time Markov processes." J. Chem. Phys. 143 034109 (2015) http://dx.doi.org/10.1063/1.4926516
75 | .. [2] Kalbfleisch, J. D., and Jerald F. Lawless. "The analysis of panel data
76 |    under a Markov assumption." J. Am. Stat. Assoc. 80.392 (1985): 863-871.
77 | 
78 | .. vim: tw=75
79 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpydoc
 2 | matplotlib
 3 | jupyter
 4 | notebook
 5 | jinja2
 6 | openmm
 7 | nbconvert
 8 | msmb_data
 9 | pyparsing
10 | msmexplorer
11 | 


--------------------------------------------------------------------------------
/docs/tpt.rst:
--------------------------------------------------------------------------------
 1 | .. _tpt:
 2 | .. currentmodule:: msmbuilder.tpt
 3 | 
 4 | Transition Path Theory
 5 | ======================
 6 | 
 7 | 
 8 | Transition path theory (TPT) is a way to extract the highest-flux pathways
 9 | of your system from an estimated MSM. 
10 | 
11 | .. todo: more
12 | 
13 | .. todo: example
14 | 
15 | 
16 | References
17 | ----------
18 | 
19 | These are some canonical references for TPT. Note that TPT is really a
20 | specialization of ideas very familiar to the mathematical study of Markov
21 | chains, and there are many books, manuscripts in the mathematical
22 | literature that cover the same concepts.
23 | 
24 | .. [1] E, Weinan and Vanden-Eijnden, Eric Towards a Theory of Transition Paths
25 |        J. Stat. Phys. 123 503-523 (2006)
26 | .. [2] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory
27 |        for Markov jump processes. Multiscale Model. Simul. 7, 1192-1219
28 |        (2009).
29 | .. [3] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding
30 |        pathways in network models of coarse-grained protein dynamics. J.
31 |        Chem. Phys. 130, 205102 (2009).
32 | .. [4] Noé, Frank, et al. "Constructing the equilibrium ensemble of folding
33 |        pathways from short off-equilibrium simulations." PNAS 106.45 (2009):
34 |        19011-19016.
35 | 
36 | Functions
37 | ---------
38 | 
39 | .. autosummary::
40 |     :toctree: _tpt/
41 | 
42 |     fluxes
43 |     net_fluxes
44 |     fraction_visited
45 |     hub_scores
46 |     paths
47 |     top_path
48 |     committors
49 |     conditional_committors
50 |     mfpts
51 | 
52 | .. vim: tw=75
53 | 


--------------------------------------------------------------------------------
/examples/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints/
2 | 


--------------------------------------------------------------------------------
/examples/LICENSE.md:
--------------------------------------------------------------------------------
 1 | These example scripts are released under the MIT license. MSMBuilder
 2 | is LGPL. Please consider citing MSMBuilder if you use it in your work.
 3 | 
 4 | The MIT License (MIT)
 5 | 
 6 | Copyright (c) 2016 Stanford University and the Authors
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a
 9 | copy of this software and associated documentation files (the "Software"),
10 | to deal in the Software without restriction, including without limitation
11 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 | and/or sell copies of the Software, and to permit persons to whom the
13 | Software is furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 | DEALINGS IN THE SOFTWARE.
25 | 


--------------------------------------------------------------------------------
/examples/advanced/quadwell.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "%matplotlib inline\n",
12 |     "import numpy as np\n",
13 |     "from matplotlib import pyplot as plt\n",
14 |     "from msmbuilder.example_datasets import QuadWell, quadwell_eigs\n",
15 |     "from msmbuilder.cluster import NDGrid\n",
16 |     "from msmbuilder.msm import MarkovStateModel\n",
17 |     "from sklearn.pipeline import Pipeline"
18 |    ]
19 |   },
20 |   {
21 |    "cell_type": "code",
22 |    "execution_count": null,
23 |    "metadata": {
24 |     "collapsed": false
25 |    },
26 |    "outputs": [],
27 |    "source": [
28 |     "dataset = QuadWell(random_state=0).get()\n",
29 |     "true_eigenvalues = quadwell_eigs(200)[0]\n",
30 |     "true_timescales = -1 / np.log(true_eigenvalues[1:])\n",
31 |     "print(QuadWell.description())"
32 |    ]
33 |   },
34 |   {
35 |    "cell_type": "code",
36 |    "execution_count": null,
37 |    "metadata": {
38 |     "collapsed": false
39 |    },
40 |    "outputs": [],
41 |    "source": [
42 |     "def msm_timescales(trajectories, n_states):\n",
43 |     "    pipeline = Pipeline([\n",
44 |     "        ('grid', NDGrid(min=-1.2, max=1.2)),\n",
45 |     "        ('msm', MarkovStateModel(n_timescales=4, reversible_type='transpose', verbose=False))\n",
46 |     "    ])\n",
47 |     "    pipeline.set_params(grid__n_bins_per_feature=n_states)\n",
48 |     "    pipeline.fit(trajectories)\n",
49 |     "    return pipeline.named_steps['msm'].timescales_\n",
50 |     "\n",
51 |     "n_states = [5, 10, 50, 100]\n",
52 |     "ts = np.array([msm_timescales(dataset.trajectories, n) for n in n_states])"
53 |    ]
54 |   },
55 |   {
56 |    "cell_type": "code",
57 |    "execution_count": null,
58 |    "metadata": {
59 |     "collapsed": false
60 |    },
61 |    "outputs": [],
62 |    "source": [
63 |     "for i, c in enumerate(['b', 'r', 'm']):\n",
64 |     "    plt.plot(n_states, ts[:, i], c=c, marker='x')\n",
65 |     "    plt.axhline(true_timescales[i], ls='--', c=c, lw=2)\n",
66 |     "\n",
67 |     "plt.xlabel('Number of states')\n",
68 |     "plt.ylabel('Timescale (steps)')\n",
69 |     "plt.show()"
70 |    ]
71 |   }
72 |  ],
73 |  "metadata": {
74 |   "kernelspec": {
75 |    "display_name": "Python 3",
76 |    "language": "python",
77 |    "name": "python3"
78 |   },
79 |   "language_info": {
80 |    "codemirror_mode": {
81 |     "name": "ipython",
82 |     "version": 3
83 |    },
84 |    "file_extension": ".py",
85 |    "mimetype": "text/x-python",
86 |    "name": "python",
87 |    "nbconvert_exporter": "python",
88 |    "pygments_lexer": "ipython3",
89 |    "version": "3.4.3"
90 |   }
91 |  },
92 |  "nbformat": 4,
93 |  "nbformat_minor": 0
94 | }
95 | 


--------------------------------------------------------------------------------
/msmbuilder/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/msmbuilder/__init__.py


--------------------------------------------------------------------------------
/msmbuilder/base.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, absolute_import, division
 2 | 
 3 | from sklearn.base import BaseEstimator as SklearnBaseEstimator
 4 | 
 5 | 
 6 | class BaseEstimator(SklearnBaseEstimator):
 7 |     # http://msmbuilder.org/development/apipatterns.html
 8 | 
 9 |     def summarize(self):
10 |         """Return some diagnostic summary statistics about this Markov model"""
11 |         return 'NotImplemented'
12 | 


--------------------------------------------------------------------------------
/msmbuilder/cluster/.gitignore:
--------------------------------------------------------------------------------
1 | _kmedoids.cpp


--------------------------------------------------------------------------------
/msmbuilder/cluster/src/kmedoids.h:
--------------------------------------------------------------------------------
 1 | /******************************************************************************/
 2 | /* The C Clustering Library.
 3 |  * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
 4 |  *
 5 |  * This library was written at the Laboratory of DNA Information Analysis,
 6 |  * Human Genome Center, Institute of Medical Science, University of Tokyo,
 7 |  * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
 8 |  * Contact: mdehoon 'AT' gsc.riken.jp
 9 |  *
10 |  * Permission to use, copy, modify, and distribute this software and its
11 |  * documentation with or without modifications and for any purpose and
12 |  * without fee is hereby granted, provided that any copyright notices
13 |  * appear in all copies and that both those copyright notices and this
14 |  * permission notice appear in supporting documentation, and that the
15 |  * names of the contributors or copyright holders not be used in
16 |  * advertising or publicity pertaining to distribution of the software
17 |  * without specific prior permission.
18 |  *
19 |  * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
20 |  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
21 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
22 |  * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
23 |  * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
24 |  * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
25 |  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
26 |  * OR PERFORMANCE OF THIS SOFTWARE.
27 |  *
28 |  */
29 | 
30 | #ifndef MIXTAPE_CLUSTER_KMEDOIDS_H
31 | #define MIXTAPE_CLUSTER_KMEDOIDS_H
32 | #include <map>
33 | #include <Python.h>
34 | #include <numpy/npy_common.h>
35 | 
36 | void kmedoids(npy_intp nclusters, npy_intp nelements, double* distmatrix,
37 | 	      npy_intp npass, npy_intp clusterid[], PyObject* random,
38 |               double* error, npy_intp* ifound);
39 | 
40 | 
41 | /*
42 | Renumber cluster ids to go from 0 to n_clusters - 1.
43 | This function modifies the array inplace, and returns
44 | the mapping from the old values to new values.
45 | */
46 | std::map<npy_intp, npy_intp> contigify_ids(npy_intp* ids, npy_intp length);
47 | 
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/msmbuilder/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from .atom_indices import AtomIndices
 4 | from .convert_chunked_project import ConvertChunkedProject
 5 | from .example_datasets import AlanineDipeptideDatasetCommand
 6 | from .featurizer import (AtomPairsFeaturizerCommand, ContactFeaturizerCommand,
 7 |                          DihedralFeaturizerCommand, DRIDFeaturizerCommand,
 8 |                          SuperposeFeaturizerCommand,
 9 |                          KappaAngleFeaturizerCommand,
10 |                          AlphaAngleFeaturizerCommand, RMSDFeaturizerCommand,
11 |                          LandMarkRMSDFeaturizerCommand,
12 |                          BinaryContactFeaturizerCommand,
13 |                          LogisticContactFeaturizerCommand,
14 |                          VonMisesFeaturizerCommand,
15 |                          RawPositionsFeaturizerCommand, SASAFeaturizerCommand,
16 |                          LigandContactFeaturizerCommand,
17 |                          BinaryLigandContactFeaturizerCommand,
18 |                          LigandRMSDFeaturizerCommand)
19 | from .fit import (GaussianHMMCommand, MarkovStateModelCommand,
20 |                   BayesianMarkovStateModelCommand, ContinuousTimeMSMCommand,
21 |                   BayesianContinuousTimeMSMCommand)
22 | 
23 | try:
24 |     from .fit_transform import RobustScalerCommand, StandardScalerCommand
25 | except:
26 |     pass
27 | 
28 | from .fit_transform import (tICACommand, ButterworthCommand, DoubleEWMACommand,
29 |                             SparseTICACommand, FastICACommand,
30 |                             FactorAnalysisCommand, KernelTICACommand,
31 |                             PCACommand, SparsePCACommand,
32 |                             MiniBatchSparsePCACommand,
33 |                             KMeansCommand, MiniBatchKMeansCommand,
34 |                             KCentersCommand, KMedoidsCommand,
35 |                             MiniBatchKMedoidsCommand, RegularSpatialCommand,
36 |                             LandmarkAgglomerativeCommand, GMMCommand,
37 |                             MeanShiftCommand, NDGridCommand,
38 |                             SpectralClusteringCommand,
39 |                             AffinityPropagationCommand, APMCommand,
40 |                             AgglomerativeClusteringCommand, KSparseTICACommand)
41 | from .transform import TransformCommand
42 | from .example_datasets import (AlanineDipeptideDatasetCommand,
43 |                                FsPeptideDatasetCommand)
44 | from .atom_indices import AtomIndices
45 | from .implied_timescales import ImpliedTimescales
46 | from .template_project import TemplateProjectCommand
47 | from .transform import TransformCommand
48 | 


--------------------------------------------------------------------------------
/msmbuilder/commands/example_datasets.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, absolute_import
 2 | 
 3 | from ..cmdline import NumpydocClassCommand
 4 | from ..example_datasets import (AlanineDipeptide, DoubleWell, QuadWell, FsPeptide,
 5 |                                 MetEnkephalin, MullerPotential)
 6 | 
 7 | 
 8 | class DatasetCommand(NumpydocClassCommand):
 9 |     _group = 'Dataset'
10 |     def start(self):
11 |         self.instance.cache()
12 |         print('Example dataset saved: %s' % self.instance.data_dir)
13 | 
14 | 
15 | class AlanineDipeptideDatasetCommand(DatasetCommand):
16 |     _concrete = True
17 |     klass = AlanineDipeptide
18 |     description = 'Download example alanine dipeptide dataset.'
19 | 
20 | 
21 | class _NWellDatasetCommand(DatasetCommand):
22 |     def _random_state_type(self, s):
23 |         if s is not None:
24 |             return int(s)
25 |         else:
26 |             return s
27 | 
28 | 
29 | class DoubleWellDatasetCommand(_NWellDatasetCommand):
30 |     _concrete = True
31 |     klass = DoubleWell
32 |     description = ('Generate example double well potential dataset.\n\n' +
33 |                     DoubleWell.description())
34 | 
35 | 
36 | class QuadWellDatasetCommand(_NWellDatasetCommand):
37 |     _concrete = True
38 |     klass = QuadWell
39 |     description = ('Generate example quad-well potential dataset.\n\n' +
40 |                    QuadWell.description())
41 | 
42 | 
43 | class MullerPotentialDatasetCommand(_NWellDatasetCommand):
44 |     _concrete = True
45 |     klass = MullerPotential
46 |     description = ('Generate example Muller potential dataset.\n\n'
47 |                    + MullerPotential.description())
48 | 
49 | 
50 | class FsPeptideDatasetCommand(DatasetCommand):
51 |     _concrete = True
52 |     klass = FsPeptide
53 |     description = 'Download example Fs-peptide dataset.'
54 | 
55 | 
56 | class MetEnkephalinDatasetCommand(DatasetCommand):
57 |     _concrete = True
58 |     klass = MetEnkephalin
59 |     description = 'Download example Met-Enkephalin dataset.'
60 | 


--------------------------------------------------------------------------------
/msmbuilder/commands/fit.py:
--------------------------------------------------------------------------------
 1 | # Author: Robert McGibbon <rmcgibbo@gmail.com>
 2 | # Contributors: Brooke Husic <brookehusic@gmail.com>
 3 | # Copyright (c) 2014, Stanford University
 4 | # All rights reserved.
 5 | 
 6 | # -----------------------------------------------------------------------------
 7 | # Imports
 8 | # -----------------------------------------------------------------------------
 9 | 
10 | from __future__ import print_function, absolute_import
11 | 
12 | import os
13 | 
14 | from ..dataset import dataset
15 | from ..utils import verbosedump
16 | from ..hmm import GaussianHMM
17 | from ..msm import (MarkovStateModel, BayesianMarkovStateModel, ContinuousTimeMSM,
18 |                    BayesianContinuousTimeMSM)
19 | from ..cmdline import NumpydocClassCommand, argument, exttype
20 | 
21 | 
22 | class FitCommand(NumpydocClassCommand):
23 |     inp = argument(
24 |         '-i', '--inp', help='''Input dataset. This should be serialized
25 |         list of numpy arrays.''', required=True, type=os.path.expanduser)
26 |     model = argument(
27 |         '-o', '--out', help='''Output (fit) model. This will be a
28 |         serialized instance of the fit model object.''', required=True,
29 |         type=exttype('.pkl'))
30 | 
31 |     def start(self):
32 |         if not os.path.exists(self.inp):
33 |             self.error('File does not exist: %s' % self.inp)
34 | 
35 |         print(self.instance)
36 |         inp_ds = dataset(self.inp, mode='r')
37 |         self.instance.fit(inp_ds)
38 | 
39 |         print("*********\n*RESULTS*\n*********")
40 |         print(self.instance.summarize())
41 |         print('-' * 80)
42 | 
43 |         verbosedump(self.instance, self.out)
44 |         print("To load this %s object interactively inside an IPython\n"
45 |               "shell or notebook, run: \n" % self.klass.__name__)
46 |         print("  $ ipython")
47 |         print("  >>> from msmbuilder.utils import load")
48 |         print("  >>> model = load('%s')\n" % self.out)
49 | 
50 |         inp_ds.close()
51 | 
52 | class GaussianHMMCommand(FitCommand):
53 |     klass = GaussianHMM
54 |     _concrete = True
55 |     _group = 'MSM'
56 | 
57 | 
58 | class MarkovStateModelCommand(FitCommand):
59 |     klass = MarkovStateModel
60 |     _concrete = True
61 |     _group = 'MSM'
62 | 
63 |     def _ergodic_cutoff_type(self, erg):
64 |         if erg.lower() in ['on', 'off']:
65 |             return erg
66 |         else:
67 |             return float(erg)
68 | 
69 | 
70 | class BayesianMarkovStateModelCommand(FitCommand):
71 |     klass = BayesianMarkovStateModel
72 |     _concrete = True
73 |     _group = 'MSM'
74 | 
75 | 
76 | class ContinuousTimeMSMCommand(FitCommand):
77 |     klass = ContinuousTimeMSM
78 |     _concrete = True
79 |     _group = 'MSM'
80 | 
81 | 
82 | class BayesianContinuousTimeMSMCommand(FitCommand):
83 |     klass = BayesianContinuousTimeMSM
84 |     _concrete = True
85 |     _group = 'MSM'
86 | 


--------------------------------------------------------------------------------
/msmbuilder/commands/template_project.py:
--------------------------------------------------------------------------------
 1 | """Set up a new MSMBuilder project
 2 | 
 3 | """
 4 | # Author: Matthew Harrigan <matthew.harrigan@outlook.com>
 5 | # Contributors:
 6 | # Copyright (c) 2016, Stanford University
 7 | # All rights reserved.
 8 | 
 9 | from __future__ import print_function, division, absolute_import
10 | 
11 | import os
12 | import stat
13 | import textwrap
14 | 
15 | from ..cmdline import NumpydocClassCommand, argument
16 | from ..io import TemplateProject
17 | 
18 | 
19 | def chmod_plus_x(fn):
20 |     st = os.stat(fn)
21 |     os.chmod(fn, st.st_mode | stat.S_IEXEC)
22 | 
23 | 
24 | class TemplateProjectCommand(NumpydocClassCommand):
25 |     _group = '0-Support'
26 |     _concrete = True
27 |     description = __doc__
28 |     klass = TemplateProject
29 | 
30 |     disclaimer = argument('--disclaimer', default=False, action='store_true',
31 |                           help="Print a disclaimer about using these templates.")
32 | 
33 |     def print_disclaimer(self):
34 |         print('\n'.join(textwrap.wrap(
35 |             "This writes a bunch of Python files that can guide you "
36 |             "through analyzing a system with MSMBuilder. I implore you to "
37 |             "look at the scripts before you start blindly running them. "
38 |             "You will likely have to change some (hyper-)parameters or "
39 |             "filenames to match your particular project."
40 |         )))
41 |         print()
42 |         print('\n'.join(textwrap.wrap(
43 |             "More than that, however, it is important that you understand "
44 |             "exactly what the scripts are doing. Each protein system is "
45 |             "different, and it is up to you (the researcher) to hone in on "
46 |             "interesting aspects. This very generic pipeline may not give "
47 |             "you any new insight for anything but the simplest systems."
48 |         )))
49 | 
50 |     def start(self):
51 |         if self.disclaimer:
52 |             self.print_disclaimer()
53 |             print()
54 |             print("Run again without --disclaimer to actually write tempaltes.")
55 |             return
56 | 
57 |         self.instance.do()
58 | 


--------------------------------------------------------------------------------
/msmbuilder/decomposition/.gitignore:
--------------------------------------------------------------------------------
1 | _speigh.cpp


--------------------------------------------------------------------------------
/msmbuilder/decomposition/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | from sklearn import decomposition as _decomposition
 4 | 
 5 | from .base import MultiSequenceDecompositionMixin
 6 | from .ktica import KernelTICA
 7 | from .pca import PCA, SparsePCA, MiniBatchSparsePCA
 8 | from .sparsetica import SparseTICA
 9 | from .ksparsetica import KSparseTICA
10 | from .tica import tICA
11 | 
12 | 
13 | class FastICA(MultiSequenceDecompositionMixin, _decomposition.FastICA):
14 |     __doc__ = _decomposition.FastICA.__doc__
15 | 
16 |     def summarize(self):
17 |         return '\n'.join([
18 |             "Independent Component Analysis (ICA)",
19 |             "----------",
20 |             "Number of components:    {n_components}",
21 |             "Number of iterations: {n_iter_}",
22 |         ]).format(**self.__dict__)
23 | 
24 | 
25 | class FactorAnalysis(MultiSequenceDecompositionMixin,
26 |                      _decomposition.FactorAnalysis):
27 |     __doc__ = _decomposition.FactorAnalysis.__doc__
28 | 
29 |     def summarize(self):
30 |         return '\n'.join([
31 |             "FactorAnalysis (FA)",
32 |             "----------",
33 |             "Number of components:    {n_components}",
34 |             "Log likelihood:          {loglike_}",
35 |             "Noise variance:          {noise_variance_}",
36 |             "Number of iterations: {n_iter_}",
37 |         ]).format(**self.__dict__)
38 | 


--------------------------------------------------------------------------------
/msmbuilder/decomposition/pca.py:
--------------------------------------------------------------------------------
 1 | # Author: Matthew Harrigan <matthew.harrigan@outlook.com>
 2 | # Contributors:
 3 | # Copyright (c) 2016, Stanford University and the Authors
 4 | # All rights reserved.
 5 | 
 6 | from __future__ import print_function, division, absolute_import
 7 | 
 8 | from sklearn import decomposition
 9 | 
10 | from .base import MultiSequenceDecompositionMixin
11 | 
12 | __all__ = ['PCA', 'SparsePCA']
13 | 
14 | 
15 | class PCA(MultiSequenceDecompositionMixin, decomposition.PCA):
16 |     __doc__ = decomposition.PCA.__doc__
17 | 
18 |     def summarize(self):
19 |         return '\n'.join([
20 |             "Principal Component Analysis (PCA)",
21 |             "----------",
22 |             "Number of components:    {n_components}",
23 |             "explained variance raio: {explained_variance_ratio_}",
24 |             "Noise variance:          {noise_variance_}",
25 |         ]).format(**self.__dict__)
26 | 
27 | 
28 | class SparsePCA(MultiSequenceDecompositionMixin, decomposition.SparsePCA):
29 |     __doc__ = decomposition.SparsePCA.__doc__
30 | 
31 |     def summarize(self):
32 |         return '\n'.join([
33 |             "Sparse PCA",
34 |             "----------",
35 |             "Number of components:    {n_components}",
36 |         ]).format(**self.__dict__)
37 | 
38 | 
39 | class MiniBatchSparsePCA(MultiSequenceDecompositionMixin,
40 |                          decomposition.MiniBatchSparsePCA):
41 |     __doc__ = decomposition.MiniBatchSparsePCA.__doc__
42 | 
43 |     def summarize(self):
44 |         return '\n'.join([
45 |             "MiniBatch Sparse PCA",
46 |             "--------------------",
47 |             "Number of components:    {n_components}",
48 |             "Batch size:              {batch_size}"
49 |         ]).format(**self.__dict__)
50 | 
51 | 
52 | class KernelPCA(MultiSequenceDecompositionMixin, decomposition.KernelPCA):
53 |     __doc__ = decomposition.KernelPCA.__doc__
54 | 
55 |     def summarize(self):
56 |         return '\n'.join([
57 |             "Kernel PCA",
58 |             "--------------------",
59 |             "Number of components:    {n_components}",
60 |             "Kernel:              {kernel}",
61 |         ]).format(**self.__dict__)
62 | 


--------------------------------------------------------------------------------
/msmbuilder/decomposition/utils.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import itertools
 3 | import numpy as np
 4 | from six.moves import xrange
 5 | 
 6 | 
 7 | def iterate_tracker(maxiter, max_nc, verbose=False):
 8 |     """Generator that breaks after maxiter, or after the same
 9 |     array has been sent in more max_nc times in a row.
10 |     """
11 |     last_hash = None
12 |     last_hash_count = 0
13 |     arr = yield
14 | 
15 |     for i in xrange(maxiter):
16 |         arr = yield i
17 |         if arr is not None:    
18 |             hsh = hashlib.sha1(arr.view(np.uint8)).hexdigest()
19 |             if last_hash == hsh:
20 |                 last_hash_count += 1
21 |             else:
22 |                 last_hash = hsh
23 |                 last_hash_count = 1
24 | 
25 |             if last_hash_count >= max_nc:
26 |                 if verbose:
27 |                     print('Termination. Over %d iterations without '
28 |                           'change.' % max_nc)
29 |                 break


--------------------------------------------------------------------------------
/msmbuilder/example_datasets/.gitignore:
--------------------------------------------------------------------------------
1 | _muller.c
2 | 


--------------------------------------------------------------------------------
/msmbuilder/example_datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from .base import get_data_home, clear_data_home, has_msmb_data
 3 | from .brownian1d import DoubleWell, QuadWell
 4 | from .brownian1d import load_doublewell, load_quadwell
 5 | from .brownian1d import doublewell_eigs, quadwell_eigs
 6 | from .alanine_dipeptide import fetch_alanine_dipeptide, AlanineDipeptide
 7 | from .met_enkephalin import fetch_met_enkephalin, MetEnkephalin
 8 | from .fs_peptide import fetch_fs_peptide, FsPeptide, MinimalFsPeptide
 9 | from .muller import MullerPotential, load_muller
10 | 
11 | __all__ = [
12 |     'get_data_home',
13 |     'clear_data_home',
14 |     'has_msmb_data',
15 |     'load_doublewell',
16 |     'load_quadwell',
17 |     'doublewell_eigs',
18 |     'quadwell_eigs',
19 |     'fetch_alanine_dipeptide',
20 |     'fetch_met_enkephalin',
21 |     'fetch_fs_peptide',
22 |     'AlanineDipeptide',
23 |     'MetEnkephalin',
24 |     'FsPeptide',
25 |     'DoubleWell',
26 |     'QuadWell',
27 |     'MullerPotential',
28 |     'load_muller',
29 | ]
30 | 


--------------------------------------------------------------------------------
/msmbuilder/example_datasets/alanine_dipeptide.py:
--------------------------------------------------------------------------------
 1 | # Author: Robert McGibbon <rmcgibbo@gmail.com>
 2 | # Contributors: Matthew Harrigan <matthew.harrigan@outlook.com>
 3 | # Copyright (c) 2016, Stanford University and the Authors
 4 | # All rights reserved.
 5 | 
 6 | # -----------------------------------------------------------------------------
 7 | # Imports
 8 | # -----------------------------------------------------------------------------
 9 | from __future__ import print_function, absolute_import, division
10 | 
11 | from glob import glob
12 | from os.path import join
13 | 
14 | import mdtraj as md
15 | 
16 | from .base import Bunch, _MDDataset
17 | 
18 | DATA_URL = "https://ndownloader.figshare.com/articles/1026131/versions/8"
19 | TARGET_DIRECTORY = "alanine_dipeptide"
20 | 
21 | 
22 | class AlanineDipeptide(_MDDataset):
23 |     """Alanine dipeptide dataset
24 | 
25 |     Parameters
26 |     ----------
27 |     data_home : optional, default: None
28 |         Specify another download and cache folder for the datasets. By default
29 |         all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
30 | 
31 | 
32 |     Notes
33 |     -----
34 |     The dataset consists of ten 10ns trajectories of of alanine dipeptide,
35 |     simulated using OpenMM 6.0.1 (CUDA platform, NVIDIA GTX660) with the
36 |     AMBER99SB-ILDN force field at 300K (langevin dynamics, friction coefficient
37 |     of 91/ps, timestep of 2fs) with GBSA implicit solvent. The coordinates are
38 |     saved every 1ps. Each trajectory contains 9,999 snapshots.
39 | 
40 |     The dataset, including the script used to generate the dataset
41 |     is available on figshare at
42 | 
43 |         http://dx.doi.org/10.6084/m9.figshare.1026131
44 |     """
45 |     target_directory = TARGET_DIRECTORY
46 |     data_url = DATA_URL
47 | 
48 |     def get_cached(self):
49 |         top = md.load(join(self.data_dir, 'ala2.pdb'))
50 |         trajectories = []
51 |         for fn in glob(join(self.data_dir, 'trajectory*.dcd')):
52 |             trajectories.append(md.load(fn, top=top))
53 | 
54 |         return Bunch(trajectories=trajectories, DESCR=self.description())
55 | 
56 | 
57 | def fetch_alanine_dipeptide(data_home=None):
58 |     return AlanineDipeptide(data_home).get()
59 | 
60 | 
61 | fetch_alanine_dipeptide.__doc__ = AlanineDipeptide.__doc__
62 | 


--------------------------------------------------------------------------------
/msmbuilder/example_datasets/met_enkephalin.py:
--------------------------------------------------------------------------------
 1 | # Author: Robert McGibbon <rmcgibbo@gmail.com>
 2 | # Contributors:
 3 | # Copyright (c) 2014, Stanford University and the Authors
 4 | # All rights reserved.
 5 | 
 6 | # -----------------------------------------------------------------------------
 7 | # Imports
 8 | # -----------------------------------------------------------------------------
 9 | from __future__ import print_function, absolute_import, division
10 | 
11 | from glob import glob
12 | from os.path import join
13 | 
14 | import mdtraj as md
15 | 
16 | from .base import Bunch, _MDDataset
17 | 
18 | DATA_URL = "https://ndownloader.figshare.com/articles/1026324/versions/1"
19 | TARGET_DIRECTORY = "met_enkephalin"
20 | 
21 | 
22 | class MetEnkephalin(_MDDataset):
23 |     """Loader for the met-enkephalin dataset
24 | 
25 |     Parameters
26 |     ----------
27 |     data_home : optional, default: None
28 |         Specify another download and cache folder for the datasets. By default
29 |         all MSMBuilder data is stored in '~/msmbuilder_data' subfolders.
30 | 
31 |     download_if_missing: optional, True by default
32 |         If False, raise a IOError if the data is not locally available
33 |         instead of trying to download the data from the source site.
34 | 
35 |     Notes
36 |     -----
37 |     The dataset consists of ten ~50 ns molecular dynamics (MD) simulation
38 |     trajectories of the 5 residue Met-enkaphalin peptide. The aggregate
39 |     sampling is 499.58 ns. Simulations were performed starting from the 1st
40 |     model in the 1PLX PDB file, solvated with 832 TIP3P water molecules using
41 |     OpenMM 6.0. The coordinates (protein only -- the water was stripped)
42 |     are saved every 5 picoseconds. Each of the ten trajectories is roughly
43 |     50 ns long and contains about 10,000 snapshots.
44 | 
45 |     Forcefield: amber99sb-ildn; water: tip3p; nonbonded method: PME; cutoffs:
46 |     1nm; bonds to hydrogen were constrained; integrator: langevin dynamics;
47 |     temperature: 300K; friction coefficient: 1.0/ps; pressure control: Monte
48 |     Carlo barostat (interval of 25 steps); timestep 2 fs.
49 | 
50 |     The dataset is available on figshare at
51 | 
52 |     http://dx.doi.org/10.6084/m9.figshare.1026324
53 |     """
54 | 
55 |     data_url = DATA_URL
56 |     target_directory = TARGET_DIRECTORY
57 | 
58 |     def get_cached(self):
59 |         top = md.load(join(self.data_dir, '1plx.pdb'))
60 |         trajectories = []
61 |         for fn in glob(join(self.data_dir, 'trajectory*.dcd')):
62 |             trajectories.append(md.load(fn, top=top))
63 | 
64 |         return Bunch(trajectories=trajectories, DESCR=self.description())
65 | 
66 | 
67 | def fetch_met_enkephalin(data_home=None):
68 |     return MetEnkephalin(data_home).get()
69 | 
70 | 
71 | fetch_met_enkephalin.__doc__ = MetEnkephalin.__doc__
72 | 


--------------------------------------------------------------------------------
/msmbuilder/feature_extraction/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from ..featurizer import *
4 | from ..featurizer import subset
5 | 


--------------------------------------------------------------------------------
/msmbuilder/feature_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | # Author: Carlos Xavier Hernandez <cxh@stanford.edu>
 2 | # Contributors:
 3 | # Copyright (c) 2016, Stanford University and the Authors
 4 | # All rights reserved.
 5 | 
 6 | from __future__ import absolute_import
 7 | 
 8 | from .base import MultiSequenceFeatureSelectionMixin
 9 | from .featureselector import FeatureSlicer, FeatureSelector
10 | 
11 | from sklearn import feature_selection
12 | 
13 | class VarianceThreshold(MultiSequenceFeatureSelectionMixin,
14 |                         feature_selection.VarianceThreshold):
15 |     __doc__ = feature_selection.VarianceThreshold.__doc__
16 | 


--------------------------------------------------------------------------------
/msmbuilder/feature_selection/base.py:
--------------------------------------------------------------------------------
 1 | # Author: Carlos Xavier Hernandez <cxh@stanford.edu>
 2 | # Contributors:
 3 | # Copyright (c) 2016, Stanford University and the Authors
 4 | # All rights reserved.
 5 | 
 6 | from __future__ import absolute_import
 7 | 
 8 | from ..decomposition.base import MultiSequenceDecompositionMixin
 9 | 
10 | 
11 | class MultiSequenceFeatureSelectionMixin(MultiSequenceDecompositionMixin):
12 |     __doc__ = MultiSequenceDecompositionMixin.__doc__
13 | 


--------------------------------------------------------------------------------
/msmbuilder/featurizer/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from .feature_union import FeatureUnion
4 | from .featurizer import *
5 | from .indices import get_atompair_indices
6 | from .multiseq_featuizer import *
7 | from .multichain import *
8 | 


--------------------------------------------------------------------------------
/msmbuilder/featurizer/indices.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | import numpy as np
 4 | 
 5 | ATOM_NAMES = ["N", "CA", "CB", "C", "O", "H"]
 6 | 
 7 | 
 8 | def get_atompair_indices(reference_traj, keep_atoms=None,
 9 |                          exclude_atoms=None, reject_bonded=True):
10 |     """Get a list of acceptable atom pairs.
11 | 
12 |     Parameters
13 |     ----------
14 |     reference_traj : mdtraj.Trajectory
15 |         Trajectory to grab atom pairs from
16 |     keep_atoms : np.ndarray, dtype=string, optional
17 |         Select only these atom names. Defaults to N, CA, CB, C, O, H
18 |     exclude_atoms : np.ndarray, dtype=string, optional
19 |         Exclude these atom names
20 |     reject_bonded : bool, default=True
21 |         If True, exclude bonded atompairs.
22 | 
23 |     Returns
24 |     -------
25 |     atom_indices : np.ndarray, dtype=int
26 |         The atom indices that pass your criteria
27 |     pair_indices : np.ndarray, dtype=int, shape=(N, 2)
28 |         Pairs of atom indices that pass your criteria.
29 | 
30 |     Notes
31 |     -----
32 |     This function has been optimized for speed.  A naive implementation
33 |     can be slow (~minutes) for large proteins.
34 |     """
35 |     if keep_atoms is None:
36 |         keep_atoms = ATOM_NAMES
37 | 
38 |     top, bonds = reference_traj.top.to_dataframe()
39 | 
40 |     if keep_atoms is not None:
41 |         atom_indices = top[top.name.isin(keep_atoms) == True].index.values
42 | 
43 |     if exclude_atoms is not None:
44 |         atom_indices = top[top.name.isin(exclude_atoms) == False].index.values
45 | 
46 |     pair_indices = np.array(list(itertools.combinations(atom_indices, 2)))
47 | 
48 |     if reject_bonded:
49 |         a_list = bonds.min(1)
50 |         b_list = bonds.max(1)
51 | 
52 |         n = atom_indices.max() + 1
53 | 
54 |         bond_hashes = a_list + b_list * n
55 |         pair_hashes = pair_indices[:, 0] + pair_indices[:, 1] * n
56 | 
57 |         not_bonds = ~np.in1d(pair_hashes, bond_hashes)
58 | 
59 |         pair_indices = np.array([(a, b) for k, (a, b)
60 |                                  in enumerate(pair_indices)
61 |                                  if not_bonds[k]])
62 | 
63 |     return atom_indices, pair_indices
64 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/.gitignore:
--------------------------------------------------------------------------------
1 | gaussian.cpp
2 | gaussian.h
3 | vonmises.cpp
4 | vonmises.h
5 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .gaussian import GaussianHMM
3 | from .vonmises import VonMisesHMM
4 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/README.md:
--------------------------------------------------------------------------------
 1 | This code is from Cephes, download directly from netlib:
 2 | 
 3 | http://www.netlib.no/netlib/cephes/
 4 | 
 5 | 
 6 | The original copyright, from the readme file of that distribution:
 7 | 
 8 | Some software in this archive may be from the book _Methods and
 9 | Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
10 | International, 1989) or from the Cephes Mathematical Library, a
11 | commercial product. In either event, it is copyrighted by the author.
12 | What you see here may be used freely but it comes with no support or
13 | guarantee.
14 | 
15 | The two known misprints in the book are repaired here in the
16 | source listings for the gamma function and the incomplete beta
17 | integral.
18 | 
19 | 
20 |    Stephen L. Moshier
21 |    moshier@na-net.ornl.gov
22 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/cephes.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CEPHES_H_
 2 | #define _CEPHES_H_
 3 | 
 4 | #include "cephes_names.h"
 5 | int mtherr(char *name, int code);
 6 | double i0(double x);
 7 | double i1(double x);
 8 | double zeta(double x, double q);
 9 | double psi(double x);
10 | double lgam(double x);
11 | double p1evl(double x, double coef[], int N);
12 | double polevl(double x, double coef[], int N);
13 | double chbevl(double x, double array[], int n);
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/cephes_names.h:
--------------------------------------------------------------------------------
 1 | #ifndef CEPHES_NAMES_H
 2 | #define CEPHES_NAMES_H
 3 | 
 4 | #define airy cephes_airy
 5 | #define bdtrc cephes_bdtrc
 6 | #define bdtr cephes_bdtr
 7 | #define bdtri cephes_bdtri
 8 | #define beta cephes_beta
 9 | #define lbeta cephes_lbeta
10 | #define btdtr cephes_btdtr
11 | #define cbrt cephes_cbrt
12 | #define chdtrc cephes_chdtrc
13 | #define chdtr cephes_chdtr
14 | #define chdtri cephes_chdtri
15 | #define dawsn cephes_dawsn
16 | #define ellie cephes_ellie
17 | #define ellik cephes_ellik
18 | #define ellpe cephes_ellpe
19 | #define ellpj cephes_ellpj
20 | #define ellpk cephes_ellpk
21 | #define exp10 cephes_exp10
22 | #define exp1m cephes_exp1m
23 | #define exp2 cephes_exp2
24 | #define expn cephes_expn
25 | // #define fabs cephes_fabs
26 | #define fdtrc cephes_fdtrc
27 | #define fdtr cephes_fdtr
28 | #define fdtri cephes_fdtri
29 | #define fresnl cephes_fresnl
30 | #define Gamma cephes_Gamma
31 | #define lgam cephes_lgam
32 | #define gdtr cephes_gdtr
33 | #define gdtrc cephes_gdtrc
34 | #define gdtri cephes_gdtri
35 | #define hyp2f1 cephes_hyp2f1
36 | #define hyperg cephes_hyperg
37 | #define hyp2f0 cephes_hyp2f0
38 | #define onef2 cephes_onef2
39 | #define threef0 cephes_threef0
40 | #define i0 cephes_i0
41 | #define i0e cephes_i0e
42 | #define i1 cephes_i1
43 | #define i1e cephes_i1e
44 | #define igamc cephes_igamc
45 | #define igam cephes_igam
46 | #define igami cephes_igami
47 | #define incbet cephes_incbet
48 | #define incbi cephes_incbi
49 | #define iv cephes_iv
50 | #define j0 cephes_j0
51 | #define y0 cephes_y0
52 | #define j1 cephes_j1
53 | #define y1 cephes_y1
54 | #define jn cephes_jn
55 | #define jv cephes_jv
56 | #define k0 cephes_k0
57 | #define k0e cephes_k0e
58 | #define k1 cephes_k1
59 | #define k1e cephes_k1e
60 | #define kn cephes_kn
61 | #define nbdtrc cephes_nbdtrc
62 | #define nbdtr cephes_nbdtr
63 | #define nbdtri cephes_nbdtri
64 | #define ndtr cephes_ndtr
65 | #define erfc cephes_erfc
66 | #define erf cephes_erf
67 | #define ndtri cephes_ndtri
68 | #define pdtrc cephes_pdtrc
69 | #define pdtr cephes_pdtr
70 | #define pdtri cephes_pdtri
71 | #define psi cephes_psi
72 | #define rgamma cephes_rgamma
73 | #define round cephes_round
74 | #define shichi cephes_shichi
75 | #define sici cephes_sici
76 | #define radian cephes_radian
77 | #define sindg cephes_sindg
78 | #define cosdg cephes_cosdg
79 | #define sincos cephes_sincos
80 | #define spence cephes_spence
81 | #define stdtr cephes_stdtr
82 | #define stdtri cephes_stdtri
83 | #define struve cephes_struve
84 | #define yv cephes_yv
85 | #define tandg cephes_tandg
86 | #define cotdg cephes_cotdg
87 | #define log1p cephes_log1p
88 | #define expm1 cephes_expm1
89 | #define cosm1 cephes_cosm1
90 | #define yn cephes_yn
91 | #define zeta cephes_zeta
92 | #define zetac cephes_zetac
93 | #define smirnov cephes_smirnov
94 | #define smirnovi cephes_smirnovi
95 | #define kolmogorov cephes_kolmogorov
96 | #define kolmogi cephes_kolmogi
97 | 
98 | #endif
99 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/chbevl.c:
--------------------------------------------------------------------------------
 1 | /*							chbevl.c
 2 |  *
 3 |  *	Evaluate Chebyshev series
 4 |  *
 5 |  *
 6 |  *
 7 |  * SYNOPSIS:
 8 |  *
 9 |  * int N;
10 |  * double x, y, coef[N], chebevl();
11 |  *
12 |  * y = chbevl( x, coef, N );
13 |  *
14 |  *
15 |  *
16 |  * DESCRIPTION:
17 |  *
18 |  * Evaluates the series
19 |  *
20 |  *        N-1
21 |  *         - '
22 |  *  y  =   >   coef[i] T (x/2)
23 |  *         -            i
24 |  *        i=0
25 |  *
26 |  * of Chebyshev polynomials Ti at argument x/2.
27 |  *
28 |  * Coefficients are stored in reverse order, i.e. the zero
29 |  * order term is last in the array.  Note N is the number of
30 |  * coefficients, not the order.
31 |  *
32 |  * If coefficients are for the interval a to b, x must
33 |  * have been transformed to x -> 2(2x - b - a)/(b-a) before
34 |  * entering the routine.  This maps x from (a, b) to (-1, 1),
35 |  * over which the Chebyshev polynomials are defined.
36 |  *
37 |  * If the coefficients are for the inverted interval, in
38 |  * which (a, b) is mapped to (1/b, 1/a), the transformation
39 |  * required is x -> 2(2ab/x - b - a)/(b-a).  If b is infinity,
40 |  * this becomes x -> 4a/x - 1.
41 |  *
42 |  *
43 |  *
44 |  * SPEED:
45 |  *
46 |  * Taking advantage of the recurrence properties of the
47 |  * Chebyshev polynomials, the routine requires one more
48 |  * addition per loop than evaluating a nested polynomial of
49 |  * the same degree.
50 |  *
51 |  */
52 | /*							chbevl.c	*/
53 | 
54 | /*
55 | Cephes Math Library Release 2.0:  April, 1987
56 | Copyright 1985, 1987 by Stephen L. Moshier
57 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140
58 | */
59 | 
60 | double chbevl(double x, double array[] , int n ) {
61 |     double b0, b1, b2, *p;
62 |     int i;
63 | 
64 |     p = array;
65 |     b0 = *p++;
66 |     b1 = 0.0;
67 |     i = n - 1;
68 | 
69 |     do {
70 |         b2 = b1;
71 |         b1 = b0;
72 |         b0 = x * b1  -  b2  + *p++;
73 |     } while( --i );
74 | 
75 |     return( 0.5*(b0-b2) );
76 | }
77 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/mtherr.c:
--------------------------------------------------------------------------------
  1 | /*							mtherr.c
  2 |  *
  3 |  *	Library common error handling routine
  4 |  *
  5 |  *
  6 |  *
  7 |  * SYNOPSIS:
  8 |  *
  9 |  * char *fctnam;
 10 |  * int code;
 11 |  * int mtherr();
 12 |  *
 13 |  * mtherr( fctnam, code );
 14 |  *
 15 |  *
 16 |  *
 17 |  * DESCRIPTION:
 18 |  *
 19 |  * This routine may be called to report one of the following
 20 |  * error conditions (in the include file mconf.h).
 21 |  *
 22 |  *   Mnemonic        Value          Significance
 23 |  *
 24 |  *    DOMAIN            1       argument domain error
 25 |  *    SING              2       function singularity
 26 |  *    OVERFLOW          3       overflow range error
 27 |  *    UNDERFLOW         4       underflow range error
 28 |  *    TLOSS             5       total loss of precision
 29 |  *    PLOSS             6       partial loss of precision
 30 |  *    EDOM             33       Unix domain error code
 31 |  *    ERANGE           34       Unix range error code
 32 |  *
 33 |  * The default version of the file prints the function name,
 34 |  * passed to it by the pointer fctnam, followed by the
 35 |  * error condition.  The display is directed to the standard
 36 |  * output device.  The routine then returns to the calling
 37 |  * program.  Users may wish to modify the program to abort by
 38 |  * calling exit() under severe error conditions such as domain
 39 |  * errors.
 40 |  *
 41 |  * Since all error conditions pass control to this function,
 42 |  * the display may be easily changed, eliminated, or directed
 43 |  * to an error logging device.
 44 |  *
 45 |  * SEE ALSO:
 46 |  *
 47 |  * mconf.h
 48 |  *
 49 |  */
 50 | 
 51 | /*
 52 | Cephes Math Library Release 2.0:  April, 1987
 53 | Copyright 1984, 1987 by Stephen L. Moshier
 54 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140
 55 | */
 56 | 
 57 | #include <stdio.h>
 58 | #include "mconf.h"
 59 | 
 60 | int merror = 0;
 61 | 
 62 | /* Notice: the order of appearance of the following
 63 |  * messages is bound to the error codes defined
 64 |  * in mconf.h.
 65 |  */
 66 | static char *ermsg[7] = {
 67 |     "unknown",      /* error code 0 */
 68 |     "domain",       /* error code 1 */
 69 |     "singularity",  /* et seq.      */
 70 |     "overflow",
 71 |     "underflow",
 72 |     "total loss of precision",
 73 |     "partial loss of precision"
 74 | };
 75 | 
 76 | 
 77 | int mtherr(char* name, int code)
 78 | {
 79 | 
 80 |     /* Display string passed by calling program,
 81 |      * which is supposed to be the name of the
 82 |      * function in which the error occurred:
 83 |      */
 84 |     printf( "\n%s ", name );
 85 | 
 86 |     /* Set global error message word */
 87 |     merror = code;
 88 | 
 89 |     /* Display error message defined
 90 |      * by the code argument.
 91 |      */
 92 |     if( (code <= 0) || (code >= 7) )
 93 |         code = 0;
 94 |     printf( "%s error\n", ermsg[code] );
 95 | 
 96 |     /* Return to calling
 97 |      * program
 98 |      */
 99 |     return( 0 );
100 | }
101 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/cephes/polevl.c:
--------------------------------------------------------------------------------
 1 | /*							polevl.c
 2 |  *							p1evl.c
 3 |  *
 4 |  *	Evaluate polynomial
 5 |  *
 6 |  *
 7 |  *
 8 |  * SYNOPSIS:
 9 |  *
10 |  * int N;
11 |  * double x, y, coef[N+1], polevl[];
12 |  *
13 |  * y = polevl( x, coef, N );
14 |  *
15 |  *
16 |  *
17 |  * DESCRIPTION:
18 |  *
19 |  * Evaluates polynomial of degree N:
20 |  *
21 |  *                     2          N
22 |  * y  =  C  + C x + C x  +...+ C x
23 |  *        0    1     2          N
24 |  *
25 |  * Coefficients are stored in reverse order:
26 |  *
27 |  * coef[0] = C  , ..., coef[N] = C  .
28 |  *            N                   0
29 |  *
30 |  *  The function p1evl() assumes that coef[N] = 1.0 and is
31 |  * omitted from the array.  Its calling arguments are
32 |  * otherwise the same as polevl().
33 |  *
34 |  *
35 |  * SPEED:
36 |  *
37 |  * In the interest of speed, there are no checks for out
38 |  * of bounds arithmetic.  This routine is used by most of
39 |  * the functions in the library.  Depending on available
40 |  * equipment features, the user may wish to rewrite the
41 |  * program in microcode or assembly language.
42 |  *
43 |  */
44 | 
45 | 
46 | /*
47 | Cephes Math Library Release 2.1:  December, 1988
48 | Copyright 1984, 1987, 1988 by Stephen L. Moshier
49 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140
50 | */
51 | 
52 | 
53 | double polevl(double x, double coef[], int N)
54 | {
55 |     double ans;
56 |     int i;
57 |     double *p;
58 | 
59 |     p = coef;
60 |     ans = *p++;
61 |     i = N;
62 | 
63 |     do
64 |         ans = ans * x  +  *p++;
65 |     while( --i );
66 | 
67 |     return( ans );
68 | }
69 | 
70 | /*							p1evl()	*/
71 | /*                                          N
72 |  * Evaluate polynomial when coefficient of x  is 1.0.
73 |  * Otherwise same as polevl.
74 |  */
75 | 
76 | double p1evl(double x, double coef[], int N)
77 | {
78 |     double ans;
79 |     double *p;
80 |     int i;
81 | 
82 |     p = coef;
83 |     ans = x + *p++;
84 |     i = N-1;
85 | 
86 |     do
87 |         ans = ans * x  + *p++;
88 |     while( --i );
89 | 
90 |     return( ans );
91 | }
92 | 


--------------------------------------------------------------------------------
/msmbuilder/hmm/src/include/GaussianHMMFitter.h:
--------------------------------------------------------------------------------
 1 | #ifndef MIXTAPE_GAUSSIAN_HMM_FITTER_H
 2 | #define MIXTAPE_GAUSSIAN_HMM_FITTER_H
 3 | 
 4 | #include "HMMFitter.h"
 5 | 
 6 | namespace msmbuilder {
 7 | 
 8 | /**
 9 |  * This subclass of HMMFitter computes Gaussian HMMs.
10 |  */
11 | template <class T>
12 | class GaussianHMMFitter : public HMMFitter<T> {
13 | public:
14 |     GaussianHMMFitter(void* owner, int n_states, int n_features, int n_iter, const double* log_startprob);
15 |     
16 |     ~GaussianHMMFitter();
17 |     
18 |     void set_means_and_variances(const double* means, const double* variances);
19 |     
20 |     void initialize_sufficient_statistics();
21 |     
22 |     void compute_log_likelihood(const Trajectory& trajectory,
23 |                                 std::vector<std::vector<double> >& frame_log_probability) const;
24 | 
25 |     void accumulate_sufficient_statistics(const Trajectory& trajectory,
26 |                                           const std::vector<std::vector<double> >& frame_log_probability,
27 |                                           const std::vector<std::vector<double> >& posteriors,
28 |                                           const std::vector<std::vector<double> >& fwdlattice,
29 |                                           const std::vector<std::vector<double> >& bwdlattice);
30 |     
31 |     void get_obs(double* output);
32 |     
33 |     void get_obs2(double* output);
34 | 
35 |     void do_mstep();
36 | private:
37 |     void* owner;
38 |     std::vector<double> obs, obs2, a0, a1, a2;
39 | };
40 | 
41 | } // namespace msmbuilder
42 | 
43 | #endif


--------------------------------------------------------------------------------
/msmbuilder/hmm/src/include/VonMisesHMMFitter.h:
--------------------------------------------------------------------------------
 1 | #ifndef MIXTAPE_GAUSSIAN_HMM_FITTER_H
 2 | #define MIXTAPE_GAUSSIAN_HMM_FITTER_H
 3 | 
 4 | #include "HMMFitter.h"
 5 | 
 6 | namespace msmbuilder {
 7 | 
 8 | /**
 9 |  * This subclass of HMMFitter computes von Mises HMMs.
10 |  */
11 | template <class T>
12 | class VonMisesHMMFitter : public HMMFitter<T> {
13 | public:
14 |     VonMisesHMMFitter(void* owner, int n_states, int n_features, int n_iter, const double* log_startprob);
15 |     
16 |     ~VonMisesHMMFitter();
17 |     
18 |     void set_means_and_kappas(const double* means, const double* kappas);
19 |     
20 |     void initialize_sufficient_statistics();
21 |     
22 |     void compute_log_likelihood(const Trajectory& trajectory,
23 |                                 std::vector<std::vector<double> >& frame_log_probability) const;
24 | 
25 |     void accumulate_sufficient_statistics(const Trajectory& trajectory,
26 |                                           const std::vector<std::vector<double> >& frame_log_probability,
27 |                                           const std::vector<std::vector<double> >& posteriors,
28 |                                           const std::vector<std::vector<double> >& fwdlattice,
29 |                                           const std::vector<std::vector<double> >& bwdlattice);
30 |     
31 |     void get_cosobs(double* output);
32 | 
33 |     void get_sinobs(double* output);
34 |     
35 |     void do_mstep();
36 | private:
37 |     void* owner;
38 |     std::vector<double> cosobs, sinobs, means, kappas;
39 | };
40 | 
41 | } // namespace msmbuilder
42 | 
43 | #endif


--------------------------------------------------------------------------------
/msmbuilder/io/__init__.py:
--------------------------------------------------------------------------------
1 | from .gather_metadata import (gather_metadata, GenericParser,
2 |                               NumberedRunsParser, HierarchyParser, ParseWarning)
3 | from .io import (backup, preload_top, preload_tops, load_meta, load_generic,
4 |                  load_trajs, save_meta, render_meta, save_generic, save_trajs,
5 |                  itertrajs)
6 | from .project_template import TemplateProject


--------------------------------------------------------------------------------
/msmbuilder/io/sampling/__init__.py:
--------------------------------------------------------------------------------
1 | from .sampling import sample_dimension, sample_states, sample_msm


--------------------------------------------------------------------------------
/msmbuilder/io_templates/twitter-bootstrap.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1">
 7 |     <title>{{title}}</title>
 8 | 
 9 |     <!-- Bootstrap -->
10 |     <link rel="stylesheet"
11 |           href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css"
12 |           integrity="sha384-1q8mTJOASx8j1Au+a5WDVnPi2lkFfwwEAa8hDDdjZlpLegxhjVME1fgjWPGmkzs7"
13 |           crossorigin="anonymous">
14 |     <link rel="stylesheet"
15 |           href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap-theme.min.css"
16 |           integrity="sha384-fLW2N01lMqjakBkx3l/M9EahuwpSfeNvV63J5ezn3uZzapT0u7EYsXMjQV+0En5r"
17 |           crossorigin="anonymous">
18 | 
19 |     <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
20 |     <!-- WARNING: Respond.js doesn't work if you view the page via file:// -->
21 |     <!--[if lt IE 9]>
22 |     <script src="https://oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
23 |     <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
24 |     <![endif]-->
25 | </head>
26 | <body>
27 | <div class="container">
28 |     {{content}}
29 | </div>
30 | </body>
31 | </html>


--------------------------------------------------------------------------------
/msmbuilder/libdistance/.gitignore:
--------------------------------------------------------------------------------
1 | libdistance.cpp


--------------------------------------------------------------------------------
/msmbuilder/libdistance/src/cdist.hpp:
--------------------------------------------------------------------------------
 1 | #include "distance_kernels.h"
 2 | 
 3 | 
 4 | void cdist_double(const double* XA, const double* XB, const char* metric,
 5 |                   npy_intp na, npy_intp nb, npy_intp m, double* out)
 6 | 
 7 | {
 8 |     npy_intp i, j, k;
 9 |     const double *u, *v;
10 |     double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
11 |             metric_double(metric);
12 |     if (metricfunc == NULL) {
13 |         fprintf(stderr, "Error");
14 |         return;
15 |     }
16 | 
17 |     k = 0;
18 |     for (i = 0; i < na; i++) {
19 |         for (j = 0; j < nb; j++) {
20 |             u = XA + m * i;
21 |             v = XB + m * j;
22 |             out[k++] = metricfunc(u, v, m);
23 |         }
24 |     }
25 | }
26 | 
27 | 
28 | void cdist_float(const float* XA, const float* XB, const char* metric,
29 |                   npy_intp na, npy_intp nb, npy_intp m, double* out)
30 | 
31 | {
32 |     npy_intp i, j, k;
33 |     const float *u, *v;
34 |     double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
35 |             metric_float(metric);
36 |     if (metricfunc == NULL) {
37 |         fprintf(stderr, "Error");
38 |         return;
39 |     }
40 | 
41 |     k = 0;
42 |     for (i = 0; i < na; i++) {
43 |         for (j = 0; j < nb; j++) {
44 |             u = XA + m * i;
45 |             v = XB + m * j;
46 |             out[k++] = metricfunc(u, v, m);
47 |         }
48 |     }
49 | }


--------------------------------------------------------------------------------
/msmbuilder/libdistance/src/dist.hpp:
--------------------------------------------------------------------------------
 1 | #include "distance_kernels.h"
 2 | 
 3 | 
 4 | void dist_double(const double* X, const double* y, const char* metric, npy_intp n,
 5 |                  npy_intp m, double* out)
 6 | {
 7 |     npy_intp i;
 8 |     const double *u;
 9 |     double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
10 |             metric_double(metric);
11 |     if (metricfunc == NULL) {
12 |         fprintf(stderr, "Error");
13 |         return;
14 |     }
15 | 
16 |     for (i = 0; i < n; i++) {
17 |         u = X + m * i;
18 |         out[i] = metricfunc(u, y, m);
19 |     }
20 | }
21 | 
22 | 
23 | void dist_double_X_indices(const double* X, const double* y, const char* metric,
24 |                            npy_intp n, npy_intp m, const npy_intp* X_indices,
25 |                            npy_intp n_X_indices, double* out)
26 | {
27 |     npy_intp i, ii;
28 |     const double *u;
29 |     double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
30 |             metric_double(metric);
31 |     if (metricfunc == NULL) {
32 |         fprintf(stderr, "Error");
33 |         return;
34 |     }
35 | 
36 |     for (ii = 0; ii < n_X_indices; ii++) {
37 |         i = X_indices[ii];
38 |         u = X + m * i;
39 |         out[ii] = metricfunc(u, y, m);
40 |     }
41 | }
42 | 
43 | 
44 | void dist_float(const float* X, const float* y, const char* metric, npy_intp n,
45 |                 npy_intp m, double* out)
46 | {
47 |     npy_intp i;
48 |     const float *u;
49 |     double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
50 |             metric_float(metric);
51 |     if (metricfunc == NULL) {
52 |         fprintf(stderr, "Error");
53 |         return;
54 |     }
55 | 
56 |     for (i = 0; i < n; i++) {
57 |         u = X + m * i;
58 |         out[i] = metricfunc(u, y, m);
59 |     }
60 | }
61 | 
62 | void dist_float_X_indices(const float* X, const float* y, const char* metric,
63 |                           npy_intp n, npy_intp m, const npy_intp* X_indices,
64 |                           npy_intp n_X_indices, double* out)
65 | {
66 |     npy_intp i, ii;
67 |     const float *u;
68 |     double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
69 |             metric_float(metric);
70 |     if (metricfunc == NULL) {
71 |         fprintf(stderr, "Error");
72 |         return;
73 |     }
74 | 
75 |     for (ii = 0; ii < n_X_indices; ii++) {
76 |         i = X_indices[ii];
77 |         u = X + m * i;
78 |         out[ii] = metricfunc(u, y, m);
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/msmbuilder/libdistance/src/pdist.hpp:
--------------------------------------------------------------------------------
 1 | #include "distance_kernels.h"
 2 | 
 3 | 
 4 | void pdist_double(const double* X, const char* metric, npy_intp n, npy_intp m,
 5 |                   double* out)
 6 | {
 7 |     npy_intp i, j, k;
 8 |     const double *u, *v;
 9 |     double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
10 |             metric_double(metric);
11 |     if (metricfunc == NULL) {
12 |         fprintf(stderr, "Error");
13 |         return;
14 |     }
15 | 
16 |     k = 0;
17 |     for (i = 0; i < n; i++) {
18 |         for (j = i+1; j < n; j++) {
19 |             u = X + m * i;
20 |             v = X + m * j;
21 |             out[k++] = metricfunc(u, v, m);
22 |         }
23 |     }
24 | }
25 | 
26 | void pdist_double_X_indices(const double* X, const char* metric, npy_intp n,
27 |                             npy_intp m, const npy_intp* X_indices,
28 |                             npy_intp n_X_indices, double* out)
29 | {
30 |     npy_intp i, ii, j, jj, k;
31 |     const double *u, *v;
32 |     double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
33 |             metric_double(metric);
34 |     if (metricfunc == NULL) {
35 |         fprintf(stderr, "Error");
36 |         return;
37 |     }
38 | 
39 |     k = 0;
40 |     for (ii = 0; ii < n_X_indices; ii++) {
41 |         i = X_indices[ii];
42 |         for (jj = ii+1; jj < n_X_indices; jj++) {
43 |             j = X_indices[jj];
44 |             u = X + m * i;
45 |             v = X + m * j;
46 |             out[k++] = metricfunc(u, v, m);
47 |         }
48 |     }
49 | }
50 | 
51 | 
52 | void pdist_float(const float* X, const char* metric, npy_intp n, npy_intp m,
53 |                  double* out)
54 | {
55 |     npy_intp i, j, k;
56 |     const float *u, *v;
57 |     double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
58 |             metric_float(metric);
59 |     if (metricfunc == NULL) {
60 |         fprintf(stderr, "Error");
61 |         return;
62 |     }
63 | 
64 |     k = 0;
65 |     for (i = 0; i < n; i++) {
66 |         for (j = i+1; j < n; j++) {
67 |             u = X + m * i;
68 |             v = X + m * j;
69 |             out[k++] = metricfunc(u, v, m);
70 |         }
71 |     }
72 | }
73 | void pdist_float_X_indices(const float* X, const char* metric, npy_intp n,
74 |                            npy_intp m, const npy_intp* X_indices,
75 |                            npy_intp n_X_indices, double* out)
76 | {
77 |     npy_intp i, ii, j, jj, k;
78 |     const float *u, *v;
79 |     double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
80 |             metric_float(metric);
81 |     if (metricfunc == NULL) {
82 |         fprintf(stderr, "Error");
83 |         return;
84 |     }
85 | 
86 |     k = 0;
87 |     for (ii = 0; ii < n_X_indices; ii++) {
88 |         i = X_indices[ii];
89 |         for (jj = ii+1; jj < n_X_indices; jj++) {
90 |             j = X_indices[jj];
91 |             u = X + m * i;
92 |             v = X + m * j;
93 |             out[k++] = metricfunc(u, v, m);
94 |         }
95 |     }
96 | }


--------------------------------------------------------------------------------
/msmbuilder/libdistance/src/sumdist.hpp:
--------------------------------------------------------------------------------
 1 | #include "distance_kernels.h"
 2 | 
 3 | double sumdist_double(const double* X, const char* metric, npy_intp n, npy_intp m,
 4 |                       const npy_intp* pairs, npy_intp p)
 5 | {
 6 |     npy_intp i;
 7 |     double s = 0;
 8 |     const double *u, *v;
 9 |     double (*metricfunc) (const double *u, const double *v, npy_intp n) = \
10 |             metric_double(metric);
11 |     if (metricfunc == NULL) {
12 |         fprintf(stderr, "Error");
13 |         return -1;
14 |     }
15 | 
16 |     for (i = 0; i < p; i++) {
17 |         u = X + m * pairs[2*i];
18 |         v = X + m * pairs[2*i+1];
19 |         s += metricfunc(u, v, m);
20 |     }
21 | 
22 |     return s;
23 | }
24 | 
25 | 
26 | double sumdist_float(const float* X, const char* metric, npy_intp n, npy_intp m,
27 |                      const npy_intp* pairs, npy_intp p)
28 | {
29 |     npy_intp i;
30 |     double s = 0;
31 |     const float *u, *v;
32 |     double (*metricfunc) (const float *u, const float *v, npy_intp n) = \
33 |             metric_float(metric);
34 |     if (metricfunc == NULL) {
35 |         fprintf(stderr, "Error");
36 |         return -1;
37 |     }
38 |     for (i = 0; i < p; i++) {
39 |         u = X + m * pairs[2*i];
40 |         v = X + m * pairs[2*i+1];
41 |         s += metricfunc(u, v, m);
42 |     }
43 | 
44 |     return s;
45 | }
46 | 


--------------------------------------------------------------------------------
/msmbuilder/lumping/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import, print_function, division
2 | 
3 | from .pcca import PCCA
4 | from .pcca_plus import PCCAPlus
5 | from .mvca import MVCA
6 | from .bace import BACE
7 | 
8 | __all__ = ["PCCA", "PCCAPlus", "MVCA", "BACE"]
9 | 


--------------------------------------------------------------------------------
/msmbuilder/msm/.gitignore:
--------------------------------------------------------------------------------
1 | _markovstatemodel.c
2 | _metzner_mcmc_fast.c
3 | _ratematrix.c*
4 | 


--------------------------------------------------------------------------------
/msmbuilder/msm/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .core import *
3 | from .msm import MarkovStateModel
4 | from .ratematrix import ContinuousTimeMSM
5 | from .bayesmsm import BayesianMarkovStateModel
6 | from .implied_timescales import implied_timescales
7 | from .bayes_ratematrix import BayesianContinuousTimeMSM
8 | 


--------------------------------------------------------------------------------
/msmbuilder/msm/_markovstatemodel.pyx:
--------------------------------------------------------------------------------
 1 | # Author: Robert McGibbon <rmcgibbo@gmail.com>
 2 | # Contributors:
 3 | # Copyright (c) 2014, Stanford University
 4 | # All rights reserved.
 5 | 
 6 | import numpy as np
 7 | 
 8 | cdef extern from "transmat_mle_prinz.h":
 9 |     int transmat_mle_prinz(const double* C, int n_states,
10 |                            double tol, double* T, double* pi)
11 | 
12 | def _transmat_mle_prinz(double[:, ::1] C, double tol=1e-10):
13 |     """Compute a maximum likelihood reversible transition matrix, given
14 |     a set of directed transition counts.
15 | 
16 |     Algorithim 1 of Prinz et al.[1]
17 | 
18 |     Parameters
19 |     ----------
20 |     C : (input) 2d array of shape=(n_states, n_states)
21 |         The directed transition counts, in C (row-major) order.
22 |     tol : (input) float
23 |         Convergence tolerance. The algorithm will iterate until the
24 |         change in the log-likelihood is less than `tol`.
25 | 
26 |     Returns
27 |     -------
28 |     T : (output) pointer to output 2d array of shape=(n_states, n_states)
29 |         Once the algorithim is completed, the resulting transition
30 |         matrix will be written to `T`.
31 |     populations : array, shape = (n_states_,)
32 |         The equilibrium population (stationary left eigenvector) of T
33 | 
34 |      References
35 |      ----------
36 |      .. [1] Prinz, Jan-Hendrik, et al. "Markov models of molecular kinetics:
37 |         Generation and validation." J Chem. Phys. 134.17 (2011): 174105.
38 |     """
39 | 
40 |     cdef int n_states = len(C)
41 |     if n_states == 0:
42 |         return np.zeros((0, 0)), np.zeros(0)
43 | 
44 |     if len(C[0]) != n_states:
45 |         raise ValueError('C must be square')
46 |     cdef double[:, ::1] T = np.zeros((n_states, n_states))
47 |     cdef double[::1] pi = np.zeros(n_states)
48 |     cdef int n_iter
49 | 
50 |     n_iter = transmat_mle_prinz(&C[0,0], n_states, tol, &T[0,0], &pi[0])
51 |     if n_iter < 0:
52 |         # diagnose the error
53 |         msg = ' Error code=%d' % n_iter
54 |         if np.any(np.less(C, 0)):
55 |             msg = 'Domain error. C must be positive.' + msg
56 |         if np.any(np.sum(C, axis=1) == 0):
57 |             msg = 'Row-sums of C must be positive.' + msg
58 |         if n_iter == -3:
59 |             msg = 'Likelihood not converged.' + msg
60 |         raise ValueError(msg)
61 | 
62 |     return np.array(T), np.array(pi)
63 | 


--------------------------------------------------------------------------------
/msmbuilder/msm/implied_timescales.py:
--------------------------------------------------------------------------------
 1 | # Author: Christian Schwantes
 2 | # Contributors:
 3 | # Copyright (c) 2014, Stanford University
 4 | # All rights reserved.
 5 | 
 6 | 
 7 | import numpy as np
 8 | from ..utils import param_sweep
 9 | from . import MarkovStateModel
10 | 
11 | 
12 | def implied_timescales(sequences, lag_times, n_timescales=10,
13 |                        msm=None, n_jobs=1, verbose=0):
14 |     """
15 |     Calculate the implied timescales for a given MSM.
16 | 
17 |     Parameters
18 |     ----------
19 |     sequences : list of array-like
20 |         List of sequences, or a single sequence. Each 
21 |         sequence should be a 1D iterable of state
22 |         labels. Labels can be integers, strings, or
23 |         other orderable objects.
24 |     lag_times : array-like
25 |         Lag times to calculate implied timescales at.
26 |     n_timescales : int, optional
27 |         Number of timescales to calculate.
28 |     msm : msmbuilder.msm.MarkovStateModel, optional
29 |         Instance of an MSM to specify parameters other
30 |         than the lag time. If None, then the default
31 |         parameters (as implemented by msmbuilder.msm.MarkovStateModel)
32 |         will be used.
33 |     n_jobs : int, optional
34 |         Number of jobs to run in parallel
35 | 
36 |     Returns
37 |     -------
38 |     timescales : np.ndarray, shape = [n_models, n_timescales]
39 |         The slowest timescales (in units of lag times) for each
40 |         model.
41 |     """
42 | 
43 |     if msm is None:
44 |         msm = MarkovStateModel()
45 | 
46 |     param_grid = {'lag_time' : lag_times}
47 |     models = param_sweep(msm, sequences, param_grid, n_jobs=n_jobs,
48 |                          verbose=verbose)
49 |     timescales = [m.timescales_ for m in models]
50 |     n_timescales = min(n_timescales, min(len(ts) for ts in timescales))
51 |     timescales = np.array([ts[:n_timescales] for ts in timescales])
52 |     return timescales
53 | 


--------------------------------------------------------------------------------
/msmbuilder/msm/markov_appreciation.py:
--------------------------------------------------------------------------------
 1 | # Author: Muneeb Sultan <msultan@stanford.edu>
 2 | # Contributors: Matthew Harrigan <matthew.harrigan@outlook.com>
 3 | # Copyright (c) 2016, Stanford University
 4 | # All rights reserved.
 5 | 
 6 | 
 7 | def show_markov_appreciation():
 8 |     from PIL import Image
 9 |     import requests
10 |     from io import BytesIO
11 |     response = requests.get("https://upload.wikimedia.org/wikipedia/commons/"
12 |                             "thumb/7/70/AAMarkov.jpg/330px-AAMarkov.jpg")
13 |     img = Image.open(BytesIO(response.content))
14 |     img.show()
15 | 


--------------------------------------------------------------------------------
/msmbuilder/msm/src/metzner_mcmc.h:
--------------------------------------------------------------------------------
 1 | #ifndef METZNER_MCMC_STEP_H
 2 | #define METZNER_MCMC_STEP_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | void
 9 | metzner_mcmc_step(const double* Z, const double* N, double* K,
10 |                   double* Q, const double* random, double* sc, int n_states,
11 |                   int n_steps);
12 | 
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/msmbuilder/msm/src/transmat_mle_prinz.h:
--------------------------------------------------------------------------------
 1 | #ifndef TRANSMAT_MLE_PRINZ_H
 2 | #define TRANSMAT_MLE_PRINZ_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int transmat_mle_prinz(const double* C, int n_states, double tol,
 9 |                        double* T, double* pi);
10 | #ifdef __cplusplus
11 | }
12 | #endif
13 | 
14 | #endif
15 | 


--------------------------------------------------------------------------------
/msmbuilder/msm/validation/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from .bootstrapmsm import BootStrapMarkovStateModel


--------------------------------------------------------------------------------
/msmbuilder/msm/validation/transmat_errorbar.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def create_perturb_params(countsmat, transmat=None):
 5 |     '''
 6 |     Computes transition probabilities and standard errors of the transition probabilities due to 
 7 |     finite sampling using the MSM counts matrix. First, the transition probabilities are computed 
 8 |     by dividing the each element c_ij by the row-sumemd counts of row i. THe standard errors are then
 9 |     computed by first computing the standard deviation of the transition probability, treating each count 
10 |     as a Bernoulli process with p = t_ij (std = (t_ij - t_ij ^2)^0.5). This is then divided by the 
11 |     square root of the row-summed counts of row i to obtain the standard error.
12 |     
13 |     Parameters:
14 |     ----------
15 |     countsmat: np.ndarray
16 |         The msm counts matrix
17 |     transmat: np.ndarray
18 |         If you have a transition matrix you want to use (e.g. MLE symmetrized), you can supply that here. This
19 |         function will use the transition probabilities from this matrix to calculate the Bernoulli standard deviations, 
20 |         which will be divided by the row-summed counts in the original supplied counts matrix.
21 | 
22 |     Returns:
23 |     -----------
24 |     transmat, np.ndarray:
25 |         The MSM transition matrix
26 |     scale, np.ndarray:
27 |         The matrix of standard errors for each transition probability
28 |     '''
29 |     norm = np.sum(countsmat, axis=1)
30 |     if not transmat:
31 |         transmat = (countsmat.transpose() / norm).transpose()
32 |     counts = (np.ones((len(transmat), len(transmat))) * norm).transpose()
33 |     scale = ((transmat - transmat ** 2) ** 0.5 / counts ** 0.5) + 10 ** -15
34 |     return transmat, scale
35 | 
36 | 
37 | def perturb_tmat(transmat, scale):
38 |     '''
39 |     Perturbs each nonzero entry in the MSM transition matrix by treating it as a Gaussian random variable
40 |     with mean t_ij and standard deviation equal to the standard error computed using "create_perturb_params".
41 |     Returns a sampled transition matrix that takes into consideration errors due to finite sampling
42 |     (useful for boostrapping, etc.)
43 | 
44 |     Parameters:
45 |     ----------
46 |     transmat: np.ndarray:
47 |         The transition matrix, whose elements serve as the means of the Gaussian random variables
48 |     scale: np.ndarray:
49 |         The matrix of standard errors. For transition probability t_ij, this is assumed to be the standard 
50 |         error of the mean of a binomial distribution with p = transition probability and number of observations 
51 |         equal to the summed counts in row i.
52 | 
53 |     '''
54 |     output = np.vectorize(np.random.normal)(transmat, scale)
55 |     output[np.where(output < 0)] = 0
56 |     return (output.transpose() / np.sum(output, axis=1)).transpose()
57 | 
58 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/0-test-install.py:
--------------------------------------------------------------------------------
 1 | """This script tests your python installation as it pertains to running project templates.
 2 | 
 3 | MSMBuilder supports Python 2.7 and 3.3+ and has some necessary dependencies
 4 | like numpy, scipy, and scikit-learn. This templated project enforces
 5 | some more stringent requirements to make sure all the users are more-or-less
 6 | on the same page and to allow developers to exploit more helper libraries.
 7 | 
 8 | You can modify the template scripts to work for your particular set-up,
 9 | but it's probably easier to install `conda` and get the packages we
10 | recommend.
11 | 
12 | {{header}}
13 | """
14 | 
15 | import textwrap
16 | 
17 | # Show intro text
18 | paragraphs = __doc__.split('\n\n')
19 | for p in paragraphs:
20 |     print(textwrap.fill(p))
21 |     print()
22 | 
23 | warnings = 0
24 | 
25 | ## Test for python 3.5
26 | import sys
27 | 
28 | if sys.version_info < (3, 5):
29 |     print(textwrap.fill(
30 |         "These scripts were all developed on Python 3.5, "
31 |         "which is the current, stable release of Python. "
32 |         "In particular, we use subprocess.run "
33 |         "(and probably some other new features). "
34 |         "You can easily modify the scripts to work on older versions "
35 |         "of Python, but why not just upgrade? We like Continuum's "
36 |         "Anaconda Python distribution for a simple install (without root)."
37 |     ))
38 |     print()
39 |     warnings += 1
40 | 
41 | ## Test for matplotlib
42 | try:
43 |     import matplotlib as plt
44 | except ImportError:
45 |     print(textwrap.fill(
46 |         "These scripts try to make some mildly intesting plots. "
47 |         "That requires `matplotlib`."
48 |     ))
49 |     print()
50 |     warnings += 1
51 | 
52 | ## Test for seaborn
53 | try:
54 |     import seaborn as sns
55 | except ImportError:
56 |     print(textwrap.fill(
57 |         "The default matplotlib styling is a little ugly. "
58 |         "By default, these scripts try to use `seaborn` to make prettier "
59 |         "plots. You can remove all the seaborn imports if you don't want "
60 |         "to install this library, but why not just install it? Try "
61 |         "`conda install seaborn`"
62 |     ))
63 |     print()
64 |     warnings += 1
65 | 
66 | ## Test for xdg-open
67 | try:
68 |     import subprocess
69 | 
70 |     subprocess.check_call(['xdg-open', '--version'])
71 | except:
72 |     print(textwrap.fill(
73 |         "For convenience, the plotting scripts can try to use `xdg-open` "
74 |         "to pop up the result of the plot. Use the --display flag on "
75 |         "msmb TemplateProject to enable this behavior."
76 |     ))
77 |     warnings += 1
78 | 
79 | ## Report results
80 | if warnings == 0:
81 |     print("I didn't find any problems with your installation! Good job.")
82 |     print()
83 | else:
84 |     print("I found {} warnings, see above. Good luck!".format(warnings))
85 |     print()
86 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/1-get-example-data.py:
--------------------------------------------------------------------------------
 1 | """Get sample data for testing and experimenting
 2 | 
 3 | {{header}}
 4 | """
 5 | import os
 6 | 
 7 | from msmbuilder.example_datasets import FsPeptide
 8 | 
 9 | FsPeptide("./").cache()
10 | if not os.path.exists("trajs"):
11 |     os.symlink("fs_peptide", "trajs")
12 | if not os.path.exists("top.pdb"):
13 |     os.symlink("fs_peptide/fs-peptide.pdb", "top.pdb")
14 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/LICENSE.md:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Stanford University and the Authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a
 6 | copy of this software and associated documentation files (the "Software"),
 7 | to deal in the Software without restriction, including without limitation
 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | and/or sell copies of the Software, and to permit persons to whom the
10 | Software is furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | DEALINGS IN THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/README.md:
--------------------------------------------------------------------------------
 1 | My msmb Project
 2 | ===============
 3 | 
 4 | Initialized with `msmb TemplateProject` on {{date}}
 5 | 
 6 | Keep notes about your project here.
 7 | 
 8 | ## Folder layout
 9 | 
10 | Each new step in MSM construction is in a new folder with symlinks
11 | to the files on which it depends from previous steps.
12 | 
13 | ## Variable names convention
14 | 
15 | variable    | filename          | description
16 | ------------|-------------------|-----------------------------------------------
17 | meta        | meta.pandas.pickl | pandas dataframe of trajectory metadata
18 | ftrajs      | ftrajs/           | trajectories of feature vectors (dihedrals, ...)
19 | dihed_feat  | featurizer.pickl  | featurizer object
20 | ttrajs      | ttrajs/           | dimensionality-reduced, tica trajectories
21 | tica        | tica.pickl        | tica object
22 | ktrajs      | ktrajs/           | trajecories of cluster indices
23 | kmeans      | clusterer.pickl   | cluserer object
24 | microktrajs | microktrajs/      | trimmed cluster indices
25 | macroktrajs | macroktrajs/      | macrostate indices
26 | 
27 | ## License
28 | 
29 | These templates are licensed under the MIT license. Do whatever
30 | you want with them.
31 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/analysis/gather-metadata-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot metadata info
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 | 
13 | from msmbuilder.io import load_meta, render_meta
14 | 
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 | 
18 | ## Load
19 | meta = load_meta()
20 | 
21 | 
22 | ## Histogram of trajectory lengths
23 | def plot_lengths(ax):
24 |     lengths_ns = meta['nframes'] * (meta['step_ps'] / 1000)
25 |     ax.hist(lengths_ns)
26 |     ax.set_xlabel("Lenths / ns", fontsize=16)
27 |     ax.set_ylabel("Count", fontsize=16)
28 | 
29 |     total_label = ("Total length: {us:.2f}"
30 |                    .format(us=np.sum(lengths_ns) / 1000))
31 |     total_label += r" / $\mathrm{\mu s}$"
32 |     ax.annotate(total_label,
33 |                 xy=(0.05, 0.95),
34 |                 xycoords='axes fraction',
35 |                 fontsize=18,
36 |                 va='top',
37 |                 )
38 | 
39 | 
40 | ## Pie graph
41 | def plot_pie(ax):
42 |     lengths_ns = meta['nframes'] * (meta['step_ps'] / 1000)
43 |     sampling = lengths_ns.groupby(level=0).sum()
44 | 
45 |     ax.pie(sampling,
46 |            shadow=True,
47 |            labels=sampling.index,
48 |            colors=sns.color_palette(),
49 |            )
50 |     ax.axis('equal')
51 | 
52 | 
53 | ## Box plot
54 | def plot_boxplot(ax):
55 |     meta2 = meta.copy()
56 |     meta2['ns'] = meta['nframes'] * (meta['step_ps'] / 1000)
57 |     sns.boxplot(
58 |         x=meta2.index.names[0],
59 |         y='ns',
60 |         data=meta2.reset_index(),
61 |         ax=ax,
62 |     )
63 | 
64 | 
65 | ## Plot hist
66 | fig, ax = plt.subplots(figsize=(7, 5))
67 | plot_lengths(ax)
68 | fig.tight_layout()
69 | fig.savefig("lengths-hist.pdf")
70 | # {{xdg_open('lengths-hist.pdf')}}
71 | 
72 | ## Plot pie
73 | fig, ax = plt.subplots(figsize=(7, 5))
74 | plot_pie(ax)
75 | fig.tight_layout()
76 | fig.savefig("lengths-pie.pdf")
77 | # {{xdg_open('lengths-pie.pdf')}}
78 | 
79 | ## Plot box
80 | fig, ax = plt.subplots(figsize=(7, 5))
81 | plot_boxplot(ax)
82 | fig.tight_layout()
83 | fig.savefig("lengths-boxplot.pdf")
84 | # {{xdg_open('lengths-boxplot.pdf')}}
85 | 
86 | ## Save metadata as html table
87 | render_meta(meta, 'meta.pandas.html')
88 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/analysis/gather-metadata.py:
--------------------------------------------------------------------------------
 1 | """Find trajectories and associated metadata
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |   - trajs
 9 |   - top.pdb
10 | """
11 | 
12 | from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser
13 | 
14 | ## Construct and save the dataframe
15 | parser = NumberedRunsParser(
16 |     traj_fmt="trajectory-{run}.xtc",
17 |     top_fn="top.pdb",
18 |     step_ps=50,
19 | )
20 | meta = gather_metadata("trajs/*.xtc", parser)
21 | save_meta(meta)
22 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/cluster/cluster-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot cluster centers on tICA coordinates
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 | 
13 | from msmbuilder.io import load_trajs, load_generic
14 | 
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 | 
18 | ## Load
19 | kmeans = load_generic('kmeans.pickl')
20 | meta, ktrajs = load_trajs('ktrajs')
21 | meta, ttrajs = load_trajs('ttrajs', meta)
22 | txx = np.concatenate(list(ttrajs.values()))
23 | 
24 | 
25 | def plot_cluster_centers(ax):
26 |     ax.hexbin(txx[:, 0], txx[:, 1],
27 |               cmap=sns.cubehelix_palette(as_cmap=True),
28 |               mincnt=1,
29 |               bins='log',
30 |               )
31 |     ax.scatter(kmeans.cluster_centers_[:, 0],
32 |                kmeans.cluster_centers_[:, 1],
33 |                s=40, c=colors[0],
34 |                )
35 |     ax.set_xlabel("tIC 1", fontsize=16)
36 |     ax.set_ylabel("tIC 2", fontsize=16)
37 | 
38 | 
39 | ## Plot 1
40 | fig, ax = plt.subplots(figsize=(7, 5))
41 | plot_cluster_centers(ax)
42 | fig.tight_layout()
43 | fig.savefig('kmeans-centers.pdf')
44 | # {{xdg_open('kmeans-centers.pdf')}}
45 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/cluster/cluster.py:
--------------------------------------------------------------------------------
 1 | """Cluster tICA results
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |  - ttrajs
 9 |  - meta.pandas.pickl
10 | """
11 | from msmbuilder.io import load_trajs, save_trajs, save_generic
12 | from msmbuilder.cluster import MiniBatchKMeans
13 | 
14 | ## Load
15 | meta, ttrajs = load_trajs('ttrajs')
16 | 
17 | ## Fit
18 | dim = 5
19 | kmeans = MiniBatchKMeans(n_clusters=500)
20 | kmeans.fit([traj[:, :dim] for traj in ttrajs.values()])
21 | 
22 | ## Transform
23 | ktrajs = {}
24 | for k, v in ttrajs.items():
25 |     ktrajs[k] = kmeans.partial_transform(v[:, :dim])
26 | 
27 | ## Save
28 | print(kmeans.summarize())
29 | save_trajs(ktrajs, 'ktrajs', meta)
30 | save_generic(kmeans, 'kmeans.pickl')
31 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/cluster/sample-clusters-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot the result of sampling clusters
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 | 
13 | from msmbuilder.io import load_trajs, load_generic
14 | 
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 | 
18 | ## Load
19 | meta, ttrajs = load_trajs('ttrajs')
20 | txx = np.concatenate(list(ttrajs.values()))
21 | kmeans = load_generic('kmeans.pickl')
22 | 
23 | inds = load_generic("cluster-sample-inds.pickl")
24 | coordinates = [
25 |     np.asarray([ttrajs[traj_i][frame_i, :] for traj_i, frame_i in state_inds])
26 |     for state_inds in inds
27 |     ]
28 | 
29 | 
30 | ## Overlay sampled states on histogram
31 | def plot_sampled_states(ax):
32 |     ax.hexbin(txx[:, 0], txx[:, 1],
33 |               cmap='magma_r',
34 |               mincnt=1,
35 |               bins='log',
36 |               alpha=0.8,
37 |               )
38 | 
39 |     # Show sampled points as scatter
40 |     # Annotate cluster index
41 |     for i, coo in enumerate(coordinates):
42 |         plt.scatter(coo[:, 0], coo[:, 1], c=colors[i % 6], s=40)
43 |         ax.text(kmeans.cluster_centers_[i, 0],
44 |                 kmeans.cluster_centers_[i, 1],
45 |                 "{}".format(i),
46 |                 ha='center',
47 |                 va='center',
48 |                 size=16,
49 |                 bbox=dict(
50 |                     boxstyle='round',
51 |                     fc='w',
52 |                     ec="0.5",
53 |                     alpha=0.9,
54 |                 ),
55 |                 zorder=10,
56 |                 )
57 | 
58 |     ax.set_xlabel("tIC 1", fontsize=16)
59 |     ax.set_ylabel("tIC 2", fontsize=16)
60 | 
61 | 
62 | ## Render a script for loading in vmd
63 | def load_in_vmd(dirname='cluster_samples'):
64 |     k = len(inds[0])
65 |     templ = [
66 |         '# autogenerated by msmbuilder',
67 |         '# open with `vmd -e load-cluster-samples.tcl`',
68 |         '',
69 |         '# Defaults',
70 |         'mol default material Transparent',
71 |         'mol default representation NewCartoon',
72 |         '',
73 |     ]
74 |     for i in range(len(inds)):
75 |         templ += [
76 |             '# State {}'.format(i),
77 |             'mol new top.pdb',
78 |             'mol addfile {}/{}.xtc waitfor all'.format(dirname, i),
79 |             'animate delete beg 0 end 0 top',
80 |             'mol rename top State-{}'.format(i),
81 |             'mol modcolor 0 top ColorID {}'.format(i),
82 |             'mol drawframes top 0 0:{k}'.format(k=k),
83 |             '',
84 |         ]
85 |     return '\n'.join(templ)
86 | 
87 | 
88 | ## Plot
89 | fig, ax = plt.subplots(figsize=(7, 5))
90 | plot_sampled_states(ax)
91 | fig.tight_layout()
92 | fig.savefig('cluster-samples.pdf')
93 | # {{xdg_open('cluster-samples.pdf')}}
94 | 
95 | ## Render vmd
96 | with open('load-cluster-samples.tcl', 'w') as f:
97 |     f.write(load_in_vmd())
98 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/cluster/sample-clusters.py:
--------------------------------------------------------------------------------
 1 | """Sample conformations from clusters
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |   - ../../top.pdb
 9 |   - ../../trajs
10 | """
11 | 
12 | import mdtraj as md
13 | import os
14 | 
15 | from msmbuilder.io.sampling import sample_states
16 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic
17 | 
18 | ## Load
19 | meta, ttrajs = load_trajs('ttrajs')
20 | kmeans = load_generic("kmeans.pickl")
21 | 
22 | ## Sample
23 | inds = sample_states(ttrajs,
24 |                      kmeans.cluster_centers_,
25 |                      k=10)
26 | 
27 | save_generic(inds, "cluster-sample-inds.pickl")
28 | 
29 | ## Make trajectories
30 | top = preload_top(meta)
31 | out_folder = "cluster_samples"
32 | backup(out_folder)
33 | os.mkdir(out_folder)
34 | 
35 | for state_i, state_inds in enumerate(inds):
36 |     traj = md.join(
37 |         md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
38 |         for traj_i, frame_i in state_inds
39 |     )
40 |     traj.save("{}/{}.xtc".format(out_folder, state_i))
41 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/dihedrals/featurize-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot diagnostic feature info
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | from matplotlib import pyplot as plt
11 | import seaborn as sns
12 | from msmbuilder.io import load_trajs
13 | 
14 | sns.set_style('ticks')
15 | colors = sns.color_palette()
16 | 
17 | ## Load
18 | meta, ftrajs = load_trajs('ftrajs')
19 | # (stride by 100 for memory concerns)
20 | fxx = np.concatenate([fx[::100] for fx in ftrajs.values()])
21 | 
22 | 
23 | ## Box and whisker plot
24 | def plot_box(ax):
25 |     n_feats_plot = min(fxx.shape[1], 100)
26 |     ax.boxplot(fxx[:, :100],
27 |                boxprops={'color': colors[0]},
28 |                whiskerprops={'color': colors[0]},
29 |                capprops={'color': colors[0]},
30 |                medianprops={'color': colors[2]},
31 |                )
32 | 
33 |     if fxx.shape[1] > 100:
34 |         ax.annotate("(Only showing the first 100 features)",
35 |                     xy=(0.05, 0.95),
36 |                     xycoords='axes fraction',
37 |                     fontsize=14,
38 |                     va='top',
39 |                     )
40 | 
41 |     ax.set_xlabel("Feature Index", fontsize=16)
42 |     xx = np.arange(0, n_feats_plot, 10)
43 |     ax.set_xticks(xx)
44 |     ax.set_xticklabels([str(x) for x in xx])
45 |     ax.set_xlim((0, n_feats_plot + 1))
46 |     ax.set_ylabel("Feature Value", fontsize=16)
47 | 
48 | 
49 | ## Plot
50 | fig, ax = plt.subplots(figsize=(15, 5))
51 | plot_box(ax)
52 | fig.tight_layout()
53 | fig.savefig("ftrajs-box.pdf")
54 | # {{ xdg_open('ftrajs-box.pdf') }}
55 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/dihedrals/featurize.py:
--------------------------------------------------------------------------------
 1 | """Turn trajectories into dihedral features
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |   - meta.pandas.pickl
 9 |   - trajs
10 |   - top.pdb
11 | """
12 | import mdtraj as md
13 | 
14 | from msmbuilder.featurizer import DihedralFeaturizer
15 | from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic
16 | from multiprocessing import Pool
17 | 
18 | ## Load
19 | meta = load_meta()
20 | tops = preload_tops(meta)
21 | dihed_feat = DihedralFeaturizer()
22 | 
23 | 
24 | ## Featurize logic
25 | def feat(irow):
26 |     i, row = irow
27 |     traj = md.load(row['traj_fn'], top=tops[row['top_fn']])
28 |     feat_traj = dihed_feat.partial_transform(traj)
29 |     return i, feat_traj
30 | 
31 | 
32 | ## Do it in parallel
33 | with Pool() as pool:
34 |     dihed_trajs = dict(pool.imap_unordered(feat, meta.iterrows()))
35 | 
36 | ## Save
37 | save_trajs(dihed_trajs, 'ftrajs', meta)
38 | save_generic(dihed_feat, 'featurizer.pickl')
39 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/landmarks/featurize-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot statistics from RMSD clustering
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 | 
13 | from msmbuilder.io import load_trajs
14 | 
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 | 
18 | ## Load
19 | meta, ftrajs = load_trajs('ftrajs')
20 | # (stride by 100 for memory concerns)
21 | fxx = np.concatenate([fx[::100] for fx in ftrajs.values()])
22 | 
23 | 
24 | ## Box and whisker plot
25 | def plot_box(ax):
26 |     n_feats_plot = min(fxx.shape[1], 100)
27 |     ax.boxplot(fxx[:, :100],
28 |                boxprops={'color': colors[0]},
29 |                whiskerprops={'color': colors[0]},
30 |                capprops={'color': colors[0]},
31 |                medianprops={'color': colors[2]},
32 |                )
33 | 
34 |     if fxx.shape[1] > 100:
35 |         ax.annotate("(Only showing the first 100 features)",
36 |                     xy=(0.05, 0.95),
37 |                     xycoords='axes fraction',
38 |                     fontsize=14,
39 |                     va='top',
40 |                     )
41 | 
42 |     ax.set_xlabel("Feature Index", fontsize=16)
43 |     xx = np.arange(0, n_feats_plot, 10)
44 |     ax.set_xticks(xx)
45 |     ax.set_xticklabels([str(x) for x in xx])
46 |     ax.set_xlim((0, n_feats_plot + 1))
47 |     ax.set_ylabel("Feature Value", fontsize=16)
48 | 
49 | 
50 | ## Plot
51 | fig, ax = plt.subplots(figsize=(15, 5))
52 | plot_box(ax)
53 | fig.tight_layout()
54 | fig.savefig("ftrajs-box.pdf")
55 | # {{ xdg_open('ftrajs-box.pdf') }}
56 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/landmarks/featurize.py:
--------------------------------------------------------------------------------
 1 | """Cluster based on RMSD between conformations
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |   - meta.pandas.pickl
 9 |   - trajs
10 |   - top.pdb
11 | """
12 | import mdtraj as md
13 | 
14 | from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top
15 | 
16 | ## Load
17 | meta = load_meta()
18 | centroids = md.load("centroids.xtc", top=preload_top(meta))
19 | 
20 | ## Kernel
21 | SIGMA = 0.3  # nm
22 | from msmbuilder.featurizer import RMSDFeaturizer
23 | import numpy as np
24 | 
25 | featurizer = RMSDFeaturizer(centroids)
26 | lfeats = {}
27 | for i, traj in itertrajs(meta):
28 |     lfeat = featurizer.partial_transform(traj)
29 |     lfeat = np.exp(-lfeat ** 2 / (2 * (SIGMA ** 2)))
30 |     lfeats[i] = lfeat
31 | save_trajs(lfeats, 'ftrajs', meta)
32 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/landmarks/find-landmarks.py:
--------------------------------------------------------------------------------
 1 | """Cluster based on RMSD between conformations
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |   - meta.pandas.pickl
 9 |   - trajs
10 |   - top.pdb
11 | """
12 | import mdtraj as md
13 | 
14 | from msmbuilder.cluster import MiniBatchKMedoids
15 | from msmbuilder.io import load_meta, itertrajs, save_generic, backup
16 | 
17 | ## Set up parameters
18 | kmed = MiniBatchKMedoids(
19 |     n_clusters=500,
20 |     metric='rmsd',
21 | )
22 | 
23 | ## Load
24 | meta = load_meta()
25 | 
26 | 
27 | ## Try to limit RAM usage
28 | def guestimate_stride():
29 |     total_data = meta['nframes'].sum()
30 |     want = kmed.n_clusters * 10
31 |     stride = max(1, total_data // want)
32 |     print("Since we have", total_data, "frames, we're going to stride by",
33 |           stride, "during fitting, because this is probably adequate for",
34 |           kmed.n_clusters, "clusters")
35 |     return stride
36 | 
37 | 
38 | ## Fit
39 | kmed.fit([traj for _, traj in itertrajs(meta, stride=guestimate_stride())])
40 | print(kmed.summarize())
41 | 
42 | ## Save
43 | save_generic(kmed, 'clusterer.pickl')
44 | 
45 | 
46 | ## Save centroids
47 | def frame(traj_i, frame_i):
48 |     # Note: kmedoids does 0-based, contiguous integers so we use .iloc
49 |     row = meta.iloc[traj_i]
50 |     return md.load_frame(row['traj_fn'], frame_i, top=row['top_fn'])
51 | 
52 | 
53 | centroids = md.join((frame(ti, fi) for ti, fi in kmed.cluster_ids_),
54 |                     check_topology=False)
55 | centroids_fn = 'centroids.xtc'
56 | backup(centroids_fn)
57 | centroids.save("centroids.xtc")
58 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/microstate-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot populations and eigvectors from microstate MSM
 2 | 
 3 | {{header}}
 4 | Meta
 5 | ----
 6 | depends:
 7 |  - kmeans.pickl
 8 |  - ../ttrajs
 9 | """
10 | 
11 | # ? include "plot_header.template"
12 | # ? from "plot_macros.template" import xdg_open with context
13 | 
14 | import numpy as np
15 | import seaborn as sns
16 | from matplotlib import pyplot as plt
17 | 
18 | from msmbuilder.io import load_trajs, load_generic
19 | 
20 | sns.set_style('ticks')
21 | colors = sns.color_palette()
22 | 
23 | ## Load
24 | kmeans = load_generic('kmeans.pickl')
25 | msm = load_generic('msm.pickl')
26 | meta, ttrajs = load_trajs('ttrajs')
27 | txx = np.concatenate(list(ttrajs.values()))
28 | 
29 | 
30 | ## Plot microstates
31 | def plot_microstates(ax):
32 |     ax.hexbin(txx[:, 0], txx[:, 1],
33 |               cmap='Greys',
34 |               mincnt=1,
35 |               bins='log',
36 |               )
37 | 
38 |     scale = 100 / np.max(msm.populations_)
39 |     add_a_bit = 5
40 |     ax.scatter(kmeans.cluster_centers_[msm.state_labels_, 0],
41 |                kmeans.cluster_centers_[msm.state_labels_, 1],
42 |                s=scale * msm.populations_ + add_a_bit,
43 |                c=msm.left_eigenvectors_[:, 1],
44 |                cmap='RdBu'
45 |                )
46 |     ax.set_xlabel("tIC 1", fontsize=16)
47 |     ax.set_ylabel("tIC 2", fontsize=16)
48 |     # ax.colorbar(label='First Dynamical Eigenvector', fontsize=16)
49 | 
50 | 
51 | ## Plot
52 | fig, ax = plt.subplots(figsize=(7, 5))
53 | plot_microstates(ax)
54 | fig.tight_layout()
55 | fig.savefig('msm-microstates.pdf')
56 | # {{xdg_open('msm-microstates.pdf')}}
57 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/microstate-traj.py:
--------------------------------------------------------------------------------
 1 | """Sample a trajectory from microstate MSM
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |   - top.pdb
 9 |   - trajs
10 | """
11 | 
12 | import mdtraj as md
13 | 
14 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic
15 | from msmbuilder.io.sampling import sample_msm
16 | 
17 | ## Load
18 | meta, ttrajs = load_trajs('ttrajs')
19 | msm = load_generic('msm.pickl')
20 | kmeans = load_generic('kmeans.pickl')
21 | 
22 | ## Sample
23 | # Warning: make sure ttrajs and kmeans centers have
24 | # the same number of dimensions
25 | inds = sample_msm(ttrajs, kmeans.cluster_centers_, msm, n_steps=200, stride=1)
26 | save_generic(inds, "msm-traj-inds.pickl")
27 | 
28 | ## Make trajectory
29 | top = preload_top(meta)
30 | traj = md.join(
31 |     md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
32 |     for traj_i, frame_i in inds
33 | )
34 | 
35 | ## Save
36 | traj_fn = "msm-traj.xtc"
37 | backup(traj_fn)
38 | traj.save(traj_fn)
39 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/microstate.py:
--------------------------------------------------------------------------------
 1 | """Make a microstate MSM
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | from msmbuilder.io import load_trajs, save_trajs, save_generic
 7 | from msmbuilder.msm import MarkovStateModel
 8 | 
 9 | ## Load
10 | meta, ktrajs = load_trajs('ktrajs')
11 | 
12 | ## Fit
13 | msm = MarkovStateModel(lag_time=2, n_timescales=10, verbose=False)
14 | msm.fit(list(ktrajs.values()))
15 | 
16 | ## Transform
17 | microktrajs = {}
18 | for k, v in ktrajs.items():
19 |     microktrajs[k] = msm.partial_transform(v)
20 | 
21 | ## Save
22 | print(msm.summarize())
23 | save_generic(msm, 'msm.pickl')
24 | save_trajs(microktrajs, 'microktrajs', meta)
25 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/timescales-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot implied timescales vs lagtime
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | import pandas as pd
11 | import seaborn as sns
12 | from matplotlib import pyplot as plt
13 | 
14 | sns.set_style('ticks')
15 | colors = sns.color_palette()
16 | 
17 | ## Load
18 | timescales = pd.read_pickle('timescales.pandas.pickl')
19 | n_timescales = len([x for x in timescales.columns
20 |                     if x.startswith('timescale_')])
21 | 
22 | 
23 | ## Implied timescales vs lagtime
24 | def plot_timescales(ax):
25 |     for i in range(n_timescales):
26 |         ax.scatter(timescales['lag_time'],
27 |                    timescales['timescale_{}'.format(i)],
28 |                    s=50, c=colors[0],
29 |                    label=None,  # pandas be interfering
30 |                    )
31 | 
32 |     xmin, xmax = ax.get_xlim()
33 |     xx = np.linspace(xmin, xmax)
34 |     ax.plot(xx, xx, color=colors[2], label='$y=x$')
35 |     ax.legend(loc='best', fontsize=14)
36 |     ax.set_xlabel('Lag Time / todo:units', fontsize=18)
37 |     ax.set_ylabel('Implied Timescales / todo:units', fontsize=18)
38 |     ax.set_xscale('log')
39 |     ax.set_yscale('log')
40 | 
41 | ## Percent trimmed vs lagtime
42 | def plot_trimmed(ax):
43 |     ax.plot(timescales['lag_time'],
44 |             timescales['percent_retained'],
45 |             'o-',
46 |             label=None,  # pandas be interfering
47 |             )
48 |     ax.axhline(100, color='k', ls='--', label='100%')
49 |     ax.legend(loc='best', fontsize=14)
50 |     ax.set_xlabel('Lag Time / todo:units', fontsize=18)
51 |     ax.set_ylabel('Retained / %', fontsize=18)
52 |     ax.set_xscale('log')
53 |     ax.set_ylim((0, 110))
54 | 
55 | ## Plot timescales
56 | fig, ax = plt.subplots(figsize=(7, 5))
57 | plot_timescales(ax)
58 | fig.tight_layout()
59 | fig.savefig('implied-timescales.pdf')
60 | # {{xdg_open('implied-timescales.pdf')}}
61 | 
62 | ## Plot trimmed
63 | fig, ax = plt.subplots(figsize=(7,5))
64 | plot_trimmed(ax)
65 | fig.tight_layout()
66 | fig.savefig('percent-trimmed.pdf')
67 | # {{xdg_open('percent-trimmed.pdf')}}
68 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/msm/timescales.py:
--------------------------------------------------------------------------------
 1 | """Calculate implied timescales vs. lagtime
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |  - meta.pandas.pickl
 9 |  - ktrajs
10 | """
11 | from multiprocessing import Pool
12 | 
13 | import pandas as pd
14 | 
15 | from msmbuilder.io import load_trajs
16 | from msmbuilder.msm import MarkovStateModel
17 | 
18 | ## Load
19 | meta, ktrajs = load_trajs('ktrajs')
20 | 
21 | ## Parameters
22 | lagtimes = [2 ** i for i in range(8)]
23 | 
24 | 
25 | ## Define what to do for parallel execution
26 | def at_lagtime(lt):
27 |     msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False)
28 |     msm.fit(list(ktrajs.values()))
29 |     ret = {
30 |         'lag_time': lt,
31 |         'percent_retained': msm.percent_retained_,
32 |     }
33 |     for i in range(msm.n_timescales):
34 |         ret['timescale_{}'.format(i)] = msm.timescales_[i]
35 |     return ret
36 | 
37 | 
38 | ## Do the calculation
39 | with Pool() as p:
40 |     results = p.map(at_lagtime, lagtimes)
41 | 
42 | lt_df = pd.DataFrame(results)
43 | 
44 | ## Save
45 | print(lt_df.head())
46 | lt_df.to_pickle('timescales.pandas.pickl')
47 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/plot_header.template:
--------------------------------------------------------------------------------
 1 | # ? if use_xdgopen
 2 | from subprocess import run
 3 | # ? endif
 4 | # ? if use_agg
 5 | import matplotlib
 6 | matplotlib.use('Agg')
 7 | # ? endif
 8 | # ? if ipynb
 9 | %matplotlib inline
10 | # ? endif
11 | 
12 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/plot_macros.template:
--------------------------------------------------------------------------------
1 | {% macro xdg_open(fn) -%}
2 | {% if use_xdgopen -%}
3 | Launch with default pdf viewer:
4 | run(['xdg-open', '{{fn}}'])
5 | {%- endif %}
6 | {%- endmacro %}
7 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/rmsd/rmsd-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot RMSD results
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 | 
13 | from msmbuilder.io import load_trajs
14 | 
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 | 
18 | ## Load
19 | meta, rmsds = load_trajs('rmsds')
20 | 
21 | 
22 | ## Plot box plot
23 | def plot_boxplot(ax):
24 |     catted = np.concatenate([rmsds[k] for k in meta.index])
25 |     sns.boxplot(catted * 10, ax=ax)
26 |     ax.set_xlabel(r'RMSD / $\mathrm{\AA}$', fontsize=18)
27 |     ax.set_yticks([])
28 |     # ax.set_xticks(fontsize=16) #TODO: fontsize
29 | 
30 | 
31 | ## Report bad trajectories
32 | def bad_trajs(cutoff=0.7):
33 |     bad = {}
34 |     for k in meta.index:
35 |         arr = rmsds[k]
36 |         wh = np.where(np.asarray(arr) > cutoff)[0]
37 |         if len(wh) > 0:
38 |             bad[k] = wh
39 |     return bad
40 | 
41 | 
42 | ## Plot
43 | fig, ax = plt.subplots(figsize=(6, 3))
44 | plot_boxplot(ax)
45 | fig.tight_layout()
46 | fig.savefig("rmsd-boxplot.pdf")
47 | # {{xdg_open('rmsd-boxplot.pdf')}}
48 | 
49 | ## Bad trajectories
50 | for k, frame_is in bad_trajs().items():
51 |     print("Trajectory", k)
52 |     print("Frames:", frame_is)
53 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/rmsd/rmsd.py:
--------------------------------------------------------------------------------
 1 | """Check for abnormally high rmsd values to a reference structure
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |   - meta.pandas.pickl
 9 |   - trajs
10 |   - top.pdb
11 | 
12 | """
13 | 
14 | import mdtraj as md
15 | 
16 | from msmbuilder.io import load_meta, itertrajs, save_trajs
17 | 
18 | ## Load reference structure
19 | ref = md.load("top.pdb")
20 | meta = load_meta()
21 | 
22 | ## Do calculation and save
23 | rmsds = {k: md.rmsd(traj, ref) for k, traj in itertrajs(meta)}
24 | save_trajs(rmsds, 'rmsds', meta)
25 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/tica/tica-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot tICA-transformed coordinates
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 | 
13 | from msmbuilder.io import load_trajs, load_generic
14 | 
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 | 
18 | ## Load
19 | tica = load_generic('tica.pickl')
20 | meta, ttrajs = load_trajs('ttrajs')
21 | txx = np.concatenate(list(ttrajs.values()))
22 | 
23 | 
24 | ## Heatmap
25 | def plot_heatmap(ax):
26 |     ax.hexbin(txx[:, 0], txx[:, 1],
27 |               cmap=sns.cubehelix_palette(as_cmap=True),
28 |               mincnt=1,
29 |               bins='log'
30 |               )
31 |     ax.set_xlabel("tIC 1", fontsize=16)
32 |     ax.set_ylabel("tIC 2", fontsize=16)
33 | 
34 | 
35 | ## Timescales
36 | def plot_timescales(ax):
37 |     timestep = meta['step_ps'].unique()
38 |     assert len(timestep) == 1, timestep
39 |     timestep = float(timestep[0])  # ps
40 |     to_us = (
41 |         (1.0 / 1000)  # ps -> ns
42 |         * (1.0 / 1000)  # ns -> us
43 |         * (timestep / 1)  # steps -> ps
44 |     )
45 |     ax.hlines(tica.timescales_ * to_us,
46 |               0, 1,
47 |               color=colors[0])
48 |     ax.set_ylabel(r'Timescales / $\mathrm{\mu s}$', fontsize=18)
49 |     ax.set_xticks([])
50 |     ax.set_xlim((0, 1))
51 | 
52 | 
53 | ## Plot 1
54 | fig, ax = plt.subplots(figsize=(7, 5))
55 | plot_heatmap(ax)
56 | fig.tight_layout()
57 | fig.savefig('tica-heatmap.pdf')
58 | # {{xdg_open('tica-heatmap.pdf')}}
59 | 
60 | ## Plot 2
61 | fig, ax = plt.subplots(figsize=(3, 5))
62 | plot_timescales(ax)
63 | fig.tight_layout()
64 | fig.savefig('tica-timescales.pdf')
65 | # {{xdg_open('tica-heatmap.pdf')}}
66 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/tica/tica-sample-coordinate-plot.py:
--------------------------------------------------------------------------------
 1 | """Plot the result of sampling a tICA coordinate
 2 | 
 3 | {{header}}
 4 | """
 5 | 
 6 | # ? include "plot_header.template"
 7 | # ? from "plot_macros.template" import xdg_open with context
 8 | 
 9 | import numpy as np
10 | import seaborn as sns
11 | from matplotlib import pyplot as plt
12 | 
13 | from msmbuilder.io import load_trajs, load_generic
14 | 
15 | sns.set_style('ticks')
16 | colors = sns.color_palette()
17 | 
18 | ## Load
19 | meta, ttrajs = load_trajs('ttrajs')
20 | txx = np.concatenate(list(ttrajs.values()))
21 | 
22 | inds = load_generic("tica-dimension-0-inds.pickl")
23 | straj = []
24 | for traj_i, frame_i in inds:
25 |     straj += [ttrajs[traj_i][frame_i, :]]
26 | straj = np.asarray(straj)
27 | 
28 | 
29 | ## Overlay sampled trajectory on histogram
30 | def plot_sampled_traj(ax):
31 |     ax.hexbin(txx[:, 0], txx[:, 1],
32 |               cmap='magma_r',
33 |               mincnt=1,
34 |               bins='log',
35 |               alpha=0.8,
36 |               )
37 | 
38 |     ax.plot(straj[:, 0], straj[:, 1], 'o-', label='Sampled')
39 | 
40 |     ax.set_xlabel("tIC 1", fontsize=16)
41 |     ax.set_ylabel("tIC 2", fontsize=16)
42 |     ax.legend(loc='best')
43 | 
44 | 
45 | ## Plot
46 | fig, ax = plt.subplots(figsize=(7, 5))
47 | plot_sampled_traj(ax)
48 | fig.tight_layout()
49 | fig.savefig('tica-dimension-0-heatmap.pdf')
50 | # {{xdg_open('tica-dimension-0-heatmap.pdf')}}
51 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/tica/tica-sample-coordinate.py:
--------------------------------------------------------------------------------
 1 | """Sample tICA coordinates
 2 | 
 3 | {{header}}
 4 | 
 5 | Meta
 6 | ----
 7 | depends:
 8 |   - ../top.pdb
 9 |   - ../trajs
10 | """
11 | 
12 | import mdtraj as md
13 | 
14 | from msmbuilder.io.sampling import sample_dimension
15 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup
16 | 
17 | ## Load
18 | meta, ttrajs = load_trajs('ttrajs')
19 | 
20 | ## Sample
21 | inds = sample_dimension(ttrajs,
22 |                         dimension=0,
23 |                         n_frames=200, scheme='random')
24 | 
25 | save_generic(inds, "tica-dimension-0-inds.pickl")
26 | 
27 | ## Make trajectory
28 | top = preload_top(meta)
29 | 
30 | # Use loc because sample_dimension is nice
31 | traj = md.join(
32 |     md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top)
33 |     for traj_i, frame_i in inds
34 | )
35 | 
36 | ## Save
37 | traj_fn = "tica-dimension-0.xtc"
38 | backup(traj_fn)
39 | traj.save(traj_fn)
40 | 


--------------------------------------------------------------------------------
/msmbuilder/project_templates/tica/tica.py:
--------------------------------------------------------------------------------
 1 | """Reduce dimensionality with tICA
 2 | 
 3 | {{header}}
 4 | Meta
 5 | ----
 6 | depends:
 7 |   - ftrajs
 8 |   - meta.pandas.pickl
 9 | """
10 | 
11 | from msmbuilder.io import load_trajs, save_trajs, save_generic
12 | from msmbuilder.decomposition import tICA
13 | 
14 | ## Load
15 | tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True)
16 | meta, ftrajs = load_trajs("ftrajs")
17 | 
18 | ## Fit
19 | tica.fit(ftrajs.values())
20 | 
21 | ## Transform
22 | ttrajs = {}
23 | for k, v in ftrajs.items():
24 |     ttrajs[k] = tica.partial_transform(v)
25 | 
26 | ## Save
27 | save_trajs(ttrajs, 'ttrajs', meta)
28 | save_generic(tica, 'tica.pickl')
29 | 


--------------------------------------------------------------------------------
/msmbuilder/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/msmbuilder/scripts/__init__.py


--------------------------------------------------------------------------------
/msmbuilder/scripts/msmb.py:
--------------------------------------------------------------------------------
 1 | """Statistical models for biomolecular dynamics"""
 2 | from __future__ import print_function, absolute_import, division
 3 | import sys
 4 | from ..cmdline import App
 5 | from ..commands import *
 6 | from ..version import version
 7 | # the commands register themselves when they're imported
 8 | 
 9 | # Load external commands which register themselves
10 | # with entry point msmbuilder.commands
11 | from pkg_resources import iter_entry_points
12 | 
13 | for ep in iter_entry_points("msmbuilder.commands"):
14 |     external_command = ep.load()
15 |     # Some groups start with numbers for ordering
16 |     # Some start with descriptions e.g. "MSM"
17 |     # Let's set the group to start with ZZZ to put plugins last.
18 |     external_command._group = "ZZZ-External_" + external_command._group
19 | 
20 | 
21 | class MSMBuilderApp(App):
22 |     pass
23 | 
24 | 
25 | def main():
26 |     try:
27 |         app = MSMBuilderApp(name='MSMBuilder', description=__doc__)
28 |         app.start()
29 |     except RuntimeError as e:
30 |         sys.exit("Error: %s" % e)
31 |     except Exception as e:
32 |         message = """\
33 | An unexpected error has occurred with MSMBuilder (version %s), please
34 | consider sending the following traceback to MSMBuilder GitHub issue tracker at:
35 |             https://github.com/msmbuilder/msmbuilder/issues
36 | """
37 |         print(message % version, file=sys.stderr)
38 |         raise  # as if we did not catch it
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     main()
43 | 


--------------------------------------------------------------------------------
/msmbuilder/src/f2py/f2pyptr.h:
--------------------------------------------------------------------------------
 1 | #ifndef F2PYPTR_H_
 2 | #define F2PYPTR_H_
 3 | 
 4 | #include <Python.h>
 5 | 
 6 | void *f2py_pointer(PyObject *obj) 
 7 | {
 8 | #if PY_VERSION_HEX < 0x03000000
 9 |     if (PyCObject_Check(obj)) {
10 |         return PyCObject_AsVoidPtr(obj);
11 |     }
12 | #endif
13 | #if PY_VERSION_HEX >= 0x02070000
14 |     if (PyCapsule_CheckExact(obj)) {
15 |         return PyCapsule_GetPointer(obj, NULL);
16 |     }
17 | #endif
18 |     PyErr_SetString(PyExc_ValueError, "Not an object containing a void ptr");
19 |     return NULL;
20 | }
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/msmbuilder/src/scipy_lapack.h:
--------------------------------------------------------------------------------
 1 | #ifndef MIXTAPE_SCIPY_LAPACK
 2 | #define MIXTAPE_SCIPY_LAPACK
 3 | 
 4 | #include <Python.h>
 5 | #include "f2py/f2pyptr.h"
 6 | 
 7 | typedef int sgemm_t(const char *transa, const char *transb, const int *m, const int *n, const int *k, const float *alpha, const float *a, const int *lda,  float *b, const int *ldb, const float *beta, float *c, const int *ldc);
 8 | typedef int spotrf_t(const char *uplo, const int *n, float *a, const int *lda, int *info);
 9 | typedef int strtrs_t(const char *uplo, const char *trans, const char *diag, const int *n, const int *nrhs, const float *a, const int *lda, float *b, const int *ldb, int * info);
10 | 
11 | typedef struct {
12 |   sgemm_t *sgemm;
13 |   spotrf_t *spotrf;
14 |   strtrs_t *strtrs;
15 | } lapack_t;
16 | static lapack_t __lapack;
17 | 
18 | 
19 | static lapack_t* get_lapack(void) {
20 |   PyObject *mod_lapack, *mod_blas, *func, *cpointer;
21 |   if (__lapack.sgemm == NULL) {
22 |     mod_blas = PyImport_ImportModule("scipy.linalg.blas");
23 |     mod_lapack = PyImport_ImportModule("scipy.linalg.lapack");
24 | 
25 |     func = PyObject_GetAttrString(mod_blas, "sgemm");
26 |     cpointer = PyObject_GetAttrString(func, "_cpointer");
27 |     __lapack.sgemm = (sgemm_t*) f2py_pointer(cpointer);
28 | 
29 |     func = PyObject_GetAttrString(mod_lapack, "spotrf");
30 |     cpointer = PyObject_GetAttrString(func, "_cpointer");
31 |     __lapack.spotrf = (spotrf_t*) f2py_pointer(cpointer);
32 |     
33 |     func = PyObject_GetAttrString(mod_lapack, "strtrs");
34 |     cpointer = PyObject_GetAttrString(func, "_cpointer");
35 |     __lapack.strtrs = (strtrs_t*) f2py_pointer(cpointer);
36 |   }
37 | 
38 |   return &__lapack;
39 | }
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/msmbuilder/src/triu_utils.pyx:
--------------------------------------------------------------------------------
 1 | """
 2 | Utilities related to indexing upper triangular matrices with a diagonal
 3 | offset of 1. The semantics match ``numpy.triu_indices(n, k=1)``
 4 | """
 5 | from numpy cimport npy_intp
 6 | 
 7 | 
 8 | cdef inline npy_intp ij_to_k(npy_intp i, npy_intp j, npy_intp n) nogil:
 9 |     """2D (i, j) square matrix index to linearized upper triangular index
10 | 
11 |     [ 0  a0  a1  a2  a3 ]    (i=0,j=1) -> 0
12 |     [ 0   0  a4  a5  a6 ]    (i=0,j=2) -> 1
13 |     [ 0   0   0  a7  a8 ]    (i=1,j=3) -> 5
14 |     [ 0   0   0   0  a9 ]       etc
15 |     [ 0   0   0   0   0 ]    (i=4,j=5) -> 9
16 | 
17 |     For further explanation, see http://stackoverflow.com/a/27088560/1079728
18 | 
19 |     Parameters
20 |     ----------
21 |     i : int
22 |         Row index
23 |     j : int
24 |         Column index
25 |     n : int
26 |         Matrix size. The matrix is assumed to be square
27 | 
28 |     Returns
29 |     -------
30 |     k : int
31 |         Linearized upper triangular index
32 | 
33 |     See Also
34 |     --------
35 |     k_to_ij : the inverse operation
36 |     """
37 |     if j > i:
38 |         return (n*(n-1)/2) - (n-i)*((n-i)-1)/2 + j - i - 1
39 |     return (n*(n-1)/2) - (n-j)*((n-j)-1)/2 + i - j - 1
40 | 
41 | 
42 | cdef inline void k_to_ij(npy_intp k, npy_intp n, npy_intp *i, npy_intp *j) nogil:
43 |     """Linearized upper triangular index to 2D (i, j) index
44 | 
45 |     [ 0  a0  a1  a2  a3 ]      0 -> (i=0,j=1)
46 |     [ 0   0  a4  a5  a6 ]      1 -> (i=0,j=2)
47 |     [ 0   0   0  a7  a8 ]      5 -> (i=1,j=3)
48 |     [ 0   0   0   0  a9 ]            etc
49 |     [ 0   0   0   0   0 ]
50 | 
51 |       http://stackoverflow.com/a/27088560/1079728
52 | 
53 |     Parameters
54 |     ----------
55 |     k : int
56 |         Linearized upper triangular index
57 | 
58 |     Returns
59 |     -------
60 |     i : int
61 |         Row index, written into *i on exit
62 |     j : int
63 |         Column index, written into *j on exit
64 |     """
65 | 
66 |     i[0] = n - 2 - <int>(sqrt(-8.0*k + 4.0*n*(n-1)-7.0)/2.0 - 0.5)
67 |     j[0] = k + i[0] + 1 - n*(n-1)/2 + (n-i[0])*(n-i[0]-1)/2


--------------------------------------------------------------------------------
/msmbuilder/tests/.gitignore:
--------------------------------------------------------------------------------
1 | test_cyblas.c


--------------------------------------------------------------------------------
/msmbuilder/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | # Show warnings for our package
 4 | warnings.filterwarnings('always', module='msmbuilder.*')
 5 | 
 6 | # Show warnings for packages where we want to be conscious of warnings
 7 | warnings.filterwarnings('always', module='mdtraj.*')
 8 | warnings.filterwarnings('default', module='scipy.*')
 9 | warnings.filterwarnings('default', module='sklearn.*')
10 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/native.pdb:
--------------------------------------------------------------------------------
 1 | ATOM      1 1HH3 ACE     1       4.300  13.100   8.600  1.00  0.00            
 2 | ATOM      2  CH3 ACE     1       5.200  13.600   8.800  1.00  0.00            
 3 | ATOM      3 2HH3 ACE     1       4.900  14.300   9.600  1.00  0.00            
 4 | ATOM      4 3HH3 ACE     1       5.600  14.200   7.900  1.00  0.00            
 5 | ATOM      5  C   ACE     1       6.100  12.500   9.400  1.00  0.00            
 6 | ATOM      6  O   ACE     1       6.400  12.500  10.600  1.00  0.00            
 7 | ATOM      7  N   ALA     2       6.600  11.600   8.500  1.00  0.00            
 8 | ATOM      8  H   ALA     2       6.500  11.600   7.500  1.00  0.00            
 9 | ATOM      9  CA  ALA     2       7.300  10.400   9.100  1.00  0.00            
10 | ATOM     10  HA  ALA     2       7.900  10.700  10.000  1.00  0.00            
11 | ATOM     11  CB  ALA     2       6.200   9.500   9.600  1.00  0.00            
12 | ATOM     12  HB1 ALA     2       5.700   9.100   8.800  1.00  0.00            
13 | ATOM     13  HB2 ALA     2       6.600   8.700  10.200  1.00  0.00            
14 | ATOM     14  HB3 ALA     2       5.400  10.000  10.200  1.00  0.00            
15 | ATOM     15  C   ALA     2       8.400   9.800   8.200  1.00  0.00            
16 | ATOM     16  O   ALA     2       8.400   9.900   7.000  1.00  0.00            
17 | ATOM     17  N   NME     3       9.300   9.000   8.800  1.00  0.00            
18 | ATOM     18  H   NME     3       9.100   9.000   9.800  1.00  0.00            
19 | ATOM     19  CH3 NME     3      10.500   8.400   8.300  1.00  0.00            
20 | ATOM     20 1HH3 NME     3      10.700   7.700   9.100  1.00  0.00            
21 | ATOM     21 2HH3 NME     3      10.400   8.000   7.300  1.00  0.00            
22 | ATOM     22 3HH3 NME     3      11.300   9.100   8.300  1.00  0.00            
23 | TER
24 | ENDMDL
25 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_alphaanglefeaturizer.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | import msmbuilder.featurizer
 4 | from msmbuilder.example_datasets import MinimalFsPeptide, AlanineDipeptide
 5 | 
 6 | warnings.filterwarnings('ignore', message='.*Unlikely unit cell vectors.*')
 7 | 
 8 | 
 9 | def test_alanine_dipeptide():
10 |     # will produce 0 features because not enough peptides
11 | 
12 |     trajectories = AlanineDipeptide().get_cached().trajectories
13 |     featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer()
14 |     nothing = featurizer.transform(trajectories)
15 | 
16 |     assert (nothing[0].shape[1] == 0)
17 | 
18 | 
19 | def test_fs_peptide():
20 |     # will produce 36 features
21 | 
22 |     trajectories = MinimalFsPeptide().get_cached().trajectories
23 |     featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer()
24 |     alphas = featurizer.transform(trajectories)
25 | 
26 |     assert (alphas[0].shape[1] == 36)
27 | 
28 | 
29 | def test_fs_peptide_nosincos():
30 |     # will produce 18 features
31 | 
32 |     trajectories = MinimalFsPeptide().get_cached().trajectories
33 |     featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer(sincos=False)
34 |     alphas = featurizer.transform(trajectories)
35 | 
36 |     assert (alphas[0].shape[1] == 18)
37 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_apm.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import mdtraj as md
 4 | 
 5 | from numpy.testing.decorators import skipif
 6 | import numpy as np
 7 | from mdtraj.testing import eq
 8 | 
 9 | from msmbuilder.cluster import APM
10 | from msmbuilder.example_datasets import FsPeptide
11 | 
12 | rs = np.random.RandomState(42)
13 | 
14 | X1 = 0.3 * rs.randn(1000, 10).astype(np.double)
15 | X2 = 0.3 * rs.randn(1000, 10).astype(np.float32)
16 | # trj = md.load(md.testing.get_fn("frame0.pdb"))
17 | trj = FsPeptide().get().trajectories[0]
18 | 
19 | @skipif(True)
20 | def test_shapes():
21 |     # make sure all the shapes are correct of the fit parameters
22 |     m = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs)
23 |     m.fit([rs.randn(100, 2)])
24 |     assert isinstance(m.labels_, list)
25 |     eq(m.labels_[0].shape, (100,))
26 | 
27 | 
28 | @skipif(True)
29 | def test_euclidean():
30 |     # test for predict using euclidean distance
31 |     data = rs.randn(100, 2)
32 |     m1 = APM(n_macrostates=2, metric='euclidean', lag_time=1, random_state=rs)
33 |     m2 = APM(n_macrostates=2, metric='euclidean', lag_time=1, random_state=rs)
34 | 
35 |     labels1 = m1.fit_predict([data])
36 |     labels2 = m2.fit([data]).MacroAssignments_
37 |     eq(labels1[0], labels2[0])
38 | 
39 | 
40 | @skipif(True)
41 | def test_euclidean_10000():
42 |     # test for predict using euclidean distance
43 |     m1 = APM(n_macrostates=2, metric='euclidean', lag_time=10, random_state=rs)
44 |     m2 = APM(n_macrostates=2, metric='euclidean', lag_time=10, random_state=rs)
45 |     data = rs.randn(10000, 2)
46 |     labels1 = m1.fit_predict([data])
47 |     labels2 = m2.fit([data]).MacroAssignments_
48 |     eq(labels1[0], labels2[0])
49 | 
50 | 
51 | @skipif(True)
52 | def test_rmsd():
53 |     # test for predict using rmsd
54 |     m1 = APM(n_macrostates=4, metric='rmsd', lag_time=1, random_state=rs)
55 |     m2 = APM(n_macrostates=4, metric='rmsd', lag_time=1, random_state=rs)
56 |     labels1 = m1.fit_predict([trj])
57 |     labels2 = m2.fit([trj]).MacroAssignments_
58 | 
59 |     eq(labels1[0], labels2[0])
60 | 
61 | 
62 | @skipif(True)
63 | def test_dtype():
64 |     X = rs.randn(100, 2)
65 |     X32 = X.astype(np.float32)
66 |     X64 = X.astype(np.float64)
67 |     m1 = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs).fit([X32])
68 |     m2 = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs).fit([X64])
69 | 
70 |     eq(m1.labels_[0], m2.labels_[0])
71 |     eq(m1.MacroAssignments_[0], m2.MacroAssignments_[0])
72 |     eq(m1.fit_predict([X32])[0], m2.fit_predict([X64])[0])
73 |     eq(m1.fit_predict([X32])[0], m1.MacroAssignments_[0])
74 | 
75 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_build_counts.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | 
 3 | from msmbuilder.msm import MarkovStateModel,BayesianMarkovStateModel,\
 4 |     ContinuousTimeMSM
 5 | 
 6 | from mdtraj.testing import eq
 7 | import numpy as np
 8 | 
 9 | def test_build_counts():
10 |     seq=[[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0]]
11 |     counts = np.array([[8, 1, 1], [1, 3, 0], [1, 0, 3]])
12 |     for mdl_type in [MarkovStateModel, BayesianMarkovStateModel,
13 |                 ContinuousTimeMSM]:
14 |         mdl_instance = mdl_type()
15 |         mdl_instance.fit(seq)
16 |         eq(mdl_instance.countsmat_, counts)
17 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_clustering.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import mdtraj as md
 4 | import mdtraj.testing
 5 | import numpy as np
 6 | import scipy.spatial.distance
 7 | 
 8 | import msmbuilder.cluster
 9 | from msmbuilder.example_datasets import FsPeptide
10 | 
11 | X1 = 0.3 * np.random.RandomState(0).randn(1000, 10).astype(np.double)
12 | X2 = 0.3 * np.random.RandomState(1).randn(1000, 10).astype(np.float32)
13 | # trj = md.load(md.testing.get_fn("traj.h5"))
14 | trj = FsPeptide().get().trajectories[0][:100]
15 | 
16 | 
17 | def test_regular_spatial_rmsd():
18 |     model = msmbuilder.cluster.RegularSpatial(d_min=0.01, metric='rmsd')
19 |     model.fit([trj])
20 | 
21 |     assert isinstance(model.cluster_centers_, md.Trajectory)
22 |     assert len(model.cluster_centers_) == model.n_clusters_
23 |     predict = model.predict([trj])
24 |     assert isinstance(predict, list) and len(predict) == 1
25 |     assert len(predict[0]) == len(trj)
26 |     assert isinstance(predict[0], np.ndarray) and predict[0].dtype == np.intp
27 | 
28 | 
29 | def test_regular_spatial():
30 |     model = msmbuilder.cluster.RegularSpatial(d_min=0.8)
31 | 
32 |     for X in [X1, X2]:
33 |         model.fit([X])
34 | 
35 |         assert model.cluster_centers_.shape[1] == 10
36 |         assert isinstance(model.cluster_centers_, np.ndarray)
37 |         assert len(model.cluster_centers_) == model.n_clusters_
38 |         predict = model.predict([X])
39 |         assert isinstance(predict, list) and len(predict) == 1
40 |         assert len(predict[0]) == len(X)
41 |         assert (isinstance(predict[0], np.ndarray)
42 |                 and predict[0].dtype == np.intp)
43 | 
44 |         assert model.cluster_centers_.shape[0] > 200
45 |         assert not np.all(scipy.spatial.distance.pdist(X) > model.d_min)
46 |         assert np.all(scipy.spatial.distance.pdist(model.cluster_centers_)
47 |                       > model.d_min)
48 | 
49 |         assert np.all(np.shape(model.cluster_center_indices_)
50 |                       == (len(model.cluster_center_indices_), 2))
51 | 
52 | 
53 | def test_kcenters_rmsd():
54 |     model = msmbuilder.cluster.KCenters(3, metric='rmsd')
55 |     model.fit([trj])
56 | 
57 |     assert len(model.cluster_centers_) == 3
58 |     assert isinstance(model.cluster_centers_, md.Trajectory)
59 |     predict = model.predict([trj])
60 |     assert isinstance(predict, list) and len(predict) == 1
61 |     assert len(predict[0]) == len(trj)
62 |     assert isinstance(predict[0], np.ndarray) and predict[0].dtype == np.intp
63 | 
64 | 
65 | def test_kcenters_spatial():
66 |     model = msmbuilder.cluster.KCenters(5)
67 | 
68 |     for X in [X1, X2]:
69 |         model.fit([X])
70 | 
71 |         assert model.cluster_centers_.shape[1] == 10
72 |         assert isinstance(model.cluster_centers_, np.ndarray)
73 |         assert len(model.cluster_centers_) == 5
74 |         predict = model.predict([X])
75 |         assert isinstance(predict, list) and len(predict) == 1
76 |         assert len(predict[0]) == len(X)
77 |         assert (isinstance(predict[0], np.ndarray)
78 |                 and predict[0].dtype == np.intp)
79 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_convenience.py:
--------------------------------------------------------------------------------
1 | 
2 | from msmbuilder.utils import unique
3 | 
4 | def test_unique():
5 |     assert unique([1,2,3,3,2,1]) == [1,2,3]
6 |     assert unique([3,3,2,2,1,1]) == [3,2,1]
7 |     assert unique([3,1,2,1,2,3]) == [3,1,2]
8 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_cyblas_wrapper.py:
--------------------------------------------------------------------------------
 1 | # this file gets nose to find the tests that appear in the cython module
 2 | from msmbuilder.tests import test_cyblas
 3 | 
 4 | 
 5 | def test():
 6 |     count = 0
 7 |     for name in dir(test_cyblas):
 8 |         if name.startswith('test'):
 9 |             count += 1
10 |             yield getattr(test_cyblas, name)
11 |     if count == 0:
12 |         assert False
13 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_dependencies.py:
--------------------------------------------------------------------------------
 1 | import os, pip, sys, warnings
 2 | from msmbuilder.example_datasets import has_msmb_data
 3 | 
 4 | def test_installed_packages():
 5 |     try:
 6 |         installed_packages = pip.get_installed_distributions
 7 |     except:
 8 |         from pip._internal.utils.misc import get_installed_distributions as installed_packages
 9 | 
10 |     package_names = [package.project_name for package in installed_packages()]
11 | 
12 |     test_dependencies = ['munkres', 'numdifftools', 'statsmodels', 'hmmlearn']
13 | 
14 |     if not hasattr(sys, 'getwindowsversion'):
15 |         test_dependencies += ['cvxpy']
16 | 
17 |     for td in test_dependencies:
18 |         if td not in package_names:
19 |             raise RuntimeError('Please install {} to continue'.format(td))
20 | 
21 | def test_msmb_data():
22 |     if has_msmb_data() is None:
23 |         raise RuntimeError('Please install {} to continue'.format('msmb_data'))
24 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_estimator_subclassing.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, absolute_import, division
 2 | 
 3 | import importlib
 4 | import inspect
 5 | import pkgutil
 6 | import warnings
 7 | from contextlib import contextmanager
 8 | 
 9 | from sklearn.base import BaseEstimator
10 | 
11 | import msmbuilder
12 | import msmbuilder.base
13 | 
14 | 
15 | def silent_warnings(*args, **kwargs):
16 |     print(args, kwargs)
17 | 
18 | 
19 | @contextmanager
20 | def supress_warnings():
21 |     original_warn = warnings.warn
22 |     warnings.warn = silent_warnings
23 |     yield
24 |     warnings.warn = original_warn
25 | 
26 | 
27 | def import_all_estimators(pkg):
28 |     def estimator_in_module(mod):
29 |         for name, obj in inspect.getmembers(mod):
30 |             if name.startswith('_'):
31 |                 continue
32 |             if inspect.isclass(obj) and issubclass(obj, BaseEstimator):
33 |                 yield obj
34 | 
35 |     with supress_warnings():
36 |         result = {}
37 |         for _, modname, ispkg in pkgutil.iter_modules(pkg.__path__):
38 |             c = '%s.%s' % (pkg.__name__, modname)
39 |             try:
40 |                 mod = importlib.import_module(c)
41 |                 if ispkg:
42 |                     result.update(import_all_estimators(mod))
43 |                 for kls in estimator_in_module(mod):
44 |                     result[kls.__name__] = kls
45 |             except ImportError as e:
46 |                 print('e', e)
47 |                 continue
48 | 
49 |         return result
50 | 
51 | 
52 | def test_all_estimators():
53 |     for key, value in import_all_estimators(msmbuilder).items():
54 |         if 'msmbuilder' in value.__module__:
55 |             assert issubclass(value, msmbuilder.base.BaseEstimator), value
56 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_featurizer_subset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from mdtraj.testing import eq
 3 | 
 4 | from msmbuilder.example_datasets import AlanineDipeptide
 5 | from msmbuilder.featurizer import AtomPairsFeaturizer, get_atompair_indices
 6 | from msmbuilder.featurizer.subset import SubsetAtomPairs, \
 7 |     SubsetCosPhiFeaturizer, SubsetCosPsiFeaturizer, \
 8 |     SubsetSinPhiFeaturizer, SubsetSinPsiFeaturizer
 9 | 
10 | 
11 | def test_SubsetAtomPairs_1():
12 |     trajectories = AlanineDipeptide().get_cached().trajectories
13 |     trj0 = trajectories[0][0]
14 |     atom_indices, pair_indices = get_atompair_indices(trj0)
15 |     featurizer = AtomPairsFeaturizer(pair_indices)
16 |     X_all0 = featurizer.transform(trajectories)
17 | 
18 |     featurizer = SubsetAtomPairs(pair_indices, trj0)
19 |     featurizer.subset = np.arange(len(pair_indices))
20 |     X_all = featurizer.transform(trajectories)
21 | 
22 |     any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
23 | 
24 | 
25 | def test_SubsetAtomPairs_2():
26 |     trajectories = AlanineDipeptide().get_cached().trajectories
27 |     trj0 = trajectories[0][0]
28 |     atom_indices, pair_indices = get_atompair_indices(trj0)
29 |     featurizer = AtomPairsFeaturizer(pair_indices)
30 |     X_all0 = featurizer.transform(trajectories)
31 | 
32 |     featurizer = SubsetAtomPairs(pair_indices, trj0,
33 |                                  subset=np.arange(len(pair_indices)))
34 |     X_all = featurizer.transform(trajectories)
35 | 
36 |     any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
37 | 
38 | 
39 | def test_SubsetAtomPairs_3():
40 |     trajectories = AlanineDipeptide().get_cached().trajectories
41 |     trj0 = trajectories[0][0]
42 |     atom_indices, pair_indices = get_atompair_indices(trj0)
43 |     featurizer = AtomPairsFeaturizer(pair_indices)
44 |     X_all0 = featurizer.transform(trajectories)
45 | 
46 |     featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1]))
47 |     X_all = featurizer.transform(trajectories)
48 | 
49 |     try:
50 |         any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)])
51 |     except AssertionError:
52 |         pass
53 |     else:
54 |         raise AssertionError("Did not raise an assertion!")
55 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_kernel_approximation.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | from numpy.testing import assert_array_almost_equal
 5 | from sklearn.kernel_approximation import Nystroem as NystroemR
 6 | 
 7 | from msmbuilder.decomposition.kernel_approximation import Nystroem, LandmarkNystroem
 8 | 
 9 | 
10 | def test_nystroem_vs_sklearn():
11 |     np.random.seed(42)
12 |     X = np.random.randn(100, 5)
13 | 
14 |     kernel = Nystroem(kernel='linear', random_state=42)
15 |     kernelR = NystroemR(kernel='linear', random_state=42)
16 | 
17 |     y1 = kernel.fit_transform([X])[0]
18 |     y2 = kernelR.fit_transform(X)
19 | 
20 |     assert_array_almost_equal(y1, y2)
21 | 
22 | 
23 | def test_lndmrk_nystroem_approximation():
24 |     np.random.seed(42)
25 |     X = np.random.randn(100, 5)
26 | 
27 |     u = np.arange(X.shape[0])[5::1]
28 |     v = np.arange(X.shape[0])[::1][:u.shape[0]]
29 |     lndmrks = X[np.unique((u, v))]
30 | 
31 |     kernel = LandmarkNystroem(kernel='rbf', random_state=42)
32 |     kernelR = NystroemR(kernel='rbf', random_state=42)
33 | 
34 |     y1_1 = kernel.fit_transform([X])[0]
35 |     kernel.landmarks = lndmrks
36 |     y1_2 = kernel.fit_transform([X])[0]
37 | 
38 |     y2 = kernelR.fit_transform(X)
39 | 
40 |     assert_array_almost_equal(y2, y1_1)
41 | 
42 |     assert not all((np.abs(y2 - y1_2) > 1E-6).flatten())
43 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_ksparsetica.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from msmbuilder.decomposition import tICA, KSparseTICA
 4 | from msmbuilder.example_datasets import MetEnkephalin
 5 | from msmbuilder.featurizer import AtomPairsFeaturizer
 6 | 
 7 | def build_dataset():
 8 |     trajs = MetEnkephalin().get().trajectories
 9 | 
10 |     pairs = []
11 |     for i in range(trajs[0].n_atoms):
12 |         for j in range(i):
13 |             pairs.append((i, j))
14 |     np.random.seed(0)
15 |     np.random.shuffle(pairs)
16 |     n_pairs = 200
17 | 
18 |     return AtomPairsFeaturizer(pairs[:n_pairs]).transform([traj[::10] for traj in trajs])
19 | 
20 | def test_MetEnkephalin():
21 |     np.random.seed(0)
22 |     data = build_dataset()
23 |     n_features = data[0].shape[1]
24 | 
25 |     # check whether this recovers a single 1-sparse eigenpair without error
26 |     kstica = KSparseTICA(n_components=1, k = 1)
27 |     _ = kstica.fit_transform(data)
28 |     assert (np.sum(kstica.components_ != 0) == 1)
29 | 
30 |     ## check whether this recovers >1 eigenpair without error
31 |     #kstica = KSparseTICA(n_components=2)
32 |     #_ = kstica.fit_transform(data)
33 | 
34 |     ## check whether this recovers all eigenpairs without error
35 |     #kstica = KSparseTICA()
36 |     #_ = kstica.fit_transform(data)
37 | 
38 |     # check whether we recover the same solution as standard tICA when k = n_features
39 |     n_components = 10
40 |     kstica = KSparseTICA(n_components=n_components, k=n_features)
41 |     tica = tICA(n_components=n_components)
42 |     _ = kstica.fit_transform(data)
43 |     _ = tica.fit_transform(data)
44 |     np.testing.assert_array_almost_equal(kstica.eigenvalues_, tica.eigenvalues_)


--------------------------------------------------------------------------------
/msmbuilder/tests/test_muller.py:
--------------------------------------------------------------------------------
 1 | from msmbuilder.example_datasets import MullerPotential, load_muller
 2 | from msmbuilder.utils import array2d
 3 | 
 4 | 
 5 | def test_func():
 6 |     xx = load_muller(random_state=1110102)['trajectories']
 7 |     assert len(xx) == 10
 8 |     assert xx[0].ndim == 2
 9 |     assert xx[0].shape[1] == 2
10 |     array2d(xx)
11 | 
12 | 
13 | def test_class():
14 |     xx = MullerPotential(random_state=123122).get()['trajectories']
15 |     assert len(xx) == 10
16 |     assert xx[0].ndim == 2
17 |     assert xx[0].shape[1] == 2
18 |     array2d(xx)
19 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_ndgrid.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | 
 3 | import numpy as np
 4 | 
 5 | from msmbuilder.cluster import NDGrid
 6 | 
 7 | 
 8 | def test_ndgrid_1():
 9 |     X = np.array([-3, -2, -1, 1, 2, 3]).reshape(-1, 1)
10 |     labels = NDGrid(n_bins_per_feature=2).fit([X]).predict([X])[0]
11 |     np.testing.assert_array_equal(labels, np.array([0, 0, 0, 1, 1, 1]))
12 | 
13 | 
14 | def test_ndgrid_2():
15 |     X = np.random.RandomState(0).randn(100, 2)
16 |     ndgrid = NDGrid(n_bins_per_feature=2, min=-5, max=5)
17 |     labels = ndgrid.fit([X]).predict([X])[0]
18 | 
19 |     mask0 = np.logical_and(X[:, 0] < 0, X[:, 1] < 0)
20 |     assert np.all(labels[mask0] == 0)
21 |     mask1 = np.logical_and(X[:, 0] > 0, X[:, 1] < 0)
22 |     assert np.all(labels[mask1] == 1)
23 |     mask2 = np.logical_and(X[:, 0] < 0, X[:, 1] > 0)
24 |     assert np.all(labels[mask2] == 2)
25 |     mask3 = np.logical_and(X[:, 0] > 0, X[:, 1] > 0)
26 |     assert np.all(labels[mask3] == 3)
27 | 
28 | 
29 | def test_ndgrid_3():
30 |     X = np.random.RandomState(0).randn(100, 3)
31 |     ndgrid = NDGrid(n_bins_per_feature=2, min=-5, max=5)
32 |     labels = ndgrid.fit([X]).predict([X])[0]
33 | 
34 |     operators = [np.less, np.greater]
35 |     x = X[:, 0]
36 |     y = X[:, 1]
37 |     z = X[:, 2]
38 | 
39 |     it = itertools.product(operators, repeat=3)
40 | 
41 |     for indx, (op_z, op_y, op_x) in enumerate(it):
42 |         mask = np.logical_and.reduce((op_x(x, 0), op_y(y, 0), op_z(z, 0)))
43 |         assert np.all(labels[mask] == indx)
44 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_nearest.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import numpy as np
 4 | 
 5 | from msmbuilder.utils import KDTree
 6 | 
 7 | X1 = 0.3 * np.random.RandomState(0).randn(500, 10)
 8 | X2 = 0.3 * np.random.RandomState(1).randn(1000, 10) + 10
 9 | 
10 | 
11 | def test_kdtree_k1():
12 |     kdtree = KDTree([X1, X2])
13 |     dists, inds = kdtree.query([
14 |         [0] * 10,
15 |         [10] * 10,
16 |         [0] * 10
17 |     ])
18 | 
19 |     assert len(inds) == 3
20 |     for subind in inds:
21 |         assert len(subind) == 2
22 | 
23 |     # traj i
24 |     assert inds[0][0] == 0
25 |     assert inds[1][0] == 1
26 |     assert inds[2][0] == 0
27 | 
28 |     # framei
29 |     assert 0 <= inds[0][1] < 500
30 |     assert 0 <= inds[1][1] < 1000
31 |     assert 0 <= inds[2][1] < 500
32 | 
33 |     # distances
34 |     assert len(dists) == 3
35 |     for d in dists:
36 |         assert 0 <= d < 0.5
37 | 
38 | 
39 | def test_kdtree_k2():
40 |     kdtree = KDTree([X1, X2])
41 |     dists, inds = kdtree.query([
42 |         [0] * 10,
43 |         [10] * 10,
44 |         [0] * 10
45 |     ], k=2)
46 | 
47 |     assert len(inds) == 3
48 | 
49 |     # traj i
50 |     for qp in inds[0]: assert qp[0] == 0
51 |     for qp in inds[1]: assert qp[0] == 1
52 |     for qp in inds[2]: assert qp[0] == 0
53 | 
54 |     # frame i
55 |     for qp in inds[0]: assert 0 <= qp[1] < 500
56 |     for qp in inds[1]: assert 0 <= qp[1] < 1000
57 |     for qp in inds[2]: assert 0 <= qp[1] < 500
58 | 
59 |     # distances
60 |     assert len(dists) == 3
61 |     for d in dists:
62 |         assert 0 <= d[0] < 0.5
63 |         assert 0 <= d[1] < 0.5
64 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_param_sweep.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division
 2 | 
 3 | import numpy as np
 4 | import numpy.testing as npt
 5 | 
 6 | from msmbuilder.msm import MarkovStateModel
 7 | from msmbuilder.msm import implied_timescales
 8 | from msmbuilder.utils import param_sweep
 9 | 
10 | 
11 | def test_both():
12 |     sequences = [np.random.randint(20, size=1000) for _ in range(10)]
13 |     lag_times = [1, 5, 10]
14 | 
15 |     models_ref = []
16 |     for tau in lag_times:
17 |         msm = MarkovStateModel(reversible_type='mle', lag_time=tau,
18 |                                n_timescales=10)
19 |         msm.fit(sequences)
20 |         models_ref.append(msm)
21 | 
22 |     timescales_ref = [m.timescales_ for m in models_ref]
23 | 
24 |     model = MarkovStateModel(reversible_type='mle', lag_time=1, n_timescales=10)
25 |     models = param_sweep(model, sequences, {'lag_time': lag_times}, n_jobs=2)
26 |     timescales = implied_timescales(sequences, lag_times, msm=model,
27 |                                     n_timescales=10, n_jobs=2)
28 | 
29 |     print(timescales)
30 |     print(timescales_ref)
31 | 
32 |     if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1E-6:
33 |         raise Exception("you wrote a bad test.")
34 | 
35 |     for i in range(len(lag_times)):
36 |         npt.assert_array_almost_equal(models[i].transmat_,
37 |                                       models_ref[i].transmat_)
38 |         npt.assert_array_almost_equal(timescales_ref[i], timescales[i])
39 | 
40 | 
41 | def test_multi_params():
42 |     msm = MarkovStateModel()
43 |     param_grid = {
44 |         'lag_time': [1, 2, 3],
45 |         'reversible_type': ['mle', 'transpose']
46 |     }
47 | 
48 |     sequences = np.random.randint(20, size=(10, 1000))
49 |     models = param_sweep(msm, sequences, param_grid, n_jobs=2)
50 |     assert len(models) == 6
51 | 
52 |     # I don't know what the order should be, so I'm just going
53 |     # to check that there are no duplicates
54 |     params = []
55 |     for m in models:
56 |         params.append('%s%d' % (m.reversible_type, m.lag_time))
57 | 
58 |     for l in param_grid['lag_time']:
59 |         for s in param_grid['reversible_type']:
60 |             assert ('%s%d' % (s, l)) in params
61 | 
62 |     # this is redundant, but w/e
63 |     assert len(set(params)) == 6
64 | 
65 | 
66 | def test_ntimescales():
67 |     # see issue #603
68 |     trajs = [np.random.randint(0, 30, 500) for _ in range(5)]
69 |     its = implied_timescales(trajs, [1, 2, 3], n_timescales=11)
70 |     assert its.shape[1] == 11
71 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_sampling.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from msmbuilder.decomposition import tICA
 4 | from msmbuilder.io.sampling import sample_dimension
 5 | 
 6 | 
 7 | def test_sample_dimension():
 8 |     np.random.seed(42)
 9 |     X = np.random.randn(500, 5)
10 |     data = [X, X, X]
11 | 
12 |     tica = tICA(n_components=2, lag_time=1).fit(data)
13 |     tica_trajs = {k: tica.partial_transform(v) for k, v in enumerate(data)}
14 |     res = sample_dimension(tica_trajs, 0, 10, scheme="linear")
15 |     res2 = sample_dimension(tica_trajs, 1, 10, scheme="linear")
16 | 
17 |     assert len(res) == len(res2) == 10
18 | 
19 | def test_sample_dimension_2():
20 |     np.random.seed(42)
21 |     X = np.random.randn(500, 5)
22 |     data = [X, X, X]
23 | 
24 |     tica = tICA(n_components=2, lag_time=1).fit(data)
25 |     tica_trajs = {k: tica.partial_transform(v) for k, v in enumerate(data)}
26 |     res = sample_dimension(tica_trajs, 0, 10, scheme="random")
27 |     res2 = sample_dimension(tica_trajs, 1, 10, scheme="edge")
28 | 
29 |     assert len(res) == len(res2) == 10
30 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_sasa_featurizer.py:
--------------------------------------------------------------------------------
 1 | import mdtraj as md
 2 | import numpy as np
 3 | from mdtraj.testing import eq
 4 | 
 5 | from msmbuilder.featurizer import SASAFeaturizer
 6 | from msmbuilder.example_datasets import FsPeptide
 7 | 
 8 | t = FsPeptide().get().trajectories[0][:10]
 9 | 
10 | def _test_sasa_featurizer(t, value):
11 |     sasa = md.shrake_rupley(t)
12 |     rids = np.array([a.residue.index for a in t.top.atoms])
13 | 
14 |     for i, rid in enumerate(np.unique(rids)):
15 |         mask = (rids == rid)
16 |         eq(value[:, i], np.sum(sasa[:, mask], axis=1))
17 | 
18 | 
19 | def test_sasa_featurizer_1():
20 |     # t = md.load(get_fn('frame0.h5'))
21 |     
22 |     value = SASAFeaturizer(mode='residue').partial_transform(t)
23 |     assert value.shape == (t.n_frames, t.n_residues)
24 |     _test_sasa_featurizer(t, value)
25 | 
26 | 
27 | def test_sasa_featurizer_2():
28 |     # t = md.load(get_fn('frame0.h5'))
29 | 
30 |     # scramle the order of the atoms, and which residue each is a
31 |     # member of
32 |     df, bonds = t.top.to_dataframe()
33 |     df['resSeq'] = np.random.randint(5, size=(t.n_atoms))
34 |     df['resName'] = df['resSeq']
35 |     t.top = md.Topology.from_dataframe(df, bonds)
36 | 
37 |     value = SASAFeaturizer(mode='residue').partial_transform(t)
38 |     _test_sasa_featurizer(t, value)
39 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_sparsetica.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from msmbuilder.decomposition import tICA, SparseTICA
 4 | from msmbuilder.example_datasets import DoubleWell
 5 | 
 6 | 
 7 | def build_dataset():
 8 |     slow = DoubleWell(random_state=0).get_cached().trajectories
 9 |     data = []
10 | 
11 |     # each trajectory is a double-well along the first dof,
12 |     # and then 9 degrees of freedom of gaussian white noise.
13 |     for s in slow:
14 |         t = np.hstack((s, np.random.randn(len(s), 9)))
15 |         data.append(t)
16 |     return data
17 | 
18 | 
19 | def test_doublewell():
20 |     data = build_dataset()
21 |     tica = tICA(n_components=1).fit(data)
22 |     tic0 = tica.components_[0]
23 | 
24 |     stica = SparseTICA(n_components=1, verbose=False).fit(data)
25 |     stic0 = stica.components_[0]
26 | 
27 |     np.testing.assert_array_almost_equal(stic0[1:], np.zeros(9))
28 |     np.testing.assert_almost_equal(stic0[0], 0.58, decimal=1)
29 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_strongly_connected_subgraph.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from msmbuilder.msm import _strongly_connected_subgraph
 4 | 
 5 | 
 6 | def test_completely_disconnected_1():
 7 |     # what do you do with 1 state that is not even connected to itself?
 8 |     tC, m, p_r = _strongly_connected_subgraph(np.zeros((1, 1)))
 9 |     assert tC.shape == (0, 0)
10 |     assert m == {}
11 |     assert np.isnan(p_r)
12 | 
13 | 
14 | def test_completely_disconnected_2():
15 |     tC, m, p_r = _strongly_connected_subgraph(np.zeros((3, 3)))
16 |     assert tC.shape == (0, 0)
17 |     assert m == {}
18 |     assert np.isnan(p_r)
19 | 
20 | 
21 | def test_one_state():
22 |     # but if that state does have a self-connection, it should be retained
23 |     tC, m, p_r = _strongly_connected_subgraph(np.ones((1, 1)))
24 |     assert tC.shape == (1, 1)
25 |     assert m == {0: 0}
26 |     np.testing.assert_almost_equal(p_r, 100)
27 | 
28 | 
29 | def test_counts_1():
30 |     C = np.array([[1, 0, 0],
31 |                   [0, 1, 1],
32 |                   [0, 1, 1]])
33 | 
34 |     tC, m, p_r = _strongly_connected_subgraph(np.array(C))
35 |     np.testing.assert_array_equal(tC, np.array([[1, 1], [1, 1]]))
36 |     assert m == {1: 0, 2: 1}
37 |     np.testing.assert_almost_equal(p_r, 80.0)
38 | 
39 | 
40 | def test_counts_2():
41 |     C = np.array([[1, 1, 0],
42 |                   [0, 1, 1],
43 |                   [0, 1, 1]])
44 |     tC, m, p_r = _strongly_connected_subgraph(np.array(C))
45 |     np.testing.assert_array_equal(tC, np.array([[1, 1], [1, 1]]))
46 |     assert m == {1: 0, 2: 1}
47 |     np.testing.assert_almost_equal(p_r, 83.333333333333)
48 | 
49 | 
50 | def test_fully_connected():
51 |     tC, m, p_r = _strongly_connected_subgraph(np.ones((3, 3)))
52 |     np.testing.assert_array_almost_equal(tC, np.ones((3, 3)))
53 |     assert m == {0: 0, 1: 1, 2: 2}
54 |     np.testing.assert_almost_equal(p_r, 100.0)
55 | 
56 | 
57 | def test_disconnected():
58 |     tC, m, p_r = _strongly_connected_subgraph(np.eye(3))
59 |     assert tC.shape == (1, 1)
60 |     assert type(p_r) == np.float64
61 | 
62 | 
63 | def test_upper_triangular():
64 |     tC, m, p_r = _strongly_connected_subgraph(np.eye(3, k=1))
65 |     assert tC.shape == (0, 0)
66 |     assert m == {}
67 |     np.testing.assert_almost_equal(p_r, 50.0)
68 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_template_project.py:
--------------------------------------------------------------------------------
 1 | from msmbuilder.io import TemplateProject
 2 | import tempfile
 3 | import shutil
 4 | import os
 5 | 
 6 | 
 7 | def setup_module():
 8 |     global WD, PWD
 9 |     PWD = os.path.abspath(".")
10 |     WD = tempfile.mkdtemp()
11 |     os.chdir(WD)
12 | 
13 | 
14 | def teardown_module():
15 |     os.chdir(PWD)
16 |     shutil.rmtree(WD)
17 | 
18 | 
19 | def test_template_project():
20 |     tp = TemplateProject()
21 |     tp.do()
22 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_transition_counts.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from six import PY3
 3 | 
 4 | from msmbuilder.msm import _transition_counts
 5 | 
 6 | 
 7 | def test_argument():
 8 |     # test that first argument must be a list of sequences
 9 |     with np.testing.assert_raises(ValueError):
10 |         _transition_counts([1, 2, 3])
11 | 
12 | 
13 | def test_upper_triangular():
14 |     # test a simple example
15 |     c, m = _transition_counts([np.arange(10)])
16 |     np.testing.assert_array_equal(c, np.eye(10, k=1))
17 |     assert list(m.keys()) == list(range(10))
18 |     assert list(m.values()) == list(range(10))
19 | 
20 | 
21 | def test_lag_time():
22 |     # test the simple example with lag_time > 1
23 |     c, m = _transition_counts([range(10)], lag_time=2)
24 |     np.testing.assert_array_equal(c, 0.5 * np.eye(10, k=2))
25 | 
26 | 
27 | def test_string_labels():
28 |     # try using strings as labels
29 |     c, m = _transition_counts([['alpha', 'b', 'b', 'b', 'c']])
30 |     np.testing.assert_array_equal(c, 1.0 * np.array([
31 |         [0, 1, 0],
32 |         [0, 2, 1],
33 |         [0, 0, 0]
34 |     ]))
35 |     assert m == {'alpha': 0, 'b': 1, 'c': 2}
36 | 
37 | 
38 | def test_big_counts():
39 |     # try using really big numbers, and we still want a small transition matrix
40 |     c, m = _transition_counts([[100000000, 100000000, 100000001, 100000001]])
41 |     np.testing.assert_array_equal(c, 1.0 * np.array([
42 |         [1, 1],
43 |         [0, 1],
44 |     ]))
45 |     assert m == {100000000: 0, 100000001: 1}
46 | 
47 | 
48 | def test_no_counts():
49 |     c, m = _transition_counts([[0]])
50 | 
51 | 
52 | def test_nan_and_none():
53 |     # deal with NaN, None?
54 |     c, m = _transition_counts([[0, np.nan]])
55 |     assert m == {0: 0}
56 |     np.testing.assert_array_equal(c, np.zeros((1, 1)))
57 | 
58 |     c, m = _transition_counts([[np.nan]])
59 |     assert m == {}
60 |     np.testing.assert_array_equal(c, np.zeros((0, 0)))
61 | 
62 |     if not PY3:
63 |         c, m = _transition_counts([[None, None]])
64 |         assert m == {}
65 |         np.testing.assert_array_equal(c, np.zeros((0, 0)))
66 | 
67 | 
68 | def test_lag_time_norm():
69 |     X = np.arange(6)
70 |     C, _ = _transition_counts([X], lag_time=3)
71 |     np.testing.assert_array_almost_equal(C, np.eye(6, k=3) / 3)
72 | 
73 | 
74 | def test_sliding_window():
75 |     X = np.arange(10)
76 |     C1, m1 = _transition_counts([X], lag_time=3, sliding_window=False)
77 |     C2, m2 = _transition_counts([X[::3]], sliding_window=True)
78 |     np.testing.assert_array_almost_equal(C1, C2)
79 |     assert m1 == m2
80 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_transmat_errorbar.py:
--------------------------------------------------------------------------------
 1 | from msmbuilder.msm.validation.transmat_errorbar import *
 2 | import numpy as np
 3 | 
 4 | 
 5 | def test_create_perturb_params():
 6 |     # Test with a 10x10 counts matrix, with all entries in the counts set to 100
 7 |     countsmat = 100 * np.ones((10,10))
 8 |     params = create_perturb_params(countsmat)
 9 |     # Check dimensions of outputs are equal to those of inputs
10 |     for param in params:
11 |         assert np.shape(param) == np.shape(countsmat) 
12 | 
13 | 
14 | def test_perturb_tmat():
15 |     # The transition matrix is perturbed under the CLT approximation, which is only valid for well-sampled data w.r.t. transition probability (tprob >> 1 / row-summed counts)
16 |     countsmat = 100 * np.ones((10,10)) # 10-state MSM, 1000 counts per state, 100 transition events between states, no zero entries
17 |     params = create_perturb_params(countsmat)
18 |     new_transmat = (perturb_tmat(params[0], params[1]))
19 |     # All transition probabilities are by design nonzero, so there should be no nonzero entries after the perturbation
20 |     assert len(np.where(new_transmat == 0)[0]) == 0
21 |     # Now let's assume you have a poorly sampled dataset where all elements in the counts matrix is 1
22 |     countsmat = np.ones((10,10))
23 |     params = create_perturb_params(countsmat)
24 |     new_transmat = (perturb_tmat(params[0], params[1]))
25 |     # Your perturbed transition matrix will have several negative values (set automatically to 0), indicating this method probably isn't appropriate for your dataset
26 |     # (This will also cause your distribution of MFPTs to have very obvious outliers to an otherwise approximately Gaussian distribution due to the artificial zeros in the transition matrix)
27 |     assert len(np.where(new_transmat == 0)[0] > 0)
28 | 
29 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/test_workflows.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import os
 4 | import shlex
 5 | import shutil
 6 | import subprocess
 7 | import tempfile
 8 | 
 9 | from pkg_resources import resource_filename
10 | 
11 | 
12 | class tempdir(object):
13 |     def __enter__(self):
14 |         self._curdir = os.path.abspath(os.curdir)
15 |         self._tempdir = tempfile.mkdtemp()
16 |         os.chdir(self._tempdir)
17 | 
18 |     def __exit__(self, *exc_info):
19 |         os.chdir(self._curdir)
20 |         shutil.rmtree(self._tempdir)
21 | 
22 | 
23 | def shell_lines(resource):
24 |     fn = resource_filename('msmbuilder', resource)
25 |     buf = ''
26 |     with open(fn) as f:
27 |         for line in f:
28 |             line = line.strip()
29 |             if not line or line.startswith('#'):
30 |                 continue
31 |             if line.endswith('\\'):
32 |                 buf += line.rstrip('\\')
33 |             else:
34 |                 yield buf + ' ' + line
35 |                 buf = ''
36 | 
37 | 
38 | def check_call(tokens):
39 |     try:
40 |         subprocess.check_output(tokens, stderr=subprocess.STDOUT,
41 |                                 universal_newlines=True)
42 |     except subprocess.CalledProcessError as e:
43 |         print(e.cmd)
44 |         print(e.output)
45 |         raise
46 | 
47 | 
48 | class workflow_tester(object):
49 |     def __init__(self, fn):
50 |         self.fn = fn
51 |         self.path = "tests/workflows/{}".format(fn)
52 |         self.description = "{}.test_{}".format(__name__, fn)
53 | 
54 |     def __call__(self, *args, **kwargs):
55 |         with tempdir():
56 |             for line in shell_lines(self.path):
57 |                 check_call(shlex.split(line, posix=False))
58 | 
59 | 
60 | def test_workflows():
61 |     for fn in [
62 |         'basic.sh',
63 |         'rmsd.sh',
64 |         'ghmm.sh',
65 |     ]:
66 |         yield workflow_tester(fn)
67 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/workflows/basic.sh:
--------------------------------------------------------------------------------
 1 | msmb AlanineDipeptide --data_home ./
 2 | msmb AtomIndices --out atom_indices.txt -p ./alanine_dipeptide/ala2.pdb -d --heavy
 3 | msmb AtomPairsFeaturizer --transformed atom_pairs --trjs './alanine_dipeptide/*.dcd' \
 4 |     --pair_indices atom_indices.txt --top ./alanine_dipeptide/ala2.pdb --out atom_pairs.pkl
 5 | msmb RobustScaler -i atom_pairs/ -t scaled_atom_pairs.h5
 6 | msmb tICA -i scaled_atom_pairs.h5 -t atom_pairs_tica.h5  --n_components 4 \
 7 |     --shrinkage 0 \
 8 |     --kinetic_mapping \
 9 |     --lag_time 2
10 | msmb KCenters -i atom_pairs_tica.h5 -t kcenters_clusters.h5 --metric cityblock
11 | msmb MarkovStateModel --inp kcenters_clusters.h5 --out mymsm.pkl
12 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/workflows/ghmm.sh:
--------------------------------------------------------------------------------
 1 | msmb AlanineDipeptide --data_home ./
 2 | 
 3 | msmb DihedralFeaturizer --transformed feats/ \
 4 |     --trjs './alanine_dipeptide/*.dcd' \
 5 |     --top ./alanine_dipeptide/ala2.pdb \
 6 |     --out featy.pkl
 7 | 
 8 | msmb tICA --inp feats/ --transformed tica_trajs.h5 \
 9 |     --n_components 4 \
10 |     --kinetic_mapping \
11 |     --lag_time 2
12 | 
13 | msmb GaussianHMM --inp tica_trajs.h5 \
14 |     --out hmm.pkl \
15 |     --n_states 2
16 | 


--------------------------------------------------------------------------------
/msmbuilder/tests/workflows/rmsd.sh:
--------------------------------------------------------------------------------
 1 | msmb AlanineDipeptide --data_home ./
 2 | msmb AtomIndices --out atom_indices.txt \
 3 |      -p ./alanine_dipeptide/ala2.pdb \
 4 |      -d --heavy
 5 | 
 6 | msmb MiniBatchKMedoids --n_clusters 10 \
 7 |     --metric rmsd \
 8 |     --inp './alanine_dipeptide/*.dcd' \
 9 |     --top ./alanine_dipeptide/ala2.pdb \
10 |     --atom_indices atom_indices.txt \
11 |     --transformed kmedoids_centers.h5
12 | 
13 | msmb RegularSpatial --inp  './alanine_dipeptide/*.dcd' \
14 |     --transformed rs_rmsd.h5 \
15 |     --metric rmsd \
16 |     --top ./alanine_dipeptide/ala2.pdb \
17 |     --d_min 0.5
18 | 
19 | 


--------------------------------------------------------------------------------
/msmbuilder/tpt/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for analyzing Markov State Models, with an emphasis
 3 | on Transition Path Theory.
 4 | 
 5 | These are the canonical references for TPT. Note that TPT
 6 | is really a specialization of ideas very familiar to the
 7 | mathematical study of Markov chains, and there are many
 8 | books, manuscripts in the mathematical literature that
 9 | cover the same concepts.
10 | 
11 | References
12 | ----------
13 | .. [1] Weinan, E. and Vanden-Eijnden, E. Towards a theory of
14 |        transition paths. J. Stat. Phys. 123, 503-523 (2006).
15 | .. [2] Metzner, P., Schutte, C. & Vanden-Eijnden, E.
16 |        Transition path theory for Markov jump processes.
17 |        Multiscale Model. Simul. 7, 1192-1219 (2009).
18 | .. [3] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive
19 |        flux and folding pathways in network models of
20 |        coarse-grained protein dynamics. J. Chem. Phys.
21 |        130, 205102 (2009).
22 | .. [4] Noe, Frank, et al. "Constructing the equilibrium ensemble of folding
23 |        pathways from short off-equilibrium simulations." PNAS 106.45 (2009):
24 |        19011-19016.
25 | """
26 | 
27 | from __future__ import absolute_import
28 | 
29 | from .committor import committors, conditional_committors
30 | from .flux import fluxes, net_fluxes
31 | from .hub import fraction_visited, hub_scores
32 | from .path import paths, top_path
33 | from .mfpt import mfpts
34 | 
35 | __all__ = ['fluxes', 'net_fluxes', 'fraction_visited',
36 |            'hub_scores', 'paths', 'top_path', 'committors',
37 |            'conditional_committors', 'mfpts']
38 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | from .draw_samples import *
 3 | from .io import *
 4 | from .param_sweep import *
 5 | from .probability import *
 6 | from .subsampler import *
 7 | from .validation import *
 8 | from .compat import *
 9 | from .nearest import KDTree
10 | from .divergence import *
11 | from .convenience import *
12 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/compat.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, absolute_import, division
 2 | import os
 3 | import functools
 4 | import warnings
 5 | 
 6 | # Copyright (C) 2012-2013 Marcus von Appen <marcus@sysfault.org>
 7 | #
 8 | # This software is provided 'as-is', without any express or implied
 9 | # warranty. In no event will the authors be held liable for any damages
10 | # arising from the use of this software.
11 | #
12 | # Permission is granted to anyone to use this software for any purpose,
13 | # including commercial applications, and to alter it and redistribute it
14 | # freely, subject to the following restrictions:
15 | #
16 | # 1. The origin of this software must not be misrepresented; you must not
17 | #    claim that you wrote the original software. If you use this software
18 | #    in a product, an acknowledgment in the product documentation would be
19 | #    appreciated but is not required.
20 | # 2. Altered source versions must be plainly marked as such, and must not be
21 | #    misrepresented as being the original software.
22 | # 3. This notice may not be removed or altered from any source distribution.
23 | 
24 | 
25 | class ExperimentalWarning(Warning):
26 |     """Indicates that a certain class, function or behavior is in an
27 |     experimental state.
28 |     """
29 |     def __init__(self, obj, msg=None):
30 |         """Creates a ExperimentalWarning for the specified obj.
31 | 
32 |         If a message is passed in msg, it will be printed instead of the
33 |         default message.
34 |         """
35 |         super(ExperimentalWarning, self).__init__()
36 |         self.obj = obj
37 |         self.msg = msg
38 | 
39 |     def __str__(self):
40 |         if self.msg is None:
41 |             line = "Warning: %s is in an experimental state." % repr(self.obj)
42 |             return os.linesep.join(('', '"' * len(line), line, '"' * len(line)))
43 |         return repr(self.msg)
44 | 
45 | 
46 | def experimental(name=None):
47 |     """A simple decorator to mark functions and methods as experimental."""
48 |     def inner(func):
49 |         @functools.wraps(func)
50 |         def wrapper(*fargs, **kw):
51 |             fname = name
52 |             if name is None:
53 |                 fname = func.__name__
54 |             warnings.warn("%s" % fname, category=ExperimentalWarning,
55 |                           stacklevel=2)
56 |             return func(*fargs, **kw)
57 |         return wrapper
58 |     return inner


--------------------------------------------------------------------------------
/msmbuilder/utils/convenience.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division, absolute_import
2 | 
3 | def unique(seq):
4 |     '''Returns a list of unique items maintaining the order of the original.
5 |     '''
6 |     seen = set()
7 |     seen_add = seen.add
8 |     return [x for x in seq if not (x in seen or seen_add(x))]
9 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/draw_samples.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | import numpy as np
 3 | import mdtraj as md
 4 | 
 5 | __all__ = ['map_drawn_samples']
 6 | 
 7 | 
 8 | def map_drawn_samples(selected_pairs_by_state, trajectories, top=None):
 9 |     """Lookup trajectory frames using pairs of (trajectory, frame) indices.
10 | 
11 |     Parameters
12 |     ----------
13 |     selected_pairs_by_state : array, dtype=int, shape=(n_states, n_samples, 2)
14 |         selected_pairs_by_state[state, sample] gives the (trajectory, frame)
15 |         index associated with a particular sample from that state.
16 |     trajectories : list(md.Trajectory) or list(np.ndarray) or list(filenames)
17 |         The trajectories assocated with sequences,
18 |         which will be used to extract coordinates of the state centers
19 |         from the raw trajectory data.  This can also be a list of np.ndarray
20 |         objects or filenames.  If they are filenames, mdtraj will be used to
21 |         load them
22 |     top : md.Topology, optional, default=None
23 |         Use this topology object to help mdtraj load filenames
24 | 
25 |     Returns
26 |     -------
27 |     frames_by_state : mdtraj.Trajectory
28 |         Output will be a list of trajectories such that frames_by_state[state]
29 |         is a trajectory drawn from `state` of length `n_samples`.  If
30 |         trajectories are numpy arrays, the output will be numpy arrays instead
31 |         of md.Trajectories
32 | 
33 |     Examples
34 |     --------
35 |     >>> selected_pairs_by_state = hmm.draw_samples(sequences, 3)
36 |     >>> samples = map_drawn_samples(selected_pairs_by_state, trajectories)
37 | 
38 |     Notes
39 |     -----
40 |     YOU are responsible for ensuring that selected_pairs_by_state and
41 |     trajectories correspond to the same dataset!
42 | 
43 |     See Also
44 |     --------
45 |     ghmm.GaussianHMM.draw_samples : Draw samples from GHMM
46 |     ghmm.GaussianHMM.draw_centroids : Draw centroids from GHMM
47 |     """
48 | 
49 |     frames_by_state = []
50 | 
51 |     for state, pairs in enumerate(selected_pairs_by_state):
52 |         if isinstance(trajectories[0], str):
53 |             if top:
54 |                 process = lambda x, frame: md.load_frame(x, frame, top=top)
55 |             else:
56 |                 process = lambda x, frame: md.load_frame(x, frame)
57 |         else:
58 |             process = lambda x, frame: x[frame]
59 | 
60 |         frames = [process(trajectories[trj], frame) for trj, frame in pairs]
61 |         try:  # If frames are mdtraj Trajectories
62 |             # Get an empty trajectory with correct shape and call the join
63 |             # method on it to merge trajectories
64 |             state_trj = frames[0][0:0].join(frames)
65 |         except AttributeError:
66 |             state_trj = np.array(frames)  # Just a bunch of np arrays
67 |         frames_by_state.append(state_trj)
68 | 
69 |     return frames_by_state
70 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/io.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | 
 3 | import contextlib
 4 | import pickle
 5 | import warnings
 6 | 
 7 | import numpy as np
 8 | from sklearn.externals.joblib import load as jl_load
 9 | 
10 | __all__ = ['printoptions', 'verbosedump', 'verboseload', 'dump', 'load']
11 | 
12 | warnings.warn("This module might be deprecated in favor of msmbuilder.io",
13 |               PendingDeprecationWarning)
14 | 
15 | 
16 | @contextlib.contextmanager
17 | def printoptions(*args, **kwargs):
18 |     original = np.get_printoptions()
19 |     np.set_printoptions(*args, **kwargs)
20 |     yield
21 |     np.set_printoptions(**original)
22 | 
23 | 
24 | def dump(value, filename, compress=None, cache_size=None):
25 |     """Save an arbitrary python object using pickle.
26 | 
27 |     Parameters
28 |     -----------
29 |     value : any Python object
30 |         The object to store to disk using pickle.
31 |     filename : string
32 |         The name of the file in which it is to be stored
33 |     compress : None
34 |         No longer used
35 |     cache_size : positive number, optional
36 |         No longer used
37 | 
38 |     See Also
39 |     --------
40 |     load : corresponding loader
41 |     """
42 |     if compress is not None or cache_size is not None:
43 |         warnings.warn("compress and cache_size are no longer valid options")
44 | 
45 |     with open(filename, 'wb') as f:
46 |         pickle.dump(value, f)
47 | 
48 | 
49 | def load(filename):
50 |     """Load an object that has been saved with dump.
51 | 
52 |     We try to open it using the pickle protocol. As a fallback, we
53 |     use joblib.load. Joblib was the default prior to msmbuilder v3.2
54 | 
55 |     Parameters
56 |     ----------
57 |     filename : string
58 |         The name of the file to load.
59 |     """
60 |     try:
61 |         with open(filename, 'rb') as f:
62 |             return pickle.load(f)
63 |     except Exception as e1:
64 |         try:
65 |             return jl_load(filename)
66 |         except Exception as e2:
67 |             raise IOError(
68 |                 "Unable to load {} using the pickle or joblib protocol.\n"
69 |                 "Pickle: {}\n"
70 |                 "Joblib: {}".format(filename, e1, e2)
71 |             )
72 | 
73 | 
74 | def verbosedump(value, fn, compress=None):
75 |     """Verbose wrapper around dump"""
76 |     print('Saving "%s"... (%s)' % (fn, type(value)))
77 |     dump(value, fn, compress=compress)
78 | 
79 | 
80 | def verboseload(fn):
81 |     """Verbose wrapper around load.
82 | 
83 |     Try to use pickle. If that fails, try to use joblib.
84 |     """
85 |     print('loading "%s"...' % fn)
86 |     return load(fn)
87 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/param_sweep.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | from sklearn import clone
 3 | try:
 4 |     from sklearn.model_selection import ParameterGrid
 5 | except ImportError:
 6 |     from sklearn.grid_search import ParameterGrid
 7 | 
 8 | from sklearn.externals.joblib import Parallel, delayed
 9 | 
10 | __all__ = ['param_sweep']
11 | 
12 | 
13 | def param_sweep(model, sequences, param_grid, n_jobs=1, verbose=0):
14 |     """Fit a series of models over a range of parameters.
15 | 
16 |     Parameters
17 |     ----------
18 |     model : msmbuilder.BaseEstimator
19 |         An *instance* of an estimator to be used
20 |         to fit data.
21 |     sequences : list of array-like
22 |         List of sequences, or a single sequence. Each
23 |         sequence should be a 1D iterable of state
24 |         labels. Labels can be integers, strings, or
25 |         other orderable objects.
26 |     param_grid : dict or sklearn.grid_search.ParameterGrid
27 |         Parameter grid to specify models to fit. See
28 |         sklearn.grid_search.ParameterGrid for an explanation
29 |     n_jobs : int, optional
30 |         Number of jobs to run in parallel using joblib.Parallel
31 | 
32 |     Returns
33 |     -------
34 |     models : list
35 |         List of models fit to the data according to
36 |         param_grid
37 |     """
38 | 
39 |     if isinstance(param_grid, dict):
40 |         param_grid = ParameterGrid(param_grid)
41 |     elif not isinstance(param_grid, ParameterGrid):
42 |         raise ValueError("param_grid must be a dict or ParamaterGrid instance")
43 | 
44 |     # iterable with (model, sequence) as items
45 |     iter_args = ((clone(model).set_params(**params), sequences)
46 |                  for params in param_grid)
47 | 
48 |     models = Parallel(n_jobs=n_jobs, verbose=verbose)(
49 |         delayed(_param_sweep_helper)(args) for args in iter_args)
50 | 
51 |     return models
52 | 
53 | 
54 | def _param_sweep_helper(args):
55 |     """
56 |     helper for fitting many models on some data
57 |     """
58 |     model, sequences = args
59 |     model.fit(sequences)
60 | 
61 |     return model
62 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/probability.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | import numpy as np
 3 | from sklearn.utils import check_random_state
 4 | 
 5 | __all__ = ['categorical']
 6 | 
 7 | 
 8 | def categorical(pvals, size=None, random_state=None):
 9 |     """Return random integer from a categorical distribution
10 | 
11 |     Parameters
12 |     ----------
13 |     pvals : sequence of floats, length p
14 |         Probabilities of each of the ``p`` different outcomes.  These
15 |         should sum to 1.
16 |     size : int or tuple of ints, optional
17 |         Defines the shape of the returned array of random integers. If None
18 |         (the default), returns a single float.
19 |     random_state: RandomState or an int seed, optional
20 |         A random number generator instance.
21 |     """
22 |     cumsum = np.cumsum(pvals)
23 |     if size is None:
24 |         size = (1,)
25 |         axis = 0
26 |     elif isinstance(size, tuple):
27 |         size = size + (1,)
28 |         axis = len(size) - 1
29 |     else:
30 |         raise TypeError('size must be an int or tuple of ints')
31 | 
32 |     random_state = check_random_state(random_state)
33 |     return np.sum(cumsum < random_state.random_sample(size), axis=axis)
34 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/progressbar/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # progressbar  - Text progress bar library for Python.
 5 | # Copyright (c) 2005 Nilton Volpato
 6 | #
 7 | # This library is free software; you can redistribute it and/or
 8 | # modify it under the terms of the GNU Lesser General Public
 9 | # License as published by the Free Software Foundation; either
10 | # version 2.1 of the License, or (at your option) any later version.
11 | #
12 | # This library is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 | # Lesser General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU Lesser General Public
18 | # License along with this library; if not, write to the Free Software
19 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20 | 
21 | """Text progress bar library for Python.
22 | 
23 | A text progress bar is typically used to display the progress of a long
24 | running operation, providing a visual cue that processing is underway.
25 | 
26 | The ProgressBar class manages the current progress, and the format of the line
27 | is given by a number of widgets. A widget is an object that may display
28 | differently depending on the state of the progress bar. There are three types
29 | of widgets:
30 |  - a string, which always shows itself
31 | 
32 |  - a ProgressBarWidget, which may return a different value every time its
33 |    update method is called
34 | 
35 |  - a ProgressBarWidgetHFill, which is like ProgressBarWidget, except it
36 |    expands to fill the remaining width of the line.
37 | 
38 | The progressbar module is very easy to use, yet very powerful. It will also
39 | automatically enable features like auto-resizing when the system supports it.
40 | """
41 | 
42 | from __future__ import absolute_import
43 | __author__ = 'Nilton Volpato'
44 | __author_email__ = 'first-name dot last-name @ gmail.com'
45 | __date__ = '2011-05-14'
46 | __version__ = '2.3'
47 | 
48 | from .compat import *
49 | from .widgets import *
50 | from .progressbar import *
51 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/progressbar/compat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | #
 4 | # progressbar  - Text progress bar library for Python.
 5 | # Copyright (c) 2005 Nilton Volpato
 6 | #
 7 | # This library is free software; you can redistribute it and/or
 8 | # modify it under the terms of the GNU Lesser General Public
 9 | # License as published by the Free Software Foundation; either
10 | # version 2.1 of the License, or (at your option) any later version.
11 | #
12 | # This library is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 | # Lesser General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU Lesser General Public
18 | # License along with this library; if not, write to the Free Software
19 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20 | 
21 | """Compatibility methods and classes for the progressbar module."""
22 | 
23 | 
24 | # Python 3.x (and backports) use a modified iterator syntax
25 | # This will allow 2.x to behave with 3.x iterators
26 | try:
27 |   next
28 | except NameError:
29 |     def next(iter):
30 |         try:
31 |             # Try new style iterators
32 |             return iter.__next__()
33 |         except AttributeError:
34 |             # Fallback in case of a "native" iterator
35 |             return iter.next()
36 | 
37 | 
38 | # Python < 2.5 does not have "any"
39 | try:
40 |   any
41 | except NameError:
42 |    def any(iterator):
43 |       for item in iterator:
44 |          if item: return True
45 |       return False
46 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/subsampler.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | from sklearn.base import TransformerMixin
 3 | from ..base import BaseEstimator
 4 | 
 5 | __all__ = ['Subsampler']
 6 | 
 7 | 
 8 | class Subsampler(BaseEstimator, TransformerMixin):
 9 |     """Convert a list of feature time series (`X_all`) into a `lag_time`
10 |     subsampled time series.
11 | 
12 |     Parameters
13 |     ----------
14 |     lag_time : int
15 |         The lag time to subsample by
16 |     sliding_window : bool, default=True
17 |         If True, each time series is transformed into `lag_time` interlaced
18 |         sliding-window (not statistically independent) sequences.  If
19 |         False, each time series is transformed into a single subsampled
20 |         time series.
21 |     """
22 |     def __init__(self, lag_time, sliding_window=True):
23 |         self._lag_time = lag_time
24 |         self._sliding_window = sliding_window
25 | 
26 |     def fit(self, X_all, y=None):
27 |         return self
28 | 
29 |     def transform(self, X_all, y=None):
30 |         """Subsample several time series.
31 | 
32 |         Parameters
33 |         ----------
34 |         X_all : list(np.ndarray)
35 |             List of feature time series
36 | 
37 |         Returns
38 |         -------
39 |         features : list(np.ndarray), length = len(X_all)
40 |             The subsampled trajectories.
41 |         """
42 |         if self._sliding_window:
43 |             return [X[k::self._lag_time] for k in range(self._lag_time) for X in X_all]
44 |         else:
45 |             return [X[::self._lag_time] for X in X_all]
46 | 


--------------------------------------------------------------------------------
/msmbuilder/utils/validation.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, division, absolute_import
 2 | import numpy as np
 3 | import mdtraj as md
 4 | 
 5 | __all__ = ['list_of_1d', 'check_iter_of_sequences', 'array2d']
 6 | 
 7 | 
 8 | def list_of_1d(y):
 9 |     if not hasattr(y, '__iter__') or len(y) == 0:
10 |         raise ValueError('Bad input shape')
11 |     if not hasattr(y[0], '__iter__'):
12 |         return [np.array(y)]
13 | 
14 |     result = []
15 |     for i, x in enumerate(y):
16 |         value = np.array(x)
17 |         if value.ndim != 1:
18 |             raise ValueError(
19 |                 "Bad input shape. Element %d has shape %s, but "
20 |                 "should be 1D" % (i, str(value.shape)))
21 |         result.append(value)
22 |     return result
23 | 
24 | 
25 | def check_iter_of_sequences(sequences, allow_trajectory=False, ndim=2,
26 |                             max_iter=None):
27 |     """Check that ``sequences`` is a iterable of trajectory-like sequences,
28 |     suitable as input to ``fit()`` for estimators following the MSMBuilder
29 |     API.
30 | 
31 |     Parameters
32 |     ----------
33 |     sequences : object
34 |         The object to check
35 |     allow_trajectory : bool
36 |         Are ``md.Trajectory``s allowed?
37 |     ndim : int
38 |         The expected dimensionality of the sequences
39 |     max_iter : int, optional
40 |         Only check at maximum the first ``max_iter`` entries in ``sequences``.
41 |     """
42 |     value = True
43 |     for i, X in enumerate(sequences):
44 |         if not isinstance(X, np.ndarray):
45 |             if (not allow_trajectory) and isinstance(X, md.Trajectory):
46 |                 value = False
47 |                 break
48 |         if not isinstance(X, md.Trajectory) and X.ndim != ndim:
49 |             value = False
50 |             break
51 |         if max_iter is not None and i >= max_iter:
52 |             break
53 | 
54 |     if not value:
55 |         raise ValueError('sequences must be a list of sequences')
56 | 
57 | 
58 | def array2d(X, dtype=None, order=None, copy=False, force_all_finite=True):
59 |     """Returns at least 2-d array with data from X"""
60 |     X_2d = np.asarray(np.atleast_2d(X), dtype=dtype, order=order)
61 |     if force_all_finite:
62 |         _assert_all_finite(X_2d)
63 |     if X is X_2d and copy:
64 |         X_2d = _safe_copy(X_2d)
65 |     return X_2d
66 | 
67 | 
68 | def _assert_all_finite(X):
69 |     """Like assert_all_finite, but only for ndarray."""
70 |     X = np.asanyarray(X)
71 |     if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum())
72 |             and not np.isfinite(X).all()):
73 |         raise ValueError("Input contains NaN, infinity"
74 |                          " or a value too large for %r." % X.dtype)
75 | 
76 | def _safe_copy(X):
77 |     # Copy, but keep the order
78 |     return np.copy(X, order='K')
79 | 


--------------------------------------------------------------------------------