├── .github └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── README.md ├── appveyor.yml ├── basesetup.py ├── devtools ├── README.md ├── conda-recipe │ ├── bld.bat │ ├── build.sh │ └── meta.yaml └── travis-ci │ ├── build_docs.sh │ ├── install_miniconda.sh │ ├── set_doc_version.py │ └── update_versions_json.py ├── docs ├── .gitignore ├── LICENSE ├── Makefile ├── _static │ ├── flow-chart.png │ ├── fspeptide.png │ ├── kde-vs-histogram.png │ ├── lengths-hist.png │ ├── logo-200px.png │ ├── logo.png │ ├── msm-microstates.png │ ├── tica-heatmap.png │ ├── tica-movie.gif │ └── tica_vs_pca.png ├── _templates │ └── class.rst ├── advanced_examples │ ├── bayesian-msm.rst │ ├── gmrq-model-selection.rst │ ├── hmm-and-msm.rst │ ├── implied-timescales.rst │ ├── index.rst │ ├── plot-tica-heatmap.rst │ ├── quadwell-n-states.rst │ ├── quadwell.rst │ ├── tica-1.rst │ └── uncertainty.rst ├── apipatterns.rst ├── background.rst ├── bibparse.py ├── changelog.rst ├── cluster.rst ├── conf.py ├── contributing.rst ├── datasets.rst ├── decomposition.rst ├── examples │ ├── Clustering-Comparison.rst │ ├── Fs-Peptide-command-line.rst │ ├── Fs-Peptide-in-RAM.rst │ ├── Fs-Peptide-with-dataset.rst │ ├── GMRQ-Model-Selection.rst │ ├── Ligand-Featurization.rst │ ├── Ward-Clustering.rst │ ├── index.rst │ └── tICA-vs-PCA.rst ├── faq.rst ├── feature_selection.rst ├── featurization.rst ├── figures │ └── kde-vs-histogram.py ├── gmrq.rst ├── hmm.rst ├── index.rst ├── installation.rst ├── io.rst ├── make.bat ├── msm.rst ├── plugins.rst ├── preprocessing.rst ├── publications.bib ├── publications_templ.rst ├── ratematrix.rst ├── requirements.txt ├── sphinxext │ ├── embed.tpl │ └── notebook_sphinxext.py ├── tpt.rst └── tutorial.rst ├── examples ├── .gitignore ├── Clustering-Comparison.ipynb ├── Coarse-graining-with-MVCA.ipynb ├── Fs-Peptide-command-line.ipynb ├── Fs-Peptide-in-RAM.ipynb ├── Fs-Peptide-with-Pipeline.ipynb ├── Fs-Peptide-with-dataset.ipynb ├── GMRQ-Model-Selection.ipynb ├── LICENSE.md ├── Ligand-Featurization.ipynb ├── Ward-Clustering.ipynb ├── advanced │ ├── bayesian-msm.ipynb │ ├── hmm-and-msm.ipynb │ ├── implied-timescales.ipynb │ ├── plot-tica-heatmap.ipynb │ ├── quadwell-n-states.ipynb │ ├── quadwell.ipynb │ └── uncertainty.ipynb └── tICA-vs-PCA.ipynb ├── msmbuilder ├── __init__.py ├── base.py ├── cluster │ ├── .gitignore │ ├── __init__.py │ ├── _kmedoids.pyx │ ├── agglomerative.py │ ├── apm.py │ ├── base.py │ ├── kcenters.py │ ├── kmedoids.py │ ├── minibatchkmedoids.py │ ├── ndgrid.py │ ├── regularspatial.py │ └── src │ │ ├── kmedoids.cc │ │ └── kmedoids.h ├── cmdline.py ├── commands │ ├── __init__.py │ ├── atom_indices.py │ ├── convert_chunked_project.py │ ├── example_datasets.py │ ├── featurizer.py │ ├── fit.py │ ├── fit_transform.py │ ├── implied_timescales.py │ ├── template_project.py │ └── transform.py ├── dataset.py ├── decomposition │ ├── .gitignore │ ├── __init__.py │ ├── _speigh.pyx │ ├── base.py │ ├── kernel_approximation.py │ ├── ksparsetica.py │ ├── ktica.py │ ├── pca.py │ ├── sparsetica.py │ ├── tica.py │ └── utils.py ├── example_datasets │ ├── .gitignore │ ├── __init__.py │ ├── _muller.pyx │ ├── alanine_dipeptide.py │ ├── base.py │ ├── brownian1d.py │ ├── fs_peptide.py │ ├── met_enkephalin.py │ └── muller.py ├── feature_extraction │ └── __init__.py ├── feature_selection │ ├── __init__.py │ ├── base.py │ └── featureselector.py ├── featurizer │ ├── __init__.py │ ├── feature_union.py │ ├── featurizer.py │ ├── indices.py │ ├── multichain.py │ ├── multiseq_featuizer.py │ └── subset.py ├── hmm │ ├── .gitignore │ ├── __init__.py │ ├── cephes │ │ ├── README.md │ │ ├── cephes.h │ │ ├── cephes_names.h │ │ ├── chbevl.c │ │ ├── gamma.c │ │ ├── i0.c │ │ ├── i1.c │ │ ├── mconf.h │ │ ├── mtherr.c │ │ ├── polevl.c │ │ ├── psi.c │ │ └── zeta.c │ ├── discrete_approx.py │ ├── gaussian.pyx │ ├── src │ │ ├── GaussianHMMFitter.cpp │ │ ├── VonMisesHMMFitter.cpp │ │ ├── include │ │ │ ├── GaussianHMMFitter.h │ │ │ ├── HMMFitter.h │ │ │ ├── Trajectory.h │ │ │ ├── VonMisesHMMFitter.h │ │ │ └── sse_mathfun.h │ │ └── logsumexp.hpp │ └── vonmises.pyx ├── io │ ├── __init__.py │ ├── gather_metadata.py │ ├── io.py │ ├── project_template.py │ └── sampling │ │ ├── __init__.py │ │ └── sampling.py ├── io_templates │ └── twitter-bootstrap.html ├── libdistance │ ├── .gitignore │ ├── libdistance.pyx │ └── src │ │ ├── assign.hpp │ │ ├── cdist.hpp │ │ ├── dist.hpp │ │ ├── distance_kernels.h │ │ ├── pdist.hpp │ │ └── sumdist.hpp ├── lumping │ ├── __init__.py │ ├── bace.py │ ├── mvca.py │ ├── pcca.py │ └── pcca_plus.py ├── msm │ ├── .gitignore │ ├── __init__.py │ ├── _markovstatemodel.pyx │ ├── _metzner_mcmc_fast.pyx │ ├── _metzner_mcmc_slow.py │ ├── _ratematrix.pyx │ ├── _ratematrix_priors.pyx │ ├── _ratematrix_support.pyx │ ├── bayes_ratematrix.py │ ├── bayesmsm.py │ ├── core.py │ ├── implied_timescales.py │ ├── markov_appreciation.py │ ├── msm.py │ ├── ratematrix.py │ ├── src │ │ ├── metzner_mcmc.c │ │ ├── metzner_mcmc.h │ │ ├── transmat_mle_prinz.c │ │ └── transmat_mle_prinz.h │ └── validation │ │ ├── __init__.py │ │ ├── bootstrapmsm.py │ │ └── transmat_errorbar.py ├── preprocessing │ ├── __init__.py │ ├── base.py │ └── timeseries.py ├── project_templates │ ├── 0-test-install.py │ ├── 1-get-example-data.py │ ├── LICENSE.md │ ├── README.md │ ├── analysis │ │ ├── gather-metadata-plot.py │ │ └── gather-metadata.py │ ├── cluster │ │ ├── cluster-plot.py │ │ ├── cluster.py │ │ ├── sample-clusters-plot.py │ │ └── sample-clusters.py │ ├── dihedrals │ │ ├── featurize-plot.py │ │ └── featurize.py │ ├── landmarks │ │ ├── featurize-plot.py │ │ ├── featurize.py │ │ └── find-landmarks.py │ ├── msm │ │ ├── microstate-plot.py │ │ ├── microstate-traj.py │ │ ├── microstate.py │ │ ├── timescales-plot.py │ │ └── timescales.py │ ├── plot_header.template │ ├── plot_macros.template │ ├── rmsd │ │ ├── rmsd-plot.py │ │ └── rmsd.py │ └── tica │ │ ├── tica-plot.py │ │ ├── tica-sample-coordinate-plot.py │ │ ├── tica-sample-coordinate.py │ │ └── tica.py ├── scripts │ ├── __init__.py │ └── msmb.py ├── src │ ├── cy_blas.pyx │ ├── f2py │ │ └── f2pyptr.h │ ├── scipy_lapack.h │ └── triu_utils.pyx ├── tests │ ├── .gitignore │ ├── __init__.py │ ├── native.pdb │ ├── test_agglomerative.py │ ├── test_alphaanglefeaturizer.py │ ├── test_apm.py │ ├── test_bayes_ratematrix.py │ ├── test_bootstrap_msm.py │ ├── test_build_counts.py │ ├── test_clustering.py │ ├── test_commands.py │ ├── test_commands_exist.py │ ├── test_contactfeaturizers.py │ ├── test_convenience.py │ ├── test_cyblas.pyx │ ├── test_cyblas_wrapper.py │ ├── test_dataset.py │ ├── test_decomposition.py │ ├── test_dependencies.py │ ├── test_divergence.py │ ├── test_estimator_subclassing.py │ ├── test_feature_descriptor.py │ ├── test_feature_selection.py │ ├── test_featureunion.py │ ├── test_featurizer.py │ ├── test_featurizer_subset.py │ ├── test_gather_metadata.py │ ├── test_ghmm.py │ ├── test_kcenters.py │ ├── test_kernel_approximation.py │ ├── test_kmedoids.py │ ├── test_ksparsetica.py │ ├── test_libdistance.py │ ├── test_ligandfeaturizers.py │ ├── test_lumping.py │ ├── test_metzner_mcmc.py │ ├── test_msm.py │ ├── test_msm_uncertainty.py │ ├── test_muller.py │ ├── test_ndgrid.py │ ├── test_nearest.py │ ├── test_param_sweep.py │ ├── test_preprocessing.py │ ├── test_ratematrix.py │ ├── test_rmsdfeaturizer.py │ ├── test_sampling.py │ ├── test_sasa_featurizer.py │ ├── test_sparsetica.py │ ├── test_speigh.py │ ├── test_strongly_connected_subgraph.py │ ├── test_template_project.py │ ├── test_tpt.py │ ├── test_transition_counts.py │ ├── test_transmat_errorbar.py │ ├── test_transmat_mle_prinz.py │ ├── test_utils.py │ ├── test_vmhmm.py │ ├── test_workflows.py │ └── workflows │ │ ├── basic.sh │ │ ├── ghmm.sh │ │ └── rmsd.sh ├── tpt │ ├── __init__.py │ ├── committor.py │ ├── flux.py │ ├── hub.py │ ├── mfpt.py │ └── path.py └── utils │ ├── __init__.py │ ├── compat.py │ ├── convenience.py │ ├── divergence.py │ ├── draw_samples.py │ ├── io.py │ ├── nearest.py │ ├── param_sweep.py │ ├── probability.py │ ├── progressbar │ ├── __init__.py │ ├── compat.py │ ├── progressbar.py │ └── widgets.py │ ├── subsampler.py │ └── validation.py ├── runtests.py └── setup.py /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | - [x] Implement feature / fix bug 2 | - [ ] Add tests 3 | - [ ] Update changelog 4 | 5 | [Describe changes here] 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # IDEs 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | .idea/ 38 | 39 | # Autogenerated during setup.py 40 | msmbuilder/src/config.pxi 41 | msmbuilder/version.py 42 | 43 | # Vim temp files 44 | *.swp 45 | *.swo 46 | 47 | build.log 48 | 49 | # Other files 50 | .DS_Store 51 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | sudo: false 3 | 4 | addons: 5 | apt: 6 | packages: 7 | - pandoc 8 | 9 | branches: 10 | only: 11 | - master 12 | 13 | install: 14 | - source devtools/travis-ci/install_miniconda.sh 15 | - conda config --add channels omnia 16 | - conda config --add channels conda-forge 17 | 18 | script: 19 | # Run tests. If they succeed, build docs only on 3.6 20 | conda build --quiet devtools/conda-recipe 21 | && if [[ $CONDA_PY = 3.6 ]]; then devtools/travis-ci/build_docs.sh; fi 22 | 23 | env: 24 | matrix: 25 | - CONDA_PY=2.7 CONDA_NPY=1.12 26 | - CONDA_PY=3.6 CONDA_NPY=1.12 27 | - CONDA_PY=3.7 CONDA_NPY=1.14 28 | 29 | deploy: 30 | - provider: s3 31 | access_key_id: 32 | secure: "av04wLV7wRmFjPRkDPE0FXNtvL51F597+DzUmrycLnI+Ltg5rxrxEUv2JMr7K1WrTTR1STFNhJBp6aQUwD3zzaA7N/1c0zY9ri35ML75LC/10IDb6UNbY6uPNqbP1co451OSz7tpGbu3JBL/TRL7MkReFbZxPLHPPP1ad/4O6nA=" 33 | secret_access_key: 34 | secure: "c4b2fliqot9ZnI5cyTqEXSHQnCao+GoxmR+SJAcSURv381O/z3frlJX7pKf0qai2OrZSSdqX/wa2KdcWNeoDTKrTiCeKgFikc6x839tmjeQYVV0Y3hmSvZCzCFOAXyMf9GfJJ7gLBOBHSzTTJWeZDLZB6nuoi4Xw9Blgid6QxIs=" 35 | bucket: "msmbuilder.org" 36 | skip_cleanup: true 37 | local_dir: docs/_deploy/ 38 | on: 39 | branch: master 40 | condition: "$CONDA_PY = 3.6" 41 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft examples 2 | graft msmbuilder 3 | include basesetup.py 4 | include LICENSE 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MSMBuilder 2 | ========== 3 | 4 | [![Build Status](https://travis-ci.org/msmbuilder/msmbuilder.svg?branch=master)](https://travis-ci.org/msmbuilder/msmbuilder) 5 | [![PyPi version](https://badge.fury.io/py/msmbuilder.svg)](https://pypi.python.org/pypi/msmbuilder/) 6 | [![License](https://img.shields.io/badge/license-LGPLv2.1+-red.svg?style=flat)](https://pypi.python.org/pypi/msmbuilder/) 7 | [![Documentation](https://img.shields.io/badge/docs-latest-blue.svg?style=flat)](http://msmbuilder.org) 8 | 9 | MSMBuilder is a python package which implements a series of statistical 10 | models for high-dimensional time-series. It is particularly focused on the 11 | analysis of atomistic simulations of biomolecular dynamics. For example, 12 | MSMBuilder has been used to model protein folding and conformational change 13 | from molecular dynamics (MD) simulations. MSMBuilder is available under the 14 | LGPL (v2.1 or later). 15 | 16 | Capabilities include: 17 | 18 | - Feature extraction into dihedrals, contact maps, and more 19 | - Geometric clustering with a variety of algorithms. 20 | - Dimensionality reduction using time-structure independent component 21 | analysis (tICA) and principal component analysis (PCA). 22 | - Markov state model (MSM) construction 23 | - Rate-matrix MSM construction 24 | - Hidden markov model (HMM) construction 25 | - Timescale and transition path analysis. 26 | 27 | Check out the documentation at [msmbuilder.org](http://msmbuilder.org) and 28 | join the [mailing list](https://mailman.stanford.edu/mailman/listinfo/msmbuilder-user). 29 | For a broader overview of MSMBuilder, take a look at our [slide deck](http://rawgit.com/msmbuilder/talk/master/index.html). 30 | 31 | Installation 32 | ------------ 33 | 34 | The preferred installation mechanism for `msmbuilder` is with `conda`: 35 | 36 | ```bash 37 | $ conda install -c omnia msmbuilder 38 | ``` 39 | 40 | If you don't have conda, or are new to scientific python, we recommend that 41 | you download the [Anaconda scientific python distribution](https://store.continuum.io/cshop/anaconda/). 42 | 43 | 44 | Workflow 45 | -------- 46 | 47 | An example workflow might be as follows: 48 | 49 | 1. Set up a system for molecular dynamics, and run one or more simulations 50 | for as long as you can on as many CPUs or GPUs as you have access to. 51 | There are a lot of great software packages for running MD, e.g 52 | [OpenMM](https://simtk.org/home/openmm), [Gromacs](http://www.gromacs.org/), 53 | [Amber](http://ambermd.org/), [CHARMM](http://www.charmm.org/), and 54 | many others. MSMBuilder is not one of them. 55 | 56 | 2. Transform your MD coordinates into an appropriate set of features. 57 | 58 | 3. Perform some sort of dimensionality reduction with tICA or PCA. 59 | Reduce your data into discrete states by using clustering. 60 | 61 | 4. Fit an MSM, rate matrix MSM, or HMM. Perform model selection using 62 | cross-validation with the [generalized matrix Rayleigh quotient](http://arxiv.org/abs/1407.8083) 63 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | 3 | global: 4 | PYTHONUNBUFFERED: on 5 | 6 | matrix: 7 | - PYDIR: "C:\\Miniconda35" 8 | CONDA_PY: "35" 9 | CONDA_NPY: "1.10" 10 | 11 | - PYDIR: "C:\\Miniconda35-x64" 12 | CONDA_PY: "35" 13 | CONDA_NPY: "1.10" 14 | 15 | install: 16 | - set PATH=%PYDIR%;%PYDIR%\\Scripts;%PATH% 17 | - conda config --add channels omnia 18 | - conda config --add channels conda-forge 19 | - conda update -yq --all 20 | - conda install -yq conda-build jinja2 21 | 22 | build: false 23 | 24 | test_script: 25 | - conda build -q devtools\conda-recipe 26 | -------------------------------------------------------------------------------- /devtools/conda-recipe/bld.bat: -------------------------------------------------------------------------------- 1 | python setup.py install 2 | if errorlevel 1 exit 1 3 | -------------------------------------------------------------------------------- /devtools/conda-recipe/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python setup.py install 3 | -------------------------------------------------------------------------------- /devtools/conda-recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: msmbuilder 3 | version: {{ GIT_DESCRIBE_TAG }} 4 | 5 | source: 6 | path: ../../ 7 | 8 | build: 9 | preserve_egg_dir: True 10 | number: {{ GIT_DESCRIBE_NUMBER }} 11 | entry_points: 12 | - msmb = msmbuilder.scripts.msmb:main 13 | 14 | 15 | requirements: 16 | build: 17 | - python 18 | - setuptools 19 | - cython <=0.28 20 | - numpy x.x 21 | - mdtraj <=1.8 22 | run: 23 | - python 24 | - setuptools 25 | - numpy x.x 26 | - scipy 27 | - pandas <0.20 28 | - six 29 | - mdtraj <=1.8 30 | - scikit-learn 31 | - numpydoc 32 | - pytables 33 | - pyhmc 34 | - pyyaml 35 | - jinja2 36 | - fastcluster 37 | 38 | 39 | test: 40 | requires: 41 | - nose 42 | - nose-timer 43 | - munkres 44 | - numdifftools 45 | - statsmodels 46 | - hmmlearn=0.2.1 47 | - cvxpy # [not win] 48 | - msmb_data 49 | imports: 50 | - msmbuilder 51 | commands: 52 | - msmb -h 53 | - nosetests msmbuilder -v --with-timer --timer-ok 2 --timer-warning 10 --timer-filter error 54 | 55 | 56 | about: 57 | home: https://github.com/msmbuilder/msmbuilder 58 | license: LGPLv2.1+ 59 | summary: 'MSMBuilder: Statistical models for biomolecular dynamics' 60 | -------------------------------------------------------------------------------- /devtools/travis-ci/build_docs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Print each line, exit on error 4 | set -ev 5 | 6 | # Install the built package 7 | conda create --yes -n docenv python=$CONDA_PY 8 | source activate docenv 9 | conda install -yq --use-local msmbuilder 10 | 11 | # Install doc requirements 12 | conda install -yq --file docs/requirements.txt 13 | 14 | # We don't use conda for these: 15 | # sphinx_rtd_theme's latest releases are not available 16 | # neither is msmb_theme 17 | # neither is sphinx > 1.3.1 (fix #1892 autodoc problem) 18 | pip install -I sphinx 19 | pip install -I sphinx_rtd_theme==0.1.9 msmb_theme==1.2.0 20 | 21 | # Make docs 22 | cd docs && make html && cd - 23 | 24 | # Move the docs into a versioned subdirectory 25 | python devtools/travis-ci/set_doc_version.py 26 | 27 | # Prepare versions.json 28 | python devtools/travis-ci/update_versions_json.py 29 | -------------------------------------------------------------------------------- /devtools/travis-ci/install_miniconda.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | MINICONDA=Miniconda3-latest-Linux-x86_64.sh 3 | MINICONDA_MD5=$(curl -s https://repo.continuum.io/miniconda/ | grep -A3 $MINICONDA | sed -n '4p' | sed -n 's/ *\(.*\)<\/td> */\1/p') 4 | wget https://repo.continuum.io/miniconda/$MINICONDA 5 | if [[ $MINICONDA_MD5 != $(md5sum $MINICONDA | cut -d ' ' -f 1) ]]; then 6 | echo "Miniconda MD5 mismatch" 7 | exit 1 8 | fi 9 | bash $MINICONDA -b 10 | rm -f $MINICONDA 11 | 12 | export PATH=$HOME/miniconda3/bin:$PATH 13 | 14 | conda update -yq conda 15 | conda install -yq conda-build jinja2 conda-verify 16 | -------------------------------------------------------------------------------- /devtools/travis-ci/set_doc_version.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from msmbuilder import version 4 | 5 | if version.release: 6 | docversion = version.version 7 | else: 8 | docversion = 'development' 9 | 10 | os.mkdir("docs/_deploy") 11 | shutil.copytree("docs/_build/html", "docs/_deploy/{docversion}" 12 | .format(docversion=docversion)) 13 | -------------------------------------------------------------------------------- /devtools/travis-ci/update_versions_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | try: 4 | from urllib.request import urlopen 5 | except ImportError: 6 | from urllib2 import urlopen 7 | from msmbuilder import version 8 | 9 | if not version.release: 10 | print("This is not a release.") 11 | exit(0) 12 | 13 | URL = 'http://www.msmbuilder.org' 14 | data = urlopen(URL + '/versions.json').read().decode() 15 | versions = json.loads(data) 16 | 17 | # new release so all the others are now old 18 | for i in range(len(versions)): 19 | versions[i]['latest'] = False 20 | 21 | versions.append({ 22 | 'version': version.version, 23 | 'display': version.short_version, 24 | 'url': "{base}/{version}".format(base=URL, version=version.version), 25 | 'latest': True, 26 | }) 27 | 28 | with open("docs/_deploy/versions.json", 'w') as versionf: 29 | json.dump(versions, versionf, indent=2) 30 | 31 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build/ 2 | 3 | # autosummary generated files 4 | _cluster/ 5 | _msm/ 6 | _hmm/ 7 | _ratematrix/ 8 | _decomposition/ 9 | _preprocessing/ 10 | _feature_selection/ 11 | _featurization/ 12 | _tpt/ 13 | _io/ 14 | _gmrq/ 15 | 16 | # autogenerated (see conf.py) 17 | publications.rst 18 | -------------------------------------------------------------------------------- /docs/LICENSE: -------------------------------------------------------------------------------- 1 | The MSMBuilder documentation is licensed under a Creative Commons 2 | Attribution 4.0 International License. 3 | 4 | https://creativecommons.org/licenses/by/4.0/ 5 | -------------------------------------------------------------------------------- /docs/_static/flow-chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/flow-chart.png -------------------------------------------------------------------------------- /docs/_static/fspeptide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/fspeptide.png -------------------------------------------------------------------------------- /docs/_static/kde-vs-histogram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/kde-vs-histogram.png -------------------------------------------------------------------------------- /docs/_static/lengths-hist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/lengths-hist.png -------------------------------------------------------------------------------- /docs/_static/logo-200px.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/logo-200px.png -------------------------------------------------------------------------------- /docs/_static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/logo.png -------------------------------------------------------------------------------- /docs/_static/msm-microstates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/msm-microstates.png -------------------------------------------------------------------------------- /docs/_static/tica-heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica-heatmap.png -------------------------------------------------------------------------------- /docs/_static/tica-movie.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica-movie.gif -------------------------------------------------------------------------------- /docs/_static/tica_vs_pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/docs/_static/tica_vs_pca.png -------------------------------------------------------------------------------- /docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | -------------------------------------------------------------------------------- /docs/advanced_examples/bayesian-msm.rst: -------------------------------------------------------------------------------- 1 | Bayesian Estimation of MSMs 2 | =========================== 3 | 4 | .. notebook:: examples/bayesian-msm.ipynb 5 | 6 | -------------------------------------------------------------------------------- /docs/advanced_examples/gmrq-model-selection.rst: -------------------------------------------------------------------------------- 1 | GMRQ hyperparameter selection 2 | ============================= 3 | 4 | .. notebook:: examples/gmrq-model-selection.ipynb -------------------------------------------------------------------------------- /docs/advanced_examples/hmm-and-msm.rst: -------------------------------------------------------------------------------- 1 | HMM and MSM Timescales for Ala2 2 | =============================== 3 | 4 | .. notebook:: examples/hmm-and-msm.ipynb 5 | 6 | -------------------------------------------------------------------------------- /docs/advanced_examples/implied-timescales.rst: -------------------------------------------------------------------------------- 1 | Implied Timescales 2 | ================== 3 | 4 | .. notebook:: examples/implied-timescales.ipynb 5 | 6 | -------------------------------------------------------------------------------- /docs/advanced_examples/index.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ======== 3 | 4 | This page provides a series of examples, tutorials and recipes for using 5 | MSMBuilder. 6 | 7 | Each subsection is a notebook. To open these notebooks in a "live" IPython 8 | session and execute the documentation interactively, you need to download 9 | the repository and start IPython notebook. 10 | 11 | If you installed `MSMBuilder` from source, you will need to navigate to 12 | :code:`./examples`. The notebook files for these examples and the notebooks 13 | are available in the top level `examples folder 14 | `_ on GitHub 15 | as well. In the directory with the notebook files, start an IPython 16 | notebook server: 17 | 18 | .. code-block:: python 19 | 20 | $ ipython notebook 21 | 22 | 23 | .. toctree:: 24 | :maxdepth: 2 25 | :titlesonly: 26 | :glob: 27 | 28 | * 29 | 30 | .. vim: tw=75 31 | -------------------------------------------------------------------------------- /docs/advanced_examples/plot-tica-heatmap.rst: -------------------------------------------------------------------------------- 1 | Visualization with tICA 2 | ======================= 3 | 4 | .. notebook:: examples/plot-tica-heatmap.ipynb 5 | 6 | -------------------------------------------------------------------------------- /docs/advanced_examples/quadwell-n-states.rst: -------------------------------------------------------------------------------- 1 | Model selection with Randomized CV 2 | ================================== 3 | 4 | .. notebook:: examples/quadwell-n-states.ipynb 5 | -------------------------------------------------------------------------------- /docs/advanced_examples/quadwell.rst: -------------------------------------------------------------------------------- 1 | Bootstraped MSM CIs 2 | =================== 3 | 4 | .. notebook:: examples/quadwell.ipynb 5 | 6 | -------------------------------------------------------------------------------- /docs/advanced_examples/tica-1.rst: -------------------------------------------------------------------------------- 1 | tICA and PCA 2 | ============ 3 | 4 | .. notebook:: examples/tica-example.ipynb 5 | 6 | -------------------------------------------------------------------------------- /docs/advanced_examples/uncertainty.rst: -------------------------------------------------------------------------------- 1 | Estimating uncertainty in Markov state models 2 | ============================================= 3 | 4 | .. notebook:: examples/uncertainty.ipynb 5 | -------------------------------------------------------------------------------- /docs/background.rst: -------------------------------------------------------------------------------- 1 | .. _background: 2 | 3 | Motivation 4 | ========== 5 | 6 | The aim of this package is to provide software tools for predictive 7 | modeling of the long timescale dynamics of biomolecular systems using 8 | statistical modeling to analyze physical simulations. 9 | 10 | Given a dataset of one or more stochastic trajectories tracking the 11 | coordinates of every (10,000+) atom in a molecular system at a discrete 12 | time interval, how do we understand the slow dynamical processes and make 13 | quantitative predictions about the system? 14 | 15 | 16 | Workflow 17 | -------- 18 | 19 | To build a dynamical model, we apply (stepwise) a series of dimensionality 20 | reductions. The basic set of steps is outlined below. Note that most steps 21 | are optional under certain circumstances. The particulars should become 22 | clear as you continue reading the documentation. 23 | 24 | 1. Set up a system for molecular dynamics, and run one or more simulations 25 | for as long as you can on as many CPUs or GPUs as you have access. 26 | There are a lot of great software packages for running MD, e.g `OpenMM 27 | `_, `Gromacs `_, 28 | `Amber `_, `CHARMM `_, and 29 | many others. MSMBuilder is not one of them. 30 | 31 | 2. :ref:`Featurize` trajectories into an appropriate vector 32 | of features. The full :math:`3N` set of atomic coordinates is 33 | potentially unwieldy and redundant. It likely does not respect the 34 | rotational or translational symmetry of your system either. We commonly 35 | use backbone dihedral angles as our features, although this depends 36 | highly on the system being modeled. 37 | 38 | 3. :ref:`Decompose` your features into a new basis that 39 | preserves the relevant information in your data with fewer dimensions. 40 | We typically use tICA, which finds linear combinations of input degrees 41 | of freedom that maximize autocorrelation or "slowness". 42 | 43 | 4. :ref:`Cluster` your data to define (micro-)states by grouping 44 | similar input data points. At this stage, we've reduced the 45 | dimensionality of the problem from potentially thousands of :math:`xyz` 46 | coordinates to a single cluster (state) index. 47 | 48 | 5. :ref:`Estimate a model` from the clustered data. We typically build 49 | an MSM, which models the important dynamics of the system. 50 | 51 | 6. Use :ref:`GMRQ cross-validation` to select the best model. There 52 | are many hyperparameters (knobs to tweak) in the workflow. This scoring 53 | function can help us pick the best values. 54 | 55 | 56 | .. figure:: _static/flow-chart.png 57 | :align: center 58 | :width: 80% 59 | 60 | A diagram of potential workflows. 61 | 62 | .. vim: tw=75 63 | -------------------------------------------------------------------------------- /docs/bibparse.py: -------------------------------------------------------------------------------- 1 | """Very simple bibtex parser for use in MSMBuilder doc generation 2 | 3 | Matthew Harrigan 4 | (c)2016, MIT License 5 | """ 6 | 7 | from pyparsing import CaselessKeyword as kwd 8 | from pyparsing import QuotedString, Word, alphanums, Suppress, OneOrMore, nums, \ 9 | Group, Optional, ZeroOrMore, alphas, alphas8bit, delimitedList 10 | 11 | # Change these if you need more flexibility: 12 | entry_type = kwd("article") | kwd("unpublished") 13 | cite_key = Word(alphanums + ":/._") 14 | 15 | LCURLY = Suppress('{') 16 | RCURLY = Suppress('}') 17 | COMMA = Suppress(',') 18 | AT = Suppress('@') 19 | EQUALS = Suppress('=') 20 | 21 | field_val = Word(nums) | QuotedString('{', endQuoteChar='}', multiline=True, 22 | convertWhitespaceEscapes=False) 23 | title_field = Group(kwd('title') + EQUALS + field_val) 24 | journal_field = Group(kwd('journal') + EQUALS + field_val) 25 | year_field = Group(kwd('year') + EQUALS + field_val) 26 | volume_field = Group(kwd('volume') + EQUALS + field_val) 27 | pages_field = Group(kwd('pages') + EQUALS + field_val) 28 | abstract_field = Group(kwd('abstract') + EQUALS + field_val) 29 | doi_field = Group(kwd('doi') + EQUALS + field_val) 30 | other_field = Group(Word(alphanums) + EQUALS + field_val) 31 | 32 | author = OneOrMore(~kwd('and') + Word(alphas + alphas8bit + '.,-')) 33 | author.setParseAction(lambda xx: ' '.join(str(x) for x in xx)) 34 | author_list = LCURLY + delimitedList(author, 'and') + RCURLY 35 | author_field = Group(kwd('author') + EQUALS + Group(author_list)) 36 | 37 | entry_item = (title_field | author_field | journal_field | year_field 38 | | volume_field | pages_field | abstract_field | doi_field 39 | | Suppress(other_field)) 40 | 41 | 42 | class BibEntry(object): 43 | def __init__(self, type, cite_key, fields): 44 | self.type = type 45 | self.cite_key = cite_key 46 | self.fields = fields 47 | self.__dict__.update(**fields) 48 | 49 | 50 | def to_BibEntry(toks): 51 | return BibEntry(toks[0], toks[1], dict(toks[2:])) 52 | 53 | 54 | entry = (AT + entry_type + LCURLY + cite_key + COMMA 55 | + ZeroOrMore(entry_item + COMMA) + Optional(entry_item) + RCURLY) 56 | entry.setParseAction(to_BibEntry) 57 | entries = OneOrMore(entry) 58 | -------------------------------------------------------------------------------- /docs/cluster.rst: -------------------------------------------------------------------------------- 1 | .. _cluster: 2 | .. currentmodule:: msmbuilder.cluster 3 | 4 | Clustering 5 | ========== 6 | 7 | Clustering MD trajectories groups the data [#f1]_ into a set of 8 | clusters such that conformations in the same cluster are structurally 9 | similar to one another, and conformations in different clusters are 10 | structurally distinct. The questions that arise are 11 | 12 | #. How should "structurally similar" be defined? What distance metric 13 | should be used? 14 | 15 | #. Given the distance metric, what algorithm should be used to actually 16 | cluster the data? 17 | 18 | On point 1, there is no consensus in the protein MD literature. Popular 19 | distance metrics include cartesian root-mean-squared deviation of atomic 20 | positions (RMSD) [#f3]_, distances based on the number of native contacts 21 | formed, distances based on the difference in backbone dihedral angles, and 22 | probably others. 23 | 24 | On point 2, "Optimal" clustering is NP-hard [#f2]_, so there's usually a 25 | tradeoff between clustering quality and computational cost. For that reason, 26 | MSMBuilder has a variety of different clustering algorithms implemented. 27 | 28 | Algorithms 29 | ---------- 30 | 31 | All clustering algorithms in MSMBuilder follow the following basic API. 32 | Hyperparameters, including the number of clusters, random seeds, the 33 | distance metric (if applicable) are passed to the class constructor. 34 | Then, the computation is done by calling ``fit(sequences)``. The argument 35 | to ``fit`` should be a *list* of molecular dynamics trajectories or a list 36 | of 2D numpy arrays, each of shape ``(length_of_trajecotry, n_features)``. 37 | 38 | 39 | .. autosummary:: 40 | :toctree: _cluster/ 41 | :nosignatures: 42 | 43 | KCenters 44 | KMeans 45 | KMedoids 46 | MiniBatchKMedoids 47 | RegularSpatial 48 | LandmarkAgglomerative 49 | AffinityPropagation 50 | GMM 51 | MeanShift 52 | MiniBatchKMeans 53 | SpectralClustering 54 | Ward 55 | 56 | 57 | .. todo: Example of clustering 58 | 59 | References 60 | ---------- 61 | 62 | .. [#f1] The "data", for MD, refers to snapshots of the structure of a molecular system at a given time point -- i.e the set of cartesian coordinates for all the atoms, or some mathematical transformation thereof. 63 | .. [#f2] Aloise, Daniel, et al. `NP-hardness of Euclidean sum-of-squares clustering. `_ Machine Learning 75.2 (2009): 245-248. 64 | .. [#f3] http://en.wikipedia.org/wiki/Root-mean-square_deviation_of_atomic_positions 65 | 66 | .. vim: tw=75 67 | -------------------------------------------------------------------------------- /docs/examples/Clustering-Comparison.rst: -------------------------------------------------------------------------------- 1 | Clustering Comparison 2 | ===================== 3 | 4 | .. notebook:: Clustering-Comparison 5 | -------------------------------------------------------------------------------- /docs/examples/Fs-Peptide-command-line.rst: -------------------------------------------------------------------------------- 1 | Fs Peptide (command line) 2 | ========================= 3 | 4 | .. notebook:: Fs-Peptide-command-line 5 | -------------------------------------------------------------------------------- /docs/examples/Fs-Peptide-in-RAM.rst: -------------------------------------------------------------------------------- 1 | Fs Peptide (in RAM) 2 | =================== 3 | 4 | .. notebook:: Fs-Peptide-in-RAM 5 | -------------------------------------------------------------------------------- /docs/examples/Fs-Peptide-with-dataset.rst: -------------------------------------------------------------------------------- 1 | Fs Peptide (using ``dataset``) 2 | ============================== 3 | 4 | .. notebook:: Fs-Peptide-with-dataset 5 | -------------------------------------------------------------------------------- /docs/examples/GMRQ-Model-Selection.rst: -------------------------------------------------------------------------------- 1 | GMRQ Model Selection 2 | ==================== 3 | 4 | .. notebook:: GMRQ-Model-Selection 5 | -------------------------------------------------------------------------------- /docs/examples/Ligand-Featurization.rst: -------------------------------------------------------------------------------- 1 | Ligand Featurization 2 | ==================== 3 | 4 | .. notebook:: Ligand-Featurization 5 | -------------------------------------------------------------------------------- /docs/examples/Ward-Clustering.rst: -------------------------------------------------------------------------------- 1 | Ward Clustering 2 | =============== 3 | 4 | .. notebook:: Ward-Clustering 5 | -------------------------------------------------------------------------------- /docs/examples/index.rst: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | Examples 4 | ======== 5 | 6 | The following examples show off various aspects or capabilities of 7 | MSMBuilder. They can be run interactively in Jupyter (IPython) notebook. 8 | Download the `notebook files 9 | `_ and open 10 | them in Jupyter:: 11 | 12 | $ jupyter notebook 13 | 14 | .. To make the ipython rendered images show up, each rst file must be 15 | in its own directory. 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | :titlesonly: 20 | 21 | Fs-Peptide-in-RAM 22 | Fs-Peptide-with-dataset 23 | Fs-Peptide-command-line 24 | tICA-vs-PCA 25 | Clustering-Comparison 26 | GMRQ-Model-Selection 27 | Ward-Clustering 28 | Ligand-Featurization 29 | 30 | 31 | Contributing examples 32 | --------------------- 33 | 34 | Do you have a neat example of using MSMBuilder? Format your code 35 | into an IPython notebook and submit a pull request! 36 | 37 | .. vim: tw=75 38 | -------------------------------------------------------------------------------- /docs/examples/tICA-vs-PCA.rst: -------------------------------------------------------------------------------- 1 | tICA vs. PCA 2 | ============ 3 | 4 | .. notebook:: tICA-vs-PCA 5 | -------------------------------------------------------------------------------- /docs/feature_selection.rst: -------------------------------------------------------------------------------- 1 | .. _feature_selection: 2 | .. currentmodule:: msmbuilder.feature_selection 3 | 4 | 5 | Feature Selection 6 | ================= 7 | 8 | Feature selection can be used to reduce the dimensionality of data sets, 9 | either to improve estimators’ accuracy or to boost their performance on very 10 | high-dimensional datasets. 11 | 12 | Feature Selectors 13 | ----------------- 14 | 15 | .. autosummary:: 16 | :toctree: _feature_selection/ 17 | 18 | FeatureSelector 19 | VarianceThreshold 20 | 21 | 22 | .. vim: tw=75 23 | -------------------------------------------------------------------------------- /docs/featurization.rst: -------------------------------------------------------------------------------- 1 | .. _featurization: 2 | .. currentmodule:: msmbuilder.featurizer 3 | 4 | 5 | Featurization 6 | ============= 7 | 8 | Many algorithms require that the input data be vectors in a (euclidean) 9 | vector space. This includes :class:`~msmbuilder.cluster.KMeans` clustering, 10 | :class:`~msmbuilder.decomposition.tICA`, and others. 11 | 12 | Since there's usually no special rotational or translational reference 13 | frame in an MD simulation, it's often desirable to remove rotational and 14 | translational motion via featurization that is insensitive to rotations and 15 | translations. 16 | 17 | Featurizations 18 | -------------- 19 | 20 | .. autosummary:: 21 | :toctree: _featurization/ 22 | 23 | AtomPairsFeaturizer 24 | ContactFeaturizer 25 | DRIDFeaturizer 26 | DihedralFeaturizer 27 | GaussianSolventFeaturizer 28 | RMSDFeaturizer 29 | RawPositionsFeaturizer 30 | SuperposeFeaturizer 31 | 32 | 33 | Alternative to Featurization 34 | ---------------------------- 35 | 36 | Many algorithms require vectorizable data. Other algorithms only require a 37 | pairwise distance metric, e.g. RMSD between two protein conformations. In 38 | general, you can define a pairwise distance among vectorized data, but you 39 | cannot embed data into a vector space only from pairwise distance. 40 | 41 | Some :ref:`clustering ` methods let you use an arbitrary distance 42 | metric, including RMSD. In this case, the input to ``fit()`` may be a list 43 | of MD trajectories instead of a list of numpy arrays. Clustering methods 44 | that allow this currently include :class:`~msmbuilder.cluster.KCenters` and 45 | :class:`~msmbuilder.cluster.KMedoids`. 46 | 47 | .. vim: tw=75 48 | -------------------------------------------------------------------------------- /docs/figures/kde-vs-histogram.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as pp 3 | from scipy.stats import norm 4 | from sklearn.neighbors import KernelDensity 5 | 6 | 7 | #---------------------------------------------------------------------- 8 | # Plot the progression of histograms to kernels 9 | N = 100 10 | np.random.seed(1) 11 | X = np.concatenate((np.random.normal(0, 1, 0.3 * N), 12 | np.random.normal(5, 1, 0.7 * N)))[:, np.newaxis] 13 | 14 | X_plot = np.linspace(-5, 10, 1000)[:, np.newaxis] 15 | 16 | true_dens = (0.3 * norm(0, 1).pdf(X_plot[:, 0]) 17 | + 0.7 * norm(5, 1).pdf(X_plot[:, 0])) 18 | 19 | 20 | ax = pp.subplot(axisbg='w') 21 | ax.fill(X_plot[:, 0], true_dens, fc='black', alpha=0.2) 22 | pp.plot(X_plot[:, 0], true_dens, 'k-', lw=2, label='input distribution') 23 | 24 | kde = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(X) 25 | log_dens = kde.score_samples(X_plot) 26 | ax.plot(X_plot[:, 0], np.exp(log_dens), '-', lw=2, c='r', label='Gaussian KDE') 27 | pp.twinx().hist(X, bins=20, alpha=0.5, label='Histogram') 28 | 29 | 30 | ax.text(6, 0.38, "N={0} points".format(N)) 31 | 32 | ax.legend(loc='upper left') 33 | 34 | ax.set_xlim(-4, 9) 35 | ax.set_ylim(0, 0.4) 36 | pp.savefig('_static/kde-vs-histogram.png') 37 | 38 | -------------------------------------------------------------------------------- /docs/gmrq.rst: -------------------------------------------------------------------------------- 1 | .. _gmrq: 2 | .. currentmodule:: msmbuilder 3 | 4 | Model Selection using GMRQ 5 | ========================== 6 | 7 | The generalized matrix Rayleigh quotient (GMRQ) is a specific application of 8 | the variational principle (adapted from `quantum mechanics 9 | `_) 10 | for Markov state models and a useful tool for model parameter selection. 11 | 12 | The variational principle yields a rigorous way of comparing two different 13 | Markov models for the same underlying stochastic process when using different 14 | state decompositions. Even under the assumption that you have access to 15 | infinite sampling, there is still some error associated with approximating the 16 | true continuous eigenfunctions of your modeled process with the indicator 17 | functions, as is the case with Markov state models. If we interpret the 18 | variational theorem as the measure of the quality of this approximation, the 19 | state decomposition that leads to a Markov model with larger leading dynamical 20 | eigenvalues is consequently the better state decomposition. If you wish to see 21 | the full derivation of this quantity, please refer to [#f1]_. 22 | 23 | Using this method, we can generate single scalar-valued scores for a proposed 24 | model given a supplied data set. This allows for the use of separate testing 25 | and training data sets to quantify and avoid statistical overfitting. 26 | This method extends these tools, making it possible to score trained models on 27 | new datasets and to perform hyperparameter selection. **PLEASE NOTE**: You cannot 28 | use GMRQ to optimize the MSM lag time. Changing the lag time fundamentally 29 | alters the model's eigenfunctions, which no longer makes it a useful scoring function. 30 | The number of timescales used to score the model must also be constant and user- 31 | selected. 32 | 33 | Algorithms 34 | ---------- 35 | 36 | .. autosummary:: 37 | :toctree: _gmrq/ 38 | 39 | decomposition.tICA.score 40 | msm.MarkovStateModel.score 41 | msm.ContinuousTimeMSM.score 42 | 43 | 44 | 45 | 46 | References 47 | ---------- 48 | 49 | .. [#f1] McGibbon, Robert T., and Vijay S. Pande. `Variational cross-validation of slow dynamical modes in molecular kinetics `_ J. Chem. Phys. 142, 124105 (2015). 50 | 51 | .. vim: tw=75 52 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. _msmbuilder: 2 | 3 | MSMBuilder 4 | ========== 5 | 6 | 7 | .. raw:: html 8 | 9 |

10 | Statistical models for Biomolecular Dynamics

11 | 12 | MSMBuilder is an application and python library. It builds 13 | statistical models for high-dimensional time-series. The particular focus 14 | of the package is on the analysis of atomistic simulations of biomolecular 15 | dynamics such as protein folding and conformational change. 16 | 17 | To get started via `Anaconda Python `_, 18 | use:: 19 | 20 | conda install -c omnia msmbuilder 21 | 22 | MSMBuilder includes algorithms for constructing dynamical models: 23 | 24 | - :ref:`featurization` 25 | - :ref:`feature_selection` 26 | - :ref:`preprocessing` 27 | - :ref:`decomposition` 28 | - :ref:`cluster` 29 | - :ref:`msm` 30 | - :ref:`hmm` 31 | - :ref:`ratematrix` 32 | 33 | As well as methods for analysis and validation of the models: 34 | 35 | - :ref:`gmrq` 36 | - :ref:`tpt` 37 | 38 | New users should check out: 39 | 40 | - :ref:`background` 41 | - :ref:`installation` 42 | - :ref:`tutorial` 43 | - :ref:`examples` 44 | - :ref:`faq` 45 | 46 | MSMBuilder is most effective as a library. Intermediate users should 47 | familiarize themselves with: 48 | 49 | - :ref:`apipatterns` 50 | - :ref:`datasets` 51 | - :ref:`changelog` 52 | 53 | 54 | MSMBuilder is developed by primarily by researchers at Stanford University, 55 | and we welcome contributions. The development all takes place on `Github 56 | `_. MSMBuilder is licensed under 57 | the GNU LGPL (v2.1 or later). 58 | 59 | 60 | 61 | .. toctree:: 62 | :maxdepth: 2 63 | :hidden: 64 | 65 | background 66 | installation 67 | tutorial 68 | examples/index 69 | featurization 70 | feature_selection 71 | preprocessing 72 | decomposition 73 | cluster 74 | msm 75 | gmrq 76 | tpt 77 | ratematrix 78 | hmm 79 | datasets 80 | io 81 | apipatterns 82 | plugins 83 | faq 84 | changelog 85 | publications 86 | contributing 87 | 88 | .. vim: tw=75 89 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | Installation 4 | ============ 5 | 6 | The preferred installation mechanism for ``msmbuilder`` is with ``conda``. 7 | 8 | .. code-block:: bash 9 | 10 | $ conda install -c omnia msmbuilder 11 | 12 | 13 | If you don't have conda, or are new to scientific python, we recommend that 14 | you download the `Anaconda scientific python distribution 15 | `_. 16 | 17 | 18 | From Source 19 | ----------- 20 | 21 | MSMBuilder is a python package that heavily leans on other components of the 22 | scientific python ecosystem. See ``devtools/conda-recipe/meta.yaml`` for a 23 | complete and up-to-date list of build, run, and test dependencies. When you 24 | are sure the dependencies are satisfied you can install from PyPI 25 | 26 | .. code-block:: bash 27 | 28 | $ pip install msmbuilder 29 | 30 | or from source 31 | 32 | .. code-block:: bash 33 | 34 | $ git clone git@github.com:msmbuilder/msmbuilder 35 | $ cd msmbuilder/ 36 | $ pip install . 37 | $ # (or: python setup.py install) 38 | 39 | Frequently Asked Questions 40 | -------------------------- 41 | 42 | **Do I need Anaconda python? Can't I use the python that comes with my 43 | operating like /usr/bin/python?** 44 | 45 | You can have multiple ``python`` installations on your computer which do 46 | not interact with one another at all. The system python interpreter is used 47 | by your operating system for some of its own programs but is not the best 48 | choice for data analysis or science. 49 | 50 | We strongly recommend that you install Anaconda or Miniconda python 51 | distribution and that you have the ``conda`` package manager available. 52 | 53 | If you're interested in some of the details about packaging and scientific 54 | python, see `this blog post by Travis Oliphant 55 | `_. 56 | 57 | .. vim: tw=75 58 | -------------------------------------------------------------------------------- /docs/plugins.rst: -------------------------------------------------------------------------------- 1 | .. _plugins: 2 | 3 | Writing Plugins 4 | =============== 5 | 6 | You can easily extend MSMBuilder by subclassing ``BaseEstimator`` or any of 7 | its children. You can even build your plugin to work with the ``msmb`` 8 | command-line interface. 9 | 10 | 1. Subclass ``cmdline.Command`` or any of its children. For example, 11 | if you want to expose a new Featurizer from the command line. 12 | 13 | .. code-block:: python 14 | 15 | from msmbuilder.commands.featurizer import FeaturizerCommand 16 | class MyNiftyFeaturizerCommand(FeaturizerCommand): 17 | klass = MyNiftyFeaturizer 18 | _concrete = True 19 | 20 | 2. Provide your command as an "entry point" with ``setuptools``. 21 | Use ``"msmbuilder.commands"`` as the entry point. 22 | For example, in your ``setup.py``. 23 | 24 | .. code-block:: python 25 | 26 | setup( 27 | ... 28 | entry_points={'msmbuilder.commands': 29 | 'niftyfeat = niftyfeat:MyNiftyFeaturizerCommand' 30 | ) 31 | 32 | See the 33 | `setuptools documentation `_ 34 | for more information. 35 | 36 | .. vim: tw=75 37 | -------------------------------------------------------------------------------- /docs/preprocessing.rst: -------------------------------------------------------------------------------- 1 | .. _preprocessing: 2 | .. currentmodule:: msmbuilder.preprocessing 3 | 4 | 5 | Preprocessing 6 | ============= 7 | 8 | Preprocessing of a dataset is a common requirement for many machine learning 9 | estimators and may involve scaling, centering, normalization, smoothing, 10 | binarization, and imputation methods. 11 | 12 | Preprocessors 13 | ------------- 14 | 15 | .. autosummary:: 16 | :toctree: _preprocessing/ 17 | 18 | Binarizer 19 | Butterworth 20 | EWMA 21 | DoubleEWMA 22 | Imputer 23 | KernelCenterer 24 | LabelBinarizer 25 | MultiLabelBinarizer 26 | MinMaxScaler 27 | MaxAbsScaler 28 | Normalizer 29 | RobustScaler 30 | StandardScaler 31 | PolynomialFeatures 32 | 33 | .. vim: tw=75 34 | -------------------------------------------------------------------------------- /docs/publications_templ.rst: -------------------------------------------------------------------------------- 1 | .. _publications: 2 | 3 | Publications 4 | ============ 5 | 6 | The following published works use MSMBuilder. To add your publication 7 | to the list, open an issue on GitHub with the relevant information or 8 | edit ``docs/publications.bib`` and submit a pull request. 9 | 10 | .. publications.bib lists the relevant publications 11 | .. publications_templ.rst defines how the publications will be displayed 12 | .. publications.rst is generated during sphinx build (see conf.py) 13 | and should not be edited directly! 14 | 15 | {% for pub in publications %} 16 | {{pub.title}} 17 | -------------------------------------------------------------------------------- 18 | 19 | * {{pub.author | join('; ')}} 20 | * *{{pub.journal}}* **{{pub.year}}**, {{pub.volume}} {{pub.pages}} 21 | * `doi: {{pub.doi}} `_ 22 | 23 | {{pub.abstract | wordwrap }} 24 | 25 | {% endfor %} 26 | 27 | -------------------------------------------------------------------------------- /docs/ratematrix.rst: -------------------------------------------------------------------------------- 1 | .. _ratematrix: 2 | .. currentmodule:: msmbuilder.msm 3 | 4 | Continuous-time MSMs 5 | ==================== 6 | 7 | :class:`MarkovStateModel` estimates a series of 8 | transition *probabilities* among states that depend on the discrete 9 | lag-time. Physically, we are probably more interested in a sparse set of 10 | transition *rates* in and out of states, estimated by 11 | :class:`ContinuousTimeMSM`. 12 | 13 | 14 | Theory 15 | ------ 16 | 17 | Consider an `n`-state time-homogeneous Markov process, :math:`X(t)`. At 18 | time :math:`t`, the :math:`n`-vector :math:`P(t) = Pr[ X(t) = i ]` is the 19 | probability that the system is in each of the :math:`n` states. These 20 | probabilities evolve forward in time, governed by an :math:`n \times n` 21 | transition rate matrix :math:`K` 22 | 23 | .. math :: 24 | dP(t)/dt = P(t) \cdot K 25 | 26 | The solution is 27 | 28 | .. math :: 29 | P(t) = \exp(tK) \cdot P(0) 30 | 31 | Where :math:`\exp(tK)` is the matrix exponential. Written differently, the 32 | state-to-state lag-:math:`\tau` transition probabilities are 33 | 34 | .. math :: 35 | Pr[ X(t+\tau) = j \;|\; X(t) = i ] = \exp(\tau K)_{ij} 36 | 37 | For this model, we observe the evolution of one or more chains, 38 | :math:`X(t)` at a regular interval, :math:`\tau`. Let :math:`C_{ij}` be the 39 | number of times the chain was observed at state :math:`i` at time :math:`t` 40 | and at state :math:`j` at time :math:`t+\tau` (the number of observed 41 | transition counts). Suppose that :math:`K` depends on a parameter vector, 42 | :math:`\theta`. The log-likelihood is 43 | 44 | .. math :: 45 | \mathcal{L}(\theta) = \sum_{ij} \left[ 46 | C_{ij} \log\left(\left[\exp(\tau K(\theta))\right]_{ij}\right)\right] 47 | 48 | The :class:`ContinuousTimeMSM` model finds a rate matrix that fits the data 49 | by maximizing this likelihood expression. Specifically, it uses L-BFGS-B 50 | to find a maximum likelihood estimate (MLE) rate matrix, 51 | :math:`\hat{\theta}` and :math:`K(\hat{\theta})`. 52 | 53 | Uncertainties 54 | ~~~~~~~~~~~~~ 55 | 56 | Analytical estimates of the asymptotic standard deviation in estimated 57 | parameters like the stationary distribution, rate matrix, eigenvalues, and 58 | relaxation timescales can be computed by calling methods on the 59 | :class:`ContinuousTimeMSM` object. See [1] for more detail. 60 | 61 | 62 | Algorithms 63 | ---------- 64 | 65 | .. autosummary:: 66 | :toctree: _ratematrix/ 67 | 68 | ContinuousTimeMSM 69 | 70 | 71 | References 72 | ---------- 73 | .. [1] McGibbon, R. T. and V. S. Pande, "Efficient maximum likelihood parameterization 74 | of continuous-time Markov processes." J. Chem. Phys. 143 034109 (2015) http://dx.doi.org/10.1063/1.4926516 75 | .. [2] Kalbfleisch, J. D., and Jerald F. Lawless. "The analysis of panel data 76 | under a Markov assumption." J. Am. Stat. Assoc. 80.392 (1985): 863-871. 77 | 78 | .. vim: tw=75 79 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpydoc 2 | matplotlib 3 | jupyter 4 | notebook 5 | jinja2 6 | openmm 7 | nbconvert 8 | msmb_data 9 | pyparsing 10 | msmexplorer 11 | -------------------------------------------------------------------------------- /docs/tpt.rst: -------------------------------------------------------------------------------- 1 | .. _tpt: 2 | .. currentmodule:: msmbuilder.tpt 3 | 4 | Transition Path Theory 5 | ====================== 6 | 7 | 8 | Transition path theory (TPT) is a way to extract the highest-flux pathways 9 | of your system from an estimated MSM. 10 | 11 | .. todo: more 12 | 13 | .. todo: example 14 | 15 | 16 | References 17 | ---------- 18 | 19 | These are some canonical references for TPT. Note that TPT is really a 20 | specialization of ideas very familiar to the mathematical study of Markov 21 | chains, and there are many books, manuscripts in the mathematical 22 | literature that cover the same concepts. 23 | 24 | .. [1] E, Weinan and Vanden-Eijnden, Eric Towards a Theory of Transition Paths 25 | J. Stat. Phys. 123 503-523 (2006) 26 | .. [2] Metzner, P., Schutte, C. & Vanden-Eijnden, E. Transition path theory 27 | for Markov jump processes. Multiscale Model. Simul. 7, 1192-1219 28 | (2009). 29 | .. [3] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive flux and folding 30 | pathways in network models of coarse-grained protein dynamics. J. 31 | Chem. Phys. 130, 205102 (2009). 32 | .. [4] Noé, Frank, et al. "Constructing the equilibrium ensemble of folding 33 | pathways from short off-equilibrium simulations." PNAS 106.45 (2009): 34 | 19011-19016. 35 | 36 | Functions 37 | --------- 38 | 39 | .. autosummary:: 40 | :toctree: _tpt/ 41 | 42 | fluxes 43 | net_fluxes 44 | fraction_visited 45 | hub_scores 46 | paths 47 | top_path 48 | committors 49 | conditional_committors 50 | mfpts 51 | 52 | .. vim: tw=75 53 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints/ 2 | -------------------------------------------------------------------------------- /examples/LICENSE.md: -------------------------------------------------------------------------------- 1 | These example scripts are released under the MIT license. MSMBuilder 2 | is LGPL. Please consider citing MSMBuilder if you use it in your work. 3 | 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2016 Stanford University and the Authors 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a 9 | copy of this software and associated documentation files (the "Software"), 10 | to deal in the Software without restriction, including without limitation 11 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 | and/or sell copies of the Software, and to permit persons to whom the 13 | Software is furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 | DEALINGS IN THE SOFTWARE. 25 | -------------------------------------------------------------------------------- /examples/advanced/quadwell.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline\n", 12 | "import numpy as np\n", 13 | "from matplotlib import pyplot as plt\n", 14 | "from msmbuilder.example_datasets import QuadWell, quadwell_eigs\n", 15 | "from msmbuilder.cluster import NDGrid\n", 16 | "from msmbuilder.msm import MarkovStateModel\n", 17 | "from sklearn.pipeline import Pipeline" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": { 24 | "collapsed": false 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "dataset = QuadWell(random_state=0).get()\n", 29 | "true_eigenvalues = quadwell_eigs(200)[0]\n", 30 | "true_timescales = -1 / np.log(true_eigenvalues[1:])\n", 31 | "print(QuadWell.description())" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": null, 37 | "metadata": { 38 | "collapsed": false 39 | }, 40 | "outputs": [], 41 | "source": [ 42 | "def msm_timescales(trajectories, n_states):\n", 43 | " pipeline = Pipeline([\n", 44 | " ('grid', NDGrid(min=-1.2, max=1.2)),\n", 45 | " ('msm', MarkovStateModel(n_timescales=4, reversible_type='transpose', verbose=False))\n", 46 | " ])\n", 47 | " pipeline.set_params(grid__n_bins_per_feature=n_states)\n", 48 | " pipeline.fit(trajectories)\n", 49 | " return pipeline.named_steps['msm'].timescales_\n", 50 | "\n", 51 | "n_states = [5, 10, 50, 100]\n", 52 | "ts = np.array([msm_timescales(dataset.trajectories, n) for n in n_states])" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "for i, c in enumerate(['b', 'r', 'm']):\n", 64 | " plt.plot(n_states, ts[:, i], c=c, marker='x')\n", 65 | " plt.axhline(true_timescales[i], ls='--', c=c, lw=2)\n", 66 | "\n", 67 | "plt.xlabel('Number of states')\n", 68 | "plt.ylabel('Timescale (steps)')\n", 69 | "plt.show()" 70 | ] 71 | } 72 | ], 73 | "metadata": { 74 | "kernelspec": { 75 | "display_name": "Python 3", 76 | "language": "python", 77 | "name": "python3" 78 | }, 79 | "language_info": { 80 | "codemirror_mode": { 81 | "name": "ipython", 82 | "version": 3 83 | }, 84 | "file_extension": ".py", 85 | "mimetype": "text/x-python", 86 | "name": "python", 87 | "nbconvert_exporter": "python", 88 | "pygments_lexer": "ipython3", 89 | "version": "3.4.3" 90 | } 91 | }, 92 | "nbformat": 4, 93 | "nbformat_minor": 0 94 | } 95 | -------------------------------------------------------------------------------- /msmbuilder/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/msmbuilder/__init__.py -------------------------------------------------------------------------------- /msmbuilder/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import, division 2 | 3 | from sklearn.base import BaseEstimator as SklearnBaseEstimator 4 | 5 | 6 | class BaseEstimator(SklearnBaseEstimator): 7 | # http://msmbuilder.org/development/apipatterns.html 8 | 9 | def summarize(self): 10 | """Return some diagnostic summary statistics about this Markov model""" 11 | return 'NotImplemented' 12 | -------------------------------------------------------------------------------- /msmbuilder/cluster/.gitignore: -------------------------------------------------------------------------------- 1 | _kmedoids.cpp -------------------------------------------------------------------------------- /msmbuilder/cluster/src/kmedoids.h: -------------------------------------------------------------------------------- 1 | /******************************************************************************/ 2 | /* The C Clustering Library. 3 | * Copyright (C) 2002 Michiel Jan Laurens de Hoon. 4 | * 5 | * This library was written at the Laboratory of DNA Information Analysis, 6 | * Human Genome Center, Institute of Medical Science, University of Tokyo, 7 | * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan. 8 | * Contact: mdehoon 'AT' gsc.riken.jp 9 | * 10 | * Permission to use, copy, modify, and distribute this software and its 11 | * documentation with or without modifications and for any purpose and 12 | * without fee is hereby granted, provided that any copyright notices 13 | * appear in all copies and that both those copyright notices and this 14 | * permission notice appear in supporting documentation, and that the 15 | * names of the contributors or copyright holders not be used in 16 | * advertising or publicity pertaining to distribution of the software 17 | * without specific prior permission. 18 | * 19 | * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL 20 | * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED 21 | * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE 22 | * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT 23 | * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 24 | * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 25 | * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE 26 | * OR PERFORMANCE OF THIS SOFTWARE. 27 | * 28 | */ 29 | 30 | #ifndef MIXTAPE_CLUSTER_KMEDOIDS_H 31 | #define MIXTAPE_CLUSTER_KMEDOIDS_H 32 | #include 33 | #include 34 | #include 35 | 36 | void kmedoids(npy_intp nclusters, npy_intp nelements, double* distmatrix, 37 | npy_intp npass, npy_intp clusterid[], PyObject* random, 38 | double* error, npy_intp* ifound); 39 | 40 | 41 | /* 42 | Renumber cluster ids to go from 0 to n_clusters - 1. 43 | This function modifies the array inplace, and returns 44 | the mapping from the old values to new values. 45 | */ 46 | std::map contigify_ids(npy_intp* ids, npy_intp length); 47 | 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /msmbuilder/commands/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .atom_indices import AtomIndices 4 | from .convert_chunked_project import ConvertChunkedProject 5 | from .example_datasets import AlanineDipeptideDatasetCommand 6 | from .featurizer import (AtomPairsFeaturizerCommand, ContactFeaturizerCommand, 7 | DihedralFeaturizerCommand, DRIDFeaturizerCommand, 8 | SuperposeFeaturizerCommand, 9 | KappaAngleFeaturizerCommand, 10 | AlphaAngleFeaturizerCommand, RMSDFeaturizerCommand, 11 | LandMarkRMSDFeaturizerCommand, 12 | BinaryContactFeaturizerCommand, 13 | LogisticContactFeaturizerCommand, 14 | VonMisesFeaturizerCommand, 15 | RawPositionsFeaturizerCommand, SASAFeaturizerCommand, 16 | LigandContactFeaturizerCommand, 17 | BinaryLigandContactFeaturizerCommand, 18 | LigandRMSDFeaturizerCommand) 19 | from .fit import (GaussianHMMCommand, MarkovStateModelCommand, 20 | BayesianMarkovStateModelCommand, ContinuousTimeMSMCommand, 21 | BayesianContinuousTimeMSMCommand) 22 | 23 | try: 24 | from .fit_transform import RobustScalerCommand, StandardScalerCommand 25 | except: 26 | pass 27 | 28 | from .fit_transform import (tICACommand, ButterworthCommand, DoubleEWMACommand, 29 | SparseTICACommand, FastICACommand, 30 | FactorAnalysisCommand, KernelTICACommand, 31 | PCACommand, SparsePCACommand, 32 | MiniBatchSparsePCACommand, 33 | KMeansCommand, MiniBatchKMeansCommand, 34 | KCentersCommand, KMedoidsCommand, 35 | MiniBatchKMedoidsCommand, RegularSpatialCommand, 36 | LandmarkAgglomerativeCommand, GMMCommand, 37 | MeanShiftCommand, NDGridCommand, 38 | SpectralClusteringCommand, 39 | AffinityPropagationCommand, APMCommand, 40 | AgglomerativeClusteringCommand, KSparseTICACommand) 41 | from .transform import TransformCommand 42 | from .example_datasets import (AlanineDipeptideDatasetCommand, 43 | FsPeptideDatasetCommand) 44 | from .atom_indices import AtomIndices 45 | from .implied_timescales import ImpliedTimescales 46 | from .template_project import TemplateProjectCommand 47 | from .transform import TransformCommand 48 | -------------------------------------------------------------------------------- /msmbuilder/commands/example_datasets.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from ..cmdline import NumpydocClassCommand 4 | from ..example_datasets import (AlanineDipeptide, DoubleWell, QuadWell, FsPeptide, 5 | MetEnkephalin, MullerPotential) 6 | 7 | 8 | class DatasetCommand(NumpydocClassCommand): 9 | _group = 'Dataset' 10 | def start(self): 11 | self.instance.cache() 12 | print('Example dataset saved: %s' % self.instance.data_dir) 13 | 14 | 15 | class AlanineDipeptideDatasetCommand(DatasetCommand): 16 | _concrete = True 17 | klass = AlanineDipeptide 18 | description = 'Download example alanine dipeptide dataset.' 19 | 20 | 21 | class _NWellDatasetCommand(DatasetCommand): 22 | def _random_state_type(self, s): 23 | if s is not None: 24 | return int(s) 25 | else: 26 | return s 27 | 28 | 29 | class DoubleWellDatasetCommand(_NWellDatasetCommand): 30 | _concrete = True 31 | klass = DoubleWell 32 | description = ('Generate example double well potential dataset.\n\n' + 33 | DoubleWell.description()) 34 | 35 | 36 | class QuadWellDatasetCommand(_NWellDatasetCommand): 37 | _concrete = True 38 | klass = QuadWell 39 | description = ('Generate example quad-well potential dataset.\n\n' + 40 | QuadWell.description()) 41 | 42 | 43 | class MullerPotentialDatasetCommand(_NWellDatasetCommand): 44 | _concrete = True 45 | klass = MullerPotential 46 | description = ('Generate example Muller potential dataset.\n\n' 47 | + MullerPotential.description()) 48 | 49 | 50 | class FsPeptideDatasetCommand(DatasetCommand): 51 | _concrete = True 52 | klass = FsPeptide 53 | description = 'Download example Fs-peptide dataset.' 54 | 55 | 56 | class MetEnkephalinDatasetCommand(DatasetCommand): 57 | _concrete = True 58 | klass = MetEnkephalin 59 | description = 'Download example Met-Enkephalin dataset.' 60 | -------------------------------------------------------------------------------- /msmbuilder/commands/fit.py: -------------------------------------------------------------------------------- 1 | # Author: Robert McGibbon 2 | # Contributors: Brooke Husic 3 | # Copyright (c) 2014, Stanford University 4 | # All rights reserved. 5 | 6 | # ----------------------------------------------------------------------------- 7 | # Imports 8 | # ----------------------------------------------------------------------------- 9 | 10 | from __future__ import print_function, absolute_import 11 | 12 | import os 13 | 14 | from ..dataset import dataset 15 | from ..utils import verbosedump 16 | from ..hmm import GaussianHMM 17 | from ..msm import (MarkovStateModel, BayesianMarkovStateModel, ContinuousTimeMSM, 18 | BayesianContinuousTimeMSM) 19 | from ..cmdline import NumpydocClassCommand, argument, exttype 20 | 21 | 22 | class FitCommand(NumpydocClassCommand): 23 | inp = argument( 24 | '-i', '--inp', help='''Input dataset. This should be serialized 25 | list of numpy arrays.''', required=True, type=os.path.expanduser) 26 | model = argument( 27 | '-o', '--out', help='''Output (fit) model. This will be a 28 | serialized instance of the fit model object.''', required=True, 29 | type=exttype('.pkl')) 30 | 31 | def start(self): 32 | if not os.path.exists(self.inp): 33 | self.error('File does not exist: %s' % self.inp) 34 | 35 | print(self.instance) 36 | inp_ds = dataset(self.inp, mode='r') 37 | self.instance.fit(inp_ds) 38 | 39 | print("*********\n*RESULTS*\n*********") 40 | print(self.instance.summarize()) 41 | print('-' * 80) 42 | 43 | verbosedump(self.instance, self.out) 44 | print("To load this %s object interactively inside an IPython\n" 45 | "shell or notebook, run: \n" % self.klass.__name__) 46 | print(" $ ipython") 47 | print(" >>> from msmbuilder.utils import load") 48 | print(" >>> model = load('%s')\n" % self.out) 49 | 50 | inp_ds.close() 51 | 52 | class GaussianHMMCommand(FitCommand): 53 | klass = GaussianHMM 54 | _concrete = True 55 | _group = 'MSM' 56 | 57 | 58 | class MarkovStateModelCommand(FitCommand): 59 | klass = MarkovStateModel 60 | _concrete = True 61 | _group = 'MSM' 62 | 63 | def _ergodic_cutoff_type(self, erg): 64 | if erg.lower() in ['on', 'off']: 65 | return erg 66 | else: 67 | return float(erg) 68 | 69 | 70 | class BayesianMarkovStateModelCommand(FitCommand): 71 | klass = BayesianMarkovStateModel 72 | _concrete = True 73 | _group = 'MSM' 74 | 75 | 76 | class ContinuousTimeMSMCommand(FitCommand): 77 | klass = ContinuousTimeMSM 78 | _concrete = True 79 | _group = 'MSM' 80 | 81 | 82 | class BayesianContinuousTimeMSMCommand(FitCommand): 83 | klass = BayesianContinuousTimeMSM 84 | _concrete = True 85 | _group = 'MSM' 86 | -------------------------------------------------------------------------------- /msmbuilder/commands/template_project.py: -------------------------------------------------------------------------------- 1 | """Set up a new MSMBuilder project 2 | 3 | """ 4 | # Author: Matthew Harrigan 5 | # Contributors: 6 | # Copyright (c) 2016, Stanford University 7 | # All rights reserved. 8 | 9 | from __future__ import print_function, division, absolute_import 10 | 11 | import os 12 | import stat 13 | import textwrap 14 | 15 | from ..cmdline import NumpydocClassCommand, argument 16 | from ..io import TemplateProject 17 | 18 | 19 | def chmod_plus_x(fn): 20 | st = os.stat(fn) 21 | os.chmod(fn, st.st_mode | stat.S_IEXEC) 22 | 23 | 24 | class TemplateProjectCommand(NumpydocClassCommand): 25 | _group = '0-Support' 26 | _concrete = True 27 | description = __doc__ 28 | klass = TemplateProject 29 | 30 | disclaimer = argument('--disclaimer', default=False, action='store_true', 31 | help="Print a disclaimer about using these templates.") 32 | 33 | def print_disclaimer(self): 34 | print('\n'.join(textwrap.wrap( 35 | "This writes a bunch of Python files that can guide you " 36 | "through analyzing a system with MSMBuilder. I implore you to " 37 | "look at the scripts before you start blindly running them. " 38 | "You will likely have to change some (hyper-)parameters or " 39 | "filenames to match your particular project." 40 | ))) 41 | print() 42 | print('\n'.join(textwrap.wrap( 43 | "More than that, however, it is important that you understand " 44 | "exactly what the scripts are doing. Each protein system is " 45 | "different, and it is up to you (the researcher) to hone in on " 46 | "interesting aspects. This very generic pipeline may not give " 47 | "you any new insight for anything but the simplest systems." 48 | ))) 49 | 50 | def start(self): 51 | if self.disclaimer: 52 | self.print_disclaimer() 53 | print() 54 | print("Run again without --disclaimer to actually write tempaltes.") 55 | return 56 | 57 | self.instance.do() 58 | -------------------------------------------------------------------------------- /msmbuilder/decomposition/.gitignore: -------------------------------------------------------------------------------- 1 | _speigh.cpp -------------------------------------------------------------------------------- /msmbuilder/decomposition/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from sklearn import decomposition as _decomposition 4 | 5 | from .base import MultiSequenceDecompositionMixin 6 | from .ktica import KernelTICA 7 | from .pca import PCA, SparsePCA, MiniBatchSparsePCA 8 | from .sparsetica import SparseTICA 9 | from .ksparsetica import KSparseTICA 10 | from .tica import tICA 11 | 12 | 13 | class FastICA(MultiSequenceDecompositionMixin, _decomposition.FastICA): 14 | __doc__ = _decomposition.FastICA.__doc__ 15 | 16 | def summarize(self): 17 | return '\n'.join([ 18 | "Independent Component Analysis (ICA)", 19 | "----------", 20 | "Number of components: {n_components}", 21 | "Number of iterations: {n_iter_}", 22 | ]).format(**self.__dict__) 23 | 24 | 25 | class FactorAnalysis(MultiSequenceDecompositionMixin, 26 | _decomposition.FactorAnalysis): 27 | __doc__ = _decomposition.FactorAnalysis.__doc__ 28 | 29 | def summarize(self): 30 | return '\n'.join([ 31 | "FactorAnalysis (FA)", 32 | "----------", 33 | "Number of components: {n_components}", 34 | "Log likelihood: {loglike_}", 35 | "Noise variance: {noise_variance_}", 36 | "Number of iterations: {n_iter_}", 37 | ]).format(**self.__dict__) 38 | -------------------------------------------------------------------------------- /msmbuilder/decomposition/pca.py: -------------------------------------------------------------------------------- 1 | # Author: Matthew Harrigan 2 | # Contributors: 3 | # Copyright (c) 2016, Stanford University and the Authors 4 | # All rights reserved. 5 | 6 | from __future__ import print_function, division, absolute_import 7 | 8 | from sklearn import decomposition 9 | 10 | from .base import MultiSequenceDecompositionMixin 11 | 12 | __all__ = ['PCA', 'SparsePCA'] 13 | 14 | 15 | class PCA(MultiSequenceDecompositionMixin, decomposition.PCA): 16 | __doc__ = decomposition.PCA.__doc__ 17 | 18 | def summarize(self): 19 | return '\n'.join([ 20 | "Principal Component Analysis (PCA)", 21 | "----------", 22 | "Number of components: {n_components}", 23 | "explained variance raio: {explained_variance_ratio_}", 24 | "Noise variance: {noise_variance_}", 25 | ]).format(**self.__dict__) 26 | 27 | 28 | class SparsePCA(MultiSequenceDecompositionMixin, decomposition.SparsePCA): 29 | __doc__ = decomposition.SparsePCA.__doc__ 30 | 31 | def summarize(self): 32 | return '\n'.join([ 33 | "Sparse PCA", 34 | "----------", 35 | "Number of components: {n_components}", 36 | ]).format(**self.__dict__) 37 | 38 | 39 | class MiniBatchSparsePCA(MultiSequenceDecompositionMixin, 40 | decomposition.MiniBatchSparsePCA): 41 | __doc__ = decomposition.MiniBatchSparsePCA.__doc__ 42 | 43 | def summarize(self): 44 | return '\n'.join([ 45 | "MiniBatch Sparse PCA", 46 | "--------------------", 47 | "Number of components: {n_components}", 48 | "Batch size: {batch_size}" 49 | ]).format(**self.__dict__) 50 | 51 | 52 | class KernelPCA(MultiSequenceDecompositionMixin, decomposition.KernelPCA): 53 | __doc__ = decomposition.KernelPCA.__doc__ 54 | 55 | def summarize(self): 56 | return '\n'.join([ 57 | "Kernel PCA", 58 | "--------------------", 59 | "Number of components: {n_components}", 60 | "Kernel: {kernel}", 61 | ]).format(**self.__dict__) 62 | -------------------------------------------------------------------------------- /msmbuilder/decomposition/utils.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import itertools 3 | import numpy as np 4 | from six.moves import xrange 5 | 6 | 7 | def iterate_tracker(maxiter, max_nc, verbose=False): 8 | """Generator that breaks after maxiter, or after the same 9 | array has been sent in more max_nc times in a row. 10 | """ 11 | last_hash = None 12 | last_hash_count = 0 13 | arr = yield 14 | 15 | for i in xrange(maxiter): 16 | arr = yield i 17 | if arr is not None: 18 | hsh = hashlib.sha1(arr.view(np.uint8)).hexdigest() 19 | if last_hash == hsh: 20 | last_hash_count += 1 21 | else: 22 | last_hash = hsh 23 | last_hash_count = 1 24 | 25 | if last_hash_count >= max_nc: 26 | if verbose: 27 | print('Termination. Over %d iterations without ' 28 | 'change.' % max_nc) 29 | break -------------------------------------------------------------------------------- /msmbuilder/example_datasets/.gitignore: -------------------------------------------------------------------------------- 1 | _muller.c 2 | -------------------------------------------------------------------------------- /msmbuilder/example_datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .base import get_data_home, clear_data_home, has_msmb_data 3 | from .brownian1d import DoubleWell, QuadWell 4 | from .brownian1d import load_doublewell, load_quadwell 5 | from .brownian1d import doublewell_eigs, quadwell_eigs 6 | from .alanine_dipeptide import fetch_alanine_dipeptide, AlanineDipeptide 7 | from .met_enkephalin import fetch_met_enkephalin, MetEnkephalin 8 | from .fs_peptide import fetch_fs_peptide, FsPeptide, MinimalFsPeptide 9 | from .muller import MullerPotential, load_muller 10 | 11 | __all__ = [ 12 | 'get_data_home', 13 | 'clear_data_home', 14 | 'has_msmb_data', 15 | 'load_doublewell', 16 | 'load_quadwell', 17 | 'doublewell_eigs', 18 | 'quadwell_eigs', 19 | 'fetch_alanine_dipeptide', 20 | 'fetch_met_enkephalin', 21 | 'fetch_fs_peptide', 22 | 'AlanineDipeptide', 23 | 'MetEnkephalin', 24 | 'FsPeptide', 25 | 'DoubleWell', 26 | 'QuadWell', 27 | 'MullerPotential', 28 | 'load_muller', 29 | ] 30 | -------------------------------------------------------------------------------- /msmbuilder/example_datasets/alanine_dipeptide.py: -------------------------------------------------------------------------------- 1 | # Author: Robert McGibbon 2 | # Contributors: Matthew Harrigan 3 | # Copyright (c) 2016, Stanford University and the Authors 4 | # All rights reserved. 5 | 6 | # ----------------------------------------------------------------------------- 7 | # Imports 8 | # ----------------------------------------------------------------------------- 9 | from __future__ import print_function, absolute_import, division 10 | 11 | from glob import glob 12 | from os.path import join 13 | 14 | import mdtraj as md 15 | 16 | from .base import Bunch, _MDDataset 17 | 18 | DATA_URL = "https://ndownloader.figshare.com/articles/1026131/versions/8" 19 | TARGET_DIRECTORY = "alanine_dipeptide" 20 | 21 | 22 | class AlanineDipeptide(_MDDataset): 23 | """Alanine dipeptide dataset 24 | 25 | Parameters 26 | ---------- 27 | data_home : optional, default: None 28 | Specify another download and cache folder for the datasets. By default 29 | all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. 30 | 31 | 32 | Notes 33 | ----- 34 | The dataset consists of ten 10ns trajectories of of alanine dipeptide, 35 | simulated using OpenMM 6.0.1 (CUDA platform, NVIDIA GTX660) with the 36 | AMBER99SB-ILDN force field at 300K (langevin dynamics, friction coefficient 37 | of 91/ps, timestep of 2fs) with GBSA implicit solvent. The coordinates are 38 | saved every 1ps. Each trajectory contains 9,999 snapshots. 39 | 40 | The dataset, including the script used to generate the dataset 41 | is available on figshare at 42 | 43 | http://dx.doi.org/10.6084/m9.figshare.1026131 44 | """ 45 | target_directory = TARGET_DIRECTORY 46 | data_url = DATA_URL 47 | 48 | def get_cached(self): 49 | top = md.load(join(self.data_dir, 'ala2.pdb')) 50 | trajectories = [] 51 | for fn in glob(join(self.data_dir, 'trajectory*.dcd')): 52 | trajectories.append(md.load(fn, top=top)) 53 | 54 | return Bunch(trajectories=trajectories, DESCR=self.description()) 55 | 56 | 57 | def fetch_alanine_dipeptide(data_home=None): 58 | return AlanineDipeptide(data_home).get() 59 | 60 | 61 | fetch_alanine_dipeptide.__doc__ = AlanineDipeptide.__doc__ 62 | -------------------------------------------------------------------------------- /msmbuilder/example_datasets/met_enkephalin.py: -------------------------------------------------------------------------------- 1 | # Author: Robert McGibbon 2 | # Contributors: 3 | # Copyright (c) 2014, Stanford University and the Authors 4 | # All rights reserved. 5 | 6 | # ----------------------------------------------------------------------------- 7 | # Imports 8 | # ----------------------------------------------------------------------------- 9 | from __future__ import print_function, absolute_import, division 10 | 11 | from glob import glob 12 | from os.path import join 13 | 14 | import mdtraj as md 15 | 16 | from .base import Bunch, _MDDataset 17 | 18 | DATA_URL = "https://ndownloader.figshare.com/articles/1026324/versions/1" 19 | TARGET_DIRECTORY = "met_enkephalin" 20 | 21 | 22 | class MetEnkephalin(_MDDataset): 23 | """Loader for the met-enkephalin dataset 24 | 25 | Parameters 26 | ---------- 27 | data_home : optional, default: None 28 | Specify another download and cache folder for the datasets. By default 29 | all MSMBuilder data is stored in '~/msmbuilder_data' subfolders. 30 | 31 | download_if_missing: optional, True by default 32 | If False, raise a IOError if the data is not locally available 33 | instead of trying to download the data from the source site. 34 | 35 | Notes 36 | ----- 37 | The dataset consists of ten ~50 ns molecular dynamics (MD) simulation 38 | trajectories of the 5 residue Met-enkaphalin peptide. The aggregate 39 | sampling is 499.58 ns. Simulations were performed starting from the 1st 40 | model in the 1PLX PDB file, solvated with 832 TIP3P water molecules using 41 | OpenMM 6.0. The coordinates (protein only -- the water was stripped) 42 | are saved every 5 picoseconds. Each of the ten trajectories is roughly 43 | 50 ns long and contains about 10,000 snapshots. 44 | 45 | Forcefield: amber99sb-ildn; water: tip3p; nonbonded method: PME; cutoffs: 46 | 1nm; bonds to hydrogen were constrained; integrator: langevin dynamics; 47 | temperature: 300K; friction coefficient: 1.0/ps; pressure control: Monte 48 | Carlo barostat (interval of 25 steps); timestep 2 fs. 49 | 50 | The dataset is available on figshare at 51 | 52 | http://dx.doi.org/10.6084/m9.figshare.1026324 53 | """ 54 | 55 | data_url = DATA_URL 56 | target_directory = TARGET_DIRECTORY 57 | 58 | def get_cached(self): 59 | top = md.load(join(self.data_dir, '1plx.pdb')) 60 | trajectories = [] 61 | for fn in glob(join(self.data_dir, 'trajectory*.dcd')): 62 | trajectories.append(md.load(fn, top=top)) 63 | 64 | return Bunch(trajectories=trajectories, DESCR=self.description()) 65 | 66 | 67 | def fetch_met_enkephalin(data_home=None): 68 | return MetEnkephalin(data_home).get() 69 | 70 | 71 | fetch_met_enkephalin.__doc__ = MetEnkephalin.__doc__ 72 | -------------------------------------------------------------------------------- /msmbuilder/feature_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from ..featurizer import * 4 | from ..featurizer import subset 5 | -------------------------------------------------------------------------------- /msmbuilder/feature_selection/__init__.py: -------------------------------------------------------------------------------- 1 | # Author: Carlos Xavier Hernandez 2 | # Contributors: 3 | # Copyright (c) 2016, Stanford University and the Authors 4 | # All rights reserved. 5 | 6 | from __future__ import absolute_import 7 | 8 | from .base import MultiSequenceFeatureSelectionMixin 9 | from .featureselector import FeatureSlicer, FeatureSelector 10 | 11 | from sklearn import feature_selection 12 | 13 | class VarianceThreshold(MultiSequenceFeatureSelectionMixin, 14 | feature_selection.VarianceThreshold): 15 | __doc__ = feature_selection.VarianceThreshold.__doc__ 16 | -------------------------------------------------------------------------------- /msmbuilder/feature_selection/base.py: -------------------------------------------------------------------------------- 1 | # Author: Carlos Xavier Hernandez 2 | # Contributors: 3 | # Copyright (c) 2016, Stanford University and the Authors 4 | # All rights reserved. 5 | 6 | from __future__ import absolute_import 7 | 8 | from ..decomposition.base import MultiSequenceDecompositionMixin 9 | 10 | 11 | class MultiSequenceFeatureSelectionMixin(MultiSequenceDecompositionMixin): 12 | __doc__ = MultiSequenceDecompositionMixin.__doc__ 13 | -------------------------------------------------------------------------------- /msmbuilder/featurizer/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .feature_union import FeatureUnion 4 | from .featurizer import * 5 | from .indices import get_atompair_indices 6 | from .multiseq_featuizer import * 7 | from .multichain import * 8 | -------------------------------------------------------------------------------- /msmbuilder/featurizer/indices.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import numpy as np 4 | 5 | ATOM_NAMES = ["N", "CA", "CB", "C", "O", "H"] 6 | 7 | 8 | def get_atompair_indices(reference_traj, keep_atoms=None, 9 | exclude_atoms=None, reject_bonded=True): 10 | """Get a list of acceptable atom pairs. 11 | 12 | Parameters 13 | ---------- 14 | reference_traj : mdtraj.Trajectory 15 | Trajectory to grab atom pairs from 16 | keep_atoms : np.ndarray, dtype=string, optional 17 | Select only these atom names. Defaults to N, CA, CB, C, O, H 18 | exclude_atoms : np.ndarray, dtype=string, optional 19 | Exclude these atom names 20 | reject_bonded : bool, default=True 21 | If True, exclude bonded atompairs. 22 | 23 | Returns 24 | ------- 25 | atom_indices : np.ndarray, dtype=int 26 | The atom indices that pass your criteria 27 | pair_indices : np.ndarray, dtype=int, shape=(N, 2) 28 | Pairs of atom indices that pass your criteria. 29 | 30 | Notes 31 | ----- 32 | This function has been optimized for speed. A naive implementation 33 | can be slow (~minutes) for large proteins. 34 | """ 35 | if keep_atoms is None: 36 | keep_atoms = ATOM_NAMES 37 | 38 | top, bonds = reference_traj.top.to_dataframe() 39 | 40 | if keep_atoms is not None: 41 | atom_indices = top[top.name.isin(keep_atoms) == True].index.values 42 | 43 | if exclude_atoms is not None: 44 | atom_indices = top[top.name.isin(exclude_atoms) == False].index.values 45 | 46 | pair_indices = np.array(list(itertools.combinations(atom_indices, 2))) 47 | 48 | if reject_bonded: 49 | a_list = bonds.min(1) 50 | b_list = bonds.max(1) 51 | 52 | n = atom_indices.max() + 1 53 | 54 | bond_hashes = a_list + b_list * n 55 | pair_hashes = pair_indices[:, 0] + pair_indices[:, 1] * n 56 | 57 | not_bonds = ~np.in1d(pair_hashes, bond_hashes) 58 | 59 | pair_indices = np.array([(a, b) for k, (a, b) 60 | in enumerate(pair_indices) 61 | if not_bonds[k]]) 62 | 63 | return atom_indices, pair_indices 64 | -------------------------------------------------------------------------------- /msmbuilder/hmm/.gitignore: -------------------------------------------------------------------------------- 1 | gaussian.cpp 2 | gaussian.h 3 | vonmises.cpp 4 | vonmises.h 5 | -------------------------------------------------------------------------------- /msmbuilder/hmm/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .gaussian import GaussianHMM 3 | from .vonmises import VonMisesHMM 4 | -------------------------------------------------------------------------------- /msmbuilder/hmm/cephes/README.md: -------------------------------------------------------------------------------- 1 | This code is from Cephes, download directly from netlib: 2 | 3 | http://www.netlib.no/netlib/cephes/ 4 | 5 | 6 | The original copyright, from the readme file of that distribution: 7 | 8 | Some software in this archive may be from the book _Methods and 9 | Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster 10 | International, 1989) or from the Cephes Mathematical Library, a 11 | commercial product. In either event, it is copyrighted by the author. 12 | What you see here may be used freely but it comes with no support or 13 | guarantee. 14 | 15 | The two known misprints in the book are repaired here in the 16 | source listings for the gamma function and the incomplete beta 17 | integral. 18 | 19 | 20 | Stephen L. Moshier 21 | moshier@na-net.ornl.gov 22 | -------------------------------------------------------------------------------- /msmbuilder/hmm/cephes/cephes.h: -------------------------------------------------------------------------------- 1 | #ifndef _CEPHES_H_ 2 | #define _CEPHES_H_ 3 | 4 | #include "cephes_names.h" 5 | int mtherr(char *name, int code); 6 | double i0(double x); 7 | double i1(double x); 8 | double zeta(double x, double q); 9 | double psi(double x); 10 | double lgam(double x); 11 | double p1evl(double x, double coef[], int N); 12 | double polevl(double x, double coef[], int N); 13 | double chbevl(double x, double array[], int n); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /msmbuilder/hmm/cephes/cephes_names.h: -------------------------------------------------------------------------------- 1 | #ifndef CEPHES_NAMES_H 2 | #define CEPHES_NAMES_H 3 | 4 | #define airy cephes_airy 5 | #define bdtrc cephes_bdtrc 6 | #define bdtr cephes_bdtr 7 | #define bdtri cephes_bdtri 8 | #define beta cephes_beta 9 | #define lbeta cephes_lbeta 10 | #define btdtr cephes_btdtr 11 | #define cbrt cephes_cbrt 12 | #define chdtrc cephes_chdtrc 13 | #define chdtr cephes_chdtr 14 | #define chdtri cephes_chdtri 15 | #define dawsn cephes_dawsn 16 | #define ellie cephes_ellie 17 | #define ellik cephes_ellik 18 | #define ellpe cephes_ellpe 19 | #define ellpj cephes_ellpj 20 | #define ellpk cephes_ellpk 21 | #define exp10 cephes_exp10 22 | #define exp1m cephes_exp1m 23 | #define exp2 cephes_exp2 24 | #define expn cephes_expn 25 | // #define fabs cephes_fabs 26 | #define fdtrc cephes_fdtrc 27 | #define fdtr cephes_fdtr 28 | #define fdtri cephes_fdtri 29 | #define fresnl cephes_fresnl 30 | #define Gamma cephes_Gamma 31 | #define lgam cephes_lgam 32 | #define gdtr cephes_gdtr 33 | #define gdtrc cephes_gdtrc 34 | #define gdtri cephes_gdtri 35 | #define hyp2f1 cephes_hyp2f1 36 | #define hyperg cephes_hyperg 37 | #define hyp2f0 cephes_hyp2f0 38 | #define onef2 cephes_onef2 39 | #define threef0 cephes_threef0 40 | #define i0 cephes_i0 41 | #define i0e cephes_i0e 42 | #define i1 cephes_i1 43 | #define i1e cephes_i1e 44 | #define igamc cephes_igamc 45 | #define igam cephes_igam 46 | #define igami cephes_igami 47 | #define incbet cephes_incbet 48 | #define incbi cephes_incbi 49 | #define iv cephes_iv 50 | #define j0 cephes_j0 51 | #define y0 cephes_y0 52 | #define j1 cephes_j1 53 | #define y1 cephes_y1 54 | #define jn cephes_jn 55 | #define jv cephes_jv 56 | #define k0 cephes_k0 57 | #define k0e cephes_k0e 58 | #define k1 cephes_k1 59 | #define k1e cephes_k1e 60 | #define kn cephes_kn 61 | #define nbdtrc cephes_nbdtrc 62 | #define nbdtr cephes_nbdtr 63 | #define nbdtri cephes_nbdtri 64 | #define ndtr cephes_ndtr 65 | #define erfc cephes_erfc 66 | #define erf cephes_erf 67 | #define ndtri cephes_ndtri 68 | #define pdtrc cephes_pdtrc 69 | #define pdtr cephes_pdtr 70 | #define pdtri cephes_pdtri 71 | #define psi cephes_psi 72 | #define rgamma cephes_rgamma 73 | #define round cephes_round 74 | #define shichi cephes_shichi 75 | #define sici cephes_sici 76 | #define radian cephes_radian 77 | #define sindg cephes_sindg 78 | #define cosdg cephes_cosdg 79 | #define sincos cephes_sincos 80 | #define spence cephes_spence 81 | #define stdtr cephes_stdtr 82 | #define stdtri cephes_stdtri 83 | #define struve cephes_struve 84 | #define yv cephes_yv 85 | #define tandg cephes_tandg 86 | #define cotdg cephes_cotdg 87 | #define log1p cephes_log1p 88 | #define expm1 cephes_expm1 89 | #define cosm1 cephes_cosm1 90 | #define yn cephes_yn 91 | #define zeta cephes_zeta 92 | #define zetac cephes_zetac 93 | #define smirnov cephes_smirnov 94 | #define smirnovi cephes_smirnovi 95 | #define kolmogorov cephes_kolmogorov 96 | #define kolmogi cephes_kolmogi 97 | 98 | #endif 99 | -------------------------------------------------------------------------------- /msmbuilder/hmm/cephes/chbevl.c: -------------------------------------------------------------------------------- 1 | /* chbevl.c 2 | * 3 | * Evaluate Chebyshev series 4 | * 5 | * 6 | * 7 | * SYNOPSIS: 8 | * 9 | * int N; 10 | * double x, y, coef[N], chebevl(); 11 | * 12 | * y = chbevl( x, coef, N ); 13 | * 14 | * 15 | * 16 | * DESCRIPTION: 17 | * 18 | * Evaluates the series 19 | * 20 | * N-1 21 | * - ' 22 | * y = > coef[i] T (x/2) 23 | * - i 24 | * i=0 25 | * 26 | * of Chebyshev polynomials Ti at argument x/2. 27 | * 28 | * Coefficients are stored in reverse order, i.e. the zero 29 | * order term is last in the array. Note N is the number of 30 | * coefficients, not the order. 31 | * 32 | * If coefficients are for the interval a to b, x must 33 | * have been transformed to x -> 2(2x - b - a)/(b-a) before 34 | * entering the routine. This maps x from (a, b) to (-1, 1), 35 | * over which the Chebyshev polynomials are defined. 36 | * 37 | * If the coefficients are for the inverted interval, in 38 | * which (a, b) is mapped to (1/b, 1/a), the transformation 39 | * required is x -> 2(2ab/x - b - a)/(b-a). If b is infinity, 40 | * this becomes x -> 4a/x - 1. 41 | * 42 | * 43 | * 44 | * SPEED: 45 | * 46 | * Taking advantage of the recurrence properties of the 47 | * Chebyshev polynomials, the routine requires one more 48 | * addition per loop than evaluating a nested polynomial of 49 | * the same degree. 50 | * 51 | */ 52 | /* chbevl.c */ 53 | 54 | /* 55 | Cephes Math Library Release 2.0: April, 1987 56 | Copyright 1985, 1987 by Stephen L. Moshier 57 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140 58 | */ 59 | 60 | double chbevl(double x, double array[] , int n ) { 61 | double b0, b1, b2, *p; 62 | int i; 63 | 64 | p = array; 65 | b0 = *p++; 66 | b1 = 0.0; 67 | i = n - 1; 68 | 69 | do { 70 | b2 = b1; 71 | b1 = b0; 72 | b0 = x * b1 - b2 + *p++; 73 | } while( --i ); 74 | 75 | return( 0.5*(b0-b2) ); 76 | } 77 | -------------------------------------------------------------------------------- /msmbuilder/hmm/cephes/mtherr.c: -------------------------------------------------------------------------------- 1 | /* mtherr.c 2 | * 3 | * Library common error handling routine 4 | * 5 | * 6 | * 7 | * SYNOPSIS: 8 | * 9 | * char *fctnam; 10 | * int code; 11 | * int mtherr(); 12 | * 13 | * mtherr( fctnam, code ); 14 | * 15 | * 16 | * 17 | * DESCRIPTION: 18 | * 19 | * This routine may be called to report one of the following 20 | * error conditions (in the include file mconf.h). 21 | * 22 | * Mnemonic Value Significance 23 | * 24 | * DOMAIN 1 argument domain error 25 | * SING 2 function singularity 26 | * OVERFLOW 3 overflow range error 27 | * UNDERFLOW 4 underflow range error 28 | * TLOSS 5 total loss of precision 29 | * PLOSS 6 partial loss of precision 30 | * EDOM 33 Unix domain error code 31 | * ERANGE 34 Unix range error code 32 | * 33 | * The default version of the file prints the function name, 34 | * passed to it by the pointer fctnam, followed by the 35 | * error condition. The display is directed to the standard 36 | * output device. The routine then returns to the calling 37 | * program. Users may wish to modify the program to abort by 38 | * calling exit() under severe error conditions such as domain 39 | * errors. 40 | * 41 | * Since all error conditions pass control to this function, 42 | * the display may be easily changed, eliminated, or directed 43 | * to an error logging device. 44 | * 45 | * SEE ALSO: 46 | * 47 | * mconf.h 48 | * 49 | */ 50 | 51 | /* 52 | Cephes Math Library Release 2.0: April, 1987 53 | Copyright 1984, 1987 by Stephen L. Moshier 54 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140 55 | */ 56 | 57 | #include 58 | #include "mconf.h" 59 | 60 | int merror = 0; 61 | 62 | /* Notice: the order of appearance of the following 63 | * messages is bound to the error codes defined 64 | * in mconf.h. 65 | */ 66 | static char *ermsg[7] = { 67 | "unknown", /* error code 0 */ 68 | "domain", /* error code 1 */ 69 | "singularity", /* et seq. */ 70 | "overflow", 71 | "underflow", 72 | "total loss of precision", 73 | "partial loss of precision" 74 | }; 75 | 76 | 77 | int mtherr(char* name, int code) 78 | { 79 | 80 | /* Display string passed by calling program, 81 | * which is supposed to be the name of the 82 | * function in which the error occurred: 83 | */ 84 | printf( "\n%s ", name ); 85 | 86 | /* Set global error message word */ 87 | merror = code; 88 | 89 | /* Display error message defined 90 | * by the code argument. 91 | */ 92 | if( (code <= 0) || (code >= 7) ) 93 | code = 0; 94 | printf( "%s error\n", ermsg[code] ); 95 | 96 | /* Return to calling 97 | * program 98 | */ 99 | return( 0 ); 100 | } 101 | -------------------------------------------------------------------------------- /msmbuilder/hmm/cephes/polevl.c: -------------------------------------------------------------------------------- 1 | /* polevl.c 2 | * p1evl.c 3 | * 4 | * Evaluate polynomial 5 | * 6 | * 7 | * 8 | * SYNOPSIS: 9 | * 10 | * int N; 11 | * double x, y, coef[N+1], polevl[]; 12 | * 13 | * y = polevl( x, coef, N ); 14 | * 15 | * 16 | * 17 | * DESCRIPTION: 18 | * 19 | * Evaluates polynomial of degree N: 20 | * 21 | * 2 N 22 | * y = C + C x + C x +...+ C x 23 | * 0 1 2 N 24 | * 25 | * Coefficients are stored in reverse order: 26 | * 27 | * coef[0] = C , ..., coef[N] = C . 28 | * N 0 29 | * 30 | * The function p1evl() assumes that coef[N] = 1.0 and is 31 | * omitted from the array. Its calling arguments are 32 | * otherwise the same as polevl(). 33 | * 34 | * 35 | * SPEED: 36 | * 37 | * In the interest of speed, there are no checks for out 38 | * of bounds arithmetic. This routine is used by most of 39 | * the functions in the library. Depending on available 40 | * equipment features, the user may wish to rewrite the 41 | * program in microcode or assembly language. 42 | * 43 | */ 44 | 45 | 46 | /* 47 | Cephes Math Library Release 2.1: December, 1988 48 | Copyright 1984, 1987, 1988 by Stephen L. Moshier 49 | Direct inquiries to 30 Frost Street, Cambridge, MA 02140 50 | */ 51 | 52 | 53 | double polevl(double x, double coef[], int N) 54 | { 55 | double ans; 56 | int i; 57 | double *p; 58 | 59 | p = coef; 60 | ans = *p++; 61 | i = N; 62 | 63 | do 64 | ans = ans * x + *p++; 65 | while( --i ); 66 | 67 | return( ans ); 68 | } 69 | 70 | /* p1evl() */ 71 | /* N 72 | * Evaluate polynomial when coefficient of x is 1.0. 73 | * Otherwise same as polevl. 74 | */ 75 | 76 | double p1evl(double x, double coef[], int N) 77 | { 78 | double ans; 79 | double *p; 80 | int i; 81 | 82 | p = coef; 83 | ans = x + *p++; 84 | i = N-1; 85 | 86 | do 87 | ans = ans * x + *p++; 88 | while( --i ); 89 | 90 | return( ans ); 91 | } 92 | -------------------------------------------------------------------------------- /msmbuilder/hmm/src/include/GaussianHMMFitter.h: -------------------------------------------------------------------------------- 1 | #ifndef MIXTAPE_GAUSSIAN_HMM_FITTER_H 2 | #define MIXTAPE_GAUSSIAN_HMM_FITTER_H 3 | 4 | #include "HMMFitter.h" 5 | 6 | namespace msmbuilder { 7 | 8 | /** 9 | * This subclass of HMMFitter computes Gaussian HMMs. 10 | */ 11 | template 12 | class GaussianHMMFitter : public HMMFitter { 13 | public: 14 | GaussianHMMFitter(void* owner, int n_states, int n_features, int n_iter, const double* log_startprob); 15 | 16 | ~GaussianHMMFitter(); 17 | 18 | void set_means_and_variances(const double* means, const double* variances); 19 | 20 | void initialize_sufficient_statistics(); 21 | 22 | void compute_log_likelihood(const Trajectory& trajectory, 23 | std::vector >& frame_log_probability) const; 24 | 25 | void accumulate_sufficient_statistics(const Trajectory& trajectory, 26 | const std::vector >& frame_log_probability, 27 | const std::vector >& posteriors, 28 | const std::vector >& fwdlattice, 29 | const std::vector >& bwdlattice); 30 | 31 | void get_obs(double* output); 32 | 33 | void get_obs2(double* output); 34 | 35 | void do_mstep(); 36 | private: 37 | void* owner; 38 | std::vector obs, obs2, a0, a1, a2; 39 | }; 40 | 41 | } // namespace msmbuilder 42 | 43 | #endif -------------------------------------------------------------------------------- /msmbuilder/hmm/src/include/VonMisesHMMFitter.h: -------------------------------------------------------------------------------- 1 | #ifndef MIXTAPE_GAUSSIAN_HMM_FITTER_H 2 | #define MIXTAPE_GAUSSIAN_HMM_FITTER_H 3 | 4 | #include "HMMFitter.h" 5 | 6 | namespace msmbuilder { 7 | 8 | /** 9 | * This subclass of HMMFitter computes von Mises HMMs. 10 | */ 11 | template 12 | class VonMisesHMMFitter : public HMMFitter { 13 | public: 14 | VonMisesHMMFitter(void* owner, int n_states, int n_features, int n_iter, const double* log_startprob); 15 | 16 | ~VonMisesHMMFitter(); 17 | 18 | void set_means_and_kappas(const double* means, const double* kappas); 19 | 20 | void initialize_sufficient_statistics(); 21 | 22 | void compute_log_likelihood(const Trajectory& trajectory, 23 | std::vector >& frame_log_probability) const; 24 | 25 | void accumulate_sufficient_statistics(const Trajectory& trajectory, 26 | const std::vector >& frame_log_probability, 27 | const std::vector >& posteriors, 28 | const std::vector >& fwdlattice, 29 | const std::vector >& bwdlattice); 30 | 31 | void get_cosobs(double* output); 32 | 33 | void get_sinobs(double* output); 34 | 35 | void do_mstep(); 36 | private: 37 | void* owner; 38 | std::vector cosobs, sinobs, means, kappas; 39 | }; 40 | 41 | } // namespace msmbuilder 42 | 43 | #endif -------------------------------------------------------------------------------- /msmbuilder/io/__init__.py: -------------------------------------------------------------------------------- 1 | from .gather_metadata import (gather_metadata, GenericParser, 2 | NumberedRunsParser, HierarchyParser, ParseWarning) 3 | from .io import (backup, preload_top, preload_tops, load_meta, load_generic, 4 | load_trajs, save_meta, render_meta, save_generic, save_trajs, 5 | itertrajs) 6 | from .project_template import TemplateProject -------------------------------------------------------------------------------- /msmbuilder/io/sampling/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampling import sample_dimension, sample_states, sample_msm -------------------------------------------------------------------------------- /msmbuilder/io_templates/twitter-bootstrap.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | {{title}} 8 | 9 | 10 | 14 | 18 | 19 | 20 | 21 | 25 | 26 | 27 |
28 | {{content}} 29 |
30 | 31 | -------------------------------------------------------------------------------- /msmbuilder/libdistance/.gitignore: -------------------------------------------------------------------------------- 1 | libdistance.cpp -------------------------------------------------------------------------------- /msmbuilder/libdistance/src/cdist.hpp: -------------------------------------------------------------------------------- 1 | #include "distance_kernels.h" 2 | 3 | 4 | void cdist_double(const double* XA, const double* XB, const char* metric, 5 | npy_intp na, npy_intp nb, npy_intp m, double* out) 6 | 7 | { 8 | npy_intp i, j, k; 9 | const double *u, *v; 10 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \ 11 | metric_double(metric); 12 | if (metricfunc == NULL) { 13 | fprintf(stderr, "Error"); 14 | return; 15 | } 16 | 17 | k = 0; 18 | for (i = 0; i < na; i++) { 19 | for (j = 0; j < nb; j++) { 20 | u = XA + m * i; 21 | v = XB + m * j; 22 | out[k++] = metricfunc(u, v, m); 23 | } 24 | } 25 | } 26 | 27 | 28 | void cdist_float(const float* XA, const float* XB, const char* metric, 29 | npy_intp na, npy_intp nb, npy_intp m, double* out) 30 | 31 | { 32 | npy_intp i, j, k; 33 | const float *u, *v; 34 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \ 35 | metric_float(metric); 36 | if (metricfunc == NULL) { 37 | fprintf(stderr, "Error"); 38 | return; 39 | } 40 | 41 | k = 0; 42 | for (i = 0; i < na; i++) { 43 | for (j = 0; j < nb; j++) { 44 | u = XA + m * i; 45 | v = XB + m * j; 46 | out[k++] = metricfunc(u, v, m); 47 | } 48 | } 49 | } -------------------------------------------------------------------------------- /msmbuilder/libdistance/src/dist.hpp: -------------------------------------------------------------------------------- 1 | #include "distance_kernels.h" 2 | 3 | 4 | void dist_double(const double* X, const double* y, const char* metric, npy_intp n, 5 | npy_intp m, double* out) 6 | { 7 | npy_intp i; 8 | const double *u; 9 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \ 10 | metric_double(metric); 11 | if (metricfunc == NULL) { 12 | fprintf(stderr, "Error"); 13 | return; 14 | } 15 | 16 | for (i = 0; i < n; i++) { 17 | u = X + m * i; 18 | out[i] = metricfunc(u, y, m); 19 | } 20 | } 21 | 22 | 23 | void dist_double_X_indices(const double* X, const double* y, const char* metric, 24 | npy_intp n, npy_intp m, const npy_intp* X_indices, 25 | npy_intp n_X_indices, double* out) 26 | { 27 | npy_intp i, ii; 28 | const double *u; 29 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \ 30 | metric_double(metric); 31 | if (metricfunc == NULL) { 32 | fprintf(stderr, "Error"); 33 | return; 34 | } 35 | 36 | for (ii = 0; ii < n_X_indices; ii++) { 37 | i = X_indices[ii]; 38 | u = X + m * i; 39 | out[ii] = metricfunc(u, y, m); 40 | } 41 | } 42 | 43 | 44 | void dist_float(const float* X, const float* y, const char* metric, npy_intp n, 45 | npy_intp m, double* out) 46 | { 47 | npy_intp i; 48 | const float *u; 49 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \ 50 | metric_float(metric); 51 | if (metricfunc == NULL) { 52 | fprintf(stderr, "Error"); 53 | return; 54 | } 55 | 56 | for (i = 0; i < n; i++) { 57 | u = X + m * i; 58 | out[i] = metricfunc(u, y, m); 59 | } 60 | } 61 | 62 | void dist_float_X_indices(const float* X, const float* y, const char* metric, 63 | npy_intp n, npy_intp m, const npy_intp* X_indices, 64 | npy_intp n_X_indices, double* out) 65 | { 66 | npy_intp i, ii; 67 | const float *u; 68 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \ 69 | metric_float(metric); 70 | if (metricfunc == NULL) { 71 | fprintf(stderr, "Error"); 72 | return; 73 | } 74 | 75 | for (ii = 0; ii < n_X_indices; ii++) { 76 | i = X_indices[ii]; 77 | u = X + m * i; 78 | out[ii] = metricfunc(u, y, m); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /msmbuilder/libdistance/src/pdist.hpp: -------------------------------------------------------------------------------- 1 | #include "distance_kernels.h" 2 | 3 | 4 | void pdist_double(const double* X, const char* metric, npy_intp n, npy_intp m, 5 | double* out) 6 | { 7 | npy_intp i, j, k; 8 | const double *u, *v; 9 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \ 10 | metric_double(metric); 11 | if (metricfunc == NULL) { 12 | fprintf(stderr, "Error"); 13 | return; 14 | } 15 | 16 | k = 0; 17 | for (i = 0; i < n; i++) { 18 | for (j = i+1; j < n; j++) { 19 | u = X + m * i; 20 | v = X + m * j; 21 | out[k++] = metricfunc(u, v, m); 22 | } 23 | } 24 | } 25 | 26 | void pdist_double_X_indices(const double* X, const char* metric, npy_intp n, 27 | npy_intp m, const npy_intp* X_indices, 28 | npy_intp n_X_indices, double* out) 29 | { 30 | npy_intp i, ii, j, jj, k; 31 | const double *u, *v; 32 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \ 33 | metric_double(metric); 34 | if (metricfunc == NULL) { 35 | fprintf(stderr, "Error"); 36 | return; 37 | } 38 | 39 | k = 0; 40 | for (ii = 0; ii < n_X_indices; ii++) { 41 | i = X_indices[ii]; 42 | for (jj = ii+1; jj < n_X_indices; jj++) { 43 | j = X_indices[jj]; 44 | u = X + m * i; 45 | v = X + m * j; 46 | out[k++] = metricfunc(u, v, m); 47 | } 48 | } 49 | } 50 | 51 | 52 | void pdist_float(const float* X, const char* metric, npy_intp n, npy_intp m, 53 | double* out) 54 | { 55 | npy_intp i, j, k; 56 | const float *u, *v; 57 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \ 58 | metric_float(metric); 59 | if (metricfunc == NULL) { 60 | fprintf(stderr, "Error"); 61 | return; 62 | } 63 | 64 | k = 0; 65 | for (i = 0; i < n; i++) { 66 | for (j = i+1; j < n; j++) { 67 | u = X + m * i; 68 | v = X + m * j; 69 | out[k++] = metricfunc(u, v, m); 70 | } 71 | } 72 | } 73 | void pdist_float_X_indices(const float* X, const char* metric, npy_intp n, 74 | npy_intp m, const npy_intp* X_indices, 75 | npy_intp n_X_indices, double* out) 76 | { 77 | npy_intp i, ii, j, jj, k; 78 | const float *u, *v; 79 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \ 80 | metric_float(metric); 81 | if (metricfunc == NULL) { 82 | fprintf(stderr, "Error"); 83 | return; 84 | } 85 | 86 | k = 0; 87 | for (ii = 0; ii < n_X_indices; ii++) { 88 | i = X_indices[ii]; 89 | for (jj = ii+1; jj < n_X_indices; jj++) { 90 | j = X_indices[jj]; 91 | u = X + m * i; 92 | v = X + m * j; 93 | out[k++] = metricfunc(u, v, m); 94 | } 95 | } 96 | } -------------------------------------------------------------------------------- /msmbuilder/libdistance/src/sumdist.hpp: -------------------------------------------------------------------------------- 1 | #include "distance_kernels.h" 2 | 3 | double sumdist_double(const double* X, const char* metric, npy_intp n, npy_intp m, 4 | const npy_intp* pairs, npy_intp p) 5 | { 6 | npy_intp i; 7 | double s = 0; 8 | const double *u, *v; 9 | double (*metricfunc) (const double *u, const double *v, npy_intp n) = \ 10 | metric_double(metric); 11 | if (metricfunc == NULL) { 12 | fprintf(stderr, "Error"); 13 | return -1; 14 | } 15 | 16 | for (i = 0; i < p; i++) { 17 | u = X + m * pairs[2*i]; 18 | v = X + m * pairs[2*i+1]; 19 | s += metricfunc(u, v, m); 20 | } 21 | 22 | return s; 23 | } 24 | 25 | 26 | double sumdist_float(const float* X, const char* metric, npy_intp n, npy_intp m, 27 | const npy_intp* pairs, npy_intp p) 28 | { 29 | npy_intp i; 30 | double s = 0; 31 | const float *u, *v; 32 | double (*metricfunc) (const float *u, const float *v, npy_intp n) = \ 33 | metric_float(metric); 34 | if (metricfunc == NULL) { 35 | fprintf(stderr, "Error"); 36 | return -1; 37 | } 38 | for (i = 0; i < p; i++) { 39 | u = X + m * pairs[2*i]; 40 | v = X + m * pairs[2*i+1]; 41 | s += metricfunc(u, v, m); 42 | } 43 | 44 | return s; 45 | } 46 | -------------------------------------------------------------------------------- /msmbuilder/lumping/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function, division 2 | 3 | from .pcca import PCCA 4 | from .pcca_plus import PCCAPlus 5 | from .mvca import MVCA 6 | from .bace import BACE 7 | 8 | __all__ = ["PCCA", "PCCAPlus", "MVCA", "BACE"] 9 | -------------------------------------------------------------------------------- /msmbuilder/msm/.gitignore: -------------------------------------------------------------------------------- 1 | _markovstatemodel.c 2 | _metzner_mcmc_fast.c 3 | _ratematrix.c* 4 | -------------------------------------------------------------------------------- /msmbuilder/msm/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .core import * 3 | from .msm import MarkovStateModel 4 | from .ratematrix import ContinuousTimeMSM 5 | from .bayesmsm import BayesianMarkovStateModel 6 | from .implied_timescales import implied_timescales 7 | from .bayes_ratematrix import BayesianContinuousTimeMSM 8 | -------------------------------------------------------------------------------- /msmbuilder/msm/_markovstatemodel.pyx: -------------------------------------------------------------------------------- 1 | # Author: Robert McGibbon 2 | # Contributors: 3 | # Copyright (c) 2014, Stanford University 4 | # All rights reserved. 5 | 6 | import numpy as np 7 | 8 | cdef extern from "transmat_mle_prinz.h": 9 | int transmat_mle_prinz(const double* C, int n_states, 10 | double tol, double* T, double* pi) 11 | 12 | def _transmat_mle_prinz(double[:, ::1] C, double tol=1e-10): 13 | """Compute a maximum likelihood reversible transition matrix, given 14 | a set of directed transition counts. 15 | 16 | Algorithim 1 of Prinz et al.[1] 17 | 18 | Parameters 19 | ---------- 20 | C : (input) 2d array of shape=(n_states, n_states) 21 | The directed transition counts, in C (row-major) order. 22 | tol : (input) float 23 | Convergence tolerance. The algorithm will iterate until the 24 | change in the log-likelihood is less than `tol`. 25 | 26 | Returns 27 | ------- 28 | T : (output) pointer to output 2d array of shape=(n_states, n_states) 29 | Once the algorithim is completed, the resulting transition 30 | matrix will be written to `T`. 31 | populations : array, shape = (n_states_,) 32 | The equilibrium population (stationary left eigenvector) of T 33 | 34 | References 35 | ---------- 36 | .. [1] Prinz, Jan-Hendrik, et al. "Markov models of molecular kinetics: 37 | Generation and validation." J Chem. Phys. 134.17 (2011): 174105. 38 | """ 39 | 40 | cdef int n_states = len(C) 41 | if n_states == 0: 42 | return np.zeros((0, 0)), np.zeros(0) 43 | 44 | if len(C[0]) != n_states: 45 | raise ValueError('C must be square') 46 | cdef double[:, ::1] T = np.zeros((n_states, n_states)) 47 | cdef double[::1] pi = np.zeros(n_states) 48 | cdef int n_iter 49 | 50 | n_iter = transmat_mle_prinz(&C[0,0], n_states, tol, &T[0,0], &pi[0]) 51 | if n_iter < 0: 52 | # diagnose the error 53 | msg = ' Error code=%d' % n_iter 54 | if np.any(np.less(C, 0)): 55 | msg = 'Domain error. C must be positive.' + msg 56 | if np.any(np.sum(C, axis=1) == 0): 57 | msg = 'Row-sums of C must be positive.' + msg 58 | if n_iter == -3: 59 | msg = 'Likelihood not converged.' + msg 60 | raise ValueError(msg) 61 | 62 | return np.array(T), np.array(pi) 63 | -------------------------------------------------------------------------------- /msmbuilder/msm/implied_timescales.py: -------------------------------------------------------------------------------- 1 | # Author: Christian Schwantes 2 | # Contributors: 3 | # Copyright (c) 2014, Stanford University 4 | # All rights reserved. 5 | 6 | 7 | import numpy as np 8 | from ..utils import param_sweep 9 | from . import MarkovStateModel 10 | 11 | 12 | def implied_timescales(sequences, lag_times, n_timescales=10, 13 | msm=None, n_jobs=1, verbose=0): 14 | """ 15 | Calculate the implied timescales for a given MSM. 16 | 17 | Parameters 18 | ---------- 19 | sequences : list of array-like 20 | List of sequences, or a single sequence. Each 21 | sequence should be a 1D iterable of state 22 | labels. Labels can be integers, strings, or 23 | other orderable objects. 24 | lag_times : array-like 25 | Lag times to calculate implied timescales at. 26 | n_timescales : int, optional 27 | Number of timescales to calculate. 28 | msm : msmbuilder.msm.MarkovStateModel, optional 29 | Instance of an MSM to specify parameters other 30 | than the lag time. If None, then the default 31 | parameters (as implemented by msmbuilder.msm.MarkovStateModel) 32 | will be used. 33 | n_jobs : int, optional 34 | Number of jobs to run in parallel 35 | 36 | Returns 37 | ------- 38 | timescales : np.ndarray, shape = [n_models, n_timescales] 39 | The slowest timescales (in units of lag times) for each 40 | model. 41 | """ 42 | 43 | if msm is None: 44 | msm = MarkovStateModel() 45 | 46 | param_grid = {'lag_time' : lag_times} 47 | models = param_sweep(msm, sequences, param_grid, n_jobs=n_jobs, 48 | verbose=verbose) 49 | timescales = [m.timescales_ for m in models] 50 | n_timescales = min(n_timescales, min(len(ts) for ts in timescales)) 51 | timescales = np.array([ts[:n_timescales] for ts in timescales]) 52 | return timescales 53 | -------------------------------------------------------------------------------- /msmbuilder/msm/markov_appreciation.py: -------------------------------------------------------------------------------- 1 | # Author: Muneeb Sultan 2 | # Contributors: Matthew Harrigan 3 | # Copyright (c) 2016, Stanford University 4 | # All rights reserved. 5 | 6 | 7 | def show_markov_appreciation(): 8 | from PIL import Image 9 | import requests 10 | from io import BytesIO 11 | response = requests.get("https://upload.wikimedia.org/wikipedia/commons/" 12 | "thumb/7/70/AAMarkov.jpg/330px-AAMarkov.jpg") 13 | img = Image.open(BytesIO(response.content)) 14 | img.show() 15 | -------------------------------------------------------------------------------- /msmbuilder/msm/src/metzner_mcmc.h: -------------------------------------------------------------------------------- 1 | #ifndef METZNER_MCMC_STEP_H 2 | #define METZNER_MCMC_STEP_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void 9 | metzner_mcmc_step(const double* Z, const double* N, double* K, 10 | double* Q, const double* random, double* sc, int n_states, 11 | int n_steps); 12 | 13 | #ifdef __cplusplus 14 | } 15 | #endif 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /msmbuilder/msm/src/transmat_mle_prinz.h: -------------------------------------------------------------------------------- 1 | #ifndef TRANSMAT_MLE_PRINZ_H 2 | #define TRANSMAT_MLE_PRINZ_H 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | int transmat_mle_prinz(const double* C, int n_states, double tol, 9 | double* T, double* pi); 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | 14 | #endif 15 | -------------------------------------------------------------------------------- /msmbuilder/msm/validation/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from .bootstrapmsm import BootStrapMarkovStateModel -------------------------------------------------------------------------------- /msmbuilder/msm/validation/transmat_errorbar.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def create_perturb_params(countsmat, transmat=None): 5 | ''' 6 | Computes transition probabilities and standard errors of the transition probabilities due to 7 | finite sampling using the MSM counts matrix. First, the transition probabilities are computed 8 | by dividing the each element c_ij by the row-sumemd counts of row i. THe standard errors are then 9 | computed by first computing the standard deviation of the transition probability, treating each count 10 | as a Bernoulli process with p = t_ij (std = (t_ij - t_ij ^2)^0.5). This is then divided by the 11 | square root of the row-summed counts of row i to obtain the standard error. 12 | 13 | Parameters: 14 | ---------- 15 | countsmat: np.ndarray 16 | The msm counts matrix 17 | transmat: np.ndarray 18 | If you have a transition matrix you want to use (e.g. MLE symmetrized), you can supply that here. This 19 | function will use the transition probabilities from this matrix to calculate the Bernoulli standard deviations, 20 | which will be divided by the row-summed counts in the original supplied counts matrix. 21 | 22 | Returns: 23 | ----------- 24 | transmat, np.ndarray: 25 | The MSM transition matrix 26 | scale, np.ndarray: 27 | The matrix of standard errors for each transition probability 28 | ''' 29 | norm = np.sum(countsmat, axis=1) 30 | if not transmat: 31 | transmat = (countsmat.transpose() / norm).transpose() 32 | counts = (np.ones((len(transmat), len(transmat))) * norm).transpose() 33 | scale = ((transmat - transmat ** 2) ** 0.5 / counts ** 0.5) + 10 ** -15 34 | return transmat, scale 35 | 36 | 37 | def perturb_tmat(transmat, scale): 38 | ''' 39 | Perturbs each nonzero entry in the MSM transition matrix by treating it as a Gaussian random variable 40 | with mean t_ij and standard deviation equal to the standard error computed using "create_perturb_params". 41 | Returns a sampled transition matrix that takes into consideration errors due to finite sampling 42 | (useful for boostrapping, etc.) 43 | 44 | Parameters: 45 | ---------- 46 | transmat: np.ndarray: 47 | The transition matrix, whose elements serve as the means of the Gaussian random variables 48 | scale: np.ndarray: 49 | The matrix of standard errors. For transition probability t_ij, this is assumed to be the standard 50 | error of the mean of a binomial distribution with p = transition probability and number of observations 51 | equal to the summed counts in row i. 52 | 53 | ''' 54 | output = np.vectorize(np.random.normal)(transmat, scale) 55 | output[np.where(output < 0)] = 0 56 | return (output.transpose() / np.sum(output, axis=1)).transpose() 57 | 58 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/0-test-install.py: -------------------------------------------------------------------------------- 1 | """This script tests your python installation as it pertains to running project templates. 2 | 3 | MSMBuilder supports Python 2.7 and 3.3+ and has some necessary dependencies 4 | like numpy, scipy, and scikit-learn. This templated project enforces 5 | some more stringent requirements to make sure all the users are more-or-less 6 | on the same page and to allow developers to exploit more helper libraries. 7 | 8 | You can modify the template scripts to work for your particular set-up, 9 | but it's probably easier to install `conda` and get the packages we 10 | recommend. 11 | 12 | {{header}} 13 | """ 14 | 15 | import textwrap 16 | 17 | # Show intro text 18 | paragraphs = __doc__.split('\n\n') 19 | for p in paragraphs: 20 | print(textwrap.fill(p)) 21 | print() 22 | 23 | warnings = 0 24 | 25 | ## Test for python 3.5 26 | import sys 27 | 28 | if sys.version_info < (3, 5): 29 | print(textwrap.fill( 30 | "These scripts were all developed on Python 3.5, " 31 | "which is the current, stable release of Python. " 32 | "In particular, we use subprocess.run " 33 | "(and probably some other new features). " 34 | "You can easily modify the scripts to work on older versions " 35 | "of Python, but why not just upgrade? We like Continuum's " 36 | "Anaconda Python distribution for a simple install (without root)." 37 | )) 38 | print() 39 | warnings += 1 40 | 41 | ## Test for matplotlib 42 | try: 43 | import matplotlib as plt 44 | except ImportError: 45 | print(textwrap.fill( 46 | "These scripts try to make some mildly intesting plots. " 47 | "That requires `matplotlib`." 48 | )) 49 | print() 50 | warnings += 1 51 | 52 | ## Test for seaborn 53 | try: 54 | import seaborn as sns 55 | except ImportError: 56 | print(textwrap.fill( 57 | "The default matplotlib styling is a little ugly. " 58 | "By default, these scripts try to use `seaborn` to make prettier " 59 | "plots. You can remove all the seaborn imports if you don't want " 60 | "to install this library, but why not just install it? Try " 61 | "`conda install seaborn`" 62 | )) 63 | print() 64 | warnings += 1 65 | 66 | ## Test for xdg-open 67 | try: 68 | import subprocess 69 | 70 | subprocess.check_call(['xdg-open', '--version']) 71 | except: 72 | print(textwrap.fill( 73 | "For convenience, the plotting scripts can try to use `xdg-open` " 74 | "to pop up the result of the plot. Use the --display flag on " 75 | "msmb TemplateProject to enable this behavior." 76 | )) 77 | warnings += 1 78 | 79 | ## Report results 80 | if warnings == 0: 81 | print("I didn't find any problems with your installation! Good job.") 82 | print() 83 | else: 84 | print("I found {} warnings, see above. Good luck!".format(warnings)) 85 | print() 86 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/1-get-example-data.py: -------------------------------------------------------------------------------- 1 | """Get sample data for testing and experimenting 2 | 3 | {{header}} 4 | """ 5 | import os 6 | 7 | from msmbuilder.example_datasets import FsPeptide 8 | 9 | FsPeptide("./").cache() 10 | if not os.path.exists("trajs"): 11 | os.symlink("fs_peptide", "trajs") 12 | if not os.path.exists("top.pdb"): 13 | os.symlink("fs_peptide/fs-peptide.pdb", "top.pdb") 14 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Stanford University and the Authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a 6 | copy of this software and associated documentation files (the "Software"), 7 | to deal in the Software without restriction, including without limitation 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | and/or sell copies of the Software, and to permit persons to whom the 10 | Software is furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/README.md: -------------------------------------------------------------------------------- 1 | My msmb Project 2 | =============== 3 | 4 | Initialized with `msmb TemplateProject` on {{date}} 5 | 6 | Keep notes about your project here. 7 | 8 | ## Folder layout 9 | 10 | Each new step in MSM construction is in a new folder with symlinks 11 | to the files on which it depends from previous steps. 12 | 13 | ## Variable names convention 14 | 15 | variable | filename | description 16 | ------------|-------------------|----------------------------------------------- 17 | meta | meta.pandas.pickl | pandas dataframe of trajectory metadata 18 | ftrajs | ftrajs/ | trajectories of feature vectors (dihedrals, ...) 19 | dihed_feat | featurizer.pickl | featurizer object 20 | ttrajs | ttrajs/ | dimensionality-reduced, tica trajectories 21 | tica | tica.pickl | tica object 22 | ktrajs | ktrajs/ | trajecories of cluster indices 23 | kmeans | clusterer.pickl | cluserer object 24 | microktrajs | microktrajs/ | trimmed cluster indices 25 | macroktrajs | macroktrajs/ | macrostate indices 26 | 27 | ## License 28 | 29 | These templates are licensed under the MIT license. Do whatever 30 | you want with them. 31 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/analysis/gather-metadata-plot.py: -------------------------------------------------------------------------------- 1 | """Plot metadata info 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | import seaborn as sns 11 | from matplotlib import pyplot as plt 12 | 13 | from msmbuilder.io import load_meta, render_meta 14 | 15 | sns.set_style('ticks') 16 | colors = sns.color_palette() 17 | 18 | ## Load 19 | meta = load_meta() 20 | 21 | 22 | ## Histogram of trajectory lengths 23 | def plot_lengths(ax): 24 | lengths_ns = meta['nframes'] * (meta['step_ps'] / 1000) 25 | ax.hist(lengths_ns) 26 | ax.set_xlabel("Lenths / ns", fontsize=16) 27 | ax.set_ylabel("Count", fontsize=16) 28 | 29 | total_label = ("Total length: {us:.2f}" 30 | .format(us=np.sum(lengths_ns) / 1000)) 31 | total_label += r" / $\mathrm{\mu s}$" 32 | ax.annotate(total_label, 33 | xy=(0.05, 0.95), 34 | xycoords='axes fraction', 35 | fontsize=18, 36 | va='top', 37 | ) 38 | 39 | 40 | ## Pie graph 41 | def plot_pie(ax): 42 | lengths_ns = meta['nframes'] * (meta['step_ps'] / 1000) 43 | sampling = lengths_ns.groupby(level=0).sum() 44 | 45 | ax.pie(sampling, 46 | shadow=True, 47 | labels=sampling.index, 48 | colors=sns.color_palette(), 49 | ) 50 | ax.axis('equal') 51 | 52 | 53 | ## Box plot 54 | def plot_boxplot(ax): 55 | meta2 = meta.copy() 56 | meta2['ns'] = meta['nframes'] * (meta['step_ps'] / 1000) 57 | sns.boxplot( 58 | x=meta2.index.names[0], 59 | y='ns', 60 | data=meta2.reset_index(), 61 | ax=ax, 62 | ) 63 | 64 | 65 | ## Plot hist 66 | fig, ax = plt.subplots(figsize=(7, 5)) 67 | plot_lengths(ax) 68 | fig.tight_layout() 69 | fig.savefig("lengths-hist.pdf") 70 | # {{xdg_open('lengths-hist.pdf')}} 71 | 72 | ## Plot pie 73 | fig, ax = plt.subplots(figsize=(7, 5)) 74 | plot_pie(ax) 75 | fig.tight_layout() 76 | fig.savefig("lengths-pie.pdf") 77 | # {{xdg_open('lengths-pie.pdf')}} 78 | 79 | ## Plot box 80 | fig, ax = plt.subplots(figsize=(7, 5)) 81 | plot_boxplot(ax) 82 | fig.tight_layout() 83 | fig.savefig("lengths-boxplot.pdf") 84 | # {{xdg_open('lengths-boxplot.pdf')}} 85 | 86 | ## Save metadata as html table 87 | render_meta(meta, 'meta.pandas.html') 88 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/analysis/gather-metadata.py: -------------------------------------------------------------------------------- 1 | """Find trajectories and associated metadata 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - trajs 9 | - top.pdb 10 | """ 11 | 12 | from msmbuilder.io import gather_metadata, save_meta, NumberedRunsParser 13 | 14 | ## Construct and save the dataframe 15 | parser = NumberedRunsParser( 16 | traj_fmt="trajectory-{run}.xtc", 17 | top_fn="top.pdb", 18 | step_ps=50, 19 | ) 20 | meta = gather_metadata("trajs/*.xtc", parser) 21 | save_meta(meta) 22 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/cluster/cluster-plot.py: -------------------------------------------------------------------------------- 1 | """Plot cluster centers on tICA coordinates 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | import seaborn as sns 11 | from matplotlib import pyplot as plt 12 | 13 | from msmbuilder.io import load_trajs, load_generic 14 | 15 | sns.set_style('ticks') 16 | colors = sns.color_palette() 17 | 18 | ## Load 19 | kmeans = load_generic('kmeans.pickl') 20 | meta, ktrajs = load_trajs('ktrajs') 21 | meta, ttrajs = load_trajs('ttrajs', meta) 22 | txx = np.concatenate(list(ttrajs.values())) 23 | 24 | 25 | def plot_cluster_centers(ax): 26 | ax.hexbin(txx[:, 0], txx[:, 1], 27 | cmap=sns.cubehelix_palette(as_cmap=True), 28 | mincnt=1, 29 | bins='log', 30 | ) 31 | ax.scatter(kmeans.cluster_centers_[:, 0], 32 | kmeans.cluster_centers_[:, 1], 33 | s=40, c=colors[0], 34 | ) 35 | ax.set_xlabel("tIC 1", fontsize=16) 36 | ax.set_ylabel("tIC 2", fontsize=16) 37 | 38 | 39 | ## Plot 1 40 | fig, ax = plt.subplots(figsize=(7, 5)) 41 | plot_cluster_centers(ax) 42 | fig.tight_layout() 43 | fig.savefig('kmeans-centers.pdf') 44 | # {{xdg_open('kmeans-centers.pdf')}} 45 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/cluster/cluster.py: -------------------------------------------------------------------------------- 1 | """Cluster tICA results 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - ttrajs 9 | - meta.pandas.pickl 10 | """ 11 | from msmbuilder.io import load_trajs, save_trajs, save_generic 12 | from msmbuilder.cluster import MiniBatchKMeans 13 | 14 | ## Load 15 | meta, ttrajs = load_trajs('ttrajs') 16 | 17 | ## Fit 18 | dim = 5 19 | kmeans = MiniBatchKMeans(n_clusters=500) 20 | kmeans.fit([traj[:, :dim] for traj in ttrajs.values()]) 21 | 22 | ## Transform 23 | ktrajs = {} 24 | for k, v in ttrajs.items(): 25 | ktrajs[k] = kmeans.partial_transform(v[:, :dim]) 26 | 27 | ## Save 28 | print(kmeans.summarize()) 29 | save_trajs(ktrajs, 'ktrajs', meta) 30 | save_generic(kmeans, 'kmeans.pickl') 31 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/cluster/sample-clusters-plot.py: -------------------------------------------------------------------------------- 1 | """Plot the result of sampling clusters 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | import seaborn as sns 11 | from matplotlib import pyplot as plt 12 | 13 | from msmbuilder.io import load_trajs, load_generic 14 | 15 | sns.set_style('ticks') 16 | colors = sns.color_palette() 17 | 18 | ## Load 19 | meta, ttrajs = load_trajs('ttrajs') 20 | txx = np.concatenate(list(ttrajs.values())) 21 | kmeans = load_generic('kmeans.pickl') 22 | 23 | inds = load_generic("cluster-sample-inds.pickl") 24 | coordinates = [ 25 | np.asarray([ttrajs[traj_i][frame_i, :] for traj_i, frame_i in state_inds]) 26 | for state_inds in inds 27 | ] 28 | 29 | 30 | ## Overlay sampled states on histogram 31 | def plot_sampled_states(ax): 32 | ax.hexbin(txx[:, 0], txx[:, 1], 33 | cmap='magma_r', 34 | mincnt=1, 35 | bins='log', 36 | alpha=0.8, 37 | ) 38 | 39 | # Show sampled points as scatter 40 | # Annotate cluster index 41 | for i, coo in enumerate(coordinates): 42 | plt.scatter(coo[:, 0], coo[:, 1], c=colors[i % 6], s=40) 43 | ax.text(kmeans.cluster_centers_[i, 0], 44 | kmeans.cluster_centers_[i, 1], 45 | "{}".format(i), 46 | ha='center', 47 | va='center', 48 | size=16, 49 | bbox=dict( 50 | boxstyle='round', 51 | fc='w', 52 | ec="0.5", 53 | alpha=0.9, 54 | ), 55 | zorder=10, 56 | ) 57 | 58 | ax.set_xlabel("tIC 1", fontsize=16) 59 | ax.set_ylabel("tIC 2", fontsize=16) 60 | 61 | 62 | ## Render a script for loading in vmd 63 | def load_in_vmd(dirname='cluster_samples'): 64 | k = len(inds[0]) 65 | templ = [ 66 | '# autogenerated by msmbuilder', 67 | '# open with `vmd -e load-cluster-samples.tcl`', 68 | '', 69 | '# Defaults', 70 | 'mol default material Transparent', 71 | 'mol default representation NewCartoon', 72 | '', 73 | ] 74 | for i in range(len(inds)): 75 | templ += [ 76 | '# State {}'.format(i), 77 | 'mol new top.pdb', 78 | 'mol addfile {}/{}.xtc waitfor all'.format(dirname, i), 79 | 'animate delete beg 0 end 0 top', 80 | 'mol rename top State-{}'.format(i), 81 | 'mol modcolor 0 top ColorID {}'.format(i), 82 | 'mol drawframes top 0 0:{k}'.format(k=k), 83 | '', 84 | ] 85 | return '\n'.join(templ) 86 | 87 | 88 | ## Plot 89 | fig, ax = plt.subplots(figsize=(7, 5)) 90 | plot_sampled_states(ax) 91 | fig.tight_layout() 92 | fig.savefig('cluster-samples.pdf') 93 | # {{xdg_open('cluster-samples.pdf')}} 94 | 95 | ## Render vmd 96 | with open('load-cluster-samples.tcl', 'w') as f: 97 | f.write(load_in_vmd()) 98 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/cluster/sample-clusters.py: -------------------------------------------------------------------------------- 1 | """Sample conformations from clusters 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - ../../top.pdb 9 | - ../../trajs 10 | """ 11 | 12 | import mdtraj as md 13 | import os 14 | 15 | from msmbuilder.io.sampling import sample_states 16 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic 17 | 18 | ## Load 19 | meta, ttrajs = load_trajs('ttrajs') 20 | kmeans = load_generic("kmeans.pickl") 21 | 22 | ## Sample 23 | inds = sample_states(ttrajs, 24 | kmeans.cluster_centers_, 25 | k=10) 26 | 27 | save_generic(inds, "cluster-sample-inds.pickl") 28 | 29 | ## Make trajectories 30 | top = preload_top(meta) 31 | out_folder = "cluster_samples" 32 | backup(out_folder) 33 | os.mkdir(out_folder) 34 | 35 | for state_i, state_inds in enumerate(inds): 36 | traj = md.join( 37 | md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top) 38 | for traj_i, frame_i in state_inds 39 | ) 40 | traj.save("{}/{}.xtc".format(out_folder, state_i)) 41 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/dihedrals/featurize-plot.py: -------------------------------------------------------------------------------- 1 | """Plot diagnostic feature info 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | from matplotlib import pyplot as plt 11 | import seaborn as sns 12 | from msmbuilder.io import load_trajs 13 | 14 | sns.set_style('ticks') 15 | colors = sns.color_palette() 16 | 17 | ## Load 18 | meta, ftrajs = load_trajs('ftrajs') 19 | # (stride by 100 for memory concerns) 20 | fxx = np.concatenate([fx[::100] for fx in ftrajs.values()]) 21 | 22 | 23 | ## Box and whisker plot 24 | def plot_box(ax): 25 | n_feats_plot = min(fxx.shape[1], 100) 26 | ax.boxplot(fxx[:, :100], 27 | boxprops={'color': colors[0]}, 28 | whiskerprops={'color': colors[0]}, 29 | capprops={'color': colors[0]}, 30 | medianprops={'color': colors[2]}, 31 | ) 32 | 33 | if fxx.shape[1] > 100: 34 | ax.annotate("(Only showing the first 100 features)", 35 | xy=(0.05, 0.95), 36 | xycoords='axes fraction', 37 | fontsize=14, 38 | va='top', 39 | ) 40 | 41 | ax.set_xlabel("Feature Index", fontsize=16) 42 | xx = np.arange(0, n_feats_plot, 10) 43 | ax.set_xticks(xx) 44 | ax.set_xticklabels([str(x) for x in xx]) 45 | ax.set_xlim((0, n_feats_plot + 1)) 46 | ax.set_ylabel("Feature Value", fontsize=16) 47 | 48 | 49 | ## Plot 50 | fig, ax = plt.subplots(figsize=(15, 5)) 51 | plot_box(ax) 52 | fig.tight_layout() 53 | fig.savefig("ftrajs-box.pdf") 54 | # {{ xdg_open('ftrajs-box.pdf') }} 55 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/dihedrals/featurize.py: -------------------------------------------------------------------------------- 1 | """Turn trajectories into dihedral features 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - meta.pandas.pickl 9 | - trajs 10 | - top.pdb 11 | """ 12 | import mdtraj as md 13 | 14 | from msmbuilder.featurizer import DihedralFeaturizer 15 | from msmbuilder.io import load_meta, preload_tops, save_trajs, save_generic 16 | from multiprocessing import Pool 17 | 18 | ## Load 19 | meta = load_meta() 20 | tops = preload_tops(meta) 21 | dihed_feat = DihedralFeaturizer() 22 | 23 | 24 | ## Featurize logic 25 | def feat(irow): 26 | i, row = irow 27 | traj = md.load(row['traj_fn'], top=tops[row['top_fn']]) 28 | feat_traj = dihed_feat.partial_transform(traj) 29 | return i, feat_traj 30 | 31 | 32 | ## Do it in parallel 33 | with Pool() as pool: 34 | dihed_trajs = dict(pool.imap_unordered(feat, meta.iterrows())) 35 | 36 | ## Save 37 | save_trajs(dihed_trajs, 'ftrajs', meta) 38 | save_generic(dihed_feat, 'featurizer.pickl') 39 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/landmarks/featurize-plot.py: -------------------------------------------------------------------------------- 1 | """Plot statistics from RMSD clustering 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | import seaborn as sns 11 | from matplotlib import pyplot as plt 12 | 13 | from msmbuilder.io import load_trajs 14 | 15 | sns.set_style('ticks') 16 | colors = sns.color_palette() 17 | 18 | ## Load 19 | meta, ftrajs = load_trajs('ftrajs') 20 | # (stride by 100 for memory concerns) 21 | fxx = np.concatenate([fx[::100] for fx in ftrajs.values()]) 22 | 23 | 24 | ## Box and whisker plot 25 | def plot_box(ax): 26 | n_feats_plot = min(fxx.shape[1], 100) 27 | ax.boxplot(fxx[:, :100], 28 | boxprops={'color': colors[0]}, 29 | whiskerprops={'color': colors[0]}, 30 | capprops={'color': colors[0]}, 31 | medianprops={'color': colors[2]}, 32 | ) 33 | 34 | if fxx.shape[1] > 100: 35 | ax.annotate("(Only showing the first 100 features)", 36 | xy=(0.05, 0.95), 37 | xycoords='axes fraction', 38 | fontsize=14, 39 | va='top', 40 | ) 41 | 42 | ax.set_xlabel("Feature Index", fontsize=16) 43 | xx = np.arange(0, n_feats_plot, 10) 44 | ax.set_xticks(xx) 45 | ax.set_xticklabels([str(x) for x in xx]) 46 | ax.set_xlim((0, n_feats_plot + 1)) 47 | ax.set_ylabel("Feature Value", fontsize=16) 48 | 49 | 50 | ## Plot 51 | fig, ax = plt.subplots(figsize=(15, 5)) 52 | plot_box(ax) 53 | fig.tight_layout() 54 | fig.savefig("ftrajs-box.pdf") 55 | # {{ xdg_open('ftrajs-box.pdf') }} 56 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/landmarks/featurize.py: -------------------------------------------------------------------------------- 1 | """Cluster based on RMSD between conformations 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - meta.pandas.pickl 9 | - trajs 10 | - top.pdb 11 | """ 12 | import mdtraj as md 13 | 14 | from msmbuilder.io import load_meta, itertrajs, save_trajs, preload_top 15 | 16 | ## Load 17 | meta = load_meta() 18 | centroids = md.load("centroids.xtc", top=preload_top(meta)) 19 | 20 | ## Kernel 21 | SIGMA = 0.3 # nm 22 | from msmbuilder.featurizer import RMSDFeaturizer 23 | import numpy as np 24 | 25 | featurizer = RMSDFeaturizer(centroids) 26 | lfeats = {} 27 | for i, traj in itertrajs(meta): 28 | lfeat = featurizer.partial_transform(traj) 29 | lfeat = np.exp(-lfeat ** 2 / (2 * (SIGMA ** 2))) 30 | lfeats[i] = lfeat 31 | save_trajs(lfeats, 'ftrajs', meta) 32 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/landmarks/find-landmarks.py: -------------------------------------------------------------------------------- 1 | """Cluster based on RMSD between conformations 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - meta.pandas.pickl 9 | - trajs 10 | - top.pdb 11 | """ 12 | import mdtraj as md 13 | 14 | from msmbuilder.cluster import MiniBatchKMedoids 15 | from msmbuilder.io import load_meta, itertrajs, save_generic, backup 16 | 17 | ## Set up parameters 18 | kmed = MiniBatchKMedoids( 19 | n_clusters=500, 20 | metric='rmsd', 21 | ) 22 | 23 | ## Load 24 | meta = load_meta() 25 | 26 | 27 | ## Try to limit RAM usage 28 | def guestimate_stride(): 29 | total_data = meta['nframes'].sum() 30 | want = kmed.n_clusters * 10 31 | stride = max(1, total_data // want) 32 | print("Since we have", total_data, "frames, we're going to stride by", 33 | stride, "during fitting, because this is probably adequate for", 34 | kmed.n_clusters, "clusters") 35 | return stride 36 | 37 | 38 | ## Fit 39 | kmed.fit([traj for _, traj in itertrajs(meta, stride=guestimate_stride())]) 40 | print(kmed.summarize()) 41 | 42 | ## Save 43 | save_generic(kmed, 'clusterer.pickl') 44 | 45 | 46 | ## Save centroids 47 | def frame(traj_i, frame_i): 48 | # Note: kmedoids does 0-based, contiguous integers so we use .iloc 49 | row = meta.iloc[traj_i] 50 | return md.load_frame(row['traj_fn'], frame_i, top=row['top_fn']) 51 | 52 | 53 | centroids = md.join((frame(ti, fi) for ti, fi in kmed.cluster_ids_), 54 | check_topology=False) 55 | centroids_fn = 'centroids.xtc' 56 | backup(centroids_fn) 57 | centroids.save("centroids.xtc") 58 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/msm/microstate-plot.py: -------------------------------------------------------------------------------- 1 | """Plot populations and eigvectors from microstate MSM 2 | 3 | {{header}} 4 | Meta 5 | ---- 6 | depends: 7 | - kmeans.pickl 8 | - ../ttrajs 9 | """ 10 | 11 | # ? include "plot_header.template" 12 | # ? from "plot_macros.template" import xdg_open with context 13 | 14 | import numpy as np 15 | import seaborn as sns 16 | from matplotlib import pyplot as plt 17 | 18 | from msmbuilder.io import load_trajs, load_generic 19 | 20 | sns.set_style('ticks') 21 | colors = sns.color_palette() 22 | 23 | ## Load 24 | kmeans = load_generic('kmeans.pickl') 25 | msm = load_generic('msm.pickl') 26 | meta, ttrajs = load_trajs('ttrajs') 27 | txx = np.concatenate(list(ttrajs.values())) 28 | 29 | 30 | ## Plot microstates 31 | def plot_microstates(ax): 32 | ax.hexbin(txx[:, 0], txx[:, 1], 33 | cmap='Greys', 34 | mincnt=1, 35 | bins='log', 36 | ) 37 | 38 | scale = 100 / np.max(msm.populations_) 39 | add_a_bit = 5 40 | ax.scatter(kmeans.cluster_centers_[msm.state_labels_, 0], 41 | kmeans.cluster_centers_[msm.state_labels_, 1], 42 | s=scale * msm.populations_ + add_a_bit, 43 | c=msm.left_eigenvectors_[:, 1], 44 | cmap='RdBu' 45 | ) 46 | ax.set_xlabel("tIC 1", fontsize=16) 47 | ax.set_ylabel("tIC 2", fontsize=16) 48 | # ax.colorbar(label='First Dynamical Eigenvector', fontsize=16) 49 | 50 | 51 | ## Plot 52 | fig, ax = plt.subplots(figsize=(7, 5)) 53 | plot_microstates(ax) 54 | fig.tight_layout() 55 | fig.savefig('msm-microstates.pdf') 56 | # {{xdg_open('msm-microstates.pdf')}} 57 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/msm/microstate-traj.py: -------------------------------------------------------------------------------- 1 | """Sample a trajectory from microstate MSM 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - top.pdb 9 | - trajs 10 | """ 11 | 12 | import mdtraj as md 13 | 14 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup, load_generic 15 | from msmbuilder.io.sampling import sample_msm 16 | 17 | ## Load 18 | meta, ttrajs = load_trajs('ttrajs') 19 | msm = load_generic('msm.pickl') 20 | kmeans = load_generic('kmeans.pickl') 21 | 22 | ## Sample 23 | # Warning: make sure ttrajs and kmeans centers have 24 | # the same number of dimensions 25 | inds = sample_msm(ttrajs, kmeans.cluster_centers_, msm, n_steps=200, stride=1) 26 | save_generic(inds, "msm-traj-inds.pickl") 27 | 28 | ## Make trajectory 29 | top = preload_top(meta) 30 | traj = md.join( 31 | md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top) 32 | for traj_i, frame_i in inds 33 | ) 34 | 35 | ## Save 36 | traj_fn = "msm-traj.xtc" 37 | backup(traj_fn) 38 | traj.save(traj_fn) 39 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/msm/microstate.py: -------------------------------------------------------------------------------- 1 | """Make a microstate MSM 2 | 3 | {{header}} 4 | """ 5 | 6 | from msmbuilder.io import load_trajs, save_trajs, save_generic 7 | from msmbuilder.msm import MarkovStateModel 8 | 9 | ## Load 10 | meta, ktrajs = load_trajs('ktrajs') 11 | 12 | ## Fit 13 | msm = MarkovStateModel(lag_time=2, n_timescales=10, verbose=False) 14 | msm.fit(list(ktrajs.values())) 15 | 16 | ## Transform 17 | microktrajs = {} 18 | for k, v in ktrajs.items(): 19 | microktrajs[k] = msm.partial_transform(v) 20 | 21 | ## Save 22 | print(msm.summarize()) 23 | save_generic(msm, 'msm.pickl') 24 | save_trajs(microktrajs, 'microktrajs', meta) 25 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/msm/timescales-plot.py: -------------------------------------------------------------------------------- 1 | """Plot implied timescales vs lagtime 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | import pandas as pd 11 | import seaborn as sns 12 | from matplotlib import pyplot as plt 13 | 14 | sns.set_style('ticks') 15 | colors = sns.color_palette() 16 | 17 | ## Load 18 | timescales = pd.read_pickle('timescales.pandas.pickl') 19 | n_timescales = len([x for x in timescales.columns 20 | if x.startswith('timescale_')]) 21 | 22 | 23 | ## Implied timescales vs lagtime 24 | def plot_timescales(ax): 25 | for i in range(n_timescales): 26 | ax.scatter(timescales['lag_time'], 27 | timescales['timescale_{}'.format(i)], 28 | s=50, c=colors[0], 29 | label=None, # pandas be interfering 30 | ) 31 | 32 | xmin, xmax = ax.get_xlim() 33 | xx = np.linspace(xmin, xmax) 34 | ax.plot(xx, xx, color=colors[2], label='$y=x$') 35 | ax.legend(loc='best', fontsize=14) 36 | ax.set_xlabel('Lag Time / todo:units', fontsize=18) 37 | ax.set_ylabel('Implied Timescales / todo:units', fontsize=18) 38 | ax.set_xscale('log') 39 | ax.set_yscale('log') 40 | 41 | ## Percent trimmed vs lagtime 42 | def plot_trimmed(ax): 43 | ax.plot(timescales['lag_time'], 44 | timescales['percent_retained'], 45 | 'o-', 46 | label=None, # pandas be interfering 47 | ) 48 | ax.axhline(100, color='k', ls='--', label='100%') 49 | ax.legend(loc='best', fontsize=14) 50 | ax.set_xlabel('Lag Time / todo:units', fontsize=18) 51 | ax.set_ylabel('Retained / %', fontsize=18) 52 | ax.set_xscale('log') 53 | ax.set_ylim((0, 110)) 54 | 55 | ## Plot timescales 56 | fig, ax = plt.subplots(figsize=(7, 5)) 57 | plot_timescales(ax) 58 | fig.tight_layout() 59 | fig.savefig('implied-timescales.pdf') 60 | # {{xdg_open('implied-timescales.pdf')}} 61 | 62 | ## Plot trimmed 63 | fig, ax = plt.subplots(figsize=(7,5)) 64 | plot_trimmed(ax) 65 | fig.tight_layout() 66 | fig.savefig('percent-trimmed.pdf') 67 | # {{xdg_open('percent-trimmed.pdf')}} 68 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/msm/timescales.py: -------------------------------------------------------------------------------- 1 | """Calculate implied timescales vs. lagtime 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - meta.pandas.pickl 9 | - ktrajs 10 | """ 11 | from multiprocessing import Pool 12 | 13 | import pandas as pd 14 | 15 | from msmbuilder.io import load_trajs 16 | from msmbuilder.msm import MarkovStateModel 17 | 18 | ## Load 19 | meta, ktrajs = load_trajs('ktrajs') 20 | 21 | ## Parameters 22 | lagtimes = [2 ** i for i in range(8)] 23 | 24 | 25 | ## Define what to do for parallel execution 26 | def at_lagtime(lt): 27 | msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False) 28 | msm.fit(list(ktrajs.values())) 29 | ret = { 30 | 'lag_time': lt, 31 | 'percent_retained': msm.percent_retained_, 32 | } 33 | for i in range(msm.n_timescales): 34 | ret['timescale_{}'.format(i)] = msm.timescales_[i] 35 | return ret 36 | 37 | 38 | ## Do the calculation 39 | with Pool() as p: 40 | results = p.map(at_lagtime, lagtimes) 41 | 42 | lt_df = pd.DataFrame(results) 43 | 44 | ## Save 45 | print(lt_df.head()) 46 | lt_df.to_pickle('timescales.pandas.pickl') 47 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/plot_header.template: -------------------------------------------------------------------------------- 1 | # ? if use_xdgopen 2 | from subprocess import run 3 | # ? endif 4 | # ? if use_agg 5 | import matplotlib 6 | matplotlib.use('Agg') 7 | # ? endif 8 | # ? if ipynb 9 | %matplotlib inline 10 | # ? endif 11 | 12 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/plot_macros.template: -------------------------------------------------------------------------------- 1 | {% macro xdg_open(fn) -%} 2 | {% if use_xdgopen -%} 3 | Launch with default pdf viewer: 4 | run(['xdg-open', '{{fn}}']) 5 | {%- endif %} 6 | {%- endmacro %} 7 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/rmsd/rmsd-plot.py: -------------------------------------------------------------------------------- 1 | """Plot RMSD results 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | import seaborn as sns 11 | from matplotlib import pyplot as plt 12 | 13 | from msmbuilder.io import load_trajs 14 | 15 | sns.set_style('ticks') 16 | colors = sns.color_palette() 17 | 18 | ## Load 19 | meta, rmsds = load_trajs('rmsds') 20 | 21 | 22 | ## Plot box plot 23 | def plot_boxplot(ax): 24 | catted = np.concatenate([rmsds[k] for k in meta.index]) 25 | sns.boxplot(catted * 10, ax=ax) 26 | ax.set_xlabel(r'RMSD / $\mathrm{\AA}$', fontsize=18) 27 | ax.set_yticks([]) 28 | # ax.set_xticks(fontsize=16) #TODO: fontsize 29 | 30 | 31 | ## Report bad trajectories 32 | def bad_trajs(cutoff=0.7): 33 | bad = {} 34 | for k in meta.index: 35 | arr = rmsds[k] 36 | wh = np.where(np.asarray(arr) > cutoff)[0] 37 | if len(wh) > 0: 38 | bad[k] = wh 39 | return bad 40 | 41 | 42 | ## Plot 43 | fig, ax = plt.subplots(figsize=(6, 3)) 44 | plot_boxplot(ax) 45 | fig.tight_layout() 46 | fig.savefig("rmsd-boxplot.pdf") 47 | # {{xdg_open('rmsd-boxplot.pdf')}} 48 | 49 | ## Bad trajectories 50 | for k, frame_is in bad_trajs().items(): 51 | print("Trajectory", k) 52 | print("Frames:", frame_is) 53 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/rmsd/rmsd.py: -------------------------------------------------------------------------------- 1 | """Check for abnormally high rmsd values to a reference structure 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - meta.pandas.pickl 9 | - trajs 10 | - top.pdb 11 | 12 | """ 13 | 14 | import mdtraj as md 15 | 16 | from msmbuilder.io import load_meta, itertrajs, save_trajs 17 | 18 | ## Load reference structure 19 | ref = md.load("top.pdb") 20 | meta = load_meta() 21 | 22 | ## Do calculation and save 23 | rmsds = {k: md.rmsd(traj, ref) for k, traj in itertrajs(meta)} 24 | save_trajs(rmsds, 'rmsds', meta) 25 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/tica/tica-plot.py: -------------------------------------------------------------------------------- 1 | """Plot tICA-transformed coordinates 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | import seaborn as sns 11 | from matplotlib import pyplot as plt 12 | 13 | from msmbuilder.io import load_trajs, load_generic 14 | 15 | sns.set_style('ticks') 16 | colors = sns.color_palette() 17 | 18 | ## Load 19 | tica = load_generic('tica.pickl') 20 | meta, ttrajs = load_trajs('ttrajs') 21 | txx = np.concatenate(list(ttrajs.values())) 22 | 23 | 24 | ## Heatmap 25 | def plot_heatmap(ax): 26 | ax.hexbin(txx[:, 0], txx[:, 1], 27 | cmap=sns.cubehelix_palette(as_cmap=True), 28 | mincnt=1, 29 | bins='log' 30 | ) 31 | ax.set_xlabel("tIC 1", fontsize=16) 32 | ax.set_ylabel("tIC 2", fontsize=16) 33 | 34 | 35 | ## Timescales 36 | def plot_timescales(ax): 37 | timestep = meta['step_ps'].unique() 38 | assert len(timestep) == 1, timestep 39 | timestep = float(timestep[0]) # ps 40 | to_us = ( 41 | (1.0 / 1000) # ps -> ns 42 | * (1.0 / 1000) # ns -> us 43 | * (timestep / 1) # steps -> ps 44 | ) 45 | ax.hlines(tica.timescales_ * to_us, 46 | 0, 1, 47 | color=colors[0]) 48 | ax.set_ylabel(r'Timescales / $\mathrm{\mu s}$', fontsize=18) 49 | ax.set_xticks([]) 50 | ax.set_xlim((0, 1)) 51 | 52 | 53 | ## Plot 1 54 | fig, ax = plt.subplots(figsize=(7, 5)) 55 | plot_heatmap(ax) 56 | fig.tight_layout() 57 | fig.savefig('tica-heatmap.pdf') 58 | # {{xdg_open('tica-heatmap.pdf')}} 59 | 60 | ## Plot 2 61 | fig, ax = plt.subplots(figsize=(3, 5)) 62 | plot_timescales(ax) 63 | fig.tight_layout() 64 | fig.savefig('tica-timescales.pdf') 65 | # {{xdg_open('tica-heatmap.pdf')}} 66 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/tica/tica-sample-coordinate-plot.py: -------------------------------------------------------------------------------- 1 | """Plot the result of sampling a tICA coordinate 2 | 3 | {{header}} 4 | """ 5 | 6 | # ? include "plot_header.template" 7 | # ? from "plot_macros.template" import xdg_open with context 8 | 9 | import numpy as np 10 | import seaborn as sns 11 | from matplotlib import pyplot as plt 12 | 13 | from msmbuilder.io import load_trajs, load_generic 14 | 15 | sns.set_style('ticks') 16 | colors = sns.color_palette() 17 | 18 | ## Load 19 | meta, ttrajs = load_trajs('ttrajs') 20 | txx = np.concatenate(list(ttrajs.values())) 21 | 22 | inds = load_generic("tica-dimension-0-inds.pickl") 23 | straj = [] 24 | for traj_i, frame_i in inds: 25 | straj += [ttrajs[traj_i][frame_i, :]] 26 | straj = np.asarray(straj) 27 | 28 | 29 | ## Overlay sampled trajectory on histogram 30 | def plot_sampled_traj(ax): 31 | ax.hexbin(txx[:, 0], txx[:, 1], 32 | cmap='magma_r', 33 | mincnt=1, 34 | bins='log', 35 | alpha=0.8, 36 | ) 37 | 38 | ax.plot(straj[:, 0], straj[:, 1], 'o-', label='Sampled') 39 | 40 | ax.set_xlabel("tIC 1", fontsize=16) 41 | ax.set_ylabel("tIC 2", fontsize=16) 42 | ax.legend(loc='best') 43 | 44 | 45 | ## Plot 46 | fig, ax = plt.subplots(figsize=(7, 5)) 47 | plot_sampled_traj(ax) 48 | fig.tight_layout() 49 | fig.savefig('tica-dimension-0-heatmap.pdf') 50 | # {{xdg_open('tica-dimension-0-heatmap.pdf')}} 51 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/tica/tica-sample-coordinate.py: -------------------------------------------------------------------------------- 1 | """Sample tICA coordinates 2 | 3 | {{header}} 4 | 5 | Meta 6 | ---- 7 | depends: 8 | - ../top.pdb 9 | - ../trajs 10 | """ 11 | 12 | import mdtraj as md 13 | 14 | from msmbuilder.io.sampling import sample_dimension 15 | from msmbuilder.io import load_trajs, save_generic, preload_top, backup 16 | 17 | ## Load 18 | meta, ttrajs = load_trajs('ttrajs') 19 | 20 | ## Sample 21 | inds = sample_dimension(ttrajs, 22 | dimension=0, 23 | n_frames=200, scheme='random') 24 | 25 | save_generic(inds, "tica-dimension-0-inds.pickl") 26 | 27 | ## Make trajectory 28 | top = preload_top(meta) 29 | 30 | # Use loc because sample_dimension is nice 31 | traj = md.join( 32 | md.load_frame(meta.loc[traj_i]['traj_fn'], index=frame_i, top=top) 33 | for traj_i, frame_i in inds 34 | ) 35 | 36 | ## Save 37 | traj_fn = "tica-dimension-0.xtc" 38 | backup(traj_fn) 39 | traj.save(traj_fn) 40 | -------------------------------------------------------------------------------- /msmbuilder/project_templates/tica/tica.py: -------------------------------------------------------------------------------- 1 | """Reduce dimensionality with tICA 2 | 3 | {{header}} 4 | Meta 5 | ---- 6 | depends: 7 | - ftrajs 8 | - meta.pandas.pickl 9 | """ 10 | 11 | from msmbuilder.io import load_trajs, save_trajs, save_generic 12 | from msmbuilder.decomposition import tICA 13 | 14 | ## Load 15 | tica = tICA(n_components=5, lag_time=10, kinetic_mapping=True) 16 | meta, ftrajs = load_trajs("ftrajs") 17 | 18 | ## Fit 19 | tica.fit(ftrajs.values()) 20 | 21 | ## Transform 22 | ttrajs = {} 23 | for k, v in ftrajs.items(): 24 | ttrajs[k] = tica.partial_transform(v) 25 | 26 | ## Save 27 | save_trajs(ttrajs, 'ttrajs', meta) 28 | save_generic(tica, 'tica.pickl') 29 | -------------------------------------------------------------------------------- /msmbuilder/scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msmbuilder/msmbuilder/515fd5c27836c797692d600216b5eb224dfc1c5d/msmbuilder/scripts/__init__.py -------------------------------------------------------------------------------- /msmbuilder/scripts/msmb.py: -------------------------------------------------------------------------------- 1 | """Statistical models for biomolecular dynamics""" 2 | from __future__ import print_function, absolute_import, division 3 | import sys 4 | from ..cmdline import App 5 | from ..commands import * 6 | from ..version import version 7 | # the commands register themselves when they're imported 8 | 9 | # Load external commands which register themselves 10 | # with entry point msmbuilder.commands 11 | from pkg_resources import iter_entry_points 12 | 13 | for ep in iter_entry_points("msmbuilder.commands"): 14 | external_command = ep.load() 15 | # Some groups start with numbers for ordering 16 | # Some start with descriptions e.g. "MSM" 17 | # Let's set the group to start with ZZZ to put plugins last. 18 | external_command._group = "ZZZ-External_" + external_command._group 19 | 20 | 21 | class MSMBuilderApp(App): 22 | pass 23 | 24 | 25 | def main(): 26 | try: 27 | app = MSMBuilderApp(name='MSMBuilder', description=__doc__) 28 | app.start() 29 | except RuntimeError as e: 30 | sys.exit("Error: %s" % e) 31 | except Exception as e: 32 | message = """\ 33 | An unexpected error has occurred with MSMBuilder (version %s), please 34 | consider sending the following traceback to MSMBuilder GitHub issue tracker at: 35 | https://github.com/msmbuilder/msmbuilder/issues 36 | """ 37 | print(message % version, file=sys.stderr) 38 | raise # as if we did not catch it 39 | 40 | 41 | if __name__ == '__main__': 42 | main() 43 | -------------------------------------------------------------------------------- /msmbuilder/src/f2py/f2pyptr.h: -------------------------------------------------------------------------------- 1 | #ifndef F2PYPTR_H_ 2 | #define F2PYPTR_H_ 3 | 4 | #include 5 | 6 | void *f2py_pointer(PyObject *obj) 7 | { 8 | #if PY_VERSION_HEX < 0x03000000 9 | if (PyCObject_Check(obj)) { 10 | return PyCObject_AsVoidPtr(obj); 11 | } 12 | #endif 13 | #if PY_VERSION_HEX >= 0x02070000 14 | if (PyCapsule_CheckExact(obj)) { 15 | return PyCapsule_GetPointer(obj, NULL); 16 | } 17 | #endif 18 | PyErr_SetString(PyExc_ValueError, "Not an object containing a void ptr"); 19 | return NULL; 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /msmbuilder/src/scipy_lapack.h: -------------------------------------------------------------------------------- 1 | #ifndef MIXTAPE_SCIPY_LAPACK 2 | #define MIXTAPE_SCIPY_LAPACK 3 | 4 | #include 5 | #include "f2py/f2pyptr.h" 6 | 7 | typedef int sgemm_t(const char *transa, const char *transb, const int *m, const int *n, const int *k, const float *alpha, const float *a, const int *lda, float *b, const int *ldb, const float *beta, float *c, const int *ldc); 8 | typedef int spotrf_t(const char *uplo, const int *n, float *a, const int *lda, int *info); 9 | typedef int strtrs_t(const char *uplo, const char *trans, const char *diag, const int *n, const int *nrhs, const float *a, const int *lda, float *b, const int *ldb, int * info); 10 | 11 | typedef struct { 12 | sgemm_t *sgemm; 13 | spotrf_t *spotrf; 14 | strtrs_t *strtrs; 15 | } lapack_t; 16 | static lapack_t __lapack; 17 | 18 | 19 | static lapack_t* get_lapack(void) { 20 | PyObject *mod_lapack, *mod_blas, *func, *cpointer; 21 | if (__lapack.sgemm == NULL) { 22 | mod_blas = PyImport_ImportModule("scipy.linalg.blas"); 23 | mod_lapack = PyImport_ImportModule("scipy.linalg.lapack"); 24 | 25 | func = PyObject_GetAttrString(mod_blas, "sgemm"); 26 | cpointer = PyObject_GetAttrString(func, "_cpointer"); 27 | __lapack.sgemm = (sgemm_t*) f2py_pointer(cpointer); 28 | 29 | func = PyObject_GetAttrString(mod_lapack, "spotrf"); 30 | cpointer = PyObject_GetAttrString(func, "_cpointer"); 31 | __lapack.spotrf = (spotrf_t*) f2py_pointer(cpointer); 32 | 33 | func = PyObject_GetAttrString(mod_lapack, "strtrs"); 34 | cpointer = PyObject_GetAttrString(func, "_cpointer"); 35 | __lapack.strtrs = (strtrs_t*) f2py_pointer(cpointer); 36 | } 37 | 38 | return &__lapack; 39 | } 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /msmbuilder/src/triu_utils.pyx: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities related to indexing upper triangular matrices with a diagonal 3 | offset of 1. The semantics match ``numpy.triu_indices(n, k=1)`` 4 | """ 5 | from numpy cimport npy_intp 6 | 7 | 8 | cdef inline npy_intp ij_to_k(npy_intp i, npy_intp j, npy_intp n) nogil: 9 | """2D (i, j) square matrix index to linearized upper triangular index 10 | 11 | [ 0 a0 a1 a2 a3 ] (i=0,j=1) -> 0 12 | [ 0 0 a4 a5 a6 ] (i=0,j=2) -> 1 13 | [ 0 0 0 a7 a8 ] (i=1,j=3) -> 5 14 | [ 0 0 0 0 a9 ] etc 15 | [ 0 0 0 0 0 ] (i=4,j=5) -> 9 16 | 17 | For further explanation, see http://stackoverflow.com/a/27088560/1079728 18 | 19 | Parameters 20 | ---------- 21 | i : int 22 | Row index 23 | j : int 24 | Column index 25 | n : int 26 | Matrix size. The matrix is assumed to be square 27 | 28 | Returns 29 | ------- 30 | k : int 31 | Linearized upper triangular index 32 | 33 | See Also 34 | -------- 35 | k_to_ij : the inverse operation 36 | """ 37 | if j > i: 38 | return (n*(n-1)/2) - (n-i)*((n-i)-1)/2 + j - i - 1 39 | return (n*(n-1)/2) - (n-j)*((n-j)-1)/2 + i - j - 1 40 | 41 | 42 | cdef inline void k_to_ij(npy_intp k, npy_intp n, npy_intp *i, npy_intp *j) nogil: 43 | """Linearized upper triangular index to 2D (i, j) index 44 | 45 | [ 0 a0 a1 a2 a3 ] 0 -> (i=0,j=1) 46 | [ 0 0 a4 a5 a6 ] 1 -> (i=0,j=2) 47 | [ 0 0 0 a7 a8 ] 5 -> (i=1,j=3) 48 | [ 0 0 0 0 a9 ] etc 49 | [ 0 0 0 0 0 ] 50 | 51 | http://stackoverflow.com/a/27088560/1079728 52 | 53 | Parameters 54 | ---------- 55 | k : int 56 | Linearized upper triangular index 57 | 58 | Returns 59 | ------- 60 | i : int 61 | Row index, written into *i on exit 62 | j : int 63 | Column index, written into *j on exit 64 | """ 65 | 66 | i[0] = n - 2 - (sqrt(-8.0*k + 4.0*n*(n-1)-7.0)/2.0 - 0.5) 67 | j[0] = k + i[0] + 1 - n*(n-1)/2 + (n-i[0])*(n-i[0]-1)/2 -------------------------------------------------------------------------------- /msmbuilder/tests/.gitignore: -------------------------------------------------------------------------------- 1 | test_cyblas.c -------------------------------------------------------------------------------- /msmbuilder/tests/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | # Show warnings for our package 4 | warnings.filterwarnings('always', module='msmbuilder.*') 5 | 6 | # Show warnings for packages where we want to be conscious of warnings 7 | warnings.filterwarnings('always', module='mdtraj.*') 8 | warnings.filterwarnings('default', module='scipy.*') 9 | warnings.filterwarnings('default', module='sklearn.*') 10 | -------------------------------------------------------------------------------- /msmbuilder/tests/native.pdb: -------------------------------------------------------------------------------- 1 | ATOM 1 1HH3 ACE 1 4.300 13.100 8.600 1.00 0.00 2 | ATOM 2 CH3 ACE 1 5.200 13.600 8.800 1.00 0.00 3 | ATOM 3 2HH3 ACE 1 4.900 14.300 9.600 1.00 0.00 4 | ATOM 4 3HH3 ACE 1 5.600 14.200 7.900 1.00 0.00 5 | ATOM 5 C ACE 1 6.100 12.500 9.400 1.00 0.00 6 | ATOM 6 O ACE 1 6.400 12.500 10.600 1.00 0.00 7 | ATOM 7 N ALA 2 6.600 11.600 8.500 1.00 0.00 8 | ATOM 8 H ALA 2 6.500 11.600 7.500 1.00 0.00 9 | ATOM 9 CA ALA 2 7.300 10.400 9.100 1.00 0.00 10 | ATOM 10 HA ALA 2 7.900 10.700 10.000 1.00 0.00 11 | ATOM 11 CB ALA 2 6.200 9.500 9.600 1.00 0.00 12 | ATOM 12 HB1 ALA 2 5.700 9.100 8.800 1.00 0.00 13 | ATOM 13 HB2 ALA 2 6.600 8.700 10.200 1.00 0.00 14 | ATOM 14 HB3 ALA 2 5.400 10.000 10.200 1.00 0.00 15 | ATOM 15 C ALA 2 8.400 9.800 8.200 1.00 0.00 16 | ATOM 16 O ALA 2 8.400 9.900 7.000 1.00 0.00 17 | ATOM 17 N NME 3 9.300 9.000 8.800 1.00 0.00 18 | ATOM 18 H NME 3 9.100 9.000 9.800 1.00 0.00 19 | ATOM 19 CH3 NME 3 10.500 8.400 8.300 1.00 0.00 20 | ATOM 20 1HH3 NME 3 10.700 7.700 9.100 1.00 0.00 21 | ATOM 21 2HH3 NME 3 10.400 8.000 7.300 1.00 0.00 22 | ATOM 22 3HH3 NME 3 11.300 9.100 8.300 1.00 0.00 23 | TER 24 | ENDMDL 25 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_alphaanglefeaturizer.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import msmbuilder.featurizer 4 | from msmbuilder.example_datasets import MinimalFsPeptide, AlanineDipeptide 5 | 6 | warnings.filterwarnings('ignore', message='.*Unlikely unit cell vectors.*') 7 | 8 | 9 | def test_alanine_dipeptide(): 10 | # will produce 0 features because not enough peptides 11 | 12 | trajectories = AlanineDipeptide().get_cached().trajectories 13 | featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer() 14 | nothing = featurizer.transform(trajectories) 15 | 16 | assert (nothing[0].shape[1] == 0) 17 | 18 | 19 | def test_fs_peptide(): 20 | # will produce 36 features 21 | 22 | trajectories = MinimalFsPeptide().get_cached().trajectories 23 | featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer() 24 | alphas = featurizer.transform(trajectories) 25 | 26 | assert (alphas[0].shape[1] == 36) 27 | 28 | 29 | def test_fs_peptide_nosincos(): 30 | # will produce 18 features 31 | 32 | trajectories = MinimalFsPeptide().get_cached().trajectories 33 | featurizer = msmbuilder.featurizer.AlphaAngleFeaturizer(sincos=False) 34 | alphas = featurizer.transform(trajectories) 35 | 36 | assert (alphas[0].shape[1] == 18) 37 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_apm.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import mdtraj as md 4 | 5 | from numpy.testing.decorators import skipif 6 | import numpy as np 7 | from mdtraj.testing import eq 8 | 9 | from msmbuilder.cluster import APM 10 | from msmbuilder.example_datasets import FsPeptide 11 | 12 | rs = np.random.RandomState(42) 13 | 14 | X1 = 0.3 * rs.randn(1000, 10).astype(np.double) 15 | X2 = 0.3 * rs.randn(1000, 10).astype(np.float32) 16 | # trj = md.load(md.testing.get_fn("frame0.pdb")) 17 | trj = FsPeptide().get().trajectories[0] 18 | 19 | @skipif(True) 20 | def test_shapes(): 21 | # make sure all the shapes are correct of the fit parameters 22 | m = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs) 23 | m.fit([rs.randn(100, 2)]) 24 | assert isinstance(m.labels_, list) 25 | eq(m.labels_[0].shape, (100,)) 26 | 27 | 28 | @skipif(True) 29 | def test_euclidean(): 30 | # test for predict using euclidean distance 31 | data = rs.randn(100, 2) 32 | m1 = APM(n_macrostates=2, metric='euclidean', lag_time=1, random_state=rs) 33 | m2 = APM(n_macrostates=2, metric='euclidean', lag_time=1, random_state=rs) 34 | 35 | labels1 = m1.fit_predict([data]) 36 | labels2 = m2.fit([data]).MacroAssignments_ 37 | eq(labels1[0], labels2[0]) 38 | 39 | 40 | @skipif(True) 41 | def test_euclidean_10000(): 42 | # test for predict using euclidean distance 43 | m1 = APM(n_macrostates=2, metric='euclidean', lag_time=10, random_state=rs) 44 | m2 = APM(n_macrostates=2, metric='euclidean', lag_time=10, random_state=rs) 45 | data = rs.randn(10000, 2) 46 | labels1 = m1.fit_predict([data]) 47 | labels2 = m2.fit([data]).MacroAssignments_ 48 | eq(labels1[0], labels2[0]) 49 | 50 | 51 | @skipif(True) 52 | def test_rmsd(): 53 | # test for predict using rmsd 54 | m1 = APM(n_macrostates=4, metric='rmsd', lag_time=1, random_state=rs) 55 | m2 = APM(n_macrostates=4, metric='rmsd', lag_time=1, random_state=rs) 56 | labels1 = m1.fit_predict([trj]) 57 | labels2 = m2.fit([trj]).MacroAssignments_ 58 | 59 | eq(labels1[0], labels2[0]) 60 | 61 | 62 | @skipif(True) 63 | def test_dtype(): 64 | X = rs.randn(100, 2) 65 | X32 = X.astype(np.float32) 66 | X64 = X.astype(np.float64) 67 | m1 = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs).fit([X32]) 68 | m2 = APM(n_macrostates=3, metric='euclidean', lag_time=1, random_state=rs).fit([X64]) 69 | 70 | eq(m1.labels_[0], m2.labels_[0]) 71 | eq(m1.MacroAssignments_[0], m2.MacroAssignments_[0]) 72 | eq(m1.fit_predict([X32])[0], m2.fit_predict([X64])[0]) 73 | eq(m1.fit_predict([X32])[0], m1.MacroAssignments_[0]) 74 | 75 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_build_counts.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | 3 | from msmbuilder.msm import MarkovStateModel,BayesianMarkovStateModel,\ 4 | ContinuousTimeMSM 5 | 6 | from mdtraj.testing import eq 7 | import numpy as np 8 | 9 | def test_build_counts(): 10 | seq=[[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0]] 11 | counts = np.array([[8, 1, 1], [1, 3, 0], [1, 0, 3]]) 12 | for mdl_type in [MarkovStateModel, BayesianMarkovStateModel, 13 | ContinuousTimeMSM]: 14 | mdl_instance = mdl_type() 15 | mdl_instance.fit(seq) 16 | eq(mdl_instance.countsmat_, counts) 17 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_clustering.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import mdtraj as md 4 | import mdtraj.testing 5 | import numpy as np 6 | import scipy.spatial.distance 7 | 8 | import msmbuilder.cluster 9 | from msmbuilder.example_datasets import FsPeptide 10 | 11 | X1 = 0.3 * np.random.RandomState(0).randn(1000, 10).astype(np.double) 12 | X2 = 0.3 * np.random.RandomState(1).randn(1000, 10).astype(np.float32) 13 | # trj = md.load(md.testing.get_fn("traj.h5")) 14 | trj = FsPeptide().get().trajectories[0][:100] 15 | 16 | 17 | def test_regular_spatial_rmsd(): 18 | model = msmbuilder.cluster.RegularSpatial(d_min=0.01, metric='rmsd') 19 | model.fit([trj]) 20 | 21 | assert isinstance(model.cluster_centers_, md.Trajectory) 22 | assert len(model.cluster_centers_) == model.n_clusters_ 23 | predict = model.predict([trj]) 24 | assert isinstance(predict, list) and len(predict) == 1 25 | assert len(predict[0]) == len(trj) 26 | assert isinstance(predict[0], np.ndarray) and predict[0].dtype == np.intp 27 | 28 | 29 | def test_regular_spatial(): 30 | model = msmbuilder.cluster.RegularSpatial(d_min=0.8) 31 | 32 | for X in [X1, X2]: 33 | model.fit([X]) 34 | 35 | assert model.cluster_centers_.shape[1] == 10 36 | assert isinstance(model.cluster_centers_, np.ndarray) 37 | assert len(model.cluster_centers_) == model.n_clusters_ 38 | predict = model.predict([X]) 39 | assert isinstance(predict, list) and len(predict) == 1 40 | assert len(predict[0]) == len(X) 41 | assert (isinstance(predict[0], np.ndarray) 42 | and predict[0].dtype == np.intp) 43 | 44 | assert model.cluster_centers_.shape[0] > 200 45 | assert not np.all(scipy.spatial.distance.pdist(X) > model.d_min) 46 | assert np.all(scipy.spatial.distance.pdist(model.cluster_centers_) 47 | > model.d_min) 48 | 49 | assert np.all(np.shape(model.cluster_center_indices_) 50 | == (len(model.cluster_center_indices_), 2)) 51 | 52 | 53 | def test_kcenters_rmsd(): 54 | model = msmbuilder.cluster.KCenters(3, metric='rmsd') 55 | model.fit([trj]) 56 | 57 | assert len(model.cluster_centers_) == 3 58 | assert isinstance(model.cluster_centers_, md.Trajectory) 59 | predict = model.predict([trj]) 60 | assert isinstance(predict, list) and len(predict) == 1 61 | assert len(predict[0]) == len(trj) 62 | assert isinstance(predict[0], np.ndarray) and predict[0].dtype == np.intp 63 | 64 | 65 | def test_kcenters_spatial(): 66 | model = msmbuilder.cluster.KCenters(5) 67 | 68 | for X in [X1, X2]: 69 | model.fit([X]) 70 | 71 | assert model.cluster_centers_.shape[1] == 10 72 | assert isinstance(model.cluster_centers_, np.ndarray) 73 | assert len(model.cluster_centers_) == 5 74 | predict = model.predict([X]) 75 | assert isinstance(predict, list) and len(predict) == 1 76 | assert len(predict[0]) == len(X) 77 | assert (isinstance(predict[0], np.ndarray) 78 | and predict[0].dtype == np.intp) 79 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_convenience.py: -------------------------------------------------------------------------------- 1 | 2 | from msmbuilder.utils import unique 3 | 4 | def test_unique(): 5 | assert unique([1,2,3,3,2,1]) == [1,2,3] 6 | assert unique([3,3,2,2,1,1]) == [3,2,1] 7 | assert unique([3,1,2,1,2,3]) == [3,1,2] 8 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_cyblas_wrapper.py: -------------------------------------------------------------------------------- 1 | # this file gets nose to find the tests that appear in the cython module 2 | from msmbuilder.tests import test_cyblas 3 | 4 | 5 | def test(): 6 | count = 0 7 | for name in dir(test_cyblas): 8 | if name.startswith('test'): 9 | count += 1 10 | yield getattr(test_cyblas, name) 11 | if count == 0: 12 | assert False 13 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_dependencies.py: -------------------------------------------------------------------------------- 1 | import os, pip, sys, warnings 2 | from msmbuilder.example_datasets import has_msmb_data 3 | 4 | def test_installed_packages(): 5 | try: 6 | installed_packages = pip.get_installed_distributions 7 | except: 8 | from pip._internal.utils.misc import get_installed_distributions as installed_packages 9 | 10 | package_names = [package.project_name for package in installed_packages()] 11 | 12 | test_dependencies = ['munkres', 'numdifftools', 'statsmodels', 'hmmlearn'] 13 | 14 | if not hasattr(sys, 'getwindowsversion'): 15 | test_dependencies += ['cvxpy'] 16 | 17 | for td in test_dependencies: 18 | if td not in package_names: 19 | raise RuntimeError('Please install {} to continue'.format(td)) 20 | 21 | def test_msmb_data(): 22 | if has_msmb_data() is None: 23 | raise RuntimeError('Please install {} to continue'.format('msmb_data')) 24 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_estimator_subclassing.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import, division 2 | 3 | import importlib 4 | import inspect 5 | import pkgutil 6 | import warnings 7 | from contextlib import contextmanager 8 | 9 | from sklearn.base import BaseEstimator 10 | 11 | import msmbuilder 12 | import msmbuilder.base 13 | 14 | 15 | def silent_warnings(*args, **kwargs): 16 | print(args, kwargs) 17 | 18 | 19 | @contextmanager 20 | def supress_warnings(): 21 | original_warn = warnings.warn 22 | warnings.warn = silent_warnings 23 | yield 24 | warnings.warn = original_warn 25 | 26 | 27 | def import_all_estimators(pkg): 28 | def estimator_in_module(mod): 29 | for name, obj in inspect.getmembers(mod): 30 | if name.startswith('_'): 31 | continue 32 | if inspect.isclass(obj) and issubclass(obj, BaseEstimator): 33 | yield obj 34 | 35 | with supress_warnings(): 36 | result = {} 37 | for _, modname, ispkg in pkgutil.iter_modules(pkg.__path__): 38 | c = '%s.%s' % (pkg.__name__, modname) 39 | try: 40 | mod = importlib.import_module(c) 41 | if ispkg: 42 | result.update(import_all_estimators(mod)) 43 | for kls in estimator_in_module(mod): 44 | result[kls.__name__] = kls 45 | except ImportError as e: 46 | print('e', e) 47 | continue 48 | 49 | return result 50 | 51 | 52 | def test_all_estimators(): 53 | for key, value in import_all_estimators(msmbuilder).items(): 54 | if 'msmbuilder' in value.__module__: 55 | assert issubclass(value, msmbuilder.base.BaseEstimator), value 56 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_featurizer_subset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from mdtraj.testing import eq 3 | 4 | from msmbuilder.example_datasets import AlanineDipeptide 5 | from msmbuilder.featurizer import AtomPairsFeaturizer, get_atompair_indices 6 | from msmbuilder.featurizer.subset import SubsetAtomPairs, \ 7 | SubsetCosPhiFeaturizer, SubsetCosPsiFeaturizer, \ 8 | SubsetSinPhiFeaturizer, SubsetSinPsiFeaturizer 9 | 10 | 11 | def test_SubsetAtomPairs_1(): 12 | trajectories = AlanineDipeptide().get_cached().trajectories 13 | trj0 = trajectories[0][0] 14 | atom_indices, pair_indices = get_atompair_indices(trj0) 15 | featurizer = AtomPairsFeaturizer(pair_indices) 16 | X_all0 = featurizer.transform(trajectories) 17 | 18 | featurizer = SubsetAtomPairs(pair_indices, trj0) 19 | featurizer.subset = np.arange(len(pair_indices)) 20 | X_all = featurizer.transform(trajectories) 21 | 22 | any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)]) 23 | 24 | 25 | def test_SubsetAtomPairs_2(): 26 | trajectories = AlanineDipeptide().get_cached().trajectories 27 | trj0 = trajectories[0][0] 28 | atom_indices, pair_indices = get_atompair_indices(trj0) 29 | featurizer = AtomPairsFeaturizer(pair_indices) 30 | X_all0 = featurizer.transform(trajectories) 31 | 32 | featurizer = SubsetAtomPairs(pair_indices, trj0, 33 | subset=np.arange(len(pair_indices))) 34 | X_all = featurizer.transform(trajectories) 35 | 36 | any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)]) 37 | 38 | 39 | def test_SubsetAtomPairs_3(): 40 | trajectories = AlanineDipeptide().get_cached().trajectories 41 | trj0 = trajectories[0][0] 42 | atom_indices, pair_indices = get_atompair_indices(trj0) 43 | featurizer = AtomPairsFeaturizer(pair_indices) 44 | X_all0 = featurizer.transform(trajectories) 45 | 46 | featurizer = SubsetAtomPairs(pair_indices, trj0, subset=np.array([0, 1])) 47 | X_all = featurizer.transform(trajectories) 48 | 49 | try: 50 | any([eq(x, x0) for (x, x0) in zip(X_all, X_all0)]) 51 | except AssertionError: 52 | pass 53 | else: 54 | raise AssertionError("Did not raise an assertion!") 55 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_kernel_approximation.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | from numpy.testing import assert_array_almost_equal 5 | from sklearn.kernel_approximation import Nystroem as NystroemR 6 | 7 | from msmbuilder.decomposition.kernel_approximation import Nystroem, LandmarkNystroem 8 | 9 | 10 | def test_nystroem_vs_sklearn(): 11 | np.random.seed(42) 12 | X = np.random.randn(100, 5) 13 | 14 | kernel = Nystroem(kernel='linear', random_state=42) 15 | kernelR = NystroemR(kernel='linear', random_state=42) 16 | 17 | y1 = kernel.fit_transform([X])[0] 18 | y2 = kernelR.fit_transform(X) 19 | 20 | assert_array_almost_equal(y1, y2) 21 | 22 | 23 | def test_lndmrk_nystroem_approximation(): 24 | np.random.seed(42) 25 | X = np.random.randn(100, 5) 26 | 27 | u = np.arange(X.shape[0])[5::1] 28 | v = np.arange(X.shape[0])[::1][:u.shape[0]] 29 | lndmrks = X[np.unique((u, v))] 30 | 31 | kernel = LandmarkNystroem(kernel='rbf', random_state=42) 32 | kernelR = NystroemR(kernel='rbf', random_state=42) 33 | 34 | y1_1 = kernel.fit_transform([X])[0] 35 | kernel.landmarks = lndmrks 36 | y1_2 = kernel.fit_transform([X])[0] 37 | 38 | y2 = kernelR.fit_transform(X) 39 | 40 | assert_array_almost_equal(y2, y1_1) 41 | 42 | assert not all((np.abs(y2 - y1_2) > 1E-6).flatten()) 43 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_ksparsetica.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from msmbuilder.decomposition import tICA, KSparseTICA 4 | from msmbuilder.example_datasets import MetEnkephalin 5 | from msmbuilder.featurizer import AtomPairsFeaturizer 6 | 7 | def build_dataset(): 8 | trajs = MetEnkephalin().get().trajectories 9 | 10 | pairs = [] 11 | for i in range(trajs[0].n_atoms): 12 | for j in range(i): 13 | pairs.append((i, j)) 14 | np.random.seed(0) 15 | np.random.shuffle(pairs) 16 | n_pairs = 200 17 | 18 | return AtomPairsFeaturizer(pairs[:n_pairs]).transform([traj[::10] for traj in trajs]) 19 | 20 | def test_MetEnkephalin(): 21 | np.random.seed(0) 22 | data = build_dataset() 23 | n_features = data[0].shape[1] 24 | 25 | # check whether this recovers a single 1-sparse eigenpair without error 26 | kstica = KSparseTICA(n_components=1, k = 1) 27 | _ = kstica.fit_transform(data) 28 | assert (np.sum(kstica.components_ != 0) == 1) 29 | 30 | ## check whether this recovers >1 eigenpair without error 31 | #kstica = KSparseTICA(n_components=2) 32 | #_ = kstica.fit_transform(data) 33 | 34 | ## check whether this recovers all eigenpairs without error 35 | #kstica = KSparseTICA() 36 | #_ = kstica.fit_transform(data) 37 | 38 | # check whether we recover the same solution as standard tICA when k = n_features 39 | n_components = 10 40 | kstica = KSparseTICA(n_components=n_components, k=n_features) 41 | tica = tICA(n_components=n_components) 42 | _ = kstica.fit_transform(data) 43 | _ = tica.fit_transform(data) 44 | np.testing.assert_array_almost_equal(kstica.eigenvalues_, tica.eigenvalues_) -------------------------------------------------------------------------------- /msmbuilder/tests/test_muller.py: -------------------------------------------------------------------------------- 1 | from msmbuilder.example_datasets import MullerPotential, load_muller 2 | from msmbuilder.utils import array2d 3 | 4 | 5 | def test_func(): 6 | xx = load_muller(random_state=1110102)['trajectories'] 7 | assert len(xx) == 10 8 | assert xx[0].ndim == 2 9 | assert xx[0].shape[1] == 2 10 | array2d(xx) 11 | 12 | 13 | def test_class(): 14 | xx = MullerPotential(random_state=123122).get()['trajectories'] 15 | assert len(xx) == 10 16 | assert xx[0].ndim == 2 17 | assert xx[0].shape[1] == 2 18 | array2d(xx) 19 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_ndgrid.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import numpy as np 4 | 5 | from msmbuilder.cluster import NDGrid 6 | 7 | 8 | def test_ndgrid_1(): 9 | X = np.array([-3, -2, -1, 1, 2, 3]).reshape(-1, 1) 10 | labels = NDGrid(n_bins_per_feature=2).fit([X]).predict([X])[0] 11 | np.testing.assert_array_equal(labels, np.array([0, 0, 0, 1, 1, 1])) 12 | 13 | 14 | def test_ndgrid_2(): 15 | X = np.random.RandomState(0).randn(100, 2) 16 | ndgrid = NDGrid(n_bins_per_feature=2, min=-5, max=5) 17 | labels = ndgrid.fit([X]).predict([X])[0] 18 | 19 | mask0 = np.logical_and(X[:, 0] < 0, X[:, 1] < 0) 20 | assert np.all(labels[mask0] == 0) 21 | mask1 = np.logical_and(X[:, 0] > 0, X[:, 1] < 0) 22 | assert np.all(labels[mask1] == 1) 23 | mask2 = np.logical_and(X[:, 0] < 0, X[:, 1] > 0) 24 | assert np.all(labels[mask2] == 2) 25 | mask3 = np.logical_and(X[:, 0] > 0, X[:, 1] > 0) 26 | assert np.all(labels[mask3] == 3) 27 | 28 | 29 | def test_ndgrid_3(): 30 | X = np.random.RandomState(0).randn(100, 3) 31 | ndgrid = NDGrid(n_bins_per_feature=2, min=-5, max=5) 32 | labels = ndgrid.fit([X]).predict([X])[0] 33 | 34 | operators = [np.less, np.greater] 35 | x = X[:, 0] 36 | y = X[:, 1] 37 | z = X[:, 2] 38 | 39 | it = itertools.product(operators, repeat=3) 40 | 41 | for indx, (op_z, op_y, op_x) in enumerate(it): 42 | mask = np.logical_and.reduce((op_x(x, 0), op_y(y, 0), op_z(z, 0))) 43 | assert np.all(labels[mask] == indx) 44 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_nearest.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | import numpy as np 4 | 5 | from msmbuilder.utils import KDTree 6 | 7 | X1 = 0.3 * np.random.RandomState(0).randn(500, 10) 8 | X2 = 0.3 * np.random.RandomState(1).randn(1000, 10) + 10 9 | 10 | 11 | def test_kdtree_k1(): 12 | kdtree = KDTree([X1, X2]) 13 | dists, inds = kdtree.query([ 14 | [0] * 10, 15 | [10] * 10, 16 | [0] * 10 17 | ]) 18 | 19 | assert len(inds) == 3 20 | for subind in inds: 21 | assert len(subind) == 2 22 | 23 | # traj i 24 | assert inds[0][0] == 0 25 | assert inds[1][0] == 1 26 | assert inds[2][0] == 0 27 | 28 | # framei 29 | assert 0 <= inds[0][1] < 500 30 | assert 0 <= inds[1][1] < 1000 31 | assert 0 <= inds[2][1] < 500 32 | 33 | # distances 34 | assert len(dists) == 3 35 | for d in dists: 36 | assert 0 <= d < 0.5 37 | 38 | 39 | def test_kdtree_k2(): 40 | kdtree = KDTree([X1, X2]) 41 | dists, inds = kdtree.query([ 42 | [0] * 10, 43 | [10] * 10, 44 | [0] * 10 45 | ], k=2) 46 | 47 | assert len(inds) == 3 48 | 49 | # traj i 50 | for qp in inds[0]: assert qp[0] == 0 51 | for qp in inds[1]: assert qp[0] == 1 52 | for qp in inds[2]: assert qp[0] == 0 53 | 54 | # frame i 55 | for qp in inds[0]: assert 0 <= qp[1] < 500 56 | for qp in inds[1]: assert 0 <= qp[1] < 1000 57 | for qp in inds[2]: assert 0 <= qp[1] < 500 58 | 59 | # distances 60 | assert len(dists) == 3 61 | for d in dists: 62 | assert 0 <= d[0] < 0.5 63 | assert 0 <= d[1] < 0.5 64 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_param_sweep.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | 3 | import numpy as np 4 | import numpy.testing as npt 5 | 6 | from msmbuilder.msm import MarkovStateModel 7 | from msmbuilder.msm import implied_timescales 8 | from msmbuilder.utils import param_sweep 9 | 10 | 11 | def test_both(): 12 | sequences = [np.random.randint(20, size=1000) for _ in range(10)] 13 | lag_times = [1, 5, 10] 14 | 15 | models_ref = [] 16 | for tau in lag_times: 17 | msm = MarkovStateModel(reversible_type='mle', lag_time=tau, 18 | n_timescales=10) 19 | msm.fit(sequences) 20 | models_ref.append(msm) 21 | 22 | timescales_ref = [m.timescales_ for m in models_ref] 23 | 24 | model = MarkovStateModel(reversible_type='mle', lag_time=1, n_timescales=10) 25 | models = param_sweep(model, sequences, {'lag_time': lag_times}, n_jobs=2) 26 | timescales = implied_timescales(sequences, lag_times, msm=model, 27 | n_timescales=10, n_jobs=2) 28 | 29 | print(timescales) 30 | print(timescales_ref) 31 | 32 | if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1E-6: 33 | raise Exception("you wrote a bad test.") 34 | 35 | for i in range(len(lag_times)): 36 | npt.assert_array_almost_equal(models[i].transmat_, 37 | models_ref[i].transmat_) 38 | npt.assert_array_almost_equal(timescales_ref[i], timescales[i]) 39 | 40 | 41 | def test_multi_params(): 42 | msm = MarkovStateModel() 43 | param_grid = { 44 | 'lag_time': [1, 2, 3], 45 | 'reversible_type': ['mle', 'transpose'] 46 | } 47 | 48 | sequences = np.random.randint(20, size=(10, 1000)) 49 | models = param_sweep(msm, sequences, param_grid, n_jobs=2) 50 | assert len(models) == 6 51 | 52 | # I don't know what the order should be, so I'm just going 53 | # to check that there are no duplicates 54 | params = [] 55 | for m in models: 56 | params.append('%s%d' % (m.reversible_type, m.lag_time)) 57 | 58 | for l in param_grid['lag_time']: 59 | for s in param_grid['reversible_type']: 60 | assert ('%s%d' % (s, l)) in params 61 | 62 | # this is redundant, but w/e 63 | assert len(set(params)) == 6 64 | 65 | 66 | def test_ntimescales(): 67 | # see issue #603 68 | trajs = [np.random.randint(0, 30, 500) for _ in range(5)] 69 | its = implied_timescales(trajs, [1, 2, 3], n_timescales=11) 70 | assert its.shape[1] == 11 71 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_sampling.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from msmbuilder.decomposition import tICA 4 | from msmbuilder.io.sampling import sample_dimension 5 | 6 | 7 | def test_sample_dimension(): 8 | np.random.seed(42) 9 | X = np.random.randn(500, 5) 10 | data = [X, X, X] 11 | 12 | tica = tICA(n_components=2, lag_time=1).fit(data) 13 | tica_trajs = {k: tica.partial_transform(v) for k, v in enumerate(data)} 14 | res = sample_dimension(tica_trajs, 0, 10, scheme="linear") 15 | res2 = sample_dimension(tica_trajs, 1, 10, scheme="linear") 16 | 17 | assert len(res) == len(res2) == 10 18 | 19 | def test_sample_dimension_2(): 20 | np.random.seed(42) 21 | X = np.random.randn(500, 5) 22 | data = [X, X, X] 23 | 24 | tica = tICA(n_components=2, lag_time=1).fit(data) 25 | tica_trajs = {k: tica.partial_transform(v) for k, v in enumerate(data)} 26 | res = sample_dimension(tica_trajs, 0, 10, scheme="random") 27 | res2 = sample_dimension(tica_trajs, 1, 10, scheme="edge") 28 | 29 | assert len(res) == len(res2) == 10 30 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_sasa_featurizer.py: -------------------------------------------------------------------------------- 1 | import mdtraj as md 2 | import numpy as np 3 | from mdtraj.testing import eq 4 | 5 | from msmbuilder.featurizer import SASAFeaturizer 6 | from msmbuilder.example_datasets import FsPeptide 7 | 8 | t = FsPeptide().get().trajectories[0][:10] 9 | 10 | def _test_sasa_featurizer(t, value): 11 | sasa = md.shrake_rupley(t) 12 | rids = np.array([a.residue.index for a in t.top.atoms]) 13 | 14 | for i, rid in enumerate(np.unique(rids)): 15 | mask = (rids == rid) 16 | eq(value[:, i], np.sum(sasa[:, mask], axis=1)) 17 | 18 | 19 | def test_sasa_featurizer_1(): 20 | # t = md.load(get_fn('frame0.h5')) 21 | 22 | value = SASAFeaturizer(mode='residue').partial_transform(t) 23 | assert value.shape == (t.n_frames, t.n_residues) 24 | _test_sasa_featurizer(t, value) 25 | 26 | 27 | def test_sasa_featurizer_2(): 28 | # t = md.load(get_fn('frame0.h5')) 29 | 30 | # scramle the order of the atoms, and which residue each is a 31 | # member of 32 | df, bonds = t.top.to_dataframe() 33 | df['resSeq'] = np.random.randint(5, size=(t.n_atoms)) 34 | df['resName'] = df['resSeq'] 35 | t.top = md.Topology.from_dataframe(df, bonds) 36 | 37 | value = SASAFeaturizer(mode='residue').partial_transform(t) 38 | _test_sasa_featurizer(t, value) 39 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_sparsetica.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from msmbuilder.decomposition import tICA, SparseTICA 4 | from msmbuilder.example_datasets import DoubleWell 5 | 6 | 7 | def build_dataset(): 8 | slow = DoubleWell(random_state=0).get_cached().trajectories 9 | data = [] 10 | 11 | # each trajectory is a double-well along the first dof, 12 | # and then 9 degrees of freedom of gaussian white noise. 13 | for s in slow: 14 | t = np.hstack((s, np.random.randn(len(s), 9))) 15 | data.append(t) 16 | return data 17 | 18 | 19 | def test_doublewell(): 20 | data = build_dataset() 21 | tica = tICA(n_components=1).fit(data) 22 | tic0 = tica.components_[0] 23 | 24 | stica = SparseTICA(n_components=1, verbose=False).fit(data) 25 | stic0 = stica.components_[0] 26 | 27 | np.testing.assert_array_almost_equal(stic0[1:], np.zeros(9)) 28 | np.testing.assert_almost_equal(stic0[0], 0.58, decimal=1) 29 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_strongly_connected_subgraph.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from msmbuilder.msm import _strongly_connected_subgraph 4 | 5 | 6 | def test_completely_disconnected_1(): 7 | # what do you do with 1 state that is not even connected to itself? 8 | tC, m, p_r = _strongly_connected_subgraph(np.zeros((1, 1))) 9 | assert tC.shape == (0, 0) 10 | assert m == {} 11 | assert np.isnan(p_r) 12 | 13 | 14 | def test_completely_disconnected_2(): 15 | tC, m, p_r = _strongly_connected_subgraph(np.zeros((3, 3))) 16 | assert tC.shape == (0, 0) 17 | assert m == {} 18 | assert np.isnan(p_r) 19 | 20 | 21 | def test_one_state(): 22 | # but if that state does have a self-connection, it should be retained 23 | tC, m, p_r = _strongly_connected_subgraph(np.ones((1, 1))) 24 | assert tC.shape == (1, 1) 25 | assert m == {0: 0} 26 | np.testing.assert_almost_equal(p_r, 100) 27 | 28 | 29 | def test_counts_1(): 30 | C = np.array([[1, 0, 0], 31 | [0, 1, 1], 32 | [0, 1, 1]]) 33 | 34 | tC, m, p_r = _strongly_connected_subgraph(np.array(C)) 35 | np.testing.assert_array_equal(tC, np.array([[1, 1], [1, 1]])) 36 | assert m == {1: 0, 2: 1} 37 | np.testing.assert_almost_equal(p_r, 80.0) 38 | 39 | 40 | def test_counts_2(): 41 | C = np.array([[1, 1, 0], 42 | [0, 1, 1], 43 | [0, 1, 1]]) 44 | tC, m, p_r = _strongly_connected_subgraph(np.array(C)) 45 | np.testing.assert_array_equal(tC, np.array([[1, 1], [1, 1]])) 46 | assert m == {1: 0, 2: 1} 47 | np.testing.assert_almost_equal(p_r, 83.333333333333) 48 | 49 | 50 | def test_fully_connected(): 51 | tC, m, p_r = _strongly_connected_subgraph(np.ones((3, 3))) 52 | np.testing.assert_array_almost_equal(tC, np.ones((3, 3))) 53 | assert m == {0: 0, 1: 1, 2: 2} 54 | np.testing.assert_almost_equal(p_r, 100.0) 55 | 56 | 57 | def test_disconnected(): 58 | tC, m, p_r = _strongly_connected_subgraph(np.eye(3)) 59 | assert tC.shape == (1, 1) 60 | assert type(p_r) == np.float64 61 | 62 | 63 | def test_upper_triangular(): 64 | tC, m, p_r = _strongly_connected_subgraph(np.eye(3, k=1)) 65 | assert tC.shape == (0, 0) 66 | assert m == {} 67 | np.testing.assert_almost_equal(p_r, 50.0) 68 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_template_project.py: -------------------------------------------------------------------------------- 1 | from msmbuilder.io import TemplateProject 2 | import tempfile 3 | import shutil 4 | import os 5 | 6 | 7 | def setup_module(): 8 | global WD, PWD 9 | PWD = os.path.abspath(".") 10 | WD = tempfile.mkdtemp() 11 | os.chdir(WD) 12 | 13 | 14 | def teardown_module(): 15 | os.chdir(PWD) 16 | shutil.rmtree(WD) 17 | 18 | 19 | def test_template_project(): 20 | tp = TemplateProject() 21 | tp.do() 22 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_transition_counts.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from six import PY3 3 | 4 | from msmbuilder.msm import _transition_counts 5 | 6 | 7 | def test_argument(): 8 | # test that first argument must be a list of sequences 9 | with np.testing.assert_raises(ValueError): 10 | _transition_counts([1, 2, 3]) 11 | 12 | 13 | def test_upper_triangular(): 14 | # test a simple example 15 | c, m = _transition_counts([np.arange(10)]) 16 | np.testing.assert_array_equal(c, np.eye(10, k=1)) 17 | assert list(m.keys()) == list(range(10)) 18 | assert list(m.values()) == list(range(10)) 19 | 20 | 21 | def test_lag_time(): 22 | # test the simple example with lag_time > 1 23 | c, m = _transition_counts([range(10)], lag_time=2) 24 | np.testing.assert_array_equal(c, 0.5 * np.eye(10, k=2)) 25 | 26 | 27 | def test_string_labels(): 28 | # try using strings as labels 29 | c, m = _transition_counts([['alpha', 'b', 'b', 'b', 'c']]) 30 | np.testing.assert_array_equal(c, 1.0 * np.array([ 31 | [0, 1, 0], 32 | [0, 2, 1], 33 | [0, 0, 0] 34 | ])) 35 | assert m == {'alpha': 0, 'b': 1, 'c': 2} 36 | 37 | 38 | def test_big_counts(): 39 | # try using really big numbers, and we still want a small transition matrix 40 | c, m = _transition_counts([[100000000, 100000000, 100000001, 100000001]]) 41 | np.testing.assert_array_equal(c, 1.0 * np.array([ 42 | [1, 1], 43 | [0, 1], 44 | ])) 45 | assert m == {100000000: 0, 100000001: 1} 46 | 47 | 48 | def test_no_counts(): 49 | c, m = _transition_counts([[0]]) 50 | 51 | 52 | def test_nan_and_none(): 53 | # deal with NaN, None? 54 | c, m = _transition_counts([[0, np.nan]]) 55 | assert m == {0: 0} 56 | np.testing.assert_array_equal(c, np.zeros((1, 1))) 57 | 58 | c, m = _transition_counts([[np.nan]]) 59 | assert m == {} 60 | np.testing.assert_array_equal(c, np.zeros((0, 0))) 61 | 62 | if not PY3: 63 | c, m = _transition_counts([[None, None]]) 64 | assert m == {} 65 | np.testing.assert_array_equal(c, np.zeros((0, 0))) 66 | 67 | 68 | def test_lag_time_norm(): 69 | X = np.arange(6) 70 | C, _ = _transition_counts([X], lag_time=3) 71 | np.testing.assert_array_almost_equal(C, np.eye(6, k=3) / 3) 72 | 73 | 74 | def test_sliding_window(): 75 | X = np.arange(10) 76 | C1, m1 = _transition_counts([X], lag_time=3, sliding_window=False) 77 | C2, m2 = _transition_counts([X[::3]], sliding_window=True) 78 | np.testing.assert_array_almost_equal(C1, C2) 79 | assert m1 == m2 80 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_transmat_errorbar.py: -------------------------------------------------------------------------------- 1 | from msmbuilder.msm.validation.transmat_errorbar import * 2 | import numpy as np 3 | 4 | 5 | def test_create_perturb_params(): 6 | # Test with a 10x10 counts matrix, with all entries in the counts set to 100 7 | countsmat = 100 * np.ones((10,10)) 8 | params = create_perturb_params(countsmat) 9 | # Check dimensions of outputs are equal to those of inputs 10 | for param in params: 11 | assert np.shape(param) == np.shape(countsmat) 12 | 13 | 14 | def test_perturb_tmat(): 15 | # The transition matrix is perturbed under the CLT approximation, which is only valid for well-sampled data w.r.t. transition probability (tprob >> 1 / row-summed counts) 16 | countsmat = 100 * np.ones((10,10)) # 10-state MSM, 1000 counts per state, 100 transition events between states, no zero entries 17 | params = create_perturb_params(countsmat) 18 | new_transmat = (perturb_tmat(params[0], params[1])) 19 | # All transition probabilities are by design nonzero, so there should be no nonzero entries after the perturbation 20 | assert len(np.where(new_transmat == 0)[0]) == 0 21 | # Now let's assume you have a poorly sampled dataset where all elements in the counts matrix is 1 22 | countsmat = np.ones((10,10)) 23 | params = create_perturb_params(countsmat) 24 | new_transmat = (perturb_tmat(params[0], params[1])) 25 | # Your perturbed transition matrix will have several negative values (set automatically to 0), indicating this method probably isn't appropriate for your dataset 26 | # (This will also cause your distribution of MFPTs to have very obvious outliers to an otherwise approximately Gaussian distribution due to the artificial zeros in the transition matrix) 27 | assert len(np.where(new_transmat == 0)[0] > 0) 28 | 29 | -------------------------------------------------------------------------------- /msmbuilder/tests/test_workflows.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | import os 4 | import shlex 5 | import shutil 6 | import subprocess 7 | import tempfile 8 | 9 | from pkg_resources import resource_filename 10 | 11 | 12 | class tempdir(object): 13 | def __enter__(self): 14 | self._curdir = os.path.abspath(os.curdir) 15 | self._tempdir = tempfile.mkdtemp() 16 | os.chdir(self._tempdir) 17 | 18 | def __exit__(self, *exc_info): 19 | os.chdir(self._curdir) 20 | shutil.rmtree(self._tempdir) 21 | 22 | 23 | def shell_lines(resource): 24 | fn = resource_filename('msmbuilder', resource) 25 | buf = '' 26 | with open(fn) as f: 27 | for line in f: 28 | line = line.strip() 29 | if not line or line.startswith('#'): 30 | continue 31 | if line.endswith('\\'): 32 | buf += line.rstrip('\\') 33 | else: 34 | yield buf + ' ' + line 35 | buf = '' 36 | 37 | 38 | def check_call(tokens): 39 | try: 40 | subprocess.check_output(tokens, stderr=subprocess.STDOUT, 41 | universal_newlines=True) 42 | except subprocess.CalledProcessError as e: 43 | print(e.cmd) 44 | print(e.output) 45 | raise 46 | 47 | 48 | class workflow_tester(object): 49 | def __init__(self, fn): 50 | self.fn = fn 51 | self.path = "tests/workflows/{}".format(fn) 52 | self.description = "{}.test_{}".format(__name__, fn) 53 | 54 | def __call__(self, *args, **kwargs): 55 | with tempdir(): 56 | for line in shell_lines(self.path): 57 | check_call(shlex.split(line, posix=False)) 58 | 59 | 60 | def test_workflows(): 61 | for fn in [ 62 | 'basic.sh', 63 | 'rmsd.sh', 64 | 'ghmm.sh', 65 | ]: 66 | yield workflow_tester(fn) 67 | -------------------------------------------------------------------------------- /msmbuilder/tests/workflows/basic.sh: -------------------------------------------------------------------------------- 1 | msmb AlanineDipeptide --data_home ./ 2 | msmb AtomIndices --out atom_indices.txt -p ./alanine_dipeptide/ala2.pdb -d --heavy 3 | msmb AtomPairsFeaturizer --transformed atom_pairs --trjs './alanine_dipeptide/*.dcd' \ 4 | --pair_indices atom_indices.txt --top ./alanine_dipeptide/ala2.pdb --out atom_pairs.pkl 5 | msmb RobustScaler -i atom_pairs/ -t scaled_atom_pairs.h5 6 | msmb tICA -i scaled_atom_pairs.h5 -t atom_pairs_tica.h5 --n_components 4 \ 7 | --shrinkage 0 \ 8 | --kinetic_mapping \ 9 | --lag_time 2 10 | msmb KCenters -i atom_pairs_tica.h5 -t kcenters_clusters.h5 --metric cityblock 11 | msmb MarkovStateModel --inp kcenters_clusters.h5 --out mymsm.pkl 12 | -------------------------------------------------------------------------------- /msmbuilder/tests/workflows/ghmm.sh: -------------------------------------------------------------------------------- 1 | msmb AlanineDipeptide --data_home ./ 2 | 3 | msmb DihedralFeaturizer --transformed feats/ \ 4 | --trjs './alanine_dipeptide/*.dcd' \ 5 | --top ./alanine_dipeptide/ala2.pdb \ 6 | --out featy.pkl 7 | 8 | msmb tICA --inp feats/ --transformed tica_trajs.h5 \ 9 | --n_components 4 \ 10 | --kinetic_mapping \ 11 | --lag_time 2 12 | 13 | msmb GaussianHMM --inp tica_trajs.h5 \ 14 | --out hmm.pkl \ 15 | --n_states 2 16 | -------------------------------------------------------------------------------- /msmbuilder/tests/workflows/rmsd.sh: -------------------------------------------------------------------------------- 1 | msmb AlanineDipeptide --data_home ./ 2 | msmb AtomIndices --out atom_indices.txt \ 3 | -p ./alanine_dipeptide/ala2.pdb \ 4 | -d --heavy 5 | 6 | msmb MiniBatchKMedoids --n_clusters 10 \ 7 | --metric rmsd \ 8 | --inp './alanine_dipeptide/*.dcd' \ 9 | --top ./alanine_dipeptide/ala2.pdb \ 10 | --atom_indices atom_indices.txt \ 11 | --transformed kmedoids_centers.h5 12 | 13 | msmb RegularSpatial --inp './alanine_dipeptide/*.dcd' \ 14 | --transformed rs_rmsd.h5 \ 15 | --metric rmsd \ 16 | --top ./alanine_dipeptide/ala2.pdb \ 17 | --d_min 0.5 18 | 19 | -------------------------------------------------------------------------------- /msmbuilder/tpt/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for analyzing Markov State Models, with an emphasis 3 | on Transition Path Theory. 4 | 5 | These are the canonical references for TPT. Note that TPT 6 | is really a specialization of ideas very familiar to the 7 | mathematical study of Markov chains, and there are many 8 | books, manuscripts in the mathematical literature that 9 | cover the same concepts. 10 | 11 | References 12 | ---------- 13 | .. [1] Weinan, E. and Vanden-Eijnden, E. Towards a theory of 14 | transition paths. J. Stat. Phys. 123, 503-523 (2006). 15 | .. [2] Metzner, P., Schutte, C. & Vanden-Eijnden, E. 16 | Transition path theory for Markov jump processes. 17 | Multiscale Model. Simul. 7, 1192-1219 (2009). 18 | .. [3] Berezhkovskii, A., Hummer, G. & Szabo, A. Reactive 19 | flux and folding pathways in network models of 20 | coarse-grained protein dynamics. J. Chem. Phys. 21 | 130, 205102 (2009). 22 | .. [4] Noe, Frank, et al. "Constructing the equilibrium ensemble of folding 23 | pathways from short off-equilibrium simulations." PNAS 106.45 (2009): 24 | 19011-19016. 25 | """ 26 | 27 | from __future__ import absolute_import 28 | 29 | from .committor import committors, conditional_committors 30 | from .flux import fluxes, net_fluxes 31 | from .hub import fraction_visited, hub_scores 32 | from .path import paths, top_path 33 | from .mfpt import mfpts 34 | 35 | __all__ = ['fluxes', 'net_fluxes', 'fraction_visited', 36 | 'hub_scores', 'paths', 'top_path', 'committors', 37 | 'conditional_committors', 'mfpts'] 38 | -------------------------------------------------------------------------------- /msmbuilder/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | from .draw_samples import * 3 | from .io import * 4 | from .param_sweep import * 5 | from .probability import * 6 | from .subsampler import * 7 | from .validation import * 8 | from .compat import * 9 | from .nearest import KDTree 10 | from .divergence import * 11 | from .convenience import * 12 | -------------------------------------------------------------------------------- /msmbuilder/utils/compat.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import, division 2 | import os 3 | import functools 4 | import warnings 5 | 6 | # Copyright (C) 2012-2013 Marcus von Appen 7 | # 8 | # This software is provided 'as-is', without any express or implied 9 | # warranty. In no event will the authors be held liable for any damages 10 | # arising from the use of this software. 11 | # 12 | # Permission is granted to anyone to use this software for any purpose, 13 | # including commercial applications, and to alter it and redistribute it 14 | # freely, subject to the following restrictions: 15 | # 16 | # 1. The origin of this software must not be misrepresented; you must not 17 | # claim that you wrote the original software. If you use this software 18 | # in a product, an acknowledgment in the product documentation would be 19 | # appreciated but is not required. 20 | # 2. Altered source versions must be plainly marked as such, and must not be 21 | # misrepresented as being the original software. 22 | # 3. This notice may not be removed or altered from any source distribution. 23 | 24 | 25 | class ExperimentalWarning(Warning): 26 | """Indicates that a certain class, function or behavior is in an 27 | experimental state. 28 | """ 29 | def __init__(self, obj, msg=None): 30 | """Creates a ExperimentalWarning for the specified obj. 31 | 32 | If a message is passed in msg, it will be printed instead of the 33 | default message. 34 | """ 35 | super(ExperimentalWarning, self).__init__() 36 | self.obj = obj 37 | self.msg = msg 38 | 39 | def __str__(self): 40 | if self.msg is None: 41 | line = "Warning: %s is in an experimental state." % repr(self.obj) 42 | return os.linesep.join(('', '"' * len(line), line, '"' * len(line))) 43 | return repr(self.msg) 44 | 45 | 46 | def experimental(name=None): 47 | """A simple decorator to mark functions and methods as experimental.""" 48 | def inner(func): 49 | @functools.wraps(func) 50 | def wrapper(*fargs, **kw): 51 | fname = name 52 | if name is None: 53 | fname = func.__name__ 54 | warnings.warn("%s" % fname, category=ExperimentalWarning, 55 | stacklevel=2) 56 | return func(*fargs, **kw) 57 | return wrapper 58 | return inner -------------------------------------------------------------------------------- /msmbuilder/utils/convenience.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | def unique(seq): 4 | '''Returns a list of unique items maintaining the order of the original. 5 | ''' 6 | seen = set() 7 | seen_add = seen.add 8 | return [x for x in seq if not (x in seen or seen_add(x))] 9 | -------------------------------------------------------------------------------- /msmbuilder/utils/draw_samples.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | import numpy as np 3 | import mdtraj as md 4 | 5 | __all__ = ['map_drawn_samples'] 6 | 7 | 8 | def map_drawn_samples(selected_pairs_by_state, trajectories, top=None): 9 | """Lookup trajectory frames using pairs of (trajectory, frame) indices. 10 | 11 | Parameters 12 | ---------- 13 | selected_pairs_by_state : array, dtype=int, shape=(n_states, n_samples, 2) 14 | selected_pairs_by_state[state, sample] gives the (trajectory, frame) 15 | index associated with a particular sample from that state. 16 | trajectories : list(md.Trajectory) or list(np.ndarray) or list(filenames) 17 | The trajectories assocated with sequences, 18 | which will be used to extract coordinates of the state centers 19 | from the raw trajectory data. This can also be a list of np.ndarray 20 | objects or filenames. If they are filenames, mdtraj will be used to 21 | load them 22 | top : md.Topology, optional, default=None 23 | Use this topology object to help mdtraj load filenames 24 | 25 | Returns 26 | ------- 27 | frames_by_state : mdtraj.Trajectory 28 | Output will be a list of trajectories such that frames_by_state[state] 29 | is a trajectory drawn from `state` of length `n_samples`. If 30 | trajectories are numpy arrays, the output will be numpy arrays instead 31 | of md.Trajectories 32 | 33 | Examples 34 | -------- 35 | >>> selected_pairs_by_state = hmm.draw_samples(sequences, 3) 36 | >>> samples = map_drawn_samples(selected_pairs_by_state, trajectories) 37 | 38 | Notes 39 | ----- 40 | YOU are responsible for ensuring that selected_pairs_by_state and 41 | trajectories correspond to the same dataset! 42 | 43 | See Also 44 | -------- 45 | ghmm.GaussianHMM.draw_samples : Draw samples from GHMM 46 | ghmm.GaussianHMM.draw_centroids : Draw centroids from GHMM 47 | """ 48 | 49 | frames_by_state = [] 50 | 51 | for state, pairs in enumerate(selected_pairs_by_state): 52 | if isinstance(trajectories[0], str): 53 | if top: 54 | process = lambda x, frame: md.load_frame(x, frame, top=top) 55 | else: 56 | process = lambda x, frame: md.load_frame(x, frame) 57 | else: 58 | process = lambda x, frame: x[frame] 59 | 60 | frames = [process(trajectories[trj], frame) for trj, frame in pairs] 61 | try: # If frames are mdtraj Trajectories 62 | # Get an empty trajectory with correct shape and call the join 63 | # method on it to merge trajectories 64 | state_trj = frames[0][0:0].join(frames) 65 | except AttributeError: 66 | state_trj = np.array(frames) # Just a bunch of np arrays 67 | frames_by_state.append(state_trj) 68 | 69 | return frames_by_state 70 | -------------------------------------------------------------------------------- /msmbuilder/utils/io.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | 3 | import contextlib 4 | import pickle 5 | import warnings 6 | 7 | import numpy as np 8 | from sklearn.externals.joblib import load as jl_load 9 | 10 | __all__ = ['printoptions', 'verbosedump', 'verboseload', 'dump', 'load'] 11 | 12 | warnings.warn("This module might be deprecated in favor of msmbuilder.io", 13 | PendingDeprecationWarning) 14 | 15 | 16 | @contextlib.contextmanager 17 | def printoptions(*args, **kwargs): 18 | original = np.get_printoptions() 19 | np.set_printoptions(*args, **kwargs) 20 | yield 21 | np.set_printoptions(**original) 22 | 23 | 24 | def dump(value, filename, compress=None, cache_size=None): 25 | """Save an arbitrary python object using pickle. 26 | 27 | Parameters 28 | ----------- 29 | value : any Python object 30 | The object to store to disk using pickle. 31 | filename : string 32 | The name of the file in which it is to be stored 33 | compress : None 34 | No longer used 35 | cache_size : positive number, optional 36 | No longer used 37 | 38 | See Also 39 | -------- 40 | load : corresponding loader 41 | """ 42 | if compress is not None or cache_size is not None: 43 | warnings.warn("compress and cache_size are no longer valid options") 44 | 45 | with open(filename, 'wb') as f: 46 | pickle.dump(value, f) 47 | 48 | 49 | def load(filename): 50 | """Load an object that has been saved with dump. 51 | 52 | We try to open it using the pickle protocol. As a fallback, we 53 | use joblib.load. Joblib was the default prior to msmbuilder v3.2 54 | 55 | Parameters 56 | ---------- 57 | filename : string 58 | The name of the file to load. 59 | """ 60 | try: 61 | with open(filename, 'rb') as f: 62 | return pickle.load(f) 63 | except Exception as e1: 64 | try: 65 | return jl_load(filename) 66 | except Exception as e2: 67 | raise IOError( 68 | "Unable to load {} using the pickle or joblib protocol.\n" 69 | "Pickle: {}\n" 70 | "Joblib: {}".format(filename, e1, e2) 71 | ) 72 | 73 | 74 | def verbosedump(value, fn, compress=None): 75 | """Verbose wrapper around dump""" 76 | print('Saving "%s"... (%s)' % (fn, type(value))) 77 | dump(value, fn, compress=compress) 78 | 79 | 80 | def verboseload(fn): 81 | """Verbose wrapper around load. 82 | 83 | Try to use pickle. If that fails, try to use joblib. 84 | """ 85 | print('loading "%s"...' % fn) 86 | return load(fn) 87 | -------------------------------------------------------------------------------- /msmbuilder/utils/param_sweep.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | from sklearn import clone 3 | try: 4 | from sklearn.model_selection import ParameterGrid 5 | except ImportError: 6 | from sklearn.grid_search import ParameterGrid 7 | 8 | from sklearn.externals.joblib import Parallel, delayed 9 | 10 | __all__ = ['param_sweep'] 11 | 12 | 13 | def param_sweep(model, sequences, param_grid, n_jobs=1, verbose=0): 14 | """Fit a series of models over a range of parameters. 15 | 16 | Parameters 17 | ---------- 18 | model : msmbuilder.BaseEstimator 19 | An *instance* of an estimator to be used 20 | to fit data. 21 | sequences : list of array-like 22 | List of sequences, or a single sequence. Each 23 | sequence should be a 1D iterable of state 24 | labels. Labels can be integers, strings, or 25 | other orderable objects. 26 | param_grid : dict or sklearn.grid_search.ParameterGrid 27 | Parameter grid to specify models to fit. See 28 | sklearn.grid_search.ParameterGrid for an explanation 29 | n_jobs : int, optional 30 | Number of jobs to run in parallel using joblib.Parallel 31 | 32 | Returns 33 | ------- 34 | models : list 35 | List of models fit to the data according to 36 | param_grid 37 | """ 38 | 39 | if isinstance(param_grid, dict): 40 | param_grid = ParameterGrid(param_grid) 41 | elif not isinstance(param_grid, ParameterGrid): 42 | raise ValueError("param_grid must be a dict or ParamaterGrid instance") 43 | 44 | # iterable with (model, sequence) as items 45 | iter_args = ((clone(model).set_params(**params), sequences) 46 | for params in param_grid) 47 | 48 | models = Parallel(n_jobs=n_jobs, verbose=verbose)( 49 | delayed(_param_sweep_helper)(args) for args in iter_args) 50 | 51 | return models 52 | 53 | 54 | def _param_sweep_helper(args): 55 | """ 56 | helper for fitting many models on some data 57 | """ 58 | model, sequences = args 59 | model.fit(sequences) 60 | 61 | return model 62 | -------------------------------------------------------------------------------- /msmbuilder/utils/probability.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | import numpy as np 3 | from sklearn.utils import check_random_state 4 | 5 | __all__ = ['categorical'] 6 | 7 | 8 | def categorical(pvals, size=None, random_state=None): 9 | """Return random integer from a categorical distribution 10 | 11 | Parameters 12 | ---------- 13 | pvals : sequence of floats, length p 14 | Probabilities of each of the ``p`` different outcomes. These 15 | should sum to 1. 16 | size : int or tuple of ints, optional 17 | Defines the shape of the returned array of random integers. If None 18 | (the default), returns a single float. 19 | random_state: RandomState or an int seed, optional 20 | A random number generator instance. 21 | """ 22 | cumsum = np.cumsum(pvals) 23 | if size is None: 24 | size = (1,) 25 | axis = 0 26 | elif isinstance(size, tuple): 27 | size = size + (1,) 28 | axis = len(size) - 1 29 | else: 30 | raise TypeError('size must be an int or tuple of ints') 31 | 32 | random_state = check_random_state(random_state) 33 | return np.sum(cumsum < random_state.random_sample(size), axis=axis) 34 | -------------------------------------------------------------------------------- /msmbuilder/utils/progressbar/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # progressbar - Text progress bar library for Python. 5 | # Copyright (c) 2005 Nilton Volpato 6 | # 7 | # This library is free software; you can redistribute it and/or 8 | # modify it under the terms of the GNU Lesser General Public 9 | # License as published by the Free Software Foundation; either 10 | # version 2.1 of the License, or (at your option) any later version. 11 | # 12 | # This library is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | # Lesser General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU Lesser General Public 18 | # License along with this library; if not, write to the Free Software 19 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 | 21 | """Text progress bar library for Python. 22 | 23 | A text progress bar is typically used to display the progress of a long 24 | running operation, providing a visual cue that processing is underway. 25 | 26 | The ProgressBar class manages the current progress, and the format of the line 27 | is given by a number of widgets. A widget is an object that may display 28 | differently depending on the state of the progress bar. There are three types 29 | of widgets: 30 | - a string, which always shows itself 31 | 32 | - a ProgressBarWidget, which may return a different value every time its 33 | update method is called 34 | 35 | - a ProgressBarWidgetHFill, which is like ProgressBarWidget, except it 36 | expands to fill the remaining width of the line. 37 | 38 | The progressbar module is very easy to use, yet very powerful. It will also 39 | automatically enable features like auto-resizing when the system supports it. 40 | """ 41 | 42 | from __future__ import absolute_import 43 | __author__ = 'Nilton Volpato' 44 | __author_email__ = 'first-name dot last-name @ gmail.com' 45 | __date__ = '2011-05-14' 46 | __version__ = '2.3' 47 | 48 | from .compat import * 49 | from .widgets import * 50 | from .progressbar import * 51 | -------------------------------------------------------------------------------- /msmbuilder/utils/progressbar/compat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # progressbar - Text progress bar library for Python. 5 | # Copyright (c) 2005 Nilton Volpato 6 | # 7 | # This library is free software; you can redistribute it and/or 8 | # modify it under the terms of the GNU Lesser General Public 9 | # License as published by the Free Software Foundation; either 10 | # version 2.1 of the License, or (at your option) any later version. 11 | # 12 | # This library is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 | # Lesser General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU Lesser General Public 18 | # License along with this library; if not, write to the Free Software 19 | # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 20 | 21 | """Compatibility methods and classes for the progressbar module.""" 22 | 23 | 24 | # Python 3.x (and backports) use a modified iterator syntax 25 | # This will allow 2.x to behave with 3.x iterators 26 | try: 27 | next 28 | except NameError: 29 | def next(iter): 30 | try: 31 | # Try new style iterators 32 | return iter.__next__() 33 | except AttributeError: 34 | # Fallback in case of a "native" iterator 35 | return iter.next() 36 | 37 | 38 | # Python < 2.5 does not have "any" 39 | try: 40 | any 41 | except NameError: 42 | def any(iterator): 43 | for item in iterator: 44 | if item: return True 45 | return False 46 | -------------------------------------------------------------------------------- /msmbuilder/utils/subsampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | from sklearn.base import TransformerMixin 3 | from ..base import BaseEstimator 4 | 5 | __all__ = ['Subsampler'] 6 | 7 | 8 | class Subsampler(BaseEstimator, TransformerMixin): 9 | """Convert a list of feature time series (`X_all`) into a `lag_time` 10 | subsampled time series. 11 | 12 | Parameters 13 | ---------- 14 | lag_time : int 15 | The lag time to subsample by 16 | sliding_window : bool, default=True 17 | If True, each time series is transformed into `lag_time` interlaced 18 | sliding-window (not statistically independent) sequences. If 19 | False, each time series is transformed into a single subsampled 20 | time series. 21 | """ 22 | def __init__(self, lag_time, sliding_window=True): 23 | self._lag_time = lag_time 24 | self._sliding_window = sliding_window 25 | 26 | def fit(self, X_all, y=None): 27 | return self 28 | 29 | def transform(self, X_all, y=None): 30 | """Subsample several time series. 31 | 32 | Parameters 33 | ---------- 34 | X_all : list(np.ndarray) 35 | List of feature time series 36 | 37 | Returns 38 | ------- 39 | features : list(np.ndarray), length = len(X_all) 40 | The subsampled trajectories. 41 | """ 42 | if self._sliding_window: 43 | return [X[k::self._lag_time] for k in range(self._lag_time) for X in X_all] 44 | else: 45 | return [X[::self._lag_time] for X in X_all] 46 | -------------------------------------------------------------------------------- /msmbuilder/utils/validation.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division, absolute_import 2 | import numpy as np 3 | import mdtraj as md 4 | 5 | __all__ = ['list_of_1d', 'check_iter_of_sequences', 'array2d'] 6 | 7 | 8 | def list_of_1d(y): 9 | if not hasattr(y, '__iter__') or len(y) == 0: 10 | raise ValueError('Bad input shape') 11 | if not hasattr(y[0], '__iter__'): 12 | return [np.array(y)] 13 | 14 | result = [] 15 | for i, x in enumerate(y): 16 | value = np.array(x) 17 | if value.ndim != 1: 18 | raise ValueError( 19 | "Bad input shape. Element %d has shape %s, but " 20 | "should be 1D" % (i, str(value.shape))) 21 | result.append(value) 22 | return result 23 | 24 | 25 | def check_iter_of_sequences(sequences, allow_trajectory=False, ndim=2, 26 | max_iter=None): 27 | """Check that ``sequences`` is a iterable of trajectory-like sequences, 28 | suitable as input to ``fit()`` for estimators following the MSMBuilder 29 | API. 30 | 31 | Parameters 32 | ---------- 33 | sequences : object 34 | The object to check 35 | allow_trajectory : bool 36 | Are ``md.Trajectory``s allowed? 37 | ndim : int 38 | The expected dimensionality of the sequences 39 | max_iter : int, optional 40 | Only check at maximum the first ``max_iter`` entries in ``sequences``. 41 | """ 42 | value = True 43 | for i, X in enumerate(sequences): 44 | if not isinstance(X, np.ndarray): 45 | if (not allow_trajectory) and isinstance(X, md.Trajectory): 46 | value = False 47 | break 48 | if not isinstance(X, md.Trajectory) and X.ndim != ndim: 49 | value = False 50 | break 51 | if max_iter is not None and i >= max_iter: 52 | break 53 | 54 | if not value: 55 | raise ValueError('sequences must be a list of sequences') 56 | 57 | 58 | def array2d(X, dtype=None, order=None, copy=False, force_all_finite=True): 59 | """Returns at least 2-d array with data from X""" 60 | X_2d = np.asarray(np.atleast_2d(X), dtype=dtype, order=order) 61 | if force_all_finite: 62 | _assert_all_finite(X_2d) 63 | if X is X_2d and copy: 64 | X_2d = _safe_copy(X_2d) 65 | return X_2d 66 | 67 | 68 | def _assert_all_finite(X): 69 | """Like assert_all_finite, but only for ndarray.""" 70 | X = np.asanyarray(X) 71 | if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum()) 72 | and not np.isfinite(X).all()): 73 | raise ValueError("Input contains NaN, infinity" 74 | " or a value too large for %r." % X.dtype) 75 | 76 | def _safe_copy(X): 77 | # Copy, but keep the order 78 | return np.copy(X, order='K') 79 | --------------------------------------------------------------------------------