├── .bumpversion.cfg
├── .coveragerc
├── .editorconfig
├── .gitignore
├── .travis.yml
├── ACKNOWLEDGEMENTS.rst
├── AUTHORS.rst
├── CHANGELOG.rst
├── CONTRIBUTING.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── ci
    ├── bootstrap.py
    ├── requirements.txt
    └── templates
    │   └── .travis.yml
├── docs
    ├── Makefile
    ├── acknowledgements.rst
    ├── authors.rst
    ├── changelog.rst
    ├── conf.py
    ├── contributing.rst
    ├── index.rst
    ├── installation.rst
    ├── jupyter notebook tutorials
    │   ├── Four_Wells
    │   │   ├── Four_Wells.rst
    │   │   ├── output_11_0.png
    │   │   ├── output_15_0.png
    │   │   ├── output_18_0.png
    │   │   ├── output_21_0.png
    │   │   ├── output_26_0.png
    │   │   ├── output_4_0.png
    │   │   └── output_8_0.png
    │   ├── Metrics
    │   │   ├── Metrics.rst
    │   │   ├── output_10_0.png
    │   │   └── output_5_0.png
    │   ├── Spherical_Harmonics
    │   │   ├── Spherical_Harmonics.rst
    │   │   ├── output_10_0.png
    │   │   ├── output_15_1.png
    │   │   └── output_9_0.png
    │   ├── Swiss_Roll
    │   │   ├── Swiss_Roll.rst
    │   │   ├── output_10_0.png
    │   │   ├── output_10_1.png
    │   │   ├── output_12_1.png
    │   │   ├── output_8_0.png
    │   │   └── output_8_1.png
    │   └── index.rst
    ├── readme.rst
    ├── reference
    │   ├── diffusion_map.rst
    │   ├── index.rst
    │   ├── kernel.rst
    │   └── visualization.rst
    ├── requirements.txt
    ├── spelling_wordlist.txt
    ├── theory.rst
    └── usage.rst
├── examples
    ├── Data
    │   ├── 4wells_traj.npy
    │   ├── dimer_energy.npy
    │   └── dimer_trajectory.npy
    ├── Four_Wells.ipynb
    ├── Metrics.ipynb
    ├── Spherical_Harmonics.ipynb
    └── Swiss_Roll.ipynb
├── setup.cfg
├── setup.py
├── src
    └── pydiffmap
    │   ├── __init__.py
    │   ├── diffusion_map.py
    │   ├── kernel.py
    │   ├── utils.py
    │   └── visualization.py
├── tests
    ├── conftest.py
    ├── test_diffusionmap.py
    ├── test_kernel.py
    ├── test_utils.py
    └── test_visualization.py
└── tox.ini


/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.2.0
 3 | commit = True
 4 | tag = True
 5 | 
 6 | [bumpversion:file:setup.py]
 7 | search = version='{current_version}'
 8 | replace = version='{new_version}'
 9 | 
10 | [bumpversion:file:README.rst]
11 | search = v{current_version}.
12 | replace = v{new_version}.
13 | 
14 | [bumpversion:file:docs/conf.py]
15 | search = version = release = '{current_version}'
16 | replace = version = release = '{new_version}'
17 | 
18 | [bumpversion:file:src/pydiffmap/__init__.py]
19 | search = __version__ = '{current_version}'
20 | replace = __version__ = '{new_version}'
21 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [paths]
 2 | source =
 3 |    src/
 4 |    */site-packages/
 5 | 
 6 | [run]
 7 | branch = true
 8 | source =
 9 |     pydiffmap
10 |     tests
11 | parallel = true
12 | 
13 | [report]
14 | show_missing = true
15 | precision = 2
16 | omit = *migrations*
17 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | # see http://editorconfig.org
 2 | root = true
 3 | 
 4 | [*]
 5 | end_of_line = lf
 6 | trim_trailing_whitespace = true
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 4
10 | charset = utf-8
11 | 
12 | [*.{bat,cmd,ps1}]
13 | end_of_line = crlf
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | .eggs
13 | parts
14 | bin
15 | var
16 | sdist
17 | wheelhouse
18 | develop-eggs
19 | .installed.cfg
20 | lib
21 | lib64
22 | venv*/
23 | pyvenv*/
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | 
28 | # Unit test / coverage reports
29 | .coverage
30 | .tox
31 | .coverage.*
32 | nosetests.xml
33 | coverage.xml
34 | htmlcov
35 | .pytest_cache
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | .idea
45 | *.iml
46 | *.komodoproject
47 | 
48 | # Complexity
49 | output/*.html
50 | output/*/index.html
51 | 
52 | # Sphinx
53 | docs/_build
54 | 
55 | .DS_Store
56 | *~
57 | .*.sw[po]
58 | .build
59 | .ve
60 | .env
61 | .cache
62 | .pytest
63 | .bootstrap
64 | .appveyor.token
65 | *.bak
66 | 
67 | # Ipython Notebooks
68 | examples/*.ipynb_checkpoints
69 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | dist: xenial
 3 | cache: false
 4 | env:
 5 |   global:
 6 |     - LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so
 7 |     - SEGFAULT_SIGNALS=all
 8 | matrix:
 9 |   include:
10 |     - python: '3.6'
11 |       env:
12 |         - TOXENV=check
13 |     - python: '3.6'
14 |       env:
15 |         - TOXENV=docs
16 |     - env:
17 |         - TOXENV=py27,codecov
18 |       python: '2.7'
19 |     - env:
20 |         - TOXENV=py35,codecov
21 |       python: '3.5'
22 |     - env:
23 |         - TOXENV=py36,codecov
24 |       python: '3.6'
25 |     - env:
26 |         - TOXENV=py37,codecov
27 |       python: '3.7'
28 | before_install:
29 |   - python --version
30 |   - uname -a
31 |   - lsb_release -a || true
32 | install:
33 |   - python -mpip install --progress-bar=off tox -rci/requirements.txt
34 |   - virtualenv --version
35 |   - easy_install --version
36 |   - pip --version
37 |   - tox --version
38 | script:
39 |   - tox -v
40 | after_failure:
41 |   - more .tox/log/* | cat
42 |   - more .tox/*/log/* | cat
43 | notifications:
44 |   email:
45 |     on_success: never
46 |     on_failure: always
47 | 


--------------------------------------------------------------------------------
/ACKNOWLEDGEMENTS.rst:
--------------------------------------------------------------------------------
1 | 
2 | Acknowledgements
3 | ================
4 | 
5 | This work was partially funded by grant EPSR EP/P006175/1 as well as the Molecular Sciences Software Institute (MolSSI).  Computing resources were provided in part by the University of Chicago Research Computing Center (RCC).
6 | We also want to thank the following scientists for their input and advice:
7 | 
8 | - Prof. Dimitris Giannakis for help in implementing the automatic bandwidth selection algorithm.
9 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
1 | 
2 | Authors
3 | =======
4 | 
5 | * Ralf Banisch
6 | * Erik Henning Thiede
7 | * Zofia Trstanova
8 |  
9 | 


--------------------------------------------------------------------------------
/CHANGELOG.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Changelog
 3 | =========
 4 | 
 5 | 0.2.0.1 (2019-02-04)
 6 | --------------------
 7 | 
 8 | New Features
 9 | ~~~~~~~~~~~~
10 | * Added a more generous epsilon procedure for convenience.
11 | 
12 | 0.2.0 (2019-02-01)
13 | ------------------
14 | 
15 | New Features
16 | ~~~~~~~~~~~~
17 | * Added support for user-provided kernel functions. 
18 | * Added a utility for building a sparse matrix from a function on the data.
19 | * (Re)added separate TMDmap class wrapping base diffusion map class to 
20 |   allow for easier construction of TMDmaps. 
21 | * Added ability to explicitly provide the sampled density for q^alpha normalization.
22 | * Added Variable Bandwidth Diffusion Maps.
23 | * Added a new out-of-sample extension method that should work for variable bandwidth methods.
24 | 
25 | Tweaks and Modifications
26 | ~~~~~~~~~~~~~~~~~~~~~~~~
27 | * Changed from exp^(-d^2) to exp^(-d^2/4) convention.
28 | * Moved weight functionality into a function provided on initialization, 
29 |   rather than input values, and added a helper function that allows values to
30 |   be read from a lookup table.
31 | * Improved the Diffusion Map test suite.
32 | * Moved out-of-sample routines into separate functions.
33 | * Moved matrix symmetrization into newly made utility file.
34 | * Adjusted constructor for the diffusion map to take the kernel object directly.
35 | 
36 | Bugfixes
37 | ~~~~~~~~
38 | * Fixed bug where weight matrices were not included for out of sample extension.
39 | 
40 | Other
41 | ~~~~~
42 | * Moved to MIT license.
43 | 
44 | 0.1.0 (2017-12-06)
45 | ------------------
46 | 
47 | * Fixed setup.py issues.
48 | 
49 | 0.1.0 (2017-12-06)
50 | ------------------
51 | 
52 | * Added base functionality to the code.
53 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | Contributing
 3 | ============
 4 | 
 5 | Contributions are welcome, and they are greatly appreciated! Every
 6 | little bit helps, and credit will always be given.
 7 | 
 8 | Bug reports
 9 | ===========
10 | 
11 | When `reporting a bug <https://github.com/DiffusionMapsAcademics/pyDiffMap/issues>`_ please include:
12 | 
13 |     * Your operating system name and version.
14 |     * Any details about your local setup that might be helpful in troubleshooting.
15 |     * Detailed steps to reproduce the bug.
16 | 
17 | Documentation improvements
18 | ==========================
19 | 
20 | pyDiffMap could always use more documentation, whether as part of the
21 | official pyDiffMap docs, in docstrings, or even on the web in blog posts,
22 | articles, and such.
23 | 
24 | Feature requests and feedback
25 | =============================
26 | 
27 | The best way to send feedback is to file an issue at https://github.com/DiffusionMapsAcademics/pyDiffMap/issues.
28 | 
29 | If you are proposing a feature:
30 | 
31 | * Explain in detail how it would work.
32 | * Keep the scope as narrow as possible, to make it easier to implement.
33 | * Remember that this is a volunteer-driven project, and that code contributions are welcome :)
34 | 
35 | Development
36 | ===========
37 | 
38 | To set up `python-pydiffmap` for local development:
39 | 
40 | 1. Fork `python-pydiffmap <https://github.com/DiffusionMapsAcademics/pyDiffMap>`_
41 |    (look for the "Fork" button).
42 | 2. Clone your fork locally::
43 | 
44 |     git clone git@github.com:your_name_here/python-pydiffmap.git
45 | 
46 | 3. Create a branch for local development::
47 | 
48 |     git checkout -b name-of-your-bugfix-or-feature
49 | 
50 |    Now you can make your changes locally.
51 | 
52 | 4. When you're done making changes, run all the checks, doc builder and spell checker with `tox <http://tox.readthedocs.io/en/latest/install.html>`_ one command::
53 | 
54 |     tox
55 | 
56 | 5. Commit your changes and push your branch to GitHub::
57 | 
58 |     git add .
59 |     git commit -m "Your detailed description of your changes."
60 |     git push origin name-of-your-bugfix-or-feature
61 | 
62 | 6. Submit a pull request through the GitHub website.
63 | 
64 | Pull Request Guidelines
65 | -----------------------
66 | 
67 | If you need some code review or feedback while you're developing the code just make the pull request.
68 | 
69 | For merging, you should:
70 | 
71 | 1. Include passing tests (run ``tox``) [1]_.
72 | 2. Update documentation when there's new API, functionality etc.
73 | 3. Add a note to ``CHANGELOG.rst`` about the changes.
74 | 4. Add yourself to ``AUTHORS.rst``.
75 | 
76 | .. [1] If you don't have all the necessary python versions available locally you can rely on Travis - it will
77 |        `run the tests <https://travis-ci.org/DiffusionMapsAcademics/pyDiffMap/pull_requests>`_ for each change you add in the pull request.
78 | 
79 |        It will be slower though ...
80 | 
81 | Tips
82 | ----
83 | 
84 | To run a subset of tests::
85 | 
86 |     tox -e envname -- py.test -k test_myfeature
87 | 
88 | To run all the test environments in *parallel* (you need to ``pip install detox``)::
89 | 
90 |     detox
91 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Ralf Banisch, Erik Henning Thiede, Zofia Trstanova
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | graft docs
 2 | graft src
 3 | graft ci
 4 | graft tests
 5 | graft examples
 6 | 
 7 | prune docs/_build
 8 | prune examples/.ipynb_checkpoints
 9 | 
10 | include .bumpversion.cfg
11 | include .coveragerc
12 | include .cookiecutterrc
13 | include .editorconfig
14 | 
15 | include *.rst
16 | include LICENSE
17 | 
18 | include tox.ini .travis.yml appveyor.yml
19 | 
20 | global-exclude *.py[cod] __pycache__ *.so *.dylib *.bak
21 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ========
  2 | Overview
  3 | ========
  4 | 
  5 | .. start-badges
  6 | 
  7 | .. list-table::
  8 |     :stub-columns: 1
  9 | 
 10 |     * - docs
 11 |       - |docs|
 12 |     * - tests
 13 |       - | |travis|
 14 |         | |codecov|
 15 | 
 16 | ..    * - docs
 17 |       - |docs|
 18 |     * - tests
 19 |       - | |travis|
 20 |         | |codecov|
 21 |     * - package
 22 |       - | |version| |wheel| |supported-versions| |supported-implementations|
 23 |         | |commits-since|
 24 | 
 25 | .. |docs| image:: https://readthedocs.org/projects/pydiffmap/badge/?version=master
 26 |     :target: http://pydiffmap.readthedocs.io/en/master/?badge=master
 27 |     :alt: Documentation Status
 28 | 
 29 | .. |travis| image:: https://travis-ci.org/DiffusionMapsAcademics/pyDiffMap.svg?branch=master
 30 |     :alt: Travis-CI Build Status
 31 |     :target: https://travis-ci.org/DiffusionMapsAcademics/pyDiffMap
 32 | 
 33 | .. |codecov| image:: https://codecov.io/gh/DiffusionMapsAcademics/pyDiffMap/branch/master/graph/badge.svg
 34 |     :alt: Coverage Status
 35 |     :target: https://codecov.io/gh/DiffusionMapsAcademics/pyDiffMap
 36 | 
 37 | .. .. |commits-since| image:: https://img.shields.io/github/commits-since/DiffusionMapsAcademics/pyDiffMap/v0.1.0.svg
 38 |     :alt: Commits since latest release
 39 |     :target: https://github.com/DiffusionMapsAcademics/pyDiffMap/compare/v0.1.0...master
 40 | 
 41 | .. .. |version| image:: https://img.shields.io/pypi/v/pyDiffMap.svg
 42 |     :alt: PyPI Package latest release
 43 |     :target: https://pypi.python.org/pypi/pyDiffMap
 44 | 
 45 | .. .. |commits-since| image:: https://img.shields.io/github/commits-since/DiffusionMapsAcademics/pyDiffMap/v0.1.0.svg
 46 |     :alt: Commits since latest release
 47 |     :target: https://github.com/DiffusionMapsAcademics/pyDiffMap/compare/v0.1.0...master
 48 | 
 49 | .. .. |wheel| image:: https://img.shields.io/pypi/wheel/pyDiffMap.svg
 50 |     :alt: PyPI Wheel
 51 |     :target: https://pypi.python.org/pypi/pyDiffMap
 52 | 
 53 | .. .. |supported-versions| image:: https://img.shields.io/pypi/pyversions/pyDiffMap.svg
 54 |     :alt: Supported versions
 55 |     :target: https://pypi.python.org/pypi/pyDiffMap
 56 | 
 57 | .. .. |supported-implementations| image:: https://img.shields.io/pypi/implementation/pyDiffMap.svg
 58 |     :alt: Supported implementations
 59 |     :target: https://pypi.python.org/pypi/pyDiffMap
 60 | 
 61 | 
 62 | .. end-badges
 63 | 
 64 | This is the home of the documentation for pyDiffMap, an open-source project to develop a robust and accessible diffusion map code for public use. Our documentation is currently under construction, please bear with us. 
 65 | 
 66 | * Free software: MIT License.
 67 | 
 68 | Installation
 69 | ============
 70 | 
 71 | Pydiffmap is installable using pip.  You can install it using the command
 72 | 
 73 | ::
 74 | 
 75 |     pip install pyDiffMap
 76 | 
 77 | You can also install the package directly from the source directly by downloading the package from github and running the command below, optionally with the "-e" flag for an editable install.
 78 | 
 79 | ::
 80 |     
 81 |     pip install [source_directory]
 82 | 
 83 | Documentation
 84 | =============
 85 | 
 86 | https://pyDiffMap.readthedocs.io/
 87 | 
 88 | Development
 89 | ===========
 90 | 
 91 | To run the all tests run::
 92 | 
 93 |     tox
 94 | 
 95 | Note, to combine the coverage data from all the tox environments run:
 96 | 
 97 | .. list-table::
 98 |     :widths: 10 90
 99 |     :stub-columns: 1
100 | 
101 |     - - Windows
102 |       - ::
103 | 
104 |             set PYTEST_ADDOPTS=--cov-append
105 |             tox
106 | 
107 |     - - Other
108 |       - ::
109 | 
110 |             PYTEST_ADDOPTS=--cov-append tox
111 | 
112 | If you don't have tox installed, you can also run the python tests directly with 
113 | 
114 | ::
115 |     
116 |     pytest
117 | 
118 | 


--------------------------------------------------------------------------------
/ci/bootstrap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | from __future__ import unicode_literals
 6 | 
 7 | import os
 8 | import subprocess
 9 | import sys
10 | from os.path import abspath
11 | from os.path import dirname
12 | from os.path import exists
13 | from os.path import join
14 | 
15 | base_path = dirname(dirname(abspath(__file__)))
16 | 
17 | 
18 | def check_call(args):
19 |     print("+", *args)
20 |     subprocess.check_call(args)
21 | 
22 | 
23 | def exec_in_env():
24 |     env_path = join(base_path, ".tox", "bootstrap")
25 |     if sys.platform == "win32":
26 |         bin_path = join(env_path, "Scripts")
27 |     else:
28 |         bin_path = join(env_path, "bin")
29 |     if not exists(env_path):
30 |         import subprocess
31 | 
32 |         print("Making bootstrap env in: {0} ...".format(env_path))
33 |         try:
34 |             check_call([sys.executable, "-m", "venv", env_path])
35 |         except subprocess.CalledProcessError:
36 |             try:
37 |                 check_call([sys.executable, "-m", "virtualenv", env_path])
38 |             except subprocess.CalledProcessError:
39 |                 check_call(["virtualenv", env_path])
40 |         print("Installing `jinja2` into bootstrap environment...")
41 |         check_call([join(bin_path, "pip"), "install", "jinja2", "tox"])
42 |     python_executable = join(bin_path, "python")
43 |     if not os.path.exists(python_executable):
44 |         python_executable += '.exe'
45 | 
46 |     print("Re-executing with: {0}".format(python_executable))
47 |     print("+ exec", python_executable, __file__, "--no-env")
48 |     os.execv(python_executable, [python_executable, __file__, "--no-env"])
49 | 
50 | def main():
51 |     import jinja2
52 | 
53 |     print("Project path: {0}".format(base_path))
54 | 
55 |     jinja = jinja2.Environment(
56 |         loader=jinja2.FileSystemLoader(join(base_path, "ci", "templates")),
57 |         trim_blocks=True,
58 |         lstrip_blocks=True,
59 |         keep_trailing_newline=True
60 |     )
61 | 
62 |     tox_environments = [
63 |         line.strip()
64 |         # 'tox' need not be installed globally, but must be importable
65 |         # by the Python that is running this script.
66 |         # This uses sys.executable the same way that the call in
67 |         # cookiecutter-pylibrary/hooks/post_gen_project.py
68 |         # invokes this bootstrap.py itself.
69 |         for line in subprocess.check_output([sys.executable, '-m', 'tox', '--listenvs'], universal_newlines=True).splitlines()
70 |     ]
71 |     tox_environments = [line for line in tox_environments if line.startswith('py')]
72 | 
73 |     for name in os.listdir(join("ci", "templates")):
74 |         with open(join(base_path, name), "w") as fh:
75 |             fh.write(jinja.get_template(name).render(tox_environments=tox_environments))
76 |         print("Wrote {}".format(name))
77 |     print("DONE.")
78 | 
79 | 
80 | if __name__ == "__main__":
81 |     args = sys.argv[1:]
82 |     if args == ["--no-env"]:
83 |         main()
84 |     elif not args:
85 |         exec_in_env()
86 |     else:
87 |         print("Unexpected arguments {0}".format(args), file=sys.stderr)
88 |         sys.exit(1)
89 | 
90 | 


--------------------------------------------------------------------------------
/ci/requirements.txt:
--------------------------------------------------------------------------------
1 | virtualenv>=16.6.0
2 | pip>=19.1.1
3 | setuptools>=18.0.1
4 | 


--------------------------------------------------------------------------------
/ci/templates/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | dist: xenial
 3 | cache: false
 4 | env:
 5 |   global:
 6 |     - LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so
 7 |     - SEGFAULT_SIGNALS=all
 8 | matrix:
 9 |   include:
10 |     - python: '3.6'
11 |       env:
12 |         - TOXENV=check
13 |     - python: '3.6'
14 |       env:
15 |         - TOXENV=docs
16 | {%- for env in tox_environments %}{{ '' }}
17 |     - env:
18 |         - TOXENV={{ env }},codecov
19 | {%- if env.startswith('pypy3') %}{{ '' }}
20 |         - TOXPYTHON=pypy3
21 |       python: 'pypy3'
22 | {%- elif env.startswith('pypy') %}{{ '' }}
23 |       python: 'pypy'
24 | {%- else %}{{ '' }}
25 |       python: '{{ '{0[2]}.{0[3]}'.format(env) }}'
26 | {%- endif %}
27 | {%- endfor %}{{ '' }}
28 | before_install:
29 |   - python --version
30 |   - uname -a
31 |   - lsb_release -a || true
32 | install:
33 |   - python -mpip install --progress-bar=off tox -rci/requirements.txt
34 |   - virtualenv --version
35 |   - easy_install --version
36 |   - pip --version
37 |   - tox --version
38 | script:
39 |   - tox -v
40 | after_failure:
41 |   - more .tox/log/* | cat
42 |   - more .tox/*/log/* | cat
43 | notifications:
44 |   email:
45 |     on_success: never
46 |     on_failure: always
47 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = pydiffmap
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/acknowledgements.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../ACKNOWLEDGEMENTS.rst
2 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../AUTHORS.rst
2 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CHANGELOG.rst
2 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import unicode_literals
 3 | 
 4 | import os
 5 | 
 6 | 
 7 | extensions = [
 8 |     'sphinx.ext.autodoc',
 9 |     'sphinx.ext.coverage',
10 |     'sphinx.ext.extlinks',
11 |     'sphinx.ext.ifconfig',
12 |     'sphinx.ext.mathjax',
13 |     'sphinx.ext.napoleon',
14 |     'sphinx.ext.viewcode',
15 | ]
16 | #    'sphinx.ext.autosummary',
17 | #    'sphinx.ext.todo',
18 | #    'sphinx.ext.doctest',
19 | if os.getenv('SPELLCHECK'):
20 |     extensions += 'sphinxcontrib.spelling',
21 |     spelling_show_suggestions = True
22 |     spelling_lang = 'en_US'
23 | 
24 | show_authors = False
25 | 
26 | source_suffix = '.rst'
27 | master_doc = 'index'
28 | project = u'pydiffmap'
29 | year = '2017'
30 | author = u'Ralf Banisch, Erik Henning Thiede, Zofia Trstanova'
31 | copyright = '{0}, {1}'.format(year, author)
32 | version = release = u'0.2.0.1'
33 | 
34 | pygments_style = 'trac'
35 | templates_path = ['.']
36 | extlinks = {
37 |     'issue': ('https://github.com/DiffusionMapsAcademics/pydiffmap/issues/%s', '#'),
38 |     'pr': ('https://github.com/DiffusionMapsAcademics/pydiffmap/pull/%s', 'PR #'),
39 | }
40 | # on_rtd is whether we are on readthedocs.org
41 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
42 | 
43 | if not on_rtd:  # only set the theme if we're building docs locally
44 |     html_theme = 'sphinx_rtd_theme'
45 | 
46 | html_use_smartypants = True
47 | html_last_updated_fmt = '%b %d, %Y'
48 | html_split_index = False
49 | html_sidebars = {
50 |     '**': ['searchbox.html', 'globaltoc.html', 'sourcelink.html'],
51 | }
52 | html_short_title = '%s-%s' % (project, version)
53 | 
54 | napoleon_use_ivar = True
55 | napoleon_use_rtype = False
56 | napoleon_use_param = False
57 | 


--------------------------------------------------------------------------------
/docs/contributing.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CONTRIBUTING.rst
2 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Welcome to pydiffmap
 2 | ====================
 3 | 
 4 | This is the home of the documentation for pyDiffMap, an open-source project to develop a robust and accessible diffusion map code for public use.  Code can be found on our `github page`__.  Our documentation is currently under construction, please bear with us.
 5 | 
 6 | .. _github: https://github.com/DiffusionMapsAcademics/pyDiffMap/
 7 | 
 8 | __ github_
 9 | 
10 | 
11 | 
12 | 
13 | ========
14 | Contents
15 | ========
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 | 
20 |    readme
21 |    installation
22 |    theory
23 |    usage
24 |    jupyter notebook tutorials/index
25 |    reference/index
26 |    contributing
27 |    authors
28 |    acknowledgements
29 |    changelog
30 | 
31 | Indices and tables
32 | ==================
33 | 
34 | * :ref:`genindex`
35 | * :ref:`modindex`
36 | * :ref:`search`
37 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
1 | ============
2 | Installation
3 | ============
4 | 
5 | At the command line::
6 | 
7 |     pip install [source_dir]
8 | 


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Four_Wells/Four_Wells.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | 2D Four-well potential
  3 | ======================
  4 | 
  5 | .. code:: python
  6 | 
  7 |     import matplotlib.pyplot as plt
  8 |     import numpy as np
  9 |     
 10 |     from mpl_toolkits.mplot3d import Axes3D
 11 |     from pydiffmap import diffusion_map as dm
 12 |     
 13 |     %matplotlib inline
 14 | 
 15 | Load sampled data: discretized Langevin dynamics at temperature T=1,
 16 | friction 1, and time step size dt=0.01, with double-well potentials in x
 17 | and y, with higher barrier in y.
 18 | 
 19 | .. code:: python
 20 | 
 21 |     X=np.load('Data/4wells_traj.npy')
 22 |     print(X.shape)
 23 | 
 24 | 
 25 | .. parsed-literal::
 26 | 
 27 |     (9900, 2)
 28 | 
 29 | 
 30 | .. code:: python
 31 | 
 32 |     def DW1(x):
 33 |             return 2.0*(np.linalg.norm(x)**2-1.0)**2
 34 |     
 35 |     def DW2(x):
 36 |             return 4.0*(np.linalg.norm(x)**2-1.0)**2
 37 |     
 38 |     def DW(x):
 39 |         return DW1(x[0]) + DW1(x[1])
 40 |     
 41 |     from matplotlib import cm
 42 |     
 43 |     mx=5
 44 |     
 45 |     xe=np.linspace(-mx, mx, 100)
 46 |     ye=np.linspace(-mx, mx, 100)
 47 |     energyContours=np.zeros((100, 100))
 48 |     for i in range(0,len(xe)):
 49 |                 for j in range(0,len(ye)):
 50 |                     xtmp=np.array([xe[i], ye[j]] )
 51 |                     energyContours[j,i]=DW(xtmp)
 52 |     
 53 |     levels = np.arange(0, 10, 0.5)
 54 |     plt.contour(xe, ye, energyContours, levels, cmap=cm.coolwarm)
 55 |     plt.scatter(X[:,0], X[:,1], s=5, c='k')
 56 |     plt.xlabel('X')
 57 |     plt.ylabel('Y')
 58 |     plt.xlim([-2,2])
 59 |     plt.ylim([-2,2])
 60 |     plt.show()
 61 | 
 62 | 
 63 | 
 64 | 
 65 | .. image:: output_4_0.png
 66 | 
 67 | 
 68 | Compute diffusion map embedding
 69 | -------------------------------
 70 | 
 71 | .. code:: python
 72 | 
 73 |     mydmap = dm.DiffusionMap.from_sklearn(n_evecs = 2, epsilon = .1, alpha = 0.5, k=400, metric='euclidean')
 74 |     dmap = mydmap.fit_transform(X)
 75 | 
 76 | 
 77 | .. parsed-literal::
 78 | 
 79 |     0.1 eps fitted
 80 | 
 81 | 
 82 | Visualization
 83 | -------------
 84 | 
 85 | We plot the first two diffusion coordinates against each other, colored
 86 | by the x coordinate
 87 | 
 88 | .. code:: python
 89 | 
 90 |     from pydiffmap.visualization import embedding_plot
 91 |     
 92 |     embedding_plot(mydmap, scatter_kwargs = {'c': X[:,0], 's': 5, 'cmap': 'coolwarm'})
 93 |     
 94 |     plt.show()
 95 | 
 96 | 
 97 | 
 98 | .. image:: output_8_0.png
 99 | 
100 | 
101 | .. code:: python
102 | 
103 |     #from matplotlib import cm
104 |     #plt.scatter(dmap[:,0], dmap[:,1], c=X[:,0], s=5, cmap=cm.coolwarm)
105 |     
106 |     #clb=plt.colorbar()
107 |     #clb.set_label('X coordinate')
108 |     #plt.xlabel('First dominant eigenvector')
109 |     #plt.ylabel('Second dominant eigenvector')
110 |     #plt.title('Diffusion Map Embedding')
111 |     
112 |     #plt.show()
113 | 
114 | We visualize the data again, colored by the first eigenvector this time.
115 | 
116 | .. code:: python
117 | 
118 |     from pydiffmap.visualization import data_plot
119 |     
120 |     data_plot(mydmap, scatter_kwargs = {'s': 5, 'cmap': 'coolwarm'})
121 |     plt.show()
122 | 
123 | 
124 | 
125 | .. image:: output_11_0.png
126 | 
127 | 
128 | Target measure diffusion map
129 | ----------------------------
130 | 
131 | Compute Target Measure Diffusion Map with target distribution pi(q) =
132 | exp(-beta V(q)) with inverse temperature beta = 1. TMDmap can be seen as
133 | a special case where the weights are the target distribution, and
134 | alpha=1.
135 | 
136 | .. code:: python
137 | 
138 |     V=DW
139 |     beta=1
140 |     change_of_measure = lambda x: np.exp(-beta * V(x))
141 |     mytdmap = dm.TMDmap(alpha=1.0, n_evecs = 2, epsilon = .1, 
142 |                         k=400, change_of_measure=change_of_measure)
143 |     tmdmap = mytdmap.fit_transform(X)
144 | 
145 | 
146 | .. parsed-literal::
147 | 
148 |     0.1 eps fitted
149 | 
150 | 
151 | .. code:: python
152 | 
153 |     embedding_plot(mytdmap, scatter_kwargs = {'c': X[:,0], 's': 5, 'cmap': 'coolwarm'})
154 |     
155 |     plt.show()
156 | 
157 | 
158 | 
159 | .. image:: output_15_0.png
160 | 
161 | 
162 | From the sampling at temperature 1/beta =1, we can compute diffusion map
163 | embedding at lower temperature T\_low = 1/beta\_low using TMDmap with
164 | target measure pi(q) = exp(-beta\_low V(q)). Here we set beta\_low = 10,
165 | and use the data obtained from sampling at higher temperature, i.e.
166 | pi(q) = exp(-beta V(q)) with beta = 1.
167 | 
168 | .. code:: python
169 | 
170 |     V=DW
171 |     beta_2=10
172 |     change_of_measure_2 = lambda x: np.exp(-beta_2 * V(x))
173 |     mytdmap2 = dm.TMDmap(alpha=1.0, n_evecs = 2, epsilon = .1, 
174 |                                            k=400, change_of_measure=change_of_measure_2)
175 |     tmdmap2 = mytdmap2.fit_transform(X)
176 | 
177 | 
178 | .. parsed-literal::
179 | 
180 |     0.1 eps fitted
181 | 
182 | 
183 | .. code:: python
184 | 
185 |     embedding_plot(mytdmap2, scatter_kwargs = {'c': X[:,0], 's': 5, 'cmap': 'coolwarm'})
186 |     
187 |     plt.show()
188 | 
189 | 
190 | 
191 | .. image:: output_18_0.png
192 | 
193 | 
194 | Kernel density estimate
195 | -----------------------
196 | 
197 | We can compute kernel density estimate using kde used in the diffusion
198 | map computation.
199 | 
200 | .. code:: python
201 | 
202 |     plt.scatter(X[:,0], X[:,1], c = mytdmap.q, s=5, cmap=cm.coolwarm)
203 |     
204 |     clb=plt.colorbar()
205 |     clb.set_label('q')
206 |     plt.xlabel('First dominant eigenvector')
207 |     plt.ylabel('Second dominant eigenvector')
208 |     plt.title('TMDmap Embedding, beta=1')
209 |     
210 |     plt.show()
211 | 
212 | 
213 | 
214 | .. image:: output_21_0.png
215 | 
216 | 
217 | Now we check how well we can approximate the target distribution by the
218 | formula in the paper (left dominant eigenvector times KDE).
219 | 
220 | .. code:: python
221 | 
222 |     import scipy.sparse.linalg as spsl
223 |     L = mytdmap.L
224 |     [evals, evecs] = spsl.eigs(L.transpose(),k=1, which='LR')
225 |     
226 |     phi = np.real(evecs.ravel())
227 | 
228 | .. code:: python
229 | 
230 |     q_est = phi*mytdmap.q
231 |     q_est = q_est/sum(q_est)
232 |     
233 |     target_distribution = np.array([change_of_measure(Xi) for Xi in X])
234 |     q_exact = target_distribution/sum(target_distribution)
235 |     print(np.linalg.norm(q_est - q_exact,1))
236 | 
237 | 
238 | .. parsed-literal::
239 | 
240 |     0.040391461721631335
241 | 
242 | 
243 | visualize both. there is no visible difference.
244 | 
245 | .. code:: python
246 | 
247 |     plt.figure(figsize=(16,6))
248 |     
249 |     ax = plt.subplot(121)
250 |     SC1 = ax.scatter(X[:,0], X[:,1], c = q_est, s=5, cmap=cm.coolwarm, vmin=0, vmax=2E-4)
251 |     
252 |     ax.set_xlabel('x')
253 |     ax.set_ylabel('y')
254 |     ax.set_title('estimate of pi')
255 |     plt.colorbar(SC1, ax=ax)
256 |     
257 |     
258 |     ax2 = plt.subplot(122)
259 |     SC2 = ax2.scatter(X[:,0], X[:,1], c = q_exact, s=5, cmap=cm.coolwarm, vmin=0, vmax=2E-4)
260 |     plt.colorbar(SC2, ax=ax2)
261 |     
262 |     
263 |     ax2.set_xlabel('x')
264 |     ax2.set_ylabel('y')
265 |     ax2.set_title('exact pi')
266 |     
267 |     plt.show()
268 | 
269 | 
270 | 
271 | .. image:: output_26_0.png
272 | 
273 | 
274 | 


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Four_Wells/output_11_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_11_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Four_Wells/output_15_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_15_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Four_Wells/output_18_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_18_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Four_Wells/output_21_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_21_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Four_Wells/output_26_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_26_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Four_Wells/output_4_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_4_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Four_Wells/output_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_8_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Metrics/Metrics.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Diffusion maps with general metric
  3 | ==================================
  4 | 
  5 | In this notebook, we illustrate how to use an optional metric in the
  6 | diffusion maps embedding.
  7 | 
  8 | .. code:: python
  9 | 
 10 |     import matplotlib.pyplot as plt
 11 |     import numpy as np
 12 |     
 13 |     from mpl_toolkits.mplot3d import Axes3D
 14 |     from pydiffmap import diffusion_map as dm
 15 |     
 16 |     %matplotlib inline
 17 | 
 18 | We import trajectory of two particles connected by a double-well
 19 | potential, which is a function of a radius: V(r) = V\_DW(r). The dimer
 20 | was simulated at 300K with Langevin dynamics using OpenMM. The obvious
 21 | collective variable is the radius case and we demonstrate how the first
 22 | dominant eigenvector obtained from the diffusion map clearly correlates
 23 | with this reaction coordinate. As a metric, we use the root mean square
 24 | deviation (RMSD) from the package
 25 | https://pypi.python.org/pypi/rmsd/1.2.5.
 26 | 
 27 | .. code:: python
 28 | 
 29 |     traj=np.load('Data/dimer_trajectory.npy')
 30 |     energy=np.load('Data/dimer_energy.npy')
 31 |     print('Loaded trajectory of '+repr(len(traj))+' steps of dimer molecule: '+repr(traj.shape[1])+' particles in dimension '+repr(traj.shape[2])+'.')
 32 | 
 33 | 
 34 | .. parsed-literal::
 35 | 
 36 |     Loaded trajectory of 1000 steps of dimer molecule: 2 particles in dimension 3.
 37 | 
 38 | 
 39 | .. code:: python
 40 | 
 41 |     def compute_radius(X):
 42 |         return np.linalg.norm(X[:,0,:]-X[:,1,:], 2, axis=1)
 43 |     
 44 |     fig = plt.figure(figsize=[16,6])
 45 |     ax = fig.add_subplot(121)
 46 |     
 47 |     radius= compute_radius(traj)
 48 |     cax2 = ax.scatter(range(len(radius)), radius, c=radius, s=20,alpha=0.90,cmap=plt.cm.Spectral)
 49 |     cbar = fig.colorbar(cax2)
 50 |     cbar.set_label('Radius')
 51 |     ax.set_xlabel('Simulation steps')
 52 |     ax.set_ylabel('Radius')
 53 |     
 54 |     
 55 |     ax2 = fig.add_subplot(122, projection='3d')
 56 |     
 57 |     L=2
 58 |     
 59 |     i=0
 60 |     
 61 |     ax2.scatter(traj[i,0,0], traj[i,0,1], traj[i,0,2], c='b', s=100, alpha=0.90, edgecolors='none', depthshade=True,)
 62 |     ax2.scatter(traj[i,1,0], traj[i,1,1], traj[i,1,2], c='r', s=100, alpha=0.90, edgecolors='none',  depthshade=True,)
 63 |         
 64 |     ax2.set_xlim([-L, L])
 65 |     ax2.set_ylim([-L, L])
 66 |     ax2.set_zlim([-L, L])
 67 |     
 68 |     ax2.set_xlabel('X')
 69 |     ax2.set_ylabel('Y')
 70 |     ax2.set_zlabel('Z')
 71 |        
 72 |     plt.show()
 73 | 
 74 | 
 75 | 
 76 | 
 77 | .. image:: output_5_0.png
 78 | 
 79 | 
 80 | .. code:: python
 81 | 
 82 |     # download from https://pypi.python.org/pypi/rmsd/1.2.5
 83 |     import rmsd
 84 |     
 85 |     
 86 |     def myRMSDmetric(arr1, arr2):
 87 |         """
 88 |         This function is built under the assumption that the space dimension is 3!!!
 89 |         Requirement from sklearn radius_neighbors_graph: The callable should take two arrays as input and return one value indicating the distance between them.
 90 |          Input: One row from reshaped XYZ trajectory as number of steps times nDOF
 91 |          Inside: Reshape to XYZ format and apply rmsd as r=rmsd(X[i], X[j])
 92 |          Output: rmsd distance
 93 |         """
 94 |         
 95 |         nParticles = len(arr1) / 3;
 96 |         assert (nParticles == int(nParticles))
 97 |     
 98 |         X1 = arr1.reshape(int(nParticles), 3 )
 99 |         X2 = arr2.reshape(int(nParticles), 3 )
100 |     
101 |         X1 = X1 -  rmsd.centroid(X1)
102 |         X2 = X2 -  rmsd.centroid(X2)
103 |     
104 |         return rmsd.kabsch_rmsd(X1, X2)
105 |     
106 | 
107 | 
108 | Compute diffusion map embedding using the rmsd metric from above.
109 | 
110 | .. code:: python
111 | 
112 |     epsilon=0.05
113 |     
114 |     Xresh=traj.reshape(traj.shape[0], traj.shape[1]*traj.shape[2])
115 |     mydmap = dm.DiffusionMap.from_sklearn(n_evecs = 1, epsilon = epsilon, alpha = 0.5, k=1000, metric=myRMSDmetric)
116 |     dmap = mydmap.fit_transform(Xresh)
117 | 
118 | 
119 | .. parsed-literal::
120 | 
121 |     0.05 eps fitted
122 | 
123 | 
124 | Plot the dominant eigenvector over radius, to show the correlation with
125 | this collective variable.
126 | 
127 | .. code:: python
128 | 
129 |     evecs = mydmap.evecs
130 |     
131 |     fig = plt.figure(figsize=[16,6])
132 |     ax = fig.add_subplot(121)
133 |     
134 |     ax.scatter(compute_radius(traj), evecs[:,0], c=evecs[:,0], s=10, cmap=plt.cm.Spectral)
135 |     ax.set_xlabel('Radius')
136 |     ax.set_ylabel('Dominant eigenvector')
137 |     
138 |     ax2 = fig.add_subplot(122)
139 |     #
140 |     cax2 = ax2.scatter(compute_radius(traj), energy, c=evecs[:,0], s=10, cmap=plt.cm.Spectral)
141 |     ax2.set_xlabel('Radius')
142 |     ax2.set_ylabel('Potential Energy')
143 |     cbar = fig.colorbar(cax2)
144 |     cbar.set_label('Dominant eigenvector')
145 |     plt.show()
146 | 
147 | 
148 | 
149 | .. image:: output_10_0.png
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Metrics/output_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Metrics/output_10_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Metrics/output_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Metrics/output_5_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Spherical_Harmonics/Spherical_Harmonics.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Spherical Harmonics
  3 | ===================
  4 | 
  5 | In this notebook we try to reproduce the eigenfunctions of the Laplacian
  6 | on the 2D sphere embedded in :math:`\mathbb{R}^3`. The eigenfunctions
  7 | are the spherical harmonics :math:`Y_l^m(\theta, \phi)`.
  8 | 
  9 | .. code:: python
 10 | 
 11 |     import numpy as np
 12 |     
 13 |     from pydiffmap import diffusion_map as dm
 14 |     from scipy.sparse import csr_matrix
 15 |     
 16 |     np.random.seed(100)
 17 |     
 18 |     import matplotlib.pyplot as plt
 19 |     from mpl_toolkits.mplot3d import Axes3D
 20 |     %matplotlib inline
 21 | 
 22 | generate data on a Sphere
 23 | -------------------------
 24 | 
 25 | we sample longitude and latitude uniformly and then transform to
 26 | :math:`\mathbb{R}^3` using geographical coordinates (latidude is
 27 | measured from the equator).
 28 | 
 29 | .. code:: python
 30 | 
 31 |     m = 10000
 32 |     Phi = 2*np.pi*np.random.rand(m) - np.pi
 33 |     Theta = np.pi*np.random.rand(m) - 0.5*np.pi
 34 |     X = np.cos(Theta)*np.cos(Phi)
 35 |     Y = np.cos(Theta)*np.sin(Phi)
 36 |     Z = np.sin(Theta)
 37 |     data = np.array([X, Y, Z]).transpose()
 38 |     
 39 | 
 40 | 
 41 | run diffusion maps
 42 | ------------------
 43 | 
 44 | Now we initialize the diffusion map object and fit it to the dataset. We
 45 | set n\_evecs = 4, and since we want to unbias with respect to the
 46 | non-uniform sampling density we set alpha = 1.0. The epsilon parameter
 47 | controls the scale and is set here by hand. The k parameter controls the
 48 | neighbour lists, a smaller k will increase performance but decrease
 49 | accuracy.
 50 | 
 51 | .. code:: python
 52 | 
 53 |     eps = 0.01
 54 |     mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=eps, alpha=1.0, k=400)
 55 |     mydmap.fit_transform(data)
 56 |     test_evals = -4./eps*(mydmap.evals - 1)
 57 |     print(test_evals)
 58 | 
 59 | 
 60 | .. parsed-literal::
 61 | 
 62 |     0.01 eps fitted
 63 |     [1116.4945497  1143.35090854 1147.22344311 2378.50043128]
 64 | 
 65 | 
 66 | The true eigenfunctions here are spherical harmonics
 67 | :math:`Y_l^m(\theta, \phi)` and the true eigenvalues are
 68 | :math:`\lambda_l = l(l+1)`. The eigenfunction corresponding to
 69 | :math:`l=0` is the constant function, which we ommit. Since :math:`l=1`
 70 | has multiplicity three, this gives the benchmark eigenvalues [2, 2, 2,
 71 | 6].
 72 | 
 73 | .. code:: python
 74 | 
 75 |     real_evals = np.array([2, 2, 2, 6])
 76 |     test_evals = -4./eps*(mydmap.evals - 1)
 77 |     eval_error = np.abs(test_evals-real_evals)/real_evals
 78 |     print(test_evals)
 79 |     print(eval_error)
 80 | 
 81 | 
 82 | .. parsed-literal::
 83 | 
 84 |     [1116.4945497  1143.35090854 1147.22344311 2378.50043128]
 85 |     [557.24727485 570.67545427 572.61172156 395.41673855]
 86 | 
 87 | 
 88 | visualisation
 89 | -------------
 90 | 
 91 | With pydiffmap's visualization toolbox, we can get a quick look at the
 92 | embedding produced by the first two diffusion coordinates and the data
 93 | colored by the first eigenfunction.
 94 | 
 95 | .. code:: python
 96 | 
 97 |     from pydiffmap.visualization import embedding_plot, data_plot
 98 |     
 99 |     embedding_plot(mydmap, dim=3, scatter_kwargs = {'c': mydmap.dmap[:,0], 'cmap': 'Spectral'})
100 |     
101 |     plt.show()
102 | 
103 | 
104 | 
105 | .. image:: output_9_0.png
106 | 
107 | 
108 | .. code:: python
109 | 
110 |     data_plot(mydmap, dim=3, scatter_kwargs = {'cmap': 'Spectral'})
111 |     plt.show()
112 | 
113 | 
114 | 
115 | .. image:: output_10_0.png
116 | 
117 | 
118 | Rotating the dataset
119 | --------------------
120 | 
121 | There is rotational symmetry in this dataset. To remove it, we define
122 | the 'north pole' to be the point where the first diffusion coordinate
123 | attains its maximum value.
124 | 
125 | .. code:: python
126 | 
127 |     northpole = np.argmax(mydmap.dmap[:,0])
128 |     north = data[northpole,:]
129 |     phi_n = Phi[northpole]
130 |     theta_n = Theta[northpole]
131 |     R = np.array([[np.sin(theta_n)*np.cos(phi_n), np.sin(theta_n)*np.sin(phi_n), -np.cos(theta_n)],
132 |                   [-np.sin(phi_n), np.cos(phi_n), 0],
133 |                  [np.cos(theta_n)*np.cos(phi_n), np.cos(theta_n)*np.sin(phi_n), np.sin(theta_n)]])
134 | 
135 | .. code:: python
136 | 
137 |     data_rotated = np.dot(R,data.transpose())
138 |     data_rotated.shape
139 | 
140 | 
141 | 
142 | 
143 | .. parsed-literal::
144 | 
145 |     (3, 10000)
146 | 
147 | 
148 | 
149 | Now that the dataset is rotated, we can check how well the first
150 | diffusion coordinate approximates the first spherical harmonic
151 | :math:`Y_1^1(\theta, \phi) = \sin(\theta) = Z`.
152 | 
153 | .. code:: python
154 | 
155 |     print('Correlation between \phi and \psi_1')
156 |     print(np.corrcoef(mydmap.dmap[:,0], data_rotated[2,:]))
157 |     
158 |     plt.figure(figsize=(16,6))
159 |     ax = plt.subplot(121)
160 |     ax.scatter(data_rotated[2,:], mydmap.dmap[:,0])
161 |     ax.set_title('First DC against $Z$')
162 |     ax.set_xlabel(r'$Z$')
163 |     ax.set_ylabel(r'$\psi_1$')
164 |     ax.axis('tight')
165 |     
166 |     ax2 = plt.subplot(122,projection='3d')
167 |     ax2.scatter(data_rotated[0,:],data_rotated[1,:],data_rotated[2,:], c=mydmap.dmap[:,0], cmap=plt.cm.Spectral)
168 |     #ax2.view_init(75, 10)
169 |     ax2.set_title('sphere dataset rotated, color according to $\psi_1$')
170 |     ax2.set_xlabel('X')
171 |     ax2.set_ylabel('Y')
172 |     ax2.set_zlabel('Z')
173 |     
174 |     plt.show()
175 | 
176 | 
177 | .. parsed-literal::
178 | 
179 |     Correlation between \phi and \psi_1
180 |     [[1.         0.99915563]
181 |      [0.99915563 1.        ]]
182 | 
183 | 
184 | 
185 | .. image:: output_15_1.png
186 | 
187 | 
188 | 


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Spherical_Harmonics/output_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Spherical_Harmonics/output_10_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Spherical_Harmonics/output_15_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Spherical_Harmonics/output_15_1.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Spherical_Harmonics/output_9_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Spherical_Harmonics/output_9_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Swiss_Roll/Swiss_Roll.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | The classic swiss roll data set
  3 | ===============================
  4 | 
  5 | author: Ralf Banisch
  6 | 
  7 | We demonstrate the usage of the diffusion\_map class on a
  8 | two-dimensional manifold embedded in :math:`\mathbb{R}^3`.
  9 | 
 10 | .. code:: python
 11 | 
 12 |     # import some necessary functions for plotting as well as the diffusion_map class from pydiffmap.
 13 |     import matplotlib.pyplot as plt
 14 |     import numpy as np
 15 |     
 16 |     from mpl_toolkits.mplot3d import Axes3D
 17 |     from pydiffmap import diffusion_map as dm
 18 |     
 19 |     %matplotlib inline
 20 | 
 21 | Create Data
 22 | -----------
 23 | 
 24 | We create the dataset: A noisy sampling of the twodimensional "swiss
 25 | roll" embedded in :math:`\mathbb{R}^3`. The sampling is such that the
 26 | density of samples decreases with the distance from the origin
 27 | (non-uniform sampling).
 28 | 
 29 | In order to be handled correctly by the diffusion\_map class, we must
 30 | ensure the data is a numpy array of shape (n\_points, n\_features).
 31 | 
 32 | .. code:: python
 33 | 
 34 |     # set parameters
 35 |     length_phi = 15   #length of swiss roll in angular direction
 36 |     length_Z = 15     #length of swiss roll in z direction
 37 |     sigma = 0.1       #noise strength
 38 |     m = 10000         #number of samples
 39 |     
 40 |     # create dataset
 41 |     phi = length_phi*np.random.rand(m)
 42 |     xi = np.random.rand(m)
 43 |     Z = length_Z*np.random.rand(m)
 44 |     X = 1./6*(phi + sigma*xi)*np.sin(phi)
 45 |     Y = 1./6*(phi + sigma*xi)*np.cos(phi)
 46 |     
 47 |     swiss_roll = np.array([X, Y, Z]).transpose()
 48 |     
 49 |     # check that we have the right shape
 50 |     print(swiss_roll.shape)
 51 | 
 52 | 
 53 | .. parsed-literal::
 54 | 
 55 |     (10000, 3)
 56 | 
 57 | 
 58 | Run pydiffmap
 59 | -------------
 60 | 
 61 | Now we initialize the diffusion map object and fit it to the dataset.
 62 | Since we are interested in only the first two diffusion coordinates we
 63 | set n\_evecs = 2, and since we want to unbias with respect to the
 64 | non-uniform sampling density we set alpha = 1.0. The epsilon parameter
 65 | controls the scale and needs to be adjusted to the data at hand. The k
 66 | parameter controls the neighbour lists, a smaller k will increase
 67 | performance but decrease accuracy.
 68 | 
 69 | .. code:: python
 70 | 
 71 |     # initialize Diffusion map object.
 72 |     neighbor_params = {'n_jobs': -1, 'algorithm': 'ball_tree'}
 73 |     
 74 |     mydmap = dm.DiffusionMap.from_sklearn(n_evecs=2, k=200, epsilon='bgh', alpha=1.0, neighbor_params=neighbor_params)
 75 |     # fit to data and return the diffusion map.
 76 |     dmap = mydmap.fit_transform(swiss_roll)
 77 | 
 78 | 
 79 | .. parsed-literal::
 80 | 
 81 |     0.015625000000000007 eps fitted
 82 | 
 83 | 
 84 | .. code:: python
 85 | 
 86 |     mydmap.epsilon_fitted
 87 | 
 88 | 
 89 | 
 90 | 
 91 | .. parsed-literal::
 92 | 
 93 |     0.015625000000000007
 94 | 
 95 | 
 96 | 
 97 | Visualization
 98 | -------------
 99 | 
100 | We show the original data set on the right, with points colored
101 | according to the first diffusion coordinate. On the left, we show the
102 | diffusion map embedding given by the first two diffusion coordinates.
103 | Points are again colored according to the first diffusion coordinate,
104 | which seems to parameterize the :math:`\phi` direction. We can see that
105 | the diffusion map embedding 'unwinds' the swiss roll.
106 | 
107 | .. code:: python
108 | 
109 |     from pydiffmap.visualization import embedding_plot, data_plot
110 |     
111 |     embedding_plot(mydmap, scatter_kwargs = {'c': dmap[:,0], 'cmap': 'Spectral'})
112 |     data_plot(mydmap, dim=3, scatter_kwargs = {'cmap': 'Spectral'})
113 |     
114 |     plt.show()
115 | 
116 | 
117 | 
118 | .. image:: output_8_0.png
119 | 
120 | 
121 | 
122 | .. image:: output_8_1.png
123 | 
124 | 
125 | To get a bit more information out of the embedding, we can scale the
126 | points according to the numerical estimate of the sampling density
127 | (mydmap.q), and color them according to their location in the phi
128 | direction. For comparison, we color the original data set according to
129 | :math:`\phi` this time.
130 | 
131 | .. code:: python
132 | 
133 |     from pydiffmap.visualization import embedding_plot, data_plot
134 |     
135 |     embedding_plot(mydmap, scatter_kwargs = {'c': phi, 's': mydmap.q, 'cmap': 'Spectral'})
136 |     data_plot(mydmap, dim=3, scatter_kwargs = {'cmap': 'Spectral'})
137 |     plt.show()
138 | 
139 | 
140 | 
141 | .. image:: output_10_0.png
142 | 
143 | 
144 | 
145 | .. image:: output_10_1.png
146 | 
147 | 
148 | We can see that points near the center of the swiss roll, where the
149 | winding is tight, are closer together in the embedding, while points
150 | further away from the center are more spaced out. Let's check how the
151 | first two diffusion coordinates correlate with :math:`\phi` and
152 | :math:`Z`.
153 | 
154 | .. code:: python
155 | 
156 |     print('Correlation between \phi and \psi_1')
157 |     print(np.corrcoef(dmap[:,0], phi))
158 |     
159 |     plt.figure(figsize=(16,6))
160 |     ax = plt.subplot(121)
161 |     ax.scatter(phi, dmap[:,0])
162 |     ax.set_title('First DC against $\phi$')
163 |     ax.set_xlabel(r'$\phi$')
164 |     ax.set_ylabel(r'$\psi_1$')
165 |     ax.axis('tight')
166 |     
167 |     print('Correlation between Z and \psi_2')
168 |     print(np.corrcoef(dmap[:,1], Z))
169 |     
170 |     ax2 = plt.subplot(122)
171 |     ax2.scatter(Z, dmap[:,1])
172 |     ax2.set_title('Second DC against Z')
173 |     ax2.set_xlabel('Z')
174 |     ax2.set_ylabel(r'$\psi_2$')
175 |     
176 |     plt.show()
177 | 
178 | 
179 | .. parsed-literal::
180 | 
181 |     Correlation between \phi and \psi_1
182 |     [[1.         0.92408413]
183 |      [0.92408413 1.        ]]
184 |     Correlation between Z and \psi_2
185 |     [[1.         0.97536036]
186 |      [0.97536036 1.        ]]
187 | 
188 | 
189 | 
190 | .. image:: output_12_1.png
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Swiss_Roll/output_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_10_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Swiss_Roll/output_10_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_10_1.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Swiss_Roll/output_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_12_1.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Swiss_Roll/output_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_8_0.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/Swiss_Roll/output_8_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_8_1.png


--------------------------------------------------------------------------------
/docs/jupyter notebook tutorials/index.rst:
--------------------------------------------------------------------------------
 1 | Jupyter notebook tutorials
 2 | ==========================
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 | 
 7 |     Swiss_Roll/Swiss_Roll
 8 |     Spherical_Harmonics/Spherical_Harmonics
 9 |     Four_Wells/Four_Wells
10 |     Metrics/Metrics
11 | 


--------------------------------------------------------------------------------
/docs/readme.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../README.rst
2 | 


--------------------------------------------------------------------------------
/docs/reference/diffusion_map.rst:
--------------------------------------------------------------------------------
1 | diffusion_map
2 | =============
3 | 
4 | .. automodule:: pydiffmap.diffusion_map
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/reference/index.rst:
--------------------------------------------------------------------------------
 1 | Reference
 2 | =========
 3 | 
 4 | .. toctree::
 5 |     :glob:
 6 | 
 7 |     diffusion_map
 8 |     kernel
 9 |     visualization
10 | 


--------------------------------------------------------------------------------
/docs/reference/kernel.rst:
--------------------------------------------------------------------------------
1 | kernel
2 | ======
3 | 
4 | .. automodule:: pydiffmap.kernel
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/reference/visualization.rst:
--------------------------------------------------------------------------------
1 | visualization
2 | =============
3 | 
4 | .. automodule:: pydiffmap.visualization
5 |     :members:
6 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=1.3
2 | sphinx-rtd-theme
3 | -e .
4 | 


--------------------------------------------------------------------------------
/docs/spelling_wordlist.txt:
--------------------------------------------------------------------------------
 1 | builtin
 2 | builtins
 3 | classmethod
 4 | staticmethod
 5 | classmethods
 6 | staticmethods
 7 | args
 8 | kwargs
 9 | callstack
10 | Changelog
11 | Indices
12 | 


--------------------------------------------------------------------------------
/docs/theory.rst:
--------------------------------------------------------------------------------
 1 | ======
 2 | Theory
 3 | ======
 4 | 
 5 | Diffusion maps is a dimension reduction technique that can be used to discover low dimensional structure in high
 6 | dimensional data. It assumes that the data points, which are given as points in a high dimensional metric space,
 7 | actually live on a lower dimensional structure. To uncover this structure, diffusion maps builds a neighborhood graph
 8 | on the data based on the distances between nearby points. Then a graph Laplacian **L** is constructed on the neighborhood
 9 | graph. Many variants exist that approximate different differential operators. For example, *standard* diffusion maps
10 | approximates the differential operator
11 | 
12 | .. math::
13 | 
14 |    \mathcal{L}f = \Delta f - 2(1-\alpha)\nabla f \cdot \frac{\nabla q}{q}
15 | 
16 | 
17 | where :math:`\Delta` is the Laplace Beltrami operator, :math:`\nabla` is the gradient operator and :math:`q` is the
18 | sampling density. The normalization parameter :math:`\alpha`, which is typically between 0.0 and 1.0, determines how
19 | much :math:`q` is allowed to bias the operator :math:`\mathcal{L}`.
20 | Standard diffusion maps on a dataset ``X``, which has to given as a numpy array with different rows corresponding to
21 | different observations, is implemented in pydiffmap as::
22 | 
23 |   mydmap = diffusion_map.DiffusionMap.from_sklearn(epsilon = my_epsilon, alpha = my_alpha)
24 |   mydmap.fit(X)
25 | 
26 | Here ``epsilon`` is a scale parameter used to rescale distances between data points. 
27 | We can also choose ``epsilon`` automatically due to an an algorithm by Berry, Harlim and Giannakis::
28 | 
29 |   mydmap = dm.DiffusionMap.from_sklearn(alpha = my_alpha, epsilon = 'bgh')
30 | 
31 | For additional optional arguments of the DiffusionMap class, see usage and documentation.
32 | 
33 | A variant of diffusion maps, 'TMDmap', unbiases with respect to :math:`q` and approximates the differential operator
34 | 
35 | .. math::
36 | 
37 |   \mathcal{L}f = \Delta f + \nabla (\log\pi) \cdot \nabla f
38 | 
39 | where :math:`\pi` is a 'target distribution' that defines the drift term and has to be known up to a normalization
40 | constant. TMDmap is implemented in pydiffmap as::
41 | 
42 |   mydmap = diffusion_map.TMDmap(epsilon = my_epsilon, alpha = 1.0, change_of_measure=com_fxn)
43 |   mydmap.fit(X)
44 | 
45 | where ``com_fxn`` is function that takes in a coordinate and outputs the value of the target distribution :math:`\pi` .
46 | 


--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
 1 | =====
 2 | Usage
 3 | =====
 4 | 
 5 | To use pyDiffMap in a project::
 6 | 
 7 | 	import pydiffmap
 8 | 
 9 | To initialize a diffusion map object::
10 | 
11 | 	mydmap = diffusion_map.DiffusionMap.from_sklearn(n_evecs = 1, epsilon = 1.0, alpha = 0.5, k=64)
12 | 
13 | where ``n_evecs`` is the number of eigenvectors that are computed, ``epsilon`` is a scale parameter
14 | used to rescale distances between data points, ``alpha`` is a normalization parameter (typically between 0.0 and 1.0)
15 | that influences the effect of the sampling density, and ``k`` is the number of nearest neighbors considered when the kernel
16 | is computed. A larger ``k`` means increased accuracy but larger computation time. 
17 | The ``from_sklearn`` command is used because we are constructing using the scikit-learn nearest neighbor framework.
18 | For additional optional arguments, see documentation.
19 | 
20 | We can also employ automatic epsilon detection due to an algorithm by Berry, Harlim and Giannakis::
21 | 
22 | 	mydmap = dm.DiffusionMap.from_sklearn(n_evecs = 1, alpha = 0.5, epsilon = 'bgh', k=64)
23 | 
24 | To fit to a dataset ``X`` (array-like, shape (n_query, n_features))::
25 | 
26 | 	mydmap.fit(X)
27 | 
28 | The diffusion map coordinates can also be accessed directly via::
29 | 
30 | 	dmap = mydmap.fit_transform(X)
31 | 
32 | This returns an array ``dmap`` with shape (n_query, n_evecs). E.g. ``dmap[:,0]`` is the first diffusion coordinate
33 | evaluated on the data ``X``.
34 | 
35 | In order to compute diffusion coordinates at the out of sample location(s) ``Y``::
36 | 
37 | 	dmap_Y = mydmap.transform(Y)
38 | 


--------------------------------------------------------------------------------
/examples/Data/4wells_traj.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/examples/Data/4wells_traj.npy


--------------------------------------------------------------------------------
/examples/Data/dimer_energy.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/examples/Data/dimer_energy.npy


--------------------------------------------------------------------------------
/examples/Data/dimer_trajectory.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/examples/Data/dimer_trajectory.npy


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [bdist_wheel]
 2 | universal = 1
 3 | 
 4 | 
 5 | [flake8]
 6 | max-line-length = 140
 7 | exclude = */migrations/*
 8 | ignore = E501,E226,E731,W503
 9 | 
10 | [pep8]
11 | max-line-length = 140
12 | ignore = E501,E226,E731,W503
13 | 
14 | [tool:pytest]
15 | testpaths = tests
16 | norecursedirs =
17 |     migrations
18 | 
19 | python_files =
20 |     test_*.py
21 |     *_test.py
22 |     tests.py
23 | addopts =
24 |     -ra
25 |     --strict
26 |     --doctest-modules
27 |     --doctest-glob=\*.rst
28 |     --tb=short
29 | 
30 | [isort]
31 | force_single_line = True
32 | line_length = 120
33 | known_first_party = pydiffmap
34 | default_section = THIRDPARTY
35 | forced_separate = test_pydiffmap
36 | not_skip = __init__.py
37 | skip = migrations
38 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | from __future__ import absolute_import
 4 | from __future__ import print_function
 5 | 
 6 | import io
 7 | import re
 8 | from glob import glob
 9 | from os.path import basename
10 | from os.path import dirname
11 | from os.path import join
12 | from os.path import splitext
13 | 
14 | from setuptools import find_packages
15 | from setuptools import setup
16 | 
17 | 
18 | def read(*names, **kwargs):
19 |     with io.open(
20 |         join(dirname(__file__), *names),
21 |         encoding=kwargs.get('encoding', 'utf8')
22 |     ) as fh:
23 |         return fh.read()
24 | 
25 | 
26 | setup(
27 |     name='pydiffmap',
28 |     version='0.2.0.1',
29 |     license='MIT license',
30 |     description='Library for constructing variable bandwidth diffusion maps',
31 |     long_description='%s\n%s' % (
32 |         re.compile('^.. start-badges.*^.. end-badges', re.M | re.S).sub('', read('README.rst')),
33 |         re.sub(':[a-z]+:`~?(.*?)`', r'``\1``', read('CHANGELOG.rst'))
34 |     ),
35 |     author='Ralf Banisch, Erik Henning Thiede, Zofia Trstanova',
36 |     author_email='ralf.banisch@fu-berlin.de, ehthiede@gmail.com, zofia.trstanova@ed.ac.uk',
37 |     url='https://github.com/DiffusionMapsAcademics/pyDiffMap',
38 |     packages=find_packages('src'),
39 |     package_dir={'': 'src'},
40 |     py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')],
41 |     include_package_data=True,
42 |     zip_safe=False,
43 |     classifiers=[
44 |         # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers
45 |         'Development Status :: 3 - Alpha',
46 |         'Topic :: Scientific/Engineering',
47 |         'Intended Audience :: Science/Research',
48 |         'License :: OSI Approved :: MIT License',
49 |         'Operating System :: Unix',
50 |         'Operating System :: POSIX',
51 |         'Operating System :: Microsoft :: Windows',
52 |         'Programming Language :: Python',
53 |         'Programming Language :: Python :: 2.7',
54 |         'Programming Language :: Python :: 3',
55 |         'Programming Language :: Python :: 3.4',
56 |         'Programming Language :: Python :: 3.5',
57 |         'Programming Language :: Python :: 3.6',
58 |         'Programming Language :: Python :: 3.7',
59 |         'Programming Language :: Python :: 3.8',
60 |         'Topic :: Scientific/Engineering',
61 |     ],
62 |     keywords=[
63 |         'diffusion maps', 'manifold learning', 'molecular dynamics',
64 |         'dimensionality reduction'
65 |         # eg: 'keyword1', 'keyword2', 'keyword3',
66 |     ],
67 |     install_requires=[
68 |         'numpy', 'scipy', 'scikit-learn', 'matplotlib', 'six', 'numexpr'
69 |     ],
70 |     extras_require={
71 |         # eg:
72 |         #   'rst': ['docutils>=0.11'],
73 |         #   ':python_version=="2.6"': ['argparse'],
74 |     },
75 | )
76 | #    entry_points={
77 | #        'console_scripts': [
78 | #            'pyDiffMap = pyDiffMap.cli:main',
79 | #        ]
80 | #    },
81 | 


--------------------------------------------------------------------------------
/src/pydiffmap/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | A library for constructing diffusion maps.
 3 | """
 4 | from __future__ import absolute_import
 5 | 
 6 | from . import diffusion_map
 7 | from . import kernel
 8 | from . import visualization
 9 | from . import utils
10 | 
11 | __all__ = ['diffusion_map', 'kernel', 'visualization', 'utils']
12 | 


--------------------------------------------------------------------------------
/src/pydiffmap/diffusion_map.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Routines and Class definitions for the diffusion maps algorithm.
  4 | """
  5 | from __future__ import absolute_import
  6 | 
  7 | import numpy as np
  8 | import scipy.sparse as sps
  9 | import scipy.sparse.linalg as spsl
 10 | import warnings
 11 | from . import kernel
 12 | from . import utils
 13 | 
 14 | 
 15 | class DiffusionMap(object):
 16 |     """
 17 |     Diffusion Map object for data analysis
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     kernel_object : Kernel object.
 22 |         Kernel object that outputs the values of the kernel.  Must have the method .fit(X) and .compute() methods.
 23 |         Any epsilon desired for normalization should be stored at kernel_object.epsilon_fitted and any bandwidths
 24 |         should be located at kernel_object.bandwidths.
 25 |     alpha : scalar, optional
 26 |         Exponent to be used for the left normalization in constructing the diffusion map.
 27 |     n_evecs : int, optional
 28 |         Number of diffusion map eigenvectors to return
 29 |     weight_fxn : callable or None, optional
 30 |         Callable function that take in a point, and outputs the value of the weight matrix at those points.
 31 |     density_fxn : callable or None, optional
 32 |         Callable function that take in X, and outputs the value of the density of X. Used instead of kernel density estimation in the normalisation.
 33 |     bandwidth_normalize: boolean, optional
 34 |         If true, normalize the final constructed transition matrix by the bandwidth as described in Berry and Harlim. [1]_
 35 |     oos : 'nystroem' or 'power', optional
 36 |         Method to use for out-of-sample extension.
 37 | 
 38 |     References
 39 |     ----------
 40 |     .. [1] T. Berry, and J. Harlim, Applied and Computational Harmonic Analysis 40, 68-96
 41 |        (2016).
 42 |     """
 43 | 
 44 |     def __init__(self, kernel_object, alpha=0.5, n_evecs=1,
 45 |                  weight_fxn=None, density_fxn=None,
 46 |                  bandwidth_normalize=False, oos='nystroem'):
 47 |         """
 48 |         Initializes Diffusion Map, sets parameters.
 49 |         """
 50 |         self.alpha = alpha
 51 |         self.n_evecs = n_evecs
 52 |         self.epsilon_fitted = None
 53 |         self.weight_fxn = weight_fxn
 54 |         self.bandwidth_normalize = bandwidth_normalize
 55 |         self.oos = oos
 56 |         self.density_fxn = density_fxn
 57 |         self.local_kernel = kernel_object
 58 | 
 59 |     @classmethod
 60 |     def from_sklearn(cls, alpha=0.5, k=64, kernel_type='gaussian', epsilon='bgh', n_evecs=1, neighbor_params=None,
 61 |                      metric='euclidean', metric_params=None, weight_fxn=None, density_fxn=None, bandwidth_type=None,
 62 |                      bandwidth_normalize=False, oos='nystroem'):
 63 |         """
 64 |         Builds the diffusion map using a kernel constructed using the Scikit-learn nearest neighbor object.
 65 |         Parameters are largely the same as the constructor, but in place of the kernel object it take
 66 |         the following parameters.
 67 | 
 68 |         Parameters
 69 |         ----------
 70 |         k : int, optional
 71 |             Number of nearest neighbors over which to construct the kernel.
 72 |         kernel_type : string, optional
 73 |             Type of kernel to construct. Currently the only option is 'gaussian', but more will be implemented.
 74 |         epsilon: string or scalar, optional
 75 |             Method for choosing the epsilon.  Currently, the only options are to provide a scalar (epsilon is set to the provided scalar) 'bgh' (Berry, Giannakis and Harlim), and 'bgh_generous' ('bgh' method, with answer multiplied by 2.
 76 |         neighbor_params : dict or None, optional
 77 |             Optional parameters for the nearest Neighbor search. See scikit-learn NearestNeighbors class for details.
 78 |         metric : string, optional
 79 |             Metric for distances in the kernel. Default is 'euclidean'. The callable should take two arrays as input and return one value indicating the distance between them.
 80 |         metric_params : dict or None, optional
 81 |             Optional parameters required for the metric given.
 82 |         bandwidth_type: callable, number, string, or None, optional
 83 |             Type of bandwidth to use in the kernel.  If None (default), a fixed bandwidth kernel is used.  If a callable function, the data is passed to the function, and the bandwidth is output (note that the function must take in an entire dataset, not the points 1-by-1).  If a number, e.g. -.25, a kernel density estimate is performed, and the bandwidth is taken to be q**(input_number).  For a string input, the input is assumed to be an evaluatable expression in terms of the dimension d, e.g. "-1/(d+2)".  The dimension is then estimated, and the bandwidth is set to q**(evaluated input string).
 84 | 
 85 |         Examples
 86 |         --------
 87 |         # setup neighbor_params list with as many jobs as CPU cores and kd_tree neighbor search.
 88 |         >>> neighbor_params = {'n_jobs': -1, 'algorithm': 'kd_tree'}
 89 |         # initialize diffusion map object with the top two eigenvalues being computed, epsilon set to 0.1
 90 |         # and alpha set to 1.0.
 91 |         >>> mydmap = DiffusionMap.from_sklearn(n_evecs = 2, epsilon = .1, alpha = 1.0, neighbor_params = neighbor_params)
 92 | 
 93 |         References
 94 |         ----------
 95 |         .. [1] T. Berry, and J. Harlim, Applied and Computational Harmonic Analysis 40, 68-96
 96 |            (2016).
 97 |         """
 98 | 
 99 |         buendia = kernel.Kernel(kernel_type=kernel_type, k=k, epsilon=epsilon, neighbor_params=neighbor_params, metric=metric, metric_params=metric_params, bandwidth_type=bandwidth_type)
100 |         dmap = cls(buendia, alpha=alpha, n_evecs=n_evecs, weight_fxn=weight_fxn, density_fxn=density_fxn, bandwidth_normalize=bandwidth_normalize, oos=oos)
101 |         # if ((bandwidth_type is None) and (bandwidth_normalize is True)):
102 |         #     warnings.warn('Bandwith normalization set to true, but no bandwidth function provided.  Setting to False.')
103 |         return dmap
104 | 
105 |     def _build_kernel(self, X, my_kernel):
106 |         my_kernel.fit(X)
107 |         kernel_matrix = utils._symmetrize_matrix(my_kernel.compute())
108 |         return kernel_matrix, my_kernel
109 | 
110 |     def _compute_weights(self, X):
111 |         if self.weight_fxn is not None:
112 |             N = np.shape(X)[0]
113 |             return np.array([self.weight_fxn(Xi) for Xi in X]).reshape(N)
114 |         else:
115 |             return None
116 | 
117 |     def _make_right_norm_vec(self, kernel_matrix, q=None, bandwidths=None):
118 |         if q is None:
119 |             # perform kde
120 |             q = np.array(kernel_matrix.sum(axis=1)).ravel()
121 |             if bandwidths is not None:
122 |                 q /= bandwidths**2
123 |         right_norm_vec = np.power(q, -self.alpha)
124 |         return q, right_norm_vec
125 | 
126 |     def _right_normalize(self, kernel_matrix, right_norm_vec, weights):
127 |         m = right_norm_vec.shape[0]
128 |         Dalpha = sps.spdiags(right_norm_vec, 0, m, m)
129 |         kernel_matrix = kernel_matrix * Dalpha
130 |         if weights is not None:
131 |             weight_mat = sps.spdiags(weights, 0, m, m)
132 |             kernel_matrix = kernel_matrix * weight_mat
133 |         return kernel_matrix
134 | 
135 |     def _left_normalize(self, kernel_matrix):
136 |         row_sum = kernel_matrix.sum(axis=1).transpose()
137 |         n = row_sum.shape[1]
138 |         Dalpha = sps.spdiags(np.power(row_sum, -1), 0, n, n)
139 |         P = Dalpha * kernel_matrix
140 |         return P
141 | 
142 |     def _build_generator(self, P, epsilon_fitted, bandwidths=None, bandwidth_normalize=False):
143 |         m, n = P.shape
144 |         L = (P - sps.eye(m, n, k=(n - m))) / epsilon_fitted
145 |         if bandwidth_normalize:
146 |             if bandwidths is not None:
147 |                 bw_diag = sps.spdiags(np.power(bandwidths, -2), 0, m, m)
148 |                 L = bw_diag * L
149 |             else:
150 |                 warnings.warn('Bandwith normalization set to true, but no bandwidth function was found in normalization.  Not performing normalization')
151 | 
152 |         return L
153 | 
154 |     def _make_diffusion_coords(self, L):
155 |         evals, evecs = spsl.eigs(L, k=(self.n_evecs+1), which='LR')
156 |         ix = evals.argsort()[::-1][1:]
157 |         evals = np.real(evals[ix])
158 |         evecs = np.real(evecs[:, ix])
159 |         dmap = np.dot(evecs, np.diag(np.sqrt(-1. / evals)))
160 |         return dmap, evecs, evals
161 | 
162 |     def construct_Lmat(self, X):
163 |         """
164 |         Builds the transition matrix, but does NOT compute the eigenvectors.  This is useful for applications where the transition matrix itself is the object of interest.
165 | 
166 |         Parameters
167 |         ----------
168 |         X : array-like, shape (n_query, n_features)
169 |             Data upon which to construct the diffusion map.
170 | 
171 |         Returns
172 |         -------
173 |         self : the object itself
174 |         """
175 |         kernel_matrix, my_kernel = self._build_kernel(X, self.local_kernel)
176 |         weights = self._compute_weights(X)
177 | 
178 |         if self.density_fxn is not None:
179 |             density = self.density_fxn(X)
180 |         else:
181 |             density = None
182 |         try:
183 |             bandwidths = my_kernel.bandwidths
184 |         except AttributeError:
185 |             bandwidths = None
186 | 
187 |         q, right_norm_vec = self._make_right_norm_vec(kernel_matrix, q=density, bandwidths=bandwidths)
188 |         P = self._right_normalize(kernel_matrix, right_norm_vec, weights)
189 |         P = self._left_normalize(P)
190 |         L = self._build_generator(P, my_kernel.epsilon_fitted, bandwidths, bandwidth_normalize=self.bandwidth_normalize)
191 | 
192 |         # Save data
193 |         self.local_kernel = my_kernel
194 |         self.epsilon_fitted = my_kernel.epsilon_fitted
195 |         self.data = X
196 |         self.weights = weights
197 |         self.kernel_matrix = kernel_matrix
198 |         self.L = L
199 |         self.q = q
200 |         self.right_norm_vec = right_norm_vec
201 |         return self
202 | 
203 |     def fit(self, X):
204 |         """
205 |         Fits the data.
206 | 
207 |         Parameters
208 |         ----------
209 |         X : array-like, shape (n_query, n_features)
210 |             Data upon which to construct the diffusion map.
211 | 
212 |         Returns
213 |         -------
214 |         self : the object itself
215 |         """
216 |         self.construct_Lmat(X)
217 |         dmap, evecs, evals = self._make_diffusion_coords(self.L)
218 | 
219 |         # Save constructed data.
220 |         self.evals = evals
221 |         self.evecs = evecs
222 |         self.dmap = dmap
223 |         return self
224 | 
225 |     def transform(self, Y):
226 |         """
227 |         Performs Nystroem out-of-sample extension to calculate the values of the diffusion coordinates at each given point.
228 | 
229 |         Parameters
230 |         ----------
231 |         Y : array-like, shape (n_query, n_features)
232 |             Data for which to perform the out-of-sample extension.
233 | 
234 |         Returns
235 |         -------
236 |         phi : numpy array, shape (n_query, n_eigenvectors)
237 |             Transformed value of the given values.
238 |         """
239 |         if np.array_equal(self.data, Y):
240 |             return self.dmap
241 |         else:
242 |             # turn Y into 2D array if needed
243 |             if (Y.ndim == 1):
244 |                 Y = Y[np.newaxis, :]
245 | 
246 |             if self.oos == "nystroem":
247 |                 return nystroem_oos(self, Y)
248 |             elif self.oos == "power":
249 |                 return power_oos(self, Y)
250 |             else:
251 |                 raise ValueError('Did not understand the OOS algorithm specified')
252 | 
253 |     def fit_transform(self, X):
254 |         """
255 |         Fits the data and returns diffusion coordinates.  equivalent to calling dmap.fit(X).transform(x).
256 | 
257 |         Parameters
258 |         ----------
259 |         X : array-like, shape (n_query, n_features)
260 |             Data upon which to construct the diffusion map.
261 | 
262 |         Returns
263 |         -------
264 |         phi : numpy array, shape (n_query, n_eigenvectors)
265 |             Transformed value of the given values.
266 |         """
267 |         self.fit(X)
268 |         return self.dmap
269 | 
270 | 
271 | class TMDmap(DiffusionMap):
272 |     """
273 |     Implementation of the TargetMeasure diffusion map.  This provides a more convenient interface for some hyperparameter selection for the general diffusion object.  It takes the same parameters as the base Diffusion Map object.  However, rather than taking a weight function, it takes as input a change of measure function.
274 | 
275 |     Parameters
276 |     ----------
277 |     change_of_measure : callable, optional
278 |         Function that takes in a point and evaluates the change-of-measure between the density otherwise stationary to the diffusion map and the desired density.
279 |     """
280 | 
281 |     def __init__(self, alpha=0.5, k=64, kernel_type='gaussian', epsilon='bgh',
282 |                  n_evecs=1, neighbor_params=None, metric='euclidean',
283 |                  metric_params=None, change_of_measure=None, density_fxn=None,
284 |                  bandwidth_type=None, bandwidth_normalize=False, oos='nystroem'):
285 | 
286 |         def weight_fxn(y_i):
287 |             return np.sqrt(change_of_measure(y_i))
288 | 
289 |         buendia = kernel.Kernel(kernel_type=kernel_type, k=k, epsilon=epsilon, neighbor_params=neighbor_params, metric=metric, metric_params=metric_params, bandwidth_type=bandwidth_type)
290 | 
291 |         super(TMDmap, self).__init__(buendia, alpha=alpha, n_evecs=n_evecs, weight_fxn=weight_fxn, density_fxn=density_fxn, bandwidth_normalize=bandwidth_normalize, oos=oos)
292 | 
293 | 
294 | def nystroem_oos(dmap_object, Y):
295 |     """
296 |     Performs Nystroem out-of-sample extension to calculate the values of the diffusion coordinates at each given point.
297 | 
298 |     Parameters
299 |     ----------
300 |     dmap_object : DiffusionMap object
301 |         Diffusion map upon which to perform the out-of-sample extension.
302 |     Y : array-like, shape (n_query, n_features)
303 |         Data for which to perform the out-of-sample extension.
304 | 
305 |     Returns
306 |     -------
307 |     phi : numpy array, shape (n_query, n_eigenvectors)
308 |         Transformed value of the given values.
309 |     """
310 |     # check if Y is equal to data. If yes, no computation needed.
311 |     # compute the values of the kernel matrix
312 |     kernel_extended = dmap_object.local_kernel.compute(Y)
313 |     weights = dmap_object._compute_weights(dmap_object.local_kernel.data)
314 |     P = dmap_object._left_normalize(dmap_object._right_normalize(kernel_extended, dmap_object.right_norm_vec, weights))
315 |     oos_evecs = P * dmap_object.dmap
316 |     # evals_p = dmap_object.local_kernel.epsilon_fitted * dmap_object.evals + 1.
317 |     # oos_dmap = np.dot(oos_evecs, np.diag(1. / evals_p))
318 |     return oos_evecs
319 | 
320 | 
321 | def power_oos(dmap_object, Y):
322 |     """
323 |     Performs out-of-sample extension to calculate the values of the diffusion coordinates at each given point using the power-like method.
324 | 
325 |     Parameters
326 |     ----------
327 |     dmap_object : DiffusionMap object
328 |         Diffusion map upon which to perform the out-of-sample extension.
329 |     Y : array-like, shape (n_query, n_features)
330 |         Data for which to perform the out-of-sample extension.
331 | 
332 |     Returns
333 |     -------
334 |     phi : numpy array, shape (n_query, n_eigenvectors)
335 |         Transformed value of the given values.
336 |     """
337 |     m = int(Y.shape[0])
338 |     k_yx, y_bandwidths = dmap_object.local_kernel.compute(Y, return_bandwidths=True)  # Evaluate on ref points
339 |     yy_right_norm_vec = dmap_object._make_right_norm_vec(k_yx, y_bandwidths)[1]
340 |     k_yy_diag = dmap_object.local_kernel.kernel_fxn(0, dmap_object.epsilon_fitted)
341 |     data_full = np.vstack([dmap_object.local_kernel.data, Y])
342 |     k_full = sps.hstack([k_yx, sps.eye(m) * k_yy_diag])
343 |     right_norm_full = np.hstack([dmap_object.right_norm_vec, yy_right_norm_vec])
344 |     weights = dmap_object._compute_weights(data_full)
345 | 
346 |     P = dmap_object._left_normalize(dmap_object._right_normalize(k_full, right_norm_full, weights))
347 |     L = dmap_object._build_generator(P, dmap_object.epsilon_fitted, y_bandwidths)
348 |     L_yx = L[:, :-m]
349 |     L_yy = np.array(L[:, -m:].diagonal())
350 |     adj_evals = dmap_object.evals - L_yy.reshape(-1, 1)
351 |     dot_part = np.array(L_yx.dot(dmap_object.dmap))
352 |     return (1. / adj_evals) * dot_part
353 | 


--------------------------------------------------------------------------------
/src/pydiffmap/kernel.py:
--------------------------------------------------------------------------------
  1 | """
  2 | A class to implement diffusion kernels.
  3 | """
  4 | 
  5 | import numbers
  6 | import numpy as np
  7 | import numexpr as ne
  8 | import scipy.sparse as sps
  9 | import warnings
 10 | from sklearn.neighbors import NearestNeighbors
 11 | from six import string_types
 12 | from . import utils
 13 | try:
 14 |     from scipy.special import logsumexp
 15 | except ModuleNotFoundError:
 16 |     from scipy.misc import logsumexp
 17 | 
 18 | 
 19 | class Kernel(object):
 20 |     """
 21 |     Class abstracting the evaluation of kernel functions on the dataset.
 22 | 
 23 |     Parameters
 24 |     ----------
 25 |     kernel_type : string or callable, optional
 26 |         Type of kernel to construct. Currently the only option is 'gaussian' (the default), but more will be implemented.
 27 |     epsilon : string, optional
 28 |         Method for choosing the epsilon.  Currently, the only options are to provide a scalar (epsilon is set to the provided scalar) 'bgh' (Berry, Giannakis and Harlim), and 'bgh_generous' ('bgh' method, with answer multiplied by 2.
 29 |     k : int, optional
 30 |         Number of nearest neighbors over which to construct the kernel.
 31 |     neighbor_params : dict or None, optional
 32 |         Optional parameters for the nearest Neighbor search. See scikit-learn NearestNeighbors class for details.
 33 |     metric : string, optional
 34 |         Distance metric to use in constructing the kernel.  This can be selected from any of the scipy.spatial.distance metrics, or a callable function returning the distance.
 35 |     metric_params : dict or None, optional
 36 |         Optional parameters required for the metric given.
 37 |     bandwidth_type: callable, number, string, or None, optional
 38 |         Type of bandwidth to use in the kernel.  If None (default), a fixed bandwidth kernel is used.  If a callable function, the data is passed to the function, and the bandwidth is output (note that the function must take in an entire dataset, not the points 1-by-1).  If a number, e.g. -.25, a kernel density estimate is performed, and the bandwidth is taken to be q**(input_number).  For a string input, the input is assumed to be an evaluatable expression in terms of the dimension d, e.g. "-1/(d+2)".  The dimension is then estimated, and the bandwidth is set to q**(evaluated input string).
 39 |     """
 40 | 
 41 |     def __init__(self, kernel_type='gaussian', epsilon='bgh', k=64, neighbor_params=None, metric='euclidean', metric_params=None, bandwidth_type=None):
 42 |         self.kernel_fxn = _parse_kernel_type(kernel_type)
 43 |         self.epsilon = epsilon
 44 |         self.k = k
 45 |         self.metric = metric
 46 |         self.metric_params = metric_params
 47 |         if neighbor_params is None:
 48 |             neighbor_params = {}
 49 |         self.neighbor_params = neighbor_params
 50 |         self.bandwidth_type = bandwidth_type
 51 |         self.d = None
 52 |         self.epsilon_fitted = None
 53 | 
 54 |     def build_bandwidth_fxn(self, bandwidth_type):
 55 |         """
 56 |         Parses an input string or function specifying the bandwidth.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         bandwidth_fxn : string or number or callable
 61 |             Bandwidth to use.  If a number, taken to be the beta parameter in [1]_.
 62 |             If a string, taken to again be beta, but with an evaluatable
 63 |             expression as a function of the intrinsic dimension d, e.g. '1/(d+2)'.
 64 |             If a function, taken to be a function that outputs the bandwidth.
 65 | 
 66 |         References
 67 |         ----------
 68 |         .. [1] T. Berry, and J. Harlim, Applied and Computational Harmonic Analysis 40, 68-96
 69 |            (2016).
 70 |         """
 71 |         if self.bandwidth_type is None:
 72 |             return None
 73 |         elif callable(self.bandwidth_type):
 74 |             return self.bandwidth_type
 75 |         else:
 76 |             is_string = isinstance(self.bandwidth_type, string_types)
 77 |             is_number = isinstance(self.bandwidth_type, numbers.Number)
 78 |             if (is_string or is_number):
 79 |                 kde_function, d = self._build_nn_kde()
 80 |                 if is_string:
 81 |                     beta = ne.evaluate(self.bandwidth_type)
 82 |                 elif is_number:
 83 |                     beta = self.bandwidth_type
 84 |                 else:
 85 |                     raise Exception("Honestly, we shouldn't have gotten to this point in the code")
 86 |                 bandwidth_fxn = lambda x: kde_function(x)**beta
 87 |                 return bandwidth_fxn
 88 |             else:
 89 |                 raise ValueError("Bandwidth Type was not a callable, string, or number.  Don't know what to make of it.")
 90 | 
 91 |     def _build_nn_kde(self, num_nearest_neighbors=8):
 92 |         my_nnkde = NNKDE(self.neigh, k=num_nearest_neighbors)
 93 |         my_nnkde.fit()
 94 |         bandwidth_fxn = lambda x: my_nnkde.compute(x)
 95 |         self.kde = my_nnkde
 96 |         return bandwidth_fxn, my_nnkde.d
 97 | 
 98 |     def _compute_bandwidths(self, X):
 99 |         if self.bandwidth_fxn is not None:
100 |             return self.bandwidth_fxn(X)
101 |         else:
102 |             return None
103 | 
104 |     def fit(self, X):
105 |         """
106 |         Fits the kernel to the data X, constructing the nearest neighbor tree.
107 | 
108 |         Parameters
109 |         ----------
110 |         X : array-like, shape (n_query, n_features)
111 |             Data upon which to fit the nearest neighbor tree.
112 | 
113 |         Returns
114 |         -------
115 |         self : the object itself
116 |         """
117 |         k0 = min(self.k, np.shape(X)[0])
118 |         self.data = X
119 |         # Construct Nearest Neighbor Tree
120 |         with warnings.catch_warnings():
121 |             warnings.filterwarnings("ignore", message="Parameter p is found in metric_params. The corresponding parameter from __init__ is ignored.")
122 |             self.neigh = NearestNeighbors(n_neighbors=k0,
123 |                                           metric=self.metric,
124 |                                           metric_params=self.metric_params,
125 |                                           **self.neighbor_params)
126 |         self.neigh.fit(X)
127 |         self.bandwidth_fxn = self.build_bandwidth_fxn(self.bandwidth_type)
128 |         self.bandwidths = self._compute_bandwidths(X)
129 |         self.scaled_dists = self._get_scaled_distance_mat(self.data, self.bandwidths)
130 |         self.choose_optimal_epsilon()
131 |         return self
132 | 
133 |     def compute(self, Y=None, return_bandwidths=False):
134 |         """
135 |         Computes the sparse kernel matrix.
136 | 
137 |         Parameters
138 |         ----------
139 |         Y : array-like, shape (n_query, n_features), optional.
140 |             Data against which to calculate the kernel values.  If not provided, calculates against the data provided in the fit.
141 |         return_bandwidths : boolean, optional
142 |             If True, also returns the computed bandwidth for each y point.
143 | 
144 |         Returns
145 |         -------
146 |         K : array-like, shape (n_query_X, n_query_Y)
147 |             Values of the kernel matrix.
148 |         y_bandwidths : array-like, shape (n_query_y)
149 |             Bandwidth evaluated at each point Y.  Only returned if return_bandwidths is True.
150 | 
151 |         """
152 |         if Y is None:
153 |             Y = self.data
154 |         # if np.array_equal(Y, self.data):  # Avoid recomputing nearest neighbors unless needed.
155 |         if _check_equal(Y, self.data):
156 |             y_bandwidths = self.bandwidths
157 |             K = self.scaled_dists
158 |         else:
159 |             # perform k nearest neighbour search on X and Y and construct sparse matrix
160 |             # retrieve all nonzero elements and apply kernel function to it
161 |             y_bandwidths = self._compute_bandwidths(Y)
162 |             K = self._get_scaled_distance_mat(Y, y_bandwidths=y_bandwidths)
163 |         K.data = self.kernel_fxn(K.data, self.epsilon_fitted)
164 |         if return_bandwidths:
165 |             return K, y_bandwidths
166 |         else:
167 |             return K
168 | 
169 |     def _get_scaled_distance_mat(self, Y, y_bandwidths=None):
170 |         # Scales distance matrix by (rho(x) rho(y))^1/2, where rho is the
171 |         # bandwidth.
172 |         dists = self.neigh.kneighbors_graph(Y, mode='distance')
173 |         if y_bandwidths is not None:
174 |             bw_x = np.power(self.bandwidths, 0.5)
175 |             bw_y = np.power(y_bandwidths, 0.5)
176 |             dists = _scale_by_bw(dists, bw_x, bw_y)
177 |         return dists
178 | 
179 |     def choose_optimal_epsilon(self, epsilon=None):
180 |         """
181 |         Chooses the optimal value of epsilon and automatically detects the
182 |         dimensionality of the data.
183 | 
184 |         Parameters
185 |         ----------
186 |         epsilon : string or scalar, optional
187 |             Method for choosing the epsilon.  Currently, the only options are to provide a scalar (epsilon is set to the provided scalar) or 'bgh' (Berry, Giannakis and Harlim).
188 | 
189 |         Returns
190 |         -------
191 |         self : the object itself
192 |         """
193 |         if epsilon is None:
194 |             epsilon = self.epsilon
195 | 
196 |         # Choose Epsilon according to method provided.
197 |         if isinstance(epsilon, numbers.Number):  # if user provided.
198 |             self.epsilon_fitted = epsilon
199 |             return self
200 |         elif ((epsilon == 'bgh') or (epsilon == 'bgh_generous')):  # Berry, Giannakis Harlim method.
201 |             if (self.metric != 'euclidean'):  # TODO : replace with call to scipy metrics.
202 |                 warnings.warn('The BGH method for choosing epsilon assumes a euclidean metric.  However, the metric being used is %s.  Proceed at your own risk...' % self.metric)
203 |             if self.scaled_dists is None:
204 |                 self.scaled_dists = self._get_scaled_distance_mat(self.data, self.bandwidths)
205 |             self.epsilon_fitted, self.d = choose_optimal_epsilon_BGH(self.scaled_dists.data**2)
206 |             if epsilon == 'bgh_generous':
207 |                 self.epsilon_fitted *= 2.
208 |         else:
209 |             raise ValueError("Method for automatically choosing epsilon was given as %s, but this was not recognized" % epsilon)
210 |         return self
211 | 
212 | 
213 | class NNKDE(object):
214 |     """
215 |     Class building a kernel density estimate with a variable bandwidth built from the k nearest neighbors.
216 | 
217 |     Parameters
218 |     ----------
219 |     neighbors : scikit-learn NearestNeighbors object
220 |         NearestNeighbors object to use in constructing the KDE.
221 |     k : int, optional
222 |         Number of nearest neighbors to use in the construction of the bandwidth.  This must be less or equal to the number of nearest neighbors used by the nearest neighbor object.
223 |     """
224 | 
225 |     def __init__(self, neighbors, k=8):
226 |         self.neigh = neighbors
227 |         self.kernel_fxn = _parse_kernel_type('gaussian')
228 |         self.k = k
229 | 
230 |     def _reduce_nn(self, nn_graph, k):
231 |         # gets the k nearest neighbors of an m nearest nearest graph,
232 |         # where m >n
233 |         sub_neighbors = []
234 |         for row in nn_graph:
235 |             dense_row = np.array(row[row.nonzero()]).ravel()
236 |             sorted_ndxs = np.argpartition(dense_row, k-1)
237 |             sorted_row = dense_row[sorted_ndxs[:k]]
238 |             sub_neighbors.append(sorted_row)
239 |         return np.array(sub_neighbors)
240 | 
241 |     def _build_bandwidth(self):
242 |         dist_graph_vals = self._reduce_nn(self.dist_graph_sq, k=self.k-1)
243 |         avg_sq_dist = np.array(dist_graph_vals.sum(axis=1)).ravel()
244 |         self.bandwidths = np.sqrt(avg_sq_dist/(self.k-1)).ravel()
245 | 
246 |     def _choose_epsilon(self):
247 |         # dist_graph_sq = self.neigh.kneighbors_graph(n_neighbors=self.neigh.n_neighbors-1, mode='distance')
248 |         dist_graph_sq = self.dist_graph_sq.copy()
249 |         n = dist_graph_sq.shape[0]
250 |         dist_graph_sq = _scale_by_bw(dist_graph_sq, self.bandwidths, self.bandwidths)
251 |         sq_dists = np.hstack([dist_graph_sq.data, np.zeros(n)])
252 |         self.epsilon_fitted, self.d = choose_optimal_epsilon_BGH(sq_dists)
253 | 
254 |     def fit(self):
255 |         """
256 |         Fits the kde object to the data provided in the nearest neighbor object.
257 |         """
258 |         self.dist_graph_sq = self.neigh.kneighbors_graph(n_neighbors=self.neigh.n_neighbors-1,
259 |                                                          mode='distance')
260 |         self.dist_graph_sq.data = self.dist_graph_sq.data**2
261 |         self._build_bandwidth()
262 |         self._choose_epsilon()
263 | 
264 |     def compute(self, Y):
265 |         """
266 |         Computes the density at each query point in Y.
267 | 
268 |         Parameters
269 |         ----------
270 |         Y : array-like, shape (n_query, n_features)
271 |             Data against which to calculate the kernel values.  If not provided, calculates against the data provided in the fit.
272 | 
273 | 
274 |         Returns
275 |         -------
276 |         q : array-like, shape (n_query)
277 |             Density evaluated at each point Y.
278 |         """
279 |         dist_bw = self.neigh.kneighbors_graph(Y, mode='distance', n_neighbors=self.k)
280 |         dist_bw.data = dist_bw.data**2
281 |         avg_sq_dist = np.array(dist_bw.sum(axis=1)).ravel()
282 |         y_bandwidths = np.sqrt(avg_sq_dist/(self.k-1)).ravel()
283 |         K = self.neigh.kneighbors_graph(Y, mode='distance')
284 |         K.data = K.data**2
285 |         K = _scale_by_bw(K, self.bandwidths, y_bandwidths)
286 |         K.data /= 4. * self.epsilon_fitted
287 |         K.data = np.exp(-K.data)
288 |         density = np.array(K.mean(axis=1)).ravel()
289 |         density /= y_bandwidths**self.d
290 |         density /= (4 * np.pi * self.epsilon_fitted)**(self.d / 2.)
291 |         return density
292 | 
293 | 
294 | def choose_optimal_epsilon_BGH(scaled_distsq, epsilons=None):
295 |     """
296 |     Calculates the optimal epsilon for kernel density estimation according to
297 |     the criteria in Berry, Giannakis, and Harlim.
298 | 
299 |     Parameters
300 |     ----------
301 |     scaled_distsq : numpy array
302 |         Values for scaled distance squared values, in no particular order or shape. (This is the exponent in the Gaussian Kernel, aka the thing that gets divided by epsilon).
303 |     epsilons : array-like, optional
304 |         Values of epsilon from which to choose the optimum.  If not provided, uses all powers of 2. from 2^-40 to 2^40
305 | 
306 |     Returns
307 |     -------
308 |     epsilon : float
309 |         Estimated value of the optimal length-scale parameter.
310 |     d : int
311 |         Estimated dimensionality of the system.
312 | 
313 |     Notes
314 |     -----
315 |     This code explicitly assumes the kernel is gaussian, for now.
316 | 
317 |     References
318 |     ----------
319 |     The algorithm given is based on [1]_.  If you use this code, please cite them.
320 | 
321 |     .. [1] T. Berry, D. Giannakis, and J. Harlim, Physical Review E 91, 032915
322 |        (2015).
323 |     """
324 |     if epsilons is None:
325 |         epsilons = 2**np.arange(-40., 41., 1.)
326 | 
327 |     epsilons = np.sort(epsilons).astype('float')
328 |     log_T = [logsumexp(-scaled_distsq/(4. * eps)) for eps in epsilons]
329 |     log_eps = np.log(epsilons)
330 |     log_deriv = np.diff(log_T)/np.diff(log_eps)
331 |     max_loc = np.argmax(log_deriv)
332 |     # epsilon = np.max([np.exp(log_eps[max_loc]), np.exp(log_eps[max_loc+1])])
333 |     epsilon = np.exp(log_eps[max_loc])
334 |     d = np.round(2.*log_deriv[max_loc])
335 |     return epsilon, d
336 | 
337 | 
338 | def _parse_kernel_type(kernel_type):
339 |     """
340 |     Parses an input string or function specifying the kernel.
341 | 
342 |     Parameters
343 |     ----------
344 |     kernel_type : string or callable
345 |         Type of kernel to construct. Currently the only option is 'gaussian' or
346 |         a user provided function.  If set to a user defined function, it should
347 |         take in two arguments: in order, a vector of distances between two
348 |         samples, and a length-scale parameter epsilon.  The units on epsilon
349 |         should be distance squared.
350 | 
351 |     Returns
352 |     -------
353 |     kernel_fxn : callable
354 |         Function that takes in the distance and length-scale parameter, and outputs the value of the kernel.
355 |     """
356 |     if kernel_type.lower() == 'gaussian':
357 |         def gaussian_kfxn(d, epsilon):
358 |             return np.exp(-d**2 / (4. * epsilon))
359 |         return gaussian_kfxn
360 |     elif callable(kernel_type):
361 |         return kernel_type
362 |     else:
363 |         raise("Error: Kernel type not understood.")
364 | 
365 | 
366 | def _scale_by_bw(d_yx, bw_x, bw_y):
367 |     """
368 |     Scale a distance matrix with the bandwidth functions while retaining explicit zeros.
369 |     Note that this reorders the indices in d_yx.
370 | 
371 |     Parameters
372 |     ----------
373 |     d_yx : scipy sparse matrix
374 |         Sparse matrix whose i,j'th element corresponds to f(y_i, x_j)
375 |     dw_x : numpy array
376 |         Array of bandwidth values evaluated at each x_i
377 |     dw_y : numpy array
378 |         Array of bandwidth values evaluated at each y_i
379 | 
380 |     Returns
381 |     ------
382 |     scaled_d_yx : scipy sparse matrix
383 |         Sparse matrix whose i,j'th element corresponds to f(y_i, x_j)/ bw[y_i] bw[x_j]
384 |     """
385 |     m, n = d_yx.shape
386 |     x_bw_diag = sps.spdiags(np.power(bw_x, -1), 0, n, n)
387 |     y_bw_diag = sps.spdiags(np.power(bw_y, -1), 0, m, m)
388 |     row, col = utils._get_sparse_row_col(d_yx)
389 |     inv_bw = sps.csr_matrix((np.ones(d_yx.data.shape), (row, col)), shape=d_yx.shape)
390 |     inv_bw = y_bw_diag * inv_bw * x_bw_diag
391 |     d_yx.sort_indices()
392 |     inv_bw.sort_indices()
393 |     d_yx.data = d_yx.data * inv_bw.data
394 |     return d_yx
395 | 
396 | 
397 | def _check_equal(X, Y):
398 |     """
399 |     Check if two datasets are equal.
400 | 
401 |     Parameters
402 |     ----------
403 |     X : array-like, shape (n_query, n_features), optional.
404 |         Data against which to calculate the kernel values.  If not provided, calculates against the data provided in the fit.
405 |     Y : array-like, shape (n_query, n_features), optional.
406 |         Data against which to calculate the kernel values.  If not provided, calculates against the data provided in the fit.
407 | 
408 |     Returns
409 |     -------
410 |     is_equal : bool
411 |         True if the datasets are equal, False if not.
412 |     """
413 |     X_is_sparse = isinstance(X, sps.spmatrix)
414 |     Y_is_sparse = isinstance(Y, sps.spmatrix)
415 |     if (X_is_sparse and Y_is_sparse):
416 |         if X.shape != Y.shape:
417 |             return False
418 |         else:
419 |             nonzero_rows, nonzero_cols = (X - Y).nonzero()
420 |             return (len(nonzero_rows) == 0)
421 |     else:
422 |         return np.array_equal(X, Y)
423 | 


--------------------------------------------------------------------------------
/src/pydiffmap/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Utilities for constructing diffusion maps.
  4 | """
  5 | import numpy as np
  6 | import scipy.sparse as sps
  7 | 
  8 | 
  9 | def lookup_fxn(x, vals):
 10 |     """
 11 |     Builds a simple function that acts as a lookup table.  Useful for
 12 |     constructing bandwidth and weigth functions from existing values.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     x : iterable
 17 |         values to input for the function
 18 |     vals : iterable
 19 |         Output values for the function.  Must be of the same length as x.
 20 | 
 21 |     Returns
 22 |     -------
 23 |     lf : function
 24 |         A function that, when input a value in x, outputs the corresponding
 25 |         value in vals.
 26 |     """
 27 |     # Build dictionary
 28 |     lookup = {}
 29 |     for i in range(len(x)):
 30 |         lookup[str(x[i])] = vals[i]
 31 | 
 32 |     # Define and return lookup function
 33 |     def lf(xi):
 34 |         return lookup[str(xi)]
 35 | 
 36 |     return lf
 37 | 
 38 | 
 39 | def sparse_from_fxn(X, K, function, Y=None):
 40 |     """
 41 |     For a function f, constructs a sparse matrix where each element is
 42 |     f(Y_i, X_j) with the same sparsity structure as the matrix K.
 43 | 
 44 |     Parameters
 45 |     ----------
 46 |     neighbors : scikit-learn NearestNeighbors object
 47 |         Data structure containing the nearest neighbor information.
 48 |         X values are drawn from the data in this object.
 49 |     function : function
 50 |         Function to apply to the pair Y_i, X_j.  Must take only two arguments
 51 |         and return a number.
 52 |     Y : iterable or None
 53 |         Values corresponding to each column of the matrix.  If None, defaults
 54 |         to the data in the neighbors object.
 55 | 
 56 |     Returns
 57 |     -------
 58 |     M : scipy sparse csr matrix
 59 |         Matrix with elements f(Y_i, X_j) for nearest neighbors, and zero
 60 |         otherwise.  Here Y_i is the i'th datapoint in Y, and X_j is the
 61 |         j'th datapoint in the NearestNeighbors object.
 62 |     """
 63 |     if Y is None:
 64 |         Y = X
 65 |     row, col = _get_sparse_row_col(K)
 66 | 
 67 |     fxn_vals = []
 68 |     for i, j in zip(row, col):
 69 |         fxn_vals.append(function(Y[i], X[j]))
 70 |     fxn_vals = np.array(fxn_vals)
 71 |     return sps.csr_matrix((fxn_vals, (row, col)), shape=K.shape)
 72 | 
 73 | 
 74 | def _get_sparse_row_col(sparse_mat):
 75 |     sparse_mat = sparse_mat.tocoo()
 76 |     return sparse_mat.row, sparse_mat.col
 77 | 
 78 | 
 79 | def _symmetrize_matrix(K, mode='or'):
 80 |     """
 81 |     Symmetrizes a sparse kernel matrix.
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     K : scipy sparse matrix
 86 |         The sparse matrix to be symmetrized, with positive elements on the nearest neighbors.
 87 |     mode : string
 88 |         The method of symmetrization to be implemented.  Current options are 'average', 'and', and 'or'.
 89 | 
 90 |     Returns
 91 |     -------
 92 |     K_sym : scipy sparse matrix
 93 |         Symmetrized kernel matrix.
 94 |     """
 95 | 
 96 |     if mode == 'average':
 97 |         return 0.5*(K + K.transpose())
 98 |     elif mode == 'or':
 99 |         Ktrans = K.transpose()
100 |         dK = abs(K - Ktrans)
101 |         K = K + Ktrans
102 |         K = K + dK
103 |         return 0.5*K
104 |     elif mode == 'and':
105 |         Ktrans = K.transpose()
106 |         dK = abs(K - Ktrans)
107 |         K = K + Ktrans
108 |         K = K - dK
109 |         return 0.5*K
110 |     else:
111 |         raise ValueError('Did not understand symmetrization method')
112 | 


--------------------------------------------------------------------------------
/src/pydiffmap/visualization.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Some convenient visalisation routines.
  4 | """
  5 | from __future__ import absolute_import
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | from mpl_toolkits.mplot3d import Axes3D  # noqa F401
  9 | 
 10 | 
 11 | def embedding_plot(dmap_instance, dim=2, scatter_kwargs=None, show=True):
 12 |     """
 13 |     Creates diffusion map embedding scatterplot. By default, the first two diffusion
 14 |     coordinates are plotted against each other.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     dmap_instance : DiffusionMap Instance
 19 |         An instance of the DiffusionMap class.
 20 |     dim: int, optional, 2 or 3.
 21 |         Optional argument that controls if a two- or three dimensional plot is produced.
 22 |     scatter_kwargs : dict, optional
 23 |         Optional arguments to be passed to the scatter plot, e.g. point color,
 24 |         point size, colormap, etc.
 25 |     show : boolean, optional
 26 |         If true, calls plt.show()
 27 | 
 28 |     Returns
 29 |     -------
 30 |     fig : pyplot figure object
 31 |         Figure object where everything is plotted on.
 32 | 
 33 |     Examples
 34 |     --------
 35 |     # Plots the top two diffusion coords, colored by the first coord.
 36 |     >>> scatter_kwargs = {'s': 2, 'c': mydmap.dmap[:,0], 'cmap': 'viridis'}
 37 |     >>> embedding_plot(mydmap, scatter_kwargs)
 38 | 
 39 |     """
 40 |     if scatter_kwargs is None:
 41 |         scatter_kwargs = {}
 42 |     fig = plt.figure(figsize=(6, 6))
 43 |     if (dim == 2):
 44 |         plt.scatter(dmap_instance.dmap[:, 0], dmap_instance.dmap[:, 1], **scatter_kwargs)
 45 |         plt.title('Embedding given by first two DCs.')
 46 |         plt.xlabel(r'$\psi_1$')
 47 |         plt.ylabel(r'$\psi_2$')
 48 |     elif (dim == 3):
 49 |         ax = fig.add_subplot(111, projection='3d')
 50 |         ax.scatter(dmap_instance.dmap[:, 0], dmap_instance.dmap[:, 1], dmap_instance.dmap[:, 2], **scatter_kwargs)
 51 |         ax.set_title('Embedding given by first three DCs.')
 52 |         ax.set_xlabel(r'$\psi_1$')
 53 |         ax.set_ylabel(r'$\psi_2$')
 54 |         ax.set_zlabel(r'$\psi_3$')
 55 |     plt.axis('tight')
 56 |     if show:
 57 |         plt.show()
 58 |     return fig
 59 | 
 60 | 
 61 | def data_plot(dmap_instance, n_evec=1, dim=2, scatter_kwargs=None, show=True):
 62 |     """
 63 |     Creates diffusion map embedding scatterplot. By default, the first two diffusion
 64 |     coordinates are plotted against each other.  This only plots against the first two or three
 65 |     (as controlled by 'dim' parameter) dimensions of the data, however:
 66 |     effectively this assumes the data is two resp. three dimensional.
 67 | 
 68 |     Parameters
 69 |     ----------
 70 |     dmap_instance : DiffusionMap Instance
 71 |         An instance of the DiffusionMap class.
 72 |     n_evec: int, optional
 73 |         The eigenfunction that should be used to color the plot.
 74 |     dim: int, optional, 2 or 3.
 75 |         Optional argument that controls if a two- or three dimensional plot is produced.
 76 |     scatter_kwargs : dict, optional
 77 |         Optional arguments to be passed to the scatter plot, e.g. point color,
 78 |         point size, colormap, etc.
 79 |     show : boolean, optional
 80 |         If true, calls plt.show()
 81 | 
 82 |     Returns
 83 |     -------
 84 |     fig : pyplot figure object
 85 |         Figure object where everything is plotted on.
 86 |     """
 87 |     if scatter_kwargs is None:
 88 |         scatter_kwargs = {}
 89 |     fig = plt.figure(figsize=(6, 6))
 90 |     if (dim == 2):
 91 |         plt.scatter(dmap_instance.data[:, 0], dmap_instance.data[:, 1], c=dmap_instance.dmap[:, n_evec-1], **scatter_kwargs)
 92 |         plt.title('Data coloured with first DC.')
 93 |         plt.xlabel('x')
 94 |         plt.ylabel('y')
 95 |     elif (dim == 3):
 96 |         ax = fig.add_subplot(111, projection='3d')
 97 |         ax.scatter(dmap_instance.data[:, 0], dmap_instance.data[:, 1], dmap_instance.data[:, 2], c=dmap_instance.dmap[:, n_evec-1], **scatter_kwargs)
 98 |         ax.set_title('Data coloured with first DC.')
 99 |         ax.set_xlabel('x')
100 |         ax.set_ylabel('y')
101 |         ax.set_zlabel('z')
102 |     plt.axis('tight')
103 |     if show:
104 |         plt.show()
105 |     return fig
106 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | from scipy.special import erfinv
 4 | 
 5 | 
 6 | @pytest.fixture(scope='module')
 7 | def spherical_data():
 8 |     # Construct dataset
 9 |     phi = np.pi*np.linspace(-1, 1, 61)[1:]
10 |     theta = np.pi*np.linspace(-1, 1, 33)[1:-1]
11 |     Phi, Theta = np.meshgrid(phi, theta)
12 |     Phi = Phi.ravel()
13 |     Theta = Theta.ravel()
14 | 
15 |     X = np.cos(Theta)*np.cos(Phi)
16 |     Y = np.cos(Theta)*np.sin(Phi)
17 |     Z = np.sin(Theta)
18 |     return np.array([X, Y, Z]).transpose(), Phi, Theta
19 | 
20 | 
21 | @pytest.fixture(scope='module')
22 | def uniform_2d_data():
23 |     x = np.linspace(0., 1., 61)*2.*np.pi
24 |     y = np.linspace(0., 1., 31)*np.pi
25 |     X, Y = np.meshgrid(x, y)
26 |     X = X.ravel()
27 |     Y = Y.ravel()
28 |     data = np.array([X, Y]).transpose()
29 |     return data, X, Y
30 | 
31 | 
32 | @pytest.fixture(scope='module')
33 | def harmonic_1d_data():
34 |     N = 201
35 |     delta = 1. / (N+1)
36 |     xgrid = 2 * np.arange(1, N+1) * delta - 1.
37 |     x = np.sqrt(2) * erfinv(xgrid)
38 |     return x.reshape(-1, 1)
39 | 


--------------------------------------------------------------------------------
/tests/test_diffusionmap.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from pydiffmap import diffusion_map as dm
  5 | 
  6 | 
  7 | class TestDiffusionMap(object):
  8 |     @pytest.mark.parametrize('epsilon', [0.002, 'bgh'])
  9 |     def test_1Dstrip_evals(self, epsilon):
 10 |         """
 11 |         Test that we compute the correct eigenvalues on a 1d strip of length 2*pi.
 12 |         Diffusion map parameters in this test are hand-selected to give good results.
 13 |         Eigenvalue approximation will fail if k is set too small, or epsilon not optimal (sensitive).
 14 |         """
 15 |         # Setup true values to test again.
 16 |         # real_evals = k^2 for k in 0.5*[1 2 3 4]
 17 |         real_evals = -0.25*np.array([1, 4, 9, 16])
 18 |         X = np.linspace(0., 1., 81)*2.*np.pi
 19 |         data = np.array([X]).transpose()
 20 |         THRESH = 0.05
 21 |         # Setup diffusion map
 22 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=epsilon, alpha=1.0, k=20)
 23 |         mydmap.fit(data)
 24 | 
 25 |         # Check that relative error values are beneath tolerance.
 26 |         errors_eval = abs((mydmap.evals - real_evals)/real_evals)
 27 |         total_error = np.max(errors_eval)
 28 | 
 29 |         assert(total_error < THRESH)
 30 | 
 31 |     @pytest.mark.parametrize('epsilon', [0.002, 'bgh'])
 32 |     def test_1Dstrip_evecs(self, epsilon):
 33 |         """
 34 |         Test that we compute the correct eigenvectors (cosines) on a 1d strip of length 2*pi.
 35 |         Diffusion map parameters in this test are hand-selected to give good results.
 36 |         Eigenvector approximation will fail if epsilon is set way too small or too large (robust).
 37 |         """
 38 |         # Setup true values to test again.
 39 |         # real_evecs = cos(k*x) for k in 0.5*[1 2 3 4]
 40 |         # Setup data and accuracy threshold
 41 |         X = np.linspace(0., 1., 81)*2.*np.pi
 42 |         data = np.array([X]).transpose()
 43 |         THRESH = 0.003
 44 |         # Setup diffusion map
 45 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=epsilon, alpha=1.0, k=40)
 46 |         mydmap.fit_transform(data)
 47 |         errors_evec = []
 48 |         for k in np.arange(4):
 49 |             errors_evec.append(abs(np.corrcoef(np.cos(0.5*(k+1)*X), mydmap.evecs[:, k])[0, 1]))
 50 | 
 51 |         # Check that relative error values are beneath tolerance.
 52 |         total_error = 1 - np.min(errors_evec)
 53 |         assert(total_error < THRESH)
 54 | 
 55 |     @pytest.mark.parametrize('epsilon', [0.005, 'bgh'])
 56 |     def test_1Dstrip_nonunif_evals(self, epsilon):
 57 |         """
 58 |         Test that we compute the correct eigenvalues on a 1d strip of length 2*pi with nonuniform sampling.
 59 |         Diffusion map parameters in this test are hand-selected to give good results.
 60 |         Eigenvalue approximation will fail if k is set too small, or epsilon not optimal (sensitive).
 61 |         """
 62 |         # Setup true values to test again.
 63 |         # real_evals = k^2 for k in 0.5*[1 2 3 4]
 64 |         real_evals = -0.25*np.array([1, 4, 9, 16])
 65 |         # Setup data and accuracy threshold
 66 |         X = (np.linspace(0., 1., 81)**2)*2.*np.pi
 67 |         data = np.array([X]).transpose()
 68 |         THRESH = 0.1
 69 |         # Setup diffusion map
 70 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=epsilon, alpha=1.0, k=40)
 71 |         mydmap.fit_transform(data)
 72 | 
 73 |         # Check that relative error values are beneath tolerance.
 74 |         errors_eval = abs((mydmap.evals- real_evals)/real_evals)
 75 |         total_error = np.max(errors_eval)
 76 |         assert(total_error < THRESH)
 77 | 
 78 |     @pytest.mark.parametrize('epsilon', [0.005, 'bgh'])
 79 |     def test_1Dstrip_nonunif_evecs(self, epsilon):
 80 |         """
 81 |         Test that we compute the correct eigenvectors (cosines) on a 1d strip of length 2*pi with nonuniform sampling.
 82 |         Diffusion map parameters in this test are hand-selected to give good results.
 83 |         Eigenvector approximation will fail if epsilon is set way too small or too large (robust).
 84 |         """
 85 |         # Setup true values to test again.
 86 |         # real_evecs = cos(k*x) for k in 0.5*[1 2 3 4]
 87 |         # Setup data and accuracy threshold
 88 |         X = (np.linspace(0., 1., 81)**2)*2.*np.pi
 89 |         data = np.array([X]).transpose()
 90 |         THRESH = 0.01
 91 |         # Setup diffusion map
 92 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=epsilon, alpha=1.0, k=40)
 93 |         mydmap.fit_transform(data)
 94 |         errors_evec = []
 95 |         for k in np.arange(4):
 96 |             errors_evec.append(abs(np.corrcoef(np.cos(0.5*(k+1)*X), mydmap.evecs[:, k])[0, 1]))
 97 | 
 98 |         # Check that relative error values are beneath tolerance.
 99 |         total_error = 1 - np.min(errors_evec)
100 |         assert(total_error < THRESH)
101 | 
102 |     def test_2Dstrip_evals(self, uniform_2d_data):
103 |         """
104 |         Test that we compute the correct eigenvalues on a 2d strip of length 2*pi.
105 |         Diffusion map parameters in this test are hand-selected to give good results.
106 |         Eigenvalue approximation will fail if k is set too small, or epsilon not optimal (sensitive).
107 |         """
108 |         # Setup true values to test again.
109 |         # real_evals = kx^2 + ky^2 for kx = 0.5*[1 0 2 1] and ky = [0 1 0 1].
110 |         real_evals = -0.25*np.array([1, 4, 4, 5])
111 |         # Setup data and accuracy threshold
112 |         data, X, Y = uniform_2d_data
113 |         THRESH = 0.2
114 | 
115 |         eps = 0.0025
116 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, alpha=1.0, k=100, epsilon=eps)
117 |         mydmap.fit(data)
118 | 
119 |         # Check that relative error values are beneath tolerance.
120 |         errors_eval = abs((mydmap.evals- real_evals)/real_evals)
121 |         total_error = np.max(errors_eval)
122 |         assert(total_error < THRESH)
123 | 
124 |     def test_2Dstrip_evecs(self, uniform_2d_data):
125 |         """
126 |         Test that we compute the correct eigenvectors (cosines) on a 2d strip of length 2*pi.
127 |         Diffusion map parameters in this test are hand-selected to give good results.
128 |         Eigenvector approximation will fail if epsilon is set way too small or too large (robust).
129 |         """
130 |         # Setup true values to test again.
131 |         # real_evecs = cos(kx*x)*cos(ky*y) for kx = 0.5*[1 0 2 1] and ky = [0 1 0 1].
132 |         # Setup data and accuracy threshold
133 |         data, X, Y = uniform_2d_data
134 |         THRESH = 0.01
135 | 
136 |         eps = 0.0025
137 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, alpha=1.0, k=100, epsilon=eps)
138 |         mydmap.fit(data)
139 |         errors_evec = []
140 |         errors_evec.append(abs(np.corrcoef(np.cos(0.5*1*X), mydmap.evecs[:, 0])[0, 1]))
141 |         errors_evec.append(abs(np.corrcoef(np.cos(Y), mydmap.evecs[:, 1])[0, 1]))
142 |         errors_evec.append(abs(np.corrcoef(np.cos(0.5*2*X), mydmap.evecs[:, 2])[0, 1]))
143 |         errors_evec.append(abs(np.corrcoef(np.cos(0.5*1*X)*np.cos(Y), mydmap.evecs[:, 3])[0, 1]))
144 | 
145 |         # Check that relative error values are beneath tolerance.
146 |         total_error = 1 - np.min(errors_evec)
147 |         assert(total_error < THRESH)
148 | 
149 |     def test_sphere_evals(self, spherical_data):
150 |         """
151 |         Test that we compute the correct eigenvalues on a 2d sphere embedded in 3d.
152 |         Diffusion map parameters in this test are hand-selected to give good results.
153 |         Eigenvalue approximation will fail if k is set too small, or epsilon not optimal (sensitive).
154 |         """
155 |         data, Phi, Theta = spherical_data
156 |         # Setup true values to test against.
157 |         real_evals = -1 * np.array([2, 2, 2, 6])  # =l(l+1)
158 |         THRESH = 0.1
159 |         eps = 0.015
160 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, alpha=1.0, k=400, epsilon=eps)
161 |         mydmap.fit(data)
162 | 
163 |         # Check eigenvalues pass below error tolerance.
164 |         errors_eval = abs((mydmap.evals- real_evals)/real_evals)
165 |         max_eval_error = np.max(errors_eval)
166 |         assert(max_eval_error < THRESH)
167 | 
168 |     def test_sphere_evecs(self, spherical_data):
169 |         """
170 |         Test that we compute the correct eigenvectors (spherical harmonics) on a 2d sphere embedded in R^3.
171 |         Diffusion map parameters in this test are hand-selected to give good results.
172 |         Eigenvector approximation will fail if epsilon is set way too small or too large (robust).
173 |         """
174 |         data, Phi, Theta = spherical_data
175 |         THRESH = 0.001
176 |         eps = 0.015
177 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, alpha=1.0, k=400, epsilon=eps)
178 |         mydmap.fit(data)
179 |         # rotate sphere so that maximum of first DC is at the north pole
180 |         northpole = np.argmax(mydmap.dmap[:, 0])
181 |         phi_n = Phi[northpole]
182 |         theta_n = Theta[northpole]
183 |         R = np.array([[np.sin(theta_n)*np.cos(phi_n), np.sin(theta_n)*np.sin(phi_n), -np.cos(theta_n)],
184 |                       [-np.sin(phi_n), np.cos(phi_n), 0],
185 |                       [np.cos(theta_n)*np.cos(phi_n), np.cos(theta_n)*np.sin(phi_n), np.sin(theta_n)]])
186 |         data_rotated = np.dot(R, data.transpose())
187 |         # check that error is beneath tolerance.
188 |         evec_error = 1 - np.corrcoef(mydmap.dmap[:, 0], data_rotated[2, :])[0, 1]
189 |         assert(evec_error < THRESH)
190 | 
191 |     def test_explicit_density(self, harmonic_1d_data):
192 |         """
193 |         Test explicit density function.
194 |         This test tests the implementation and is independent on all the other parameters.
195 |         """
196 | 
197 |         data = harmonic_1d_data
198 |         density_fxn = lambda x: (1.0/(np.sqrt(np.pi * 2))) * np.exp(-0.5 * x**2).squeeze()
199 | 
200 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=2, epsilon=0.1, alpha=0.5, k=100, density_fxn=density_fxn)
201 |         mydmap.fit(data)
202 | 
203 |         err = np.max((np.abs(mydmap.q / np.linalg.norm(mydmap.q) - density_fxn(data) / np.linalg.norm(density_fxn(data)))))
204 | 
205 |         assert(err == 0)
206 | 
207 |     @pytest.mark.parametrize('epsilon', [0.1, 'bgh'])
208 |     def test_explicit_density_kde(self, harmonic_1d_data, epsilon):
209 |         """
210 |         Test the implicit kernel density estimator. Results depend on knearest neighbors
211 |         and epsilon. This test is not very stable, tolerancy threshold is therefore chosen high.
212 |         """
213 |         THRESH = 0.2
214 |         data = harmonic_1d_data
215 |         # reject_outliers to stabilise
216 |         m = 2
217 |         data = data[abs(data - np.mean(data)) < m * np.std(data), np.newaxis]
218 | 
219 |         density_fxn = lambda x: (1.0/(np.sqrt(np.pi * 2))) * np.exp(-0.5 * x**2).squeeze()
220 | 
221 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=2, epsilon=epsilon, alpha=0.5, k=100)
222 |         dmap = mydmap.fit(data)
223 | 
224 |         true = density_fxn(data) / np.linalg.norm(density_fxn(data))
225 |         kde = mydmap.q / np.linalg.norm(mydmap.q)
226 | 
227 |         err = np.linalg.norm(true - kde) / np.linalg.norm(kde)
228 | 
229 |         assert(err < THRESH)
230 | 
231 | class TestNystroem(object):
232 |     @pytest.mark.parametrize('method', ['nystroem', 'power'])
233 |     def test_2Dstrip_nystroem(self, uniform_2d_data, method):
234 |         """
235 |         Test the nystroem extension in the transform() function.
236 |         """
237 |         # Setup data and accuracy threshold
238 |         data, X, Y = uniform_2d_data
239 |         THRESH = 0.01
240 |         # Setup diffusion map
241 |         eps = 0.01
242 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=1, alpha=1.0, k=100, epsilon=eps, oos=method)
243 |         mydmap.fit(data)
244 |         # Setup values to test against (regular grid)
245 |         x_test, y_test = np.meshgrid(np.linspace(0, 2*np.pi, 80), np.linspace(0, np.pi, 40))
246 |         X_test = np.array([x_test.ravel(), y_test.ravel()]).transpose()
247 |         # call nystroem extension
248 |         dmap_ext = mydmap.transform(X_test)
249 |         # extract first diffusion coordinate and normalize
250 |         V_test = dmap_ext[:, 0]
251 |         V_test = V_test/np.linalg.norm(V_test)
252 |         # true dominant eigenfunction = cos(0.5*x), normalize
253 |         V_true = np.cos(.5*x_test).ravel()
254 |         V_true = V_true/np.linalg.norm(V_true)
255 |         # compute L2 error, deal with remaining sign ambiguity
256 |         error = min([np.linalg.norm(V_true+V_test), np.linalg.norm(V_true-V_test)])
257 |         assert(error < THRESH)
258 | 
259 | 
260 | class TestWeighting(object):
261 |     @pytest.mark.parametrize('epsilon', [0.002, 'bgh'])
262 |     @pytest.mark.parametrize('oos', ['power', 'nystroem', False])
263 |     @pytest.mark.parametrize('dmap_method', ['base', 'TMDmap'])
264 |     def test_1Dstrip_evecs(self, epsilon, oos, dmap_method):
265 |         """
266 |         Test measure reweighting.  We reweight the uniform distribution to
267 |         approximate a Gaussian distribution.  For numerical reasons, we truncate
268 |         the domain to the interval [-5, 5].
269 | 
270 |         Here, we test eigenvector accuracy.  Eigenvectors should be the
271 |         probabalists Hermite polynomials.
272 |         """
273 |         # Setup data and accuracy threshold
274 |         # X = np.linspace(-5., 5., 201)
275 |         X = np.linspace(0, 2.5, 101)**2
276 |         X = np.hstack([-1 * np.copy(X[1:][::-1]), X])
277 |         if not oos:
278 |             Y = X
279 |             oos = 'nystroem'
280 |         else:
281 |             Y = np.linspace(-5., 5., 101)
282 |         data_x = np.array([X]).transpose()
283 |         data_y = np.array([Y]).transpose()
284 |         EVEC_THRESH = 0.005
285 |         EVAL_THRESH = 0.003
286 |         # Setup true values to test against.
287 |         real_evecs = [Y, Y**2-1, Y**3-3*Y,
288 |                       Y**4-6*Y**2+3]  # Hermite polynomials
289 |         real_evals = -1 * np.arange(1, 5)
290 |         # Setup diffusion map
291 |         if dmap_method == 'TMDmap':
292 |             com_fxn = lambda y_j: np.exp(-.5*np.dot(y_j, y_j))
293 |             mydmap = dm.TMDmap(alpha=1., n_evecs=4, epsilon=epsilon, k=100, change_of_measure=com_fxn, oos=oos)
294 |         else:
295 |             weight_fxn = lambda y_j: np.exp(-.25*np.dot(y_j, y_j))
296 |             mydmap = dm.DiffusionMap.from_sklearn(alpha=1., n_evecs=4, epsilon=epsilon, k=100, weight_fxn=weight_fxn, oos=oos)
297 | 
298 |         # Fit data and build dmap
299 |         mydmap.fit(data_x)
300 |         evecs = mydmap.transform(data_y)
301 |         errors_evec = []
302 |         for k in range(4):
303 |             errors_evec.append(abs(np.corrcoef(real_evecs[k], evecs[:, k])[0, 1]))
304 | 
305 |         # Check that relative evec error values are beneath tolerance.
306 |         total_evec_error = 1 - np.min(errors_evec)
307 |         assert(total_evec_error < EVEC_THRESH)
308 |         # Check that relative eval error values are beneath tolerance.
309 |         errors_eval = abs((mydmap.evals- real_evals)/real_evals)
310 |         total_eval_error = np.min(errors_eval)
311 |         assert(total_eval_error < EVAL_THRESH)
312 | 
313 | 
314 | class TestBandwidths(object):
315 |     @pytest.mark.parametrize('alpha_beta', [(0., -1./3), (-1./4, -1./2)])
316 |     @pytest.mark.parametrize('explicit_bandwidth', [False, True])
317 |     def test_bandwidth_norm(self, harmonic_1d_data, alpha_beta, explicit_bandwidth):
318 |         data = harmonic_1d_data
319 |         alpha, beta = alpha_beta
320 |         X = data[:, 0]
321 |         THRESHS = np.array([0.01, 0.01, 0.1])
322 |         ref_evecs = [X, X**2, (X**3 - 3 * X)/np.sqrt(6)]
323 | 
324 |         if explicit_bandwidth:
325 |             bandwidth_type = lambda x: np.exp(-1. * x[:, 0]**2 * (beta / 2.))  # bandwidth is density^beta
326 |         else:
327 |             bandwidth_type = beta
328 | 
329 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=3, epsilon='bgh', alpha=alpha,
330 |                                  k=50, bandwidth_type=bandwidth_type, bandwidth_normalize=True)
331 |         mydmap.fit_transform(data)
332 |         errors_evec = []
333 |         for k in np.arange(3):
334 |             errors_evec.append(abs(np.corrcoef(ref_evecs[k], mydmap.evecs[:, k])[0, 1]))
335 |         # Check that relative error values are beneath tolerance.
336 |         total_error = 1 - np.array(errors_evec)
337 |         assert((total_error < THRESHS).all())
338 | 
339 |     @pytest.mark.parametrize('alpha_beta', [(0., -1./3), (-1./4, -1./2)])
340 |     @pytest.mark.parametrize('explicit_bandwidth', [False, True])
341 |     def test_bandwidth_norm_oos(self, harmonic_1d_data, alpha_beta, explicit_bandwidth):
342 |         data = harmonic_1d_data
343 |         alpha, beta = alpha_beta
344 |         oos_data = np.linspace(-1.5, 1.5, 51).reshape(-1, 1)
345 |         Y = oos_data.ravel()
346 |         THRESHS = np.array([0.01, 0.01, 0.1])
347 |         ref_evecs = [Y, Y**2, (Y**3 - 3 * Y)/np.sqrt(6)]
348 | 
349 |         if explicit_bandwidth:
350 |             bandwidth_type = lambda x: np.exp(-1. * x[:, 0]**2 * (beta / 2.))  # bandwidth is density^beta
351 |         else:
352 |             bandwidth_type = beta
353 |         mydmap = dm.DiffusionMap.from_sklearn(n_evecs=3, epsilon='bgh', alpha=alpha,
354 |                                  k=50, bandwidth_type=bandwidth_type, bandwidth_normalize=True,
355 |                                  oos='power')
356 |         mydmap.fit(data)
357 |         oos_evecs = mydmap.transform(oos_data)
358 |         errors_evec = []
359 |         for k in np.arange(3):
360 |             errors_evec.append(abs(np.corrcoef(ref_evecs[k], oos_evecs[:, k])[0, 1]))
361 |         # Check that relative error values are beneath tolerance.
362 |         total_error = 1 - np.array(errors_evec)
363 |         assert((total_error < THRESHS).all())
364 | 


--------------------------------------------------------------------------------
/tests/test_kernel.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | 
  4 | from pydiffmap import kernel
  5 | from scipy.spatial.distance import cdist
  6 | from sklearn.neighbors import NearestNeighbors
  7 | import scipy.sparse as sps
  8 | 
  9 | x_values_set = [np.vstack((np.linspace(-1, 1, 11), np.arange(11))).T]  # set of X vals
 10 | y_values_set = [None, np.vstack((np.linspace(-1, 1, 11), np.arange(11))).T, np.arange(6).reshape(-1, 2), np.arange(22).reshape(-1, 2)]  # all sets of Y's
 11 | bandwidth_fxns = [None, lambda x: np.ones(x.shape[0]), lambda x: x[:, 1]/10. + 1]
 12 | epsilons = [10., 1.]  # Possible epsilons
 13 | 
 14 | 
 15 | class TestKernel(object):
 16 |     # These decorators run the test against all possible y, epsilon values.
 17 |     @pytest.mark.parametrize('x_values', x_values_set)
 18 |     @pytest.mark.parametrize('y_values', y_values_set)
 19 |     @pytest.mark.parametrize('epsilon', epsilons)
 20 |     @pytest.mark.parametrize('bandwidth_fxn', bandwidth_fxns)
 21 |     @pytest.mark.parametrize('metric, metric_params', [
 22 |         ('euclidean', None),
 23 |         ('minkowski', {'p': 1})
 24 |     ])
 25 |     def test_matrix_output(self, x_values, y_values, epsilon, bandwidth_fxn, metric, metric_params):
 26 |         """
 27 |         Test that we are returning the correct kernel values.
 28 |         """
 29 |         # Setup true values to test again.
 30 |         if y_values is None:
 31 |             y_values_ref = x_values
 32 |         else:
 33 |             y_values_ref = y_values
 34 |         if metric == 'minkowski':
 35 |             pw_distance = cdist(y_values_ref, x_values, metric='minkowski', p=metric_params['p'])
 36 |         else:
 37 |             pw_distance = cdist(y_values_ref, x_values, metric=metric)
 38 |         if bandwidth_fxn is None:
 39 |             ref_bandwidth_fxn = lambda x: np.ones(x.shape[0])
 40 |         else:
 41 |             ref_bandwidth_fxn = bandwidth_fxn
 42 |         x_bandwidth = ref_bandwidth_fxn(x_values)
 43 |         y_bandwidth = ref_bandwidth_fxn(y_values_ref).reshape(-1, 1)
 44 |         scaled_sq_dists = pw_distance**2 / (x_bandwidth * y_bandwidth)
 45 |         true_values = np.exp(-1.*scaled_sq_dists/(4. * epsilon))
 46 | 
 47 |         # Construct the kernel and fit to data.
 48 |         mykernel = kernel.Kernel(kernel_type='gaussian', metric=metric,
 49 |                                  metric_params=metric_params, epsilon=epsilon,
 50 |                                  k=x_values.shape[0],bandwidth_type=bandwidth_fxn)
 51 |         mykernel.fit(x_values)
 52 |         K_matrix = mykernel.compute(y_values).toarray()
 53 | 
 54 |         # Check that error values are beneath tolerance.
 55 |         error_values = (K_matrix-true_values).ravel()
 56 |         total_error = np.linalg.norm(error_values)
 57 |         assert(total_error < 1E-8)
 58 | 
 59 |     @pytest.mark.parametrize('x_values', x_values_set)
 60 |     @pytest.mark.parametrize('y_values', y_values_set)
 61 |     @pytest.mark.parametrize('use_sparse', [True, False])
 62 |     @pytest.mark.parametrize('metric, metric_params', [
 63 |         ('euclidean', None),
 64 |         ('minkowski', {'p': 1})
 65 |     ])
 66 |     def test_sparse_input(self, x_values, y_values, metric, metric_params, use_sparse):
 67 |         """
 68 |         Test that we are returning the correct kernel values.
 69 |         """
 70 |         # Setup true values to test again.
 71 |         epsilon = 10.
 72 |         bandwidth_fxn = None
 73 |         if y_values is None:
 74 |             y_values_ref = x_values
 75 |         else:
 76 |             y_values_ref = y_values
 77 |         if metric == 'minkowski':
 78 |             pw_distance = cdist(y_values_ref, x_values, metric='minkowski', p=metric_params['p'])
 79 |         else:
 80 |             pw_distance = cdist(y_values_ref, x_values, metric=metric)
 81 |         if bandwidth_fxn is None:
 82 |             ref_bandwidth_fxn = lambda x: np.ones(x.shape[0])
 83 |         else:
 84 |             ref_bandwidth_fxn = bandwidth_fxn
 85 |         if use_sparse:
 86 |             x_values = sps.csr_matrix(x_values)
 87 |             y_values_ref = sps.csr_matrix(y_values_ref)
 88 |         x_bandwidth = ref_bandwidth_fxn(x_values)
 89 |         y_bandwidth = ref_bandwidth_fxn(y_values_ref).reshape(-1, 1)
 90 |         scaled_sq_dists = pw_distance**2 / (x_bandwidth * y_bandwidth)
 91 |         true_values = np.exp(-1.*scaled_sq_dists/(4. * epsilon))
 92 | 
 93 |         # Construct the kernel and fit to data.
 94 |         mykernel = kernel.Kernel(kernel_type='gaussian', metric=metric,
 95 |                                  metric_params=metric_params, epsilon=epsilon,
 96 |                                  k=x_values.shape[0], bandwidth_type=bandwidth_fxn)
 97 |         mykernel.fit(x_values)
 98 |         K_matrix = mykernel.compute(y_values).toarray()
 99 | 
100 |         # Check that error values are beneath tolerance.
101 |         error_values = (K_matrix-true_values).ravel()
102 |         total_error = np.linalg.norm(error_values)
103 |         assert(total_error < 1E-8)
104 | 
105 |     @pytest.mark.parametrize('k', np.arange(2, 14, 2))
106 |     @pytest.mark.parametrize('neighbor_params', [{'algorithm': 'auto'}, {'algorithm': 'ball_tree'}])
107 |     @pytest.mark.parametrize('x_values', x_values_set)
108 |     def test_neighborlists(self, x_values, k, neighbor_params):
109 |         """
110 |         Test that neighborlisting gives the right number of elements.
111 |         """
112 |         # Correct number of nearest neighbors.
113 |         k0 = min(k, x_values.shape[0])
114 | 
115 |         # Construct kernel matrix.
116 |         mykernel = kernel.Kernel(kernel_type='gaussian', metric='euclidean',
117 |                                  epsilon=1., k=k0, neighbor_params=neighbor_params)
118 |         mykernel.fit(x_values)
119 |         K_matrix = mykernel.compute(x_values)
120 | 
121 |         # Check if each row has correct number of elements
122 |         row_has_k_elements = (K_matrix.nnz == k0*x_values.shape[0])
123 |         assert(row_has_k_elements)
124 | 
125 |     @pytest.mark.parametrize('eps_method', ['bgh', 'bgh_generous'])
126 |     def test_auto_epsilon_selection(self, eps_method):
127 |         X = np.arange(100).reshape(-1, 1)
128 |         mykernel = kernel.Kernel(kernel_type='gaussian', metric='euclidean',
129 |                                  epsilon=eps_method, k=10)
130 |         mykernel.fit(X)
131 |         if eps_method == 'bgh':
132 |             assert(mykernel.epsilon_fitted == 0.25)
133 |         else:
134 |             assert(mykernel.epsilon_fitted == 0.50)
135 |         assert(mykernel.d == 1.0)
136 | 
137 | 
138 | class TestKNN(object):
139 |     def test_harmonic_kde(self, harmonic_1d_data):
140 |         # Setup Data
141 |         data = harmonic_1d_data
142 |         Y = np.linspace(-2.5, 2.5, 201)
143 |         oos_data = Y.reshape(-1, 1)
144 |         ref_density = np.exp(-Y**2 / 2.) / np.sqrt(2 * np.pi)
145 |         THRESH = 0.003
146 |         # Build kde object
147 |         nneighbs = NearestNeighbors(n_neighbors=120)
148 |         nneighbs.fit(data)
149 |         my_kde = kernel.NNKDE(nneighbs, k=16)
150 |         my_kde.fit()
151 |         density = my_kde.compute(oos_data)
152 |         error = np.sqrt(np.mean((density - ref_density)**2))
153 |         assert(error < THRESH)
154 | 
155 | 
156 | class TestBGHEpsilonSelection(object):
157 |     @pytest.mark.parametrize('k', [10, 30, 100])
158 |     def test_1D_uniform_data(self, k):
159 |         X = np.arange(100).reshape(-1, 1)
160 |         neigh = NearestNeighbors(n_neighbors=k)
161 |         sq_dist = neigh.fit(X).kneighbors_graph(X, mode='distance').data**2.
162 |         epsilons = 2**np.arange(-20., 20.)
163 |         eps, d = kernel.choose_optimal_epsilon_BGH(sq_dist, epsilons)
164 |         assert(eps == 0.25)
165 |         assert(d == 1.0)
166 | 


--------------------------------------------------------------------------------
/tests/test_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pytest
 3 | 
 4 | from pydiffmap import utils
 5 | from scipy.sparse import csr_matrix
 6 | from sklearn.neighbors import NearestNeighbors
 7 | 
 8 | x_1d = np.arange(10)
 9 | x_2d = np.arange(20).reshape(10, 2)
10 | y_1d = np.arange(10) + 0.5
11 | y_2d = np.arange(20).reshape(10, 2) + 0.5
12 | 
13 | 
14 | class TestLookupFunction(object):
15 |     @pytest.mark.parametrize('x', [x_1d, x_2d])
16 |     @pytest.mark.parametrize('vals', [y_1d, y_2d])
17 |     def test_lookup_fxn(self, x, vals):
18 |         N = len(x)
19 |         shuffle_indices = np.arange(N)
20 |         np.random.shuffle(shuffle_indices)
21 |         lf = utils.lookup_fxn(x, vals)
22 |         shuffle_y = np.array([lf(xi) for xi in x[shuffle_indices]])
23 |         assert((shuffle_y == vals[shuffle_indices]).all())
24 | 
25 | 
26 | class TestSparseFromFxn(object):
27 |     @pytest.mark.parametrize('Y', [y_2d, None])
28 |     def test_sparse_from_fxn(self, Y):
29 |         nneighbors = NearestNeighbors(10)
30 |         nneighbors.fit(x_2d)
31 |         Y2 = Y
32 |         if Y2 is None:
33 |             Y2 = x_2d
34 |         K = nneighbors.kneighbors_graph(Y2, mode='connectivity')
35 |         ref_mat = nneighbors.kneighbors_graph(Y2, mode='distance')
36 |         dist_fxn = lambda Y, X: np.linalg.norm(Y - X)
37 |         dist_mat = utils.sparse_from_fxn(x_2d, K, dist_fxn, Y)
38 |         assert(np.linalg.norm((dist_mat - ref_mat).data) < 1e-10)
39 | 
40 | 
41 | class TestSymmetrization():
42 |     test_mat = csr_matrix([[0, 2.], [0, 3.]])
43 | 
44 |     def test_and_symmetrization(self):
45 |         ref_mat = np.array([[0, 0], [0, 3.]])
46 |         symmetrized = utils._symmetrize_matrix(self.test_mat, mode='and')
47 |         symmetrized = symmetrized.toarray()
48 |         assert (np.linalg.norm(ref_mat - symmetrized) == 0.)
49 | 
50 |     def test_or_symmetrization(self):
51 |         ref_mat = np.array([[0, 2.], [2., 3.]])
52 |         symmetrized = utils._symmetrize_matrix(self.test_mat, mode='or')
53 |         symmetrized = symmetrized.toarray()
54 |         assert (np.linalg.norm(ref_mat - symmetrized) == 0.)
55 | 
56 |     def test_avg_symmetrization(self):
57 |         ref_mat = np.array([[0, 1.], [1., 3.]])
58 |         symmetrized = utils._symmetrize_matrix(self.test_mat, mode='average')
59 |         symmetrized = symmetrized.toarray()
60 |         assert (np.linalg.norm(ref_mat - symmetrized) == 0.)
61 | 


--------------------------------------------------------------------------------
/tests/test_visualization.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import pytest
 4 | 
 5 | from pydiffmap import diffusion_map as dm
 6 | 
 7 | 
 8 | @pytest.fixture(scope='module')
 9 | def dummy_dmap(uniform_2d_data):
10 |     data, X, Y = uniform_2d_data
11 |     print(data)
12 |     mydmap = dm.DiffusionMap.from_sklearn(n_evecs=3, k=5)
13 |     mydmap.fit(data)
14 |     return mydmap
15 | 
16 | 
17 | if sys.version_info[0] >= 3:
18 |     from pydiffmap import visualization as viz
19 |     import matplotlib.pyplot as plt
20 | 
21 |     class TestEmbeddingPlot():
22 |         @pytest.mark.parametrize('dim', [2, 3])
23 |         def test_no_kwargs(self, dummy_dmap, dim):
24 |             mydmap = dummy_dmap
25 |             fig = viz.embedding_plot(mydmap, dim=dim, scatter_kwargs=None, show=False)
26 |             assert(fig)
27 | 
28 |         def test_fixed_coloring(self, dummy_dmap):
29 |             mydmap = dummy_dmap
30 |             scatter_kwargs = {'c': 'r'}
31 |             true_coloring = (1.0, 0., 0., 1)
32 |             fig = viz.embedding_plot(mydmap, scatter_kwargs=scatter_kwargs, show=False)
33 |             SC = fig.axes[0].collections[0]
34 |             assert(np.all(SC._facecolors[0] == true_coloring))
35 | 
36 |         @pytest.mark.parametrize('size', [4., np.arange(1, 82)])
37 |         def test_size(self, dummy_dmap, size):
38 |             mydmap = dummy_dmap
39 |             scatter_kwargs = {'s': size}
40 |             fig = viz.embedding_plot(mydmap, scatter_kwargs=scatter_kwargs, show=False)
41 |             SC = fig.axes[0].collections[0]
42 |             actual_sizes = SC.get_sizes()
43 |             assert(np.all(actual_sizes == size))
44 | 
45 |         @pytest.mark.parametrize('cmap', [None, 'Blues', plt.cm.Spectral])
46 |         def test_colormap(self, dummy_dmap, cmap):
47 |             # This just tests if the code runs...
48 |             # Replace with something more stringent?
49 |             mydmap = dummy_dmap
50 |             scatter_kwargs = {'c': mydmap.dmap[:, 0], 'cmap': cmap}
51 |             fig = viz.embedding_plot(mydmap, scatter_kwargs=scatter_kwargs, show=False)
52 |             assert(fig)
53 | 
54 |     class TestDataPlot():
55 |         def test_no_kwargs(self, dummy_dmap):
56 |             mydmap = dummy_dmap
57 |             fig = viz.data_plot(mydmap, scatter_kwargs=None, show=False)
58 |             assert(fig)
59 | 
60 |         @pytest.mark.parametrize('size', [4., np.arange(1, 82)])
61 |         def test_size(self, dummy_dmap, size):
62 |             mydmap = dummy_dmap
63 |             scatter_kwargs = {'s': size}
64 |             fig = viz.data_plot(mydmap, 1, scatter_kwargs=scatter_kwargs, show=False)
65 |             SC = fig.axes[0].collections[0]
66 |             actual_sizes = SC.get_sizes()
67 |             assert(np.all(actual_sizes == size))
68 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | ; a generative tox configuration, see: https://tox.readthedocs.io/en/latest/config.html#generative-envlist
 2 | [testenv:bootstrap]
 3 | deps =
 4 |     jinja2
 5 |     matrix
 6 |     tox
 7 | skip_install = true
 8 | commands =
 9 |     python ci/bootstrap.py --no-env
10 | passenv =
11 |     *
12 | 
13 | [tox]
14 | envlist =
15 |     clean,
16 |     check,
17 |     {py27,py34,py35,py36,py37},
18 |     report,
19 |     docs
20 | 
21 | [testenv]
22 | basepython =
23 |     py27: {env:TOXPYTHON:python2.7}
24 |     py35: {env:TOXPYTHON:python3.5}
25 |     {py36,docs}: {env:TOXPYTHON:python3.6}
26 |     py37: {env:TOXPYTHON:python3.7}
27 |     {bootstrap,clean,check,report,codecov}: {env:TOXPYTHON:python3}
28 | setenv =
29 |     PYTHONPATH={toxinidir}/tests
30 |     PYTHONUNBUFFERED=yes
31 | passenv =
32 |     *
33 | usedevelop = false
34 | deps =
35 |     pytest
36 |     pytest-travis-fold
37 |     pytest-cov
38 | commands =
39 |     {posargs:pytest --cov --cov-report=term-missing -vv tests}
40 | 
41 | [testenv:check]
42 | deps =
43 |     docutils
44 |     check-manifest
45 |     flake8
46 |     readme-renderer
47 |     pygments
48 |     isort
49 | skip_install = true
50 | commands =
51 |     python setup.py check --strict --metadata --restructuredtext
52 |     check-manifest {toxinidir}
53 | #    flake8 src tests setup.py
54 | #    isort --verbose --check-only --diff --recursive src tests setup.py
55 | 
56 | [testenv:docs]
57 | deps =
58 |     -r{toxinidir}/docs/requirements.txt
59 | commands =
60 |     sphinx-build {posargs:-E} -b html docs dist/docs
61 | #    sphinx-build -b linkcheck docs dist/docs
62 | 
63 | [testenv:codecov]
64 | deps =
65 |     codecov
66 | skip_install = true
67 | commands =
68 |     codecov []
69 | 
70 | 
71 | [testenv:report]
72 | deps = coverage
73 | skip_install = true
74 | commands =
75 |     coverage report
76 |     coverage html
77 | 
78 | [testenv:clean]
79 | commands = coverage erase
80 | skip_install = true
81 | deps = coverage
82 | 
83 | 


--------------------------------------------------------------------------------