├── .bumpversion.cfg ├── .coveragerc ├── .editorconfig ├── .gitignore ├── .travis.yml ├── ACKNOWLEDGEMENTS.rst ├── AUTHORS.rst ├── CHANGELOG.rst ├── CONTRIBUTING.rst ├── LICENSE ├── MANIFEST.in ├── README.rst ├── ci ├── bootstrap.py ├── requirements.txt └── templates │ └── .travis.yml ├── docs ├── Makefile ├── acknowledgements.rst ├── authors.rst ├── changelog.rst ├── conf.py ├── contributing.rst ├── index.rst ├── installation.rst ├── jupyter notebook tutorials │ ├── Four_Wells │ │ ├── Four_Wells.rst │ │ ├── output_11_0.png │ │ ├── output_15_0.png │ │ ├── output_18_0.png │ │ ├── output_21_0.png │ │ ├── output_26_0.png │ │ ├── output_4_0.png │ │ └── output_8_0.png │ ├── Metrics │ │ ├── Metrics.rst │ │ ├── output_10_0.png │ │ └── output_5_0.png │ ├── Spherical_Harmonics │ │ ├── Spherical_Harmonics.rst │ │ ├── output_10_0.png │ │ ├── output_15_1.png │ │ └── output_9_0.png │ ├── Swiss_Roll │ │ ├── Swiss_Roll.rst │ │ ├── output_10_0.png │ │ ├── output_10_1.png │ │ ├── output_12_1.png │ │ ├── output_8_0.png │ │ └── output_8_1.png │ └── index.rst ├── readme.rst ├── reference │ ├── diffusion_map.rst │ ├── index.rst │ ├── kernel.rst │ └── visualization.rst ├── requirements.txt ├── spelling_wordlist.txt ├── theory.rst └── usage.rst ├── examples ├── Data │ ├── 4wells_traj.npy │ ├── dimer_energy.npy │ └── dimer_trajectory.npy ├── Four_Wells.ipynb ├── Metrics.ipynb ├── Spherical_Harmonics.ipynb └── Swiss_Roll.ipynb ├── setup.cfg ├── setup.py ├── src └── pydiffmap │ ├── __init__.py │ ├── diffusion_map.py │ ├── kernel.py │ ├── utils.py │ └── visualization.py ├── tests ├── conftest.py ├── test_diffusionmap.py ├── test_kernel.py ├── test_utils.py └── test_visualization.py └── tox.ini /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 0.2.0 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | search = version='{current_version}' 8 | replace = version='{new_version}' 9 | 10 | [bumpversion:file:README.rst] 11 | search = v{current_version}. 12 | replace = v{new_version}. 13 | 14 | [bumpversion:file:docs/conf.py] 15 | search = version = release = '{current_version}' 16 | replace = version = release = '{new_version}' 17 | 18 | [bumpversion:file:src/pydiffmap/__init__.py] 19 | search = __version__ = '{current_version}' 20 | replace = __version__ = '{new_version}' 21 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [paths] 2 | source = 3 | src/ 4 | */site-packages/ 5 | 6 | [run] 7 | branch = true 8 | source = 9 | pydiffmap 10 | tests 11 | parallel = true 12 | 13 | [report] 14 | show_missing = true 15 | precision = 2 16 | omit = *migrations* 17 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # see http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | end_of_line = lf 6 | trim_trailing_whitespace = true 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | charset = utf-8 11 | 12 | [*.{bat,cmd,ps1}] 13 | end_of_line = crlf 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | .eggs 13 | parts 14 | bin 15 | var 16 | sdist 17 | wheelhouse 18 | develop-eggs 19 | .installed.cfg 20 | lib 21 | lib64 22 | venv*/ 23 | pyvenv*/ 24 | 25 | # Installer logs 26 | pip-log.txt 27 | 28 | # Unit test / coverage reports 29 | .coverage 30 | .tox 31 | .coverage.* 32 | nosetests.xml 33 | coverage.xml 34 | htmlcov 35 | .pytest_cache 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | .idea 45 | *.iml 46 | *.komodoproject 47 | 48 | # Complexity 49 | output/*.html 50 | output/*/index.html 51 | 52 | # Sphinx 53 | docs/_build 54 | 55 | .DS_Store 56 | *~ 57 | .*.sw[po] 58 | .build 59 | .ve 60 | .env 61 | .cache 62 | .pytest 63 | .bootstrap 64 | .appveyor.token 65 | *.bak 66 | 67 | # Ipython Notebooks 68 | examples/*.ipynb_checkpoints 69 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: xenial 3 | cache: false 4 | env: 5 | global: 6 | - LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so 7 | - SEGFAULT_SIGNALS=all 8 | matrix: 9 | include: 10 | - python: '3.6' 11 | env: 12 | - TOXENV=check 13 | - python: '3.6' 14 | env: 15 | - TOXENV=docs 16 | - env: 17 | - TOXENV=py27,codecov 18 | python: '2.7' 19 | - env: 20 | - TOXENV=py35,codecov 21 | python: '3.5' 22 | - env: 23 | - TOXENV=py36,codecov 24 | python: '3.6' 25 | - env: 26 | - TOXENV=py37,codecov 27 | python: '3.7' 28 | before_install: 29 | - python --version 30 | - uname -a 31 | - lsb_release -a || true 32 | install: 33 | - python -mpip install --progress-bar=off tox -rci/requirements.txt 34 | - virtualenv --version 35 | - easy_install --version 36 | - pip --version 37 | - tox --version 38 | script: 39 | - tox -v 40 | after_failure: 41 | - more .tox/log/* | cat 42 | - more .tox/*/log/* | cat 43 | notifications: 44 | email: 45 | on_success: never 46 | on_failure: always 47 | -------------------------------------------------------------------------------- /ACKNOWLEDGEMENTS.rst: -------------------------------------------------------------------------------- 1 | 2 | Acknowledgements 3 | ================ 4 | 5 | This work was partially funded by grant EPSR EP/P006175/1 as well as the Molecular Sciences Software Institute (MolSSI). Computing resources were provided in part by the University of Chicago Research Computing Center (RCC). 6 | We also want to thank the following scientists for their input and advice: 7 | 8 | - Prof. Dimitris Giannakis for help in implementing the automatic bandwidth selection algorithm. 9 | -------------------------------------------------------------------------------- /AUTHORS.rst: -------------------------------------------------------------------------------- 1 | 2 | Authors 3 | ======= 4 | 5 | * Ralf Banisch 6 | * Erik Henning Thiede 7 | * Zofia Trstanova 8 | 9 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | 2 | Changelog 3 | ========= 4 | 5 | 0.2.0.1 (2019-02-04) 6 | -------------------- 7 | 8 | New Features 9 | ~~~~~~~~~~~~ 10 | * Added a more generous epsilon procedure for convenience. 11 | 12 | 0.2.0 (2019-02-01) 13 | ------------------ 14 | 15 | New Features 16 | ~~~~~~~~~~~~ 17 | * Added support for user-provided kernel functions. 18 | * Added a utility for building a sparse matrix from a function on the data. 19 | * (Re)added separate TMDmap class wrapping base diffusion map class to 20 | allow for easier construction of TMDmaps. 21 | * Added ability to explicitly provide the sampled density for q^alpha normalization. 22 | * Added Variable Bandwidth Diffusion Maps. 23 | * Added a new out-of-sample extension method that should work for variable bandwidth methods. 24 | 25 | Tweaks and Modifications 26 | ~~~~~~~~~~~~~~~~~~~~~~~~ 27 | * Changed from exp^(-d^2) to exp^(-d^2/4) convention. 28 | * Moved weight functionality into a function provided on initialization, 29 | rather than input values, and added a helper function that allows values to 30 | be read from a lookup table. 31 | * Improved the Diffusion Map test suite. 32 | * Moved out-of-sample routines into separate functions. 33 | * Moved matrix symmetrization into newly made utility file. 34 | * Adjusted constructor for the diffusion map to take the kernel object directly. 35 | 36 | Bugfixes 37 | ~~~~~~~~ 38 | * Fixed bug where weight matrices were not included for out of sample extension. 39 | 40 | Other 41 | ~~~~~ 42 | * Moved to MIT license. 43 | 44 | 0.1.0 (2017-12-06) 45 | ------------------ 46 | 47 | * Fixed setup.py issues. 48 | 49 | 0.1.0 (2017-12-06) 50 | ------------------ 51 | 52 | * Added base functionality to the code. 53 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Contributing 3 | ============ 4 | 5 | Contributions are welcome, and they are greatly appreciated! Every 6 | little bit helps, and credit will always be given. 7 | 8 | Bug reports 9 | =========== 10 | 11 | When `reporting a bug `_ please include: 12 | 13 | * Your operating system name and version. 14 | * Any details about your local setup that might be helpful in troubleshooting. 15 | * Detailed steps to reproduce the bug. 16 | 17 | Documentation improvements 18 | ========================== 19 | 20 | pyDiffMap could always use more documentation, whether as part of the 21 | official pyDiffMap docs, in docstrings, or even on the web in blog posts, 22 | articles, and such. 23 | 24 | Feature requests and feedback 25 | ============================= 26 | 27 | The best way to send feedback is to file an issue at https://github.com/DiffusionMapsAcademics/pyDiffMap/issues. 28 | 29 | If you are proposing a feature: 30 | 31 | * Explain in detail how it would work. 32 | * Keep the scope as narrow as possible, to make it easier to implement. 33 | * Remember that this is a volunteer-driven project, and that code contributions are welcome :) 34 | 35 | Development 36 | =========== 37 | 38 | To set up `python-pydiffmap` for local development: 39 | 40 | 1. Fork `python-pydiffmap `_ 41 | (look for the "Fork" button). 42 | 2. Clone your fork locally:: 43 | 44 | git clone git@github.com:your_name_here/python-pydiffmap.git 45 | 46 | 3. Create a branch for local development:: 47 | 48 | git checkout -b name-of-your-bugfix-or-feature 49 | 50 | Now you can make your changes locally. 51 | 52 | 4. When you're done making changes, run all the checks, doc builder and spell checker with `tox `_ one command:: 53 | 54 | tox 55 | 56 | 5. Commit your changes and push your branch to GitHub:: 57 | 58 | git add . 59 | git commit -m "Your detailed description of your changes." 60 | git push origin name-of-your-bugfix-or-feature 61 | 62 | 6. Submit a pull request through the GitHub website. 63 | 64 | Pull Request Guidelines 65 | ----------------------- 66 | 67 | If you need some code review or feedback while you're developing the code just make the pull request. 68 | 69 | For merging, you should: 70 | 71 | 1. Include passing tests (run ``tox``) [1]_. 72 | 2. Update documentation when there's new API, functionality etc. 73 | 3. Add a note to ``CHANGELOG.rst`` about the changes. 74 | 4. Add yourself to ``AUTHORS.rst``. 75 | 76 | .. [1] If you don't have all the necessary python versions available locally you can rely on Travis - it will 77 | `run the tests `_ for each change you add in the pull request. 78 | 79 | It will be slower though ... 80 | 81 | Tips 82 | ---- 83 | 84 | To run a subset of tests:: 85 | 86 | tox -e envname -- py.test -k test_myfeature 87 | 88 | To run all the test environments in *parallel* (you need to ``pip install detox``):: 89 | 90 | detox 91 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ralf Banisch, Erik Henning Thiede, Zofia Trstanova 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | graft docs 2 | graft src 3 | graft ci 4 | graft tests 5 | graft examples 6 | 7 | prune docs/_build 8 | prune examples/.ipynb_checkpoints 9 | 10 | include .bumpversion.cfg 11 | include .coveragerc 12 | include .cookiecutterrc 13 | include .editorconfig 14 | 15 | include *.rst 16 | include LICENSE 17 | 18 | include tox.ini .travis.yml appveyor.yml 19 | 20 | global-exclude *.py[cod] __pycache__ *.so *.dylib *.bak 21 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ======== 2 | Overview 3 | ======== 4 | 5 | .. start-badges 6 | 7 | .. list-table:: 8 | :stub-columns: 1 9 | 10 | * - docs 11 | - |docs| 12 | * - tests 13 | - | |travis| 14 | | |codecov| 15 | 16 | .. * - docs 17 | - |docs| 18 | * - tests 19 | - | |travis| 20 | | |codecov| 21 | * - package 22 | - | |version| |wheel| |supported-versions| |supported-implementations| 23 | | |commits-since| 24 | 25 | .. |docs| image:: https://readthedocs.org/projects/pydiffmap/badge/?version=master 26 | :target: http://pydiffmap.readthedocs.io/en/master/?badge=master 27 | :alt: Documentation Status 28 | 29 | .. |travis| image:: https://travis-ci.org/DiffusionMapsAcademics/pyDiffMap.svg?branch=master 30 | :alt: Travis-CI Build Status 31 | :target: https://travis-ci.org/DiffusionMapsAcademics/pyDiffMap 32 | 33 | .. |codecov| image:: https://codecov.io/gh/DiffusionMapsAcademics/pyDiffMap/branch/master/graph/badge.svg 34 | :alt: Coverage Status 35 | :target: https://codecov.io/gh/DiffusionMapsAcademics/pyDiffMap 36 | 37 | .. .. |commits-since| image:: https://img.shields.io/github/commits-since/DiffusionMapsAcademics/pyDiffMap/v0.1.0.svg 38 | :alt: Commits since latest release 39 | :target: https://github.com/DiffusionMapsAcademics/pyDiffMap/compare/v0.1.0...master 40 | 41 | .. .. |version| image:: https://img.shields.io/pypi/v/pyDiffMap.svg 42 | :alt: PyPI Package latest release 43 | :target: https://pypi.python.org/pypi/pyDiffMap 44 | 45 | .. .. |commits-since| image:: https://img.shields.io/github/commits-since/DiffusionMapsAcademics/pyDiffMap/v0.1.0.svg 46 | :alt: Commits since latest release 47 | :target: https://github.com/DiffusionMapsAcademics/pyDiffMap/compare/v0.1.0...master 48 | 49 | .. .. |wheel| image:: https://img.shields.io/pypi/wheel/pyDiffMap.svg 50 | :alt: PyPI Wheel 51 | :target: https://pypi.python.org/pypi/pyDiffMap 52 | 53 | .. .. |supported-versions| image:: https://img.shields.io/pypi/pyversions/pyDiffMap.svg 54 | :alt: Supported versions 55 | :target: https://pypi.python.org/pypi/pyDiffMap 56 | 57 | .. .. |supported-implementations| image:: https://img.shields.io/pypi/implementation/pyDiffMap.svg 58 | :alt: Supported implementations 59 | :target: https://pypi.python.org/pypi/pyDiffMap 60 | 61 | 62 | .. end-badges 63 | 64 | This is the home of the documentation for pyDiffMap, an open-source project to develop a robust and accessible diffusion map code for public use. Our documentation is currently under construction, please bear with us. 65 | 66 | * Free software: MIT License. 67 | 68 | Installation 69 | ============ 70 | 71 | Pydiffmap is installable using pip. You can install it using the command 72 | 73 | :: 74 | 75 | pip install pyDiffMap 76 | 77 | You can also install the package directly from the source directly by downloading the package from github and running the command below, optionally with the "-e" flag for an editable install. 78 | 79 | :: 80 | 81 | pip install [source_directory] 82 | 83 | Documentation 84 | ============= 85 | 86 | https://pyDiffMap.readthedocs.io/ 87 | 88 | Development 89 | =========== 90 | 91 | To run the all tests run:: 92 | 93 | tox 94 | 95 | Note, to combine the coverage data from all the tox environments run: 96 | 97 | .. list-table:: 98 | :widths: 10 90 99 | :stub-columns: 1 100 | 101 | - - Windows 102 | - :: 103 | 104 | set PYTEST_ADDOPTS=--cov-append 105 | tox 106 | 107 | - - Other 108 | - :: 109 | 110 | PYTEST_ADDOPTS=--cov-append tox 111 | 112 | If you don't have tox installed, you can also run the python tests directly with 113 | 114 | :: 115 | 116 | pytest 117 | 118 | -------------------------------------------------------------------------------- /ci/bootstrap.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | import os 8 | import subprocess 9 | import sys 10 | from os.path import abspath 11 | from os.path import dirname 12 | from os.path import exists 13 | from os.path import join 14 | 15 | base_path = dirname(dirname(abspath(__file__))) 16 | 17 | 18 | def check_call(args): 19 | print("+", *args) 20 | subprocess.check_call(args) 21 | 22 | 23 | def exec_in_env(): 24 | env_path = join(base_path, ".tox", "bootstrap") 25 | if sys.platform == "win32": 26 | bin_path = join(env_path, "Scripts") 27 | else: 28 | bin_path = join(env_path, "bin") 29 | if not exists(env_path): 30 | import subprocess 31 | 32 | print("Making bootstrap env in: {0} ...".format(env_path)) 33 | try: 34 | check_call([sys.executable, "-m", "venv", env_path]) 35 | except subprocess.CalledProcessError: 36 | try: 37 | check_call([sys.executable, "-m", "virtualenv", env_path]) 38 | except subprocess.CalledProcessError: 39 | check_call(["virtualenv", env_path]) 40 | print("Installing `jinja2` into bootstrap environment...") 41 | check_call([join(bin_path, "pip"), "install", "jinja2", "tox"]) 42 | python_executable = join(bin_path, "python") 43 | if not os.path.exists(python_executable): 44 | python_executable += '.exe' 45 | 46 | print("Re-executing with: {0}".format(python_executable)) 47 | print("+ exec", python_executable, __file__, "--no-env") 48 | os.execv(python_executable, [python_executable, __file__, "--no-env"]) 49 | 50 | def main(): 51 | import jinja2 52 | 53 | print("Project path: {0}".format(base_path)) 54 | 55 | jinja = jinja2.Environment( 56 | loader=jinja2.FileSystemLoader(join(base_path, "ci", "templates")), 57 | trim_blocks=True, 58 | lstrip_blocks=True, 59 | keep_trailing_newline=True 60 | ) 61 | 62 | tox_environments = [ 63 | line.strip() 64 | # 'tox' need not be installed globally, but must be importable 65 | # by the Python that is running this script. 66 | # This uses sys.executable the same way that the call in 67 | # cookiecutter-pylibrary/hooks/post_gen_project.py 68 | # invokes this bootstrap.py itself. 69 | for line in subprocess.check_output([sys.executable, '-m', 'tox', '--listenvs'], universal_newlines=True).splitlines() 70 | ] 71 | tox_environments = [line for line in tox_environments if line.startswith('py')] 72 | 73 | for name in os.listdir(join("ci", "templates")): 74 | with open(join(base_path, name), "w") as fh: 75 | fh.write(jinja.get_template(name).render(tox_environments=tox_environments)) 76 | print("Wrote {}".format(name)) 77 | print("DONE.") 78 | 79 | 80 | if __name__ == "__main__": 81 | args = sys.argv[1:] 82 | if args == ["--no-env"]: 83 | main() 84 | elif not args: 85 | exec_in_env() 86 | else: 87 | print("Unexpected arguments {0}".format(args), file=sys.stderr) 88 | sys.exit(1) 89 | 90 | -------------------------------------------------------------------------------- /ci/requirements.txt: -------------------------------------------------------------------------------- 1 | virtualenv>=16.6.0 2 | pip>=19.1.1 3 | setuptools>=18.0.1 4 | -------------------------------------------------------------------------------- /ci/templates/.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: xenial 3 | cache: false 4 | env: 5 | global: 6 | - LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so 7 | - SEGFAULT_SIGNALS=all 8 | matrix: 9 | include: 10 | - python: '3.6' 11 | env: 12 | - TOXENV=check 13 | - python: '3.6' 14 | env: 15 | - TOXENV=docs 16 | {%- for env in tox_environments %}{{ '' }} 17 | - env: 18 | - TOXENV={{ env }},codecov 19 | {%- if env.startswith('pypy3') %}{{ '' }} 20 | - TOXPYTHON=pypy3 21 | python: 'pypy3' 22 | {%- elif env.startswith('pypy') %}{{ '' }} 23 | python: 'pypy' 24 | {%- else %}{{ '' }} 25 | python: '{{ '{0[2]}.{0[3]}'.format(env) }}' 26 | {%- endif %} 27 | {%- endfor %}{{ '' }} 28 | before_install: 29 | - python --version 30 | - uname -a 31 | - lsb_release -a || true 32 | install: 33 | - python -mpip install --progress-bar=off tox -rci/requirements.txt 34 | - virtualenv --version 35 | - easy_install --version 36 | - pip --version 37 | - tox --version 38 | script: 39 | - tox -v 40 | after_failure: 41 | - more .tox/log/* | cat 42 | - more .tox/*/log/* | cat 43 | notifications: 44 | email: 45 | on_success: never 46 | on_failure: always 47 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = pydiffmap 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/acknowledgements.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../ACKNOWLEDGEMENTS.rst 2 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../AUTHORS.rst 2 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CHANGELOG.rst 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import unicode_literals 3 | 4 | import os 5 | 6 | 7 | extensions = [ 8 | 'sphinx.ext.autodoc', 9 | 'sphinx.ext.coverage', 10 | 'sphinx.ext.extlinks', 11 | 'sphinx.ext.ifconfig', 12 | 'sphinx.ext.mathjax', 13 | 'sphinx.ext.napoleon', 14 | 'sphinx.ext.viewcode', 15 | ] 16 | # 'sphinx.ext.autosummary', 17 | # 'sphinx.ext.todo', 18 | # 'sphinx.ext.doctest', 19 | if os.getenv('SPELLCHECK'): 20 | extensions += 'sphinxcontrib.spelling', 21 | spelling_show_suggestions = True 22 | spelling_lang = 'en_US' 23 | 24 | show_authors = False 25 | 26 | source_suffix = '.rst' 27 | master_doc = 'index' 28 | project = u'pydiffmap' 29 | year = '2017' 30 | author = u'Ralf Banisch, Erik Henning Thiede, Zofia Trstanova' 31 | copyright = '{0}, {1}'.format(year, author) 32 | version = release = u'0.2.0.1' 33 | 34 | pygments_style = 'trac' 35 | templates_path = ['.'] 36 | extlinks = { 37 | 'issue': ('https://github.com/DiffusionMapsAcademics/pydiffmap/issues/%s', '#'), 38 | 'pr': ('https://github.com/DiffusionMapsAcademics/pydiffmap/pull/%s', 'PR #'), 39 | } 40 | # on_rtd is whether we are on readthedocs.org 41 | on_rtd = os.environ.get('READTHEDOCS', None) == 'True' 42 | 43 | if not on_rtd: # only set the theme if we're building docs locally 44 | html_theme = 'sphinx_rtd_theme' 45 | 46 | html_use_smartypants = True 47 | html_last_updated_fmt = '%b %d, %Y' 48 | html_split_index = False 49 | html_sidebars = { 50 | '**': ['searchbox.html', 'globaltoc.html', 'sourcelink.html'], 51 | } 52 | html_short_title = '%s-%s' % (project, version) 53 | 54 | napoleon_use_ivar = True 55 | napoleon_use_rtype = False 56 | napoleon_use_param = False 57 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to pydiffmap 2 | ==================== 3 | 4 | This is the home of the documentation for pyDiffMap, an open-source project to develop a robust and accessible diffusion map code for public use. Code can be found on our `github page`__. Our documentation is currently under construction, please bear with us. 5 | 6 | .. _github: https://github.com/DiffusionMapsAcademics/pyDiffMap/ 7 | 8 | __ github_ 9 | 10 | 11 | 12 | 13 | ======== 14 | Contents 15 | ======== 16 | 17 | .. toctree:: 18 | :maxdepth: 2 19 | 20 | readme 21 | installation 22 | theory 23 | usage 24 | jupyter notebook tutorials/index 25 | reference/index 26 | contributing 27 | authors 28 | acknowledgements 29 | changelog 30 | 31 | Indices and tables 32 | ================== 33 | 34 | * :ref:`genindex` 35 | * :ref:`modindex` 36 | * :ref:`search` 37 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | ============ 2 | Installation 3 | ============ 4 | 5 | At the command line:: 6 | 7 | pip install [source_dir] 8 | -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Four_Wells/Four_Wells.rst: -------------------------------------------------------------------------------- 1 | 2 | 2D Four-well potential 3 | ====================== 4 | 5 | .. code:: python 6 | 7 | import matplotlib.pyplot as plt 8 | import numpy as np 9 | 10 | from mpl_toolkits.mplot3d import Axes3D 11 | from pydiffmap import diffusion_map as dm 12 | 13 | %matplotlib inline 14 | 15 | Load sampled data: discretized Langevin dynamics at temperature T=1, 16 | friction 1, and time step size dt=0.01, with double-well potentials in x 17 | and y, with higher barrier in y. 18 | 19 | .. code:: python 20 | 21 | X=np.load('Data/4wells_traj.npy') 22 | print(X.shape) 23 | 24 | 25 | .. parsed-literal:: 26 | 27 | (9900, 2) 28 | 29 | 30 | .. code:: python 31 | 32 | def DW1(x): 33 | return 2.0*(np.linalg.norm(x)**2-1.0)**2 34 | 35 | def DW2(x): 36 | return 4.0*(np.linalg.norm(x)**2-1.0)**2 37 | 38 | def DW(x): 39 | return DW1(x[0]) + DW1(x[1]) 40 | 41 | from matplotlib import cm 42 | 43 | mx=5 44 | 45 | xe=np.linspace(-mx, mx, 100) 46 | ye=np.linspace(-mx, mx, 100) 47 | energyContours=np.zeros((100, 100)) 48 | for i in range(0,len(xe)): 49 | for j in range(0,len(ye)): 50 | xtmp=np.array([xe[i], ye[j]] ) 51 | energyContours[j,i]=DW(xtmp) 52 | 53 | levels = np.arange(0, 10, 0.5) 54 | plt.contour(xe, ye, energyContours, levels, cmap=cm.coolwarm) 55 | plt.scatter(X[:,0], X[:,1], s=5, c='k') 56 | plt.xlabel('X') 57 | plt.ylabel('Y') 58 | plt.xlim([-2,2]) 59 | plt.ylim([-2,2]) 60 | plt.show() 61 | 62 | 63 | 64 | 65 | .. image:: output_4_0.png 66 | 67 | 68 | Compute diffusion map embedding 69 | ------------------------------- 70 | 71 | .. code:: python 72 | 73 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs = 2, epsilon = .1, alpha = 0.5, k=400, metric='euclidean') 74 | dmap = mydmap.fit_transform(X) 75 | 76 | 77 | .. parsed-literal:: 78 | 79 | 0.1 eps fitted 80 | 81 | 82 | Visualization 83 | ------------- 84 | 85 | We plot the first two diffusion coordinates against each other, colored 86 | by the x coordinate 87 | 88 | .. code:: python 89 | 90 | from pydiffmap.visualization import embedding_plot 91 | 92 | embedding_plot(mydmap, scatter_kwargs = {'c': X[:,0], 's': 5, 'cmap': 'coolwarm'}) 93 | 94 | plt.show() 95 | 96 | 97 | 98 | .. image:: output_8_0.png 99 | 100 | 101 | .. code:: python 102 | 103 | #from matplotlib import cm 104 | #plt.scatter(dmap[:,0], dmap[:,1], c=X[:,0], s=5, cmap=cm.coolwarm) 105 | 106 | #clb=plt.colorbar() 107 | #clb.set_label('X coordinate') 108 | #plt.xlabel('First dominant eigenvector') 109 | #plt.ylabel('Second dominant eigenvector') 110 | #plt.title('Diffusion Map Embedding') 111 | 112 | #plt.show() 113 | 114 | We visualize the data again, colored by the first eigenvector this time. 115 | 116 | .. code:: python 117 | 118 | from pydiffmap.visualization import data_plot 119 | 120 | data_plot(mydmap, scatter_kwargs = {'s': 5, 'cmap': 'coolwarm'}) 121 | plt.show() 122 | 123 | 124 | 125 | .. image:: output_11_0.png 126 | 127 | 128 | Target measure diffusion map 129 | ---------------------------- 130 | 131 | Compute Target Measure Diffusion Map with target distribution pi(q) = 132 | exp(-beta V(q)) with inverse temperature beta = 1. TMDmap can be seen as 133 | a special case where the weights are the target distribution, and 134 | alpha=1. 135 | 136 | .. code:: python 137 | 138 | V=DW 139 | beta=1 140 | change_of_measure = lambda x: np.exp(-beta * V(x)) 141 | mytdmap = dm.TMDmap(alpha=1.0, n_evecs = 2, epsilon = .1, 142 | k=400, change_of_measure=change_of_measure) 143 | tmdmap = mytdmap.fit_transform(X) 144 | 145 | 146 | .. parsed-literal:: 147 | 148 | 0.1 eps fitted 149 | 150 | 151 | .. code:: python 152 | 153 | embedding_plot(mytdmap, scatter_kwargs = {'c': X[:,0], 's': 5, 'cmap': 'coolwarm'}) 154 | 155 | plt.show() 156 | 157 | 158 | 159 | .. image:: output_15_0.png 160 | 161 | 162 | From the sampling at temperature 1/beta =1, we can compute diffusion map 163 | embedding at lower temperature T\_low = 1/beta\_low using TMDmap with 164 | target measure pi(q) = exp(-beta\_low V(q)). Here we set beta\_low = 10, 165 | and use the data obtained from sampling at higher temperature, i.e. 166 | pi(q) = exp(-beta V(q)) with beta = 1. 167 | 168 | .. code:: python 169 | 170 | V=DW 171 | beta_2=10 172 | change_of_measure_2 = lambda x: np.exp(-beta_2 * V(x)) 173 | mytdmap2 = dm.TMDmap(alpha=1.0, n_evecs = 2, epsilon = .1, 174 | k=400, change_of_measure=change_of_measure_2) 175 | tmdmap2 = mytdmap2.fit_transform(X) 176 | 177 | 178 | .. parsed-literal:: 179 | 180 | 0.1 eps fitted 181 | 182 | 183 | .. code:: python 184 | 185 | embedding_plot(mytdmap2, scatter_kwargs = {'c': X[:,0], 's': 5, 'cmap': 'coolwarm'}) 186 | 187 | plt.show() 188 | 189 | 190 | 191 | .. image:: output_18_0.png 192 | 193 | 194 | Kernel density estimate 195 | ----------------------- 196 | 197 | We can compute kernel density estimate using kde used in the diffusion 198 | map computation. 199 | 200 | .. code:: python 201 | 202 | plt.scatter(X[:,0], X[:,1], c = mytdmap.q, s=5, cmap=cm.coolwarm) 203 | 204 | clb=plt.colorbar() 205 | clb.set_label('q') 206 | plt.xlabel('First dominant eigenvector') 207 | plt.ylabel('Second dominant eigenvector') 208 | plt.title('TMDmap Embedding, beta=1') 209 | 210 | plt.show() 211 | 212 | 213 | 214 | .. image:: output_21_0.png 215 | 216 | 217 | Now we check how well we can approximate the target distribution by the 218 | formula in the paper (left dominant eigenvector times KDE). 219 | 220 | .. code:: python 221 | 222 | import scipy.sparse.linalg as spsl 223 | L = mytdmap.L 224 | [evals, evecs] = spsl.eigs(L.transpose(),k=1, which='LR') 225 | 226 | phi = np.real(evecs.ravel()) 227 | 228 | .. code:: python 229 | 230 | q_est = phi*mytdmap.q 231 | q_est = q_est/sum(q_est) 232 | 233 | target_distribution = np.array([change_of_measure(Xi) for Xi in X]) 234 | q_exact = target_distribution/sum(target_distribution) 235 | print(np.linalg.norm(q_est - q_exact,1)) 236 | 237 | 238 | .. parsed-literal:: 239 | 240 | 0.040391461721631335 241 | 242 | 243 | visualize both. there is no visible difference. 244 | 245 | .. code:: python 246 | 247 | plt.figure(figsize=(16,6)) 248 | 249 | ax = plt.subplot(121) 250 | SC1 = ax.scatter(X[:,0], X[:,1], c = q_est, s=5, cmap=cm.coolwarm, vmin=0, vmax=2E-4) 251 | 252 | ax.set_xlabel('x') 253 | ax.set_ylabel('y') 254 | ax.set_title('estimate of pi') 255 | plt.colorbar(SC1, ax=ax) 256 | 257 | 258 | ax2 = plt.subplot(122) 259 | SC2 = ax2.scatter(X[:,0], X[:,1], c = q_exact, s=5, cmap=cm.coolwarm, vmin=0, vmax=2E-4) 260 | plt.colorbar(SC2, ax=ax2) 261 | 262 | 263 | ax2.set_xlabel('x') 264 | ax2.set_ylabel('y') 265 | ax2.set_title('exact pi') 266 | 267 | plt.show() 268 | 269 | 270 | 271 | .. image:: output_26_0.png 272 | 273 | 274 | -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Four_Wells/output_11_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_11_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Four_Wells/output_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_15_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Four_Wells/output_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_18_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Four_Wells/output_21_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_21_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Four_Wells/output_26_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_26_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Four_Wells/output_4_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_4_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Four_Wells/output_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Four_Wells/output_8_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Metrics/Metrics.rst: -------------------------------------------------------------------------------- 1 | 2 | Diffusion maps with general metric 3 | ================================== 4 | 5 | In this notebook, we illustrate how to use an optional metric in the 6 | diffusion maps embedding. 7 | 8 | .. code:: python 9 | 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | 13 | from mpl_toolkits.mplot3d import Axes3D 14 | from pydiffmap import diffusion_map as dm 15 | 16 | %matplotlib inline 17 | 18 | We import trajectory of two particles connected by a double-well 19 | potential, which is a function of a radius: V(r) = V\_DW(r). The dimer 20 | was simulated at 300K with Langevin dynamics using OpenMM. The obvious 21 | collective variable is the radius case and we demonstrate how the first 22 | dominant eigenvector obtained from the diffusion map clearly correlates 23 | with this reaction coordinate. As a metric, we use the root mean square 24 | deviation (RMSD) from the package 25 | https://pypi.python.org/pypi/rmsd/1.2.5. 26 | 27 | .. code:: python 28 | 29 | traj=np.load('Data/dimer_trajectory.npy') 30 | energy=np.load('Data/dimer_energy.npy') 31 | print('Loaded trajectory of '+repr(len(traj))+' steps of dimer molecule: '+repr(traj.shape[1])+' particles in dimension '+repr(traj.shape[2])+'.') 32 | 33 | 34 | .. parsed-literal:: 35 | 36 | Loaded trajectory of 1000 steps of dimer molecule: 2 particles in dimension 3. 37 | 38 | 39 | .. code:: python 40 | 41 | def compute_radius(X): 42 | return np.linalg.norm(X[:,0,:]-X[:,1,:], 2, axis=1) 43 | 44 | fig = plt.figure(figsize=[16,6]) 45 | ax = fig.add_subplot(121) 46 | 47 | radius= compute_radius(traj) 48 | cax2 = ax.scatter(range(len(radius)), radius, c=radius, s=20,alpha=0.90,cmap=plt.cm.Spectral) 49 | cbar = fig.colorbar(cax2) 50 | cbar.set_label('Radius') 51 | ax.set_xlabel('Simulation steps') 52 | ax.set_ylabel('Radius') 53 | 54 | 55 | ax2 = fig.add_subplot(122, projection='3d') 56 | 57 | L=2 58 | 59 | i=0 60 | 61 | ax2.scatter(traj[i,0,0], traj[i,0,1], traj[i,0,2], c='b', s=100, alpha=0.90, edgecolors='none', depthshade=True,) 62 | ax2.scatter(traj[i,1,0], traj[i,1,1], traj[i,1,2], c='r', s=100, alpha=0.90, edgecolors='none', depthshade=True,) 63 | 64 | ax2.set_xlim([-L, L]) 65 | ax2.set_ylim([-L, L]) 66 | ax2.set_zlim([-L, L]) 67 | 68 | ax2.set_xlabel('X') 69 | ax2.set_ylabel('Y') 70 | ax2.set_zlabel('Z') 71 | 72 | plt.show() 73 | 74 | 75 | 76 | 77 | .. image:: output_5_0.png 78 | 79 | 80 | .. code:: python 81 | 82 | # download from https://pypi.python.org/pypi/rmsd/1.2.5 83 | import rmsd 84 | 85 | 86 | def myRMSDmetric(arr1, arr2): 87 | """ 88 | This function is built under the assumption that the space dimension is 3!!! 89 | Requirement from sklearn radius_neighbors_graph: The callable should take two arrays as input and return one value indicating the distance between them. 90 | Input: One row from reshaped XYZ trajectory as number of steps times nDOF 91 | Inside: Reshape to XYZ format and apply rmsd as r=rmsd(X[i], X[j]) 92 | Output: rmsd distance 93 | """ 94 | 95 | nParticles = len(arr1) / 3; 96 | assert (nParticles == int(nParticles)) 97 | 98 | X1 = arr1.reshape(int(nParticles), 3 ) 99 | X2 = arr2.reshape(int(nParticles), 3 ) 100 | 101 | X1 = X1 - rmsd.centroid(X1) 102 | X2 = X2 - rmsd.centroid(X2) 103 | 104 | return rmsd.kabsch_rmsd(X1, X2) 105 | 106 | 107 | 108 | Compute diffusion map embedding using the rmsd metric from above. 109 | 110 | .. code:: python 111 | 112 | epsilon=0.05 113 | 114 | Xresh=traj.reshape(traj.shape[0], traj.shape[1]*traj.shape[2]) 115 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs = 1, epsilon = epsilon, alpha = 0.5, k=1000, metric=myRMSDmetric) 116 | dmap = mydmap.fit_transform(Xresh) 117 | 118 | 119 | .. parsed-literal:: 120 | 121 | 0.05 eps fitted 122 | 123 | 124 | Plot the dominant eigenvector over radius, to show the correlation with 125 | this collective variable. 126 | 127 | .. code:: python 128 | 129 | evecs = mydmap.evecs 130 | 131 | fig = plt.figure(figsize=[16,6]) 132 | ax = fig.add_subplot(121) 133 | 134 | ax.scatter(compute_radius(traj), evecs[:,0], c=evecs[:,0], s=10, cmap=plt.cm.Spectral) 135 | ax.set_xlabel('Radius') 136 | ax.set_ylabel('Dominant eigenvector') 137 | 138 | ax2 = fig.add_subplot(122) 139 | # 140 | cax2 = ax2.scatter(compute_radius(traj), energy, c=evecs[:,0], s=10, cmap=plt.cm.Spectral) 141 | ax2.set_xlabel('Radius') 142 | ax2.set_ylabel('Potential Energy') 143 | cbar = fig.colorbar(cax2) 144 | cbar.set_label('Dominant eigenvector') 145 | plt.show() 146 | 147 | 148 | 149 | .. image:: output_10_0.png 150 | 151 | 152 | -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Metrics/output_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Metrics/output_10_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Metrics/output_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Metrics/output_5_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Spherical_Harmonics/Spherical_Harmonics.rst: -------------------------------------------------------------------------------- 1 | 2 | Spherical Harmonics 3 | =================== 4 | 5 | In this notebook we try to reproduce the eigenfunctions of the Laplacian 6 | on the 2D sphere embedded in :math:`\mathbb{R}^3`. The eigenfunctions 7 | are the spherical harmonics :math:`Y_l^m(\theta, \phi)`. 8 | 9 | .. code:: python 10 | 11 | import numpy as np 12 | 13 | from pydiffmap import diffusion_map as dm 14 | from scipy.sparse import csr_matrix 15 | 16 | np.random.seed(100) 17 | 18 | import matplotlib.pyplot as plt 19 | from mpl_toolkits.mplot3d import Axes3D 20 | %matplotlib inline 21 | 22 | generate data on a Sphere 23 | ------------------------- 24 | 25 | we sample longitude and latitude uniformly and then transform to 26 | :math:`\mathbb{R}^3` using geographical coordinates (latidude is 27 | measured from the equator). 28 | 29 | .. code:: python 30 | 31 | m = 10000 32 | Phi = 2*np.pi*np.random.rand(m) - np.pi 33 | Theta = np.pi*np.random.rand(m) - 0.5*np.pi 34 | X = np.cos(Theta)*np.cos(Phi) 35 | Y = np.cos(Theta)*np.sin(Phi) 36 | Z = np.sin(Theta) 37 | data = np.array([X, Y, Z]).transpose() 38 | 39 | 40 | 41 | run diffusion maps 42 | ------------------ 43 | 44 | Now we initialize the diffusion map object and fit it to the dataset. We 45 | set n\_evecs = 4, and since we want to unbias with respect to the 46 | non-uniform sampling density we set alpha = 1.0. The epsilon parameter 47 | controls the scale and is set here by hand. The k parameter controls the 48 | neighbour lists, a smaller k will increase performance but decrease 49 | accuracy. 50 | 51 | .. code:: python 52 | 53 | eps = 0.01 54 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=eps, alpha=1.0, k=400) 55 | mydmap.fit_transform(data) 56 | test_evals = -4./eps*(mydmap.evals - 1) 57 | print(test_evals) 58 | 59 | 60 | .. parsed-literal:: 61 | 62 | 0.01 eps fitted 63 | [1116.4945497 1143.35090854 1147.22344311 2378.50043128] 64 | 65 | 66 | The true eigenfunctions here are spherical harmonics 67 | :math:`Y_l^m(\theta, \phi)` and the true eigenvalues are 68 | :math:`\lambda_l = l(l+1)`. The eigenfunction corresponding to 69 | :math:`l=0` is the constant function, which we ommit. Since :math:`l=1` 70 | has multiplicity three, this gives the benchmark eigenvalues [2, 2, 2, 71 | 6]. 72 | 73 | .. code:: python 74 | 75 | real_evals = np.array([2, 2, 2, 6]) 76 | test_evals = -4./eps*(mydmap.evals - 1) 77 | eval_error = np.abs(test_evals-real_evals)/real_evals 78 | print(test_evals) 79 | print(eval_error) 80 | 81 | 82 | .. parsed-literal:: 83 | 84 | [1116.4945497 1143.35090854 1147.22344311 2378.50043128] 85 | [557.24727485 570.67545427 572.61172156 395.41673855] 86 | 87 | 88 | visualisation 89 | ------------- 90 | 91 | With pydiffmap's visualization toolbox, we can get a quick look at the 92 | embedding produced by the first two diffusion coordinates and the data 93 | colored by the first eigenfunction. 94 | 95 | .. code:: python 96 | 97 | from pydiffmap.visualization import embedding_plot, data_plot 98 | 99 | embedding_plot(mydmap, dim=3, scatter_kwargs = {'c': mydmap.dmap[:,0], 'cmap': 'Spectral'}) 100 | 101 | plt.show() 102 | 103 | 104 | 105 | .. image:: output_9_0.png 106 | 107 | 108 | .. code:: python 109 | 110 | data_plot(mydmap, dim=3, scatter_kwargs = {'cmap': 'Spectral'}) 111 | plt.show() 112 | 113 | 114 | 115 | .. image:: output_10_0.png 116 | 117 | 118 | Rotating the dataset 119 | -------------------- 120 | 121 | There is rotational symmetry in this dataset. To remove it, we define 122 | the 'north pole' to be the point where the first diffusion coordinate 123 | attains its maximum value. 124 | 125 | .. code:: python 126 | 127 | northpole = np.argmax(mydmap.dmap[:,0]) 128 | north = data[northpole,:] 129 | phi_n = Phi[northpole] 130 | theta_n = Theta[northpole] 131 | R = np.array([[np.sin(theta_n)*np.cos(phi_n), np.sin(theta_n)*np.sin(phi_n), -np.cos(theta_n)], 132 | [-np.sin(phi_n), np.cos(phi_n), 0], 133 | [np.cos(theta_n)*np.cos(phi_n), np.cos(theta_n)*np.sin(phi_n), np.sin(theta_n)]]) 134 | 135 | .. code:: python 136 | 137 | data_rotated = np.dot(R,data.transpose()) 138 | data_rotated.shape 139 | 140 | 141 | 142 | 143 | .. parsed-literal:: 144 | 145 | (3, 10000) 146 | 147 | 148 | 149 | Now that the dataset is rotated, we can check how well the first 150 | diffusion coordinate approximates the first spherical harmonic 151 | :math:`Y_1^1(\theta, \phi) = \sin(\theta) = Z`. 152 | 153 | .. code:: python 154 | 155 | print('Correlation between \phi and \psi_1') 156 | print(np.corrcoef(mydmap.dmap[:,0], data_rotated[2,:])) 157 | 158 | plt.figure(figsize=(16,6)) 159 | ax = plt.subplot(121) 160 | ax.scatter(data_rotated[2,:], mydmap.dmap[:,0]) 161 | ax.set_title('First DC against $Z$') 162 | ax.set_xlabel(r'$Z$') 163 | ax.set_ylabel(r'$\psi_1$') 164 | ax.axis('tight') 165 | 166 | ax2 = plt.subplot(122,projection='3d') 167 | ax2.scatter(data_rotated[0,:],data_rotated[1,:],data_rotated[2,:], c=mydmap.dmap[:,0], cmap=plt.cm.Spectral) 168 | #ax2.view_init(75, 10) 169 | ax2.set_title('sphere dataset rotated, color according to $\psi_1$') 170 | ax2.set_xlabel('X') 171 | ax2.set_ylabel('Y') 172 | ax2.set_zlabel('Z') 173 | 174 | plt.show() 175 | 176 | 177 | .. parsed-literal:: 178 | 179 | Correlation between \phi and \psi_1 180 | [[1. 0.99915563] 181 | [0.99915563 1. ]] 182 | 183 | 184 | 185 | .. image:: output_15_1.png 186 | 187 | 188 | -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Spherical_Harmonics/output_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Spherical_Harmonics/output_10_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Spherical_Harmonics/output_15_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Spherical_Harmonics/output_15_1.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Spherical_Harmonics/output_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Spherical_Harmonics/output_9_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Swiss_Roll/Swiss_Roll.rst: -------------------------------------------------------------------------------- 1 | 2 | The classic swiss roll data set 3 | =============================== 4 | 5 | author: Ralf Banisch 6 | 7 | We demonstrate the usage of the diffusion\_map class on a 8 | two-dimensional manifold embedded in :math:`\mathbb{R}^3`. 9 | 10 | .. code:: python 11 | 12 | # import some necessary functions for plotting as well as the diffusion_map class from pydiffmap. 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | 16 | from mpl_toolkits.mplot3d import Axes3D 17 | from pydiffmap import diffusion_map as dm 18 | 19 | %matplotlib inline 20 | 21 | Create Data 22 | ----------- 23 | 24 | We create the dataset: A noisy sampling of the twodimensional "swiss 25 | roll" embedded in :math:`\mathbb{R}^3`. The sampling is such that the 26 | density of samples decreases with the distance from the origin 27 | (non-uniform sampling). 28 | 29 | In order to be handled correctly by the diffusion\_map class, we must 30 | ensure the data is a numpy array of shape (n\_points, n\_features). 31 | 32 | .. code:: python 33 | 34 | # set parameters 35 | length_phi = 15 #length of swiss roll in angular direction 36 | length_Z = 15 #length of swiss roll in z direction 37 | sigma = 0.1 #noise strength 38 | m = 10000 #number of samples 39 | 40 | # create dataset 41 | phi = length_phi*np.random.rand(m) 42 | xi = np.random.rand(m) 43 | Z = length_Z*np.random.rand(m) 44 | X = 1./6*(phi + sigma*xi)*np.sin(phi) 45 | Y = 1./6*(phi + sigma*xi)*np.cos(phi) 46 | 47 | swiss_roll = np.array([X, Y, Z]).transpose() 48 | 49 | # check that we have the right shape 50 | print(swiss_roll.shape) 51 | 52 | 53 | .. parsed-literal:: 54 | 55 | (10000, 3) 56 | 57 | 58 | Run pydiffmap 59 | ------------- 60 | 61 | Now we initialize the diffusion map object and fit it to the dataset. 62 | Since we are interested in only the first two diffusion coordinates we 63 | set n\_evecs = 2, and since we want to unbias with respect to the 64 | non-uniform sampling density we set alpha = 1.0. The epsilon parameter 65 | controls the scale and needs to be adjusted to the data at hand. The k 66 | parameter controls the neighbour lists, a smaller k will increase 67 | performance but decrease accuracy. 68 | 69 | .. code:: python 70 | 71 | # initialize Diffusion map object. 72 | neighbor_params = {'n_jobs': -1, 'algorithm': 'ball_tree'} 73 | 74 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=2, k=200, epsilon='bgh', alpha=1.0, neighbor_params=neighbor_params) 75 | # fit to data and return the diffusion map. 76 | dmap = mydmap.fit_transform(swiss_roll) 77 | 78 | 79 | .. parsed-literal:: 80 | 81 | 0.015625000000000007 eps fitted 82 | 83 | 84 | .. code:: python 85 | 86 | mydmap.epsilon_fitted 87 | 88 | 89 | 90 | 91 | .. parsed-literal:: 92 | 93 | 0.015625000000000007 94 | 95 | 96 | 97 | Visualization 98 | ------------- 99 | 100 | We show the original data set on the right, with points colored 101 | according to the first diffusion coordinate. On the left, we show the 102 | diffusion map embedding given by the first two diffusion coordinates. 103 | Points are again colored according to the first diffusion coordinate, 104 | which seems to parameterize the :math:`\phi` direction. We can see that 105 | the diffusion map embedding 'unwinds' the swiss roll. 106 | 107 | .. code:: python 108 | 109 | from pydiffmap.visualization import embedding_plot, data_plot 110 | 111 | embedding_plot(mydmap, scatter_kwargs = {'c': dmap[:,0], 'cmap': 'Spectral'}) 112 | data_plot(mydmap, dim=3, scatter_kwargs = {'cmap': 'Spectral'}) 113 | 114 | plt.show() 115 | 116 | 117 | 118 | .. image:: output_8_0.png 119 | 120 | 121 | 122 | .. image:: output_8_1.png 123 | 124 | 125 | To get a bit more information out of the embedding, we can scale the 126 | points according to the numerical estimate of the sampling density 127 | (mydmap.q), and color them according to their location in the phi 128 | direction. For comparison, we color the original data set according to 129 | :math:`\phi` this time. 130 | 131 | .. code:: python 132 | 133 | from pydiffmap.visualization import embedding_plot, data_plot 134 | 135 | embedding_plot(mydmap, scatter_kwargs = {'c': phi, 's': mydmap.q, 'cmap': 'Spectral'}) 136 | data_plot(mydmap, dim=3, scatter_kwargs = {'cmap': 'Spectral'}) 137 | plt.show() 138 | 139 | 140 | 141 | .. image:: output_10_0.png 142 | 143 | 144 | 145 | .. image:: output_10_1.png 146 | 147 | 148 | We can see that points near the center of the swiss roll, where the 149 | winding is tight, are closer together in the embedding, while points 150 | further away from the center are more spaced out. Let's check how the 151 | first two diffusion coordinates correlate with :math:`\phi` and 152 | :math:`Z`. 153 | 154 | .. code:: python 155 | 156 | print('Correlation between \phi and \psi_1') 157 | print(np.corrcoef(dmap[:,0], phi)) 158 | 159 | plt.figure(figsize=(16,6)) 160 | ax = plt.subplot(121) 161 | ax.scatter(phi, dmap[:,0]) 162 | ax.set_title('First DC against $\phi$') 163 | ax.set_xlabel(r'$\phi$') 164 | ax.set_ylabel(r'$\psi_1$') 165 | ax.axis('tight') 166 | 167 | print('Correlation between Z and \psi_2') 168 | print(np.corrcoef(dmap[:,1], Z)) 169 | 170 | ax2 = plt.subplot(122) 171 | ax2.scatter(Z, dmap[:,1]) 172 | ax2.set_title('Second DC against Z') 173 | ax2.set_xlabel('Z') 174 | ax2.set_ylabel(r'$\psi_2$') 175 | 176 | plt.show() 177 | 178 | 179 | .. parsed-literal:: 180 | 181 | Correlation between \phi and \psi_1 182 | [[1. 0.92408413] 183 | [0.92408413 1. ]] 184 | Correlation between Z and \psi_2 185 | [[1. 0.97536036] 186 | [0.97536036 1. ]] 187 | 188 | 189 | 190 | .. image:: output_12_1.png 191 | 192 | 193 | -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Swiss_Roll/output_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_10_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Swiss_Roll/output_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_10_1.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Swiss_Roll/output_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_12_1.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Swiss_Roll/output_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_8_0.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/Swiss_Roll/output_8_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/docs/jupyter notebook tutorials/Swiss_Roll/output_8_1.png -------------------------------------------------------------------------------- /docs/jupyter notebook tutorials/index.rst: -------------------------------------------------------------------------------- 1 | Jupyter notebook tutorials 2 | ========================== 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | Swiss_Roll/Swiss_Roll 8 | Spherical_Harmonics/Spherical_Harmonics 9 | Four_Wells/Four_Wells 10 | Metrics/Metrics 11 | -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | -------------------------------------------------------------------------------- /docs/reference/diffusion_map.rst: -------------------------------------------------------------------------------- 1 | diffusion_map 2 | ============= 3 | 4 | .. automodule:: pydiffmap.diffusion_map 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/reference/index.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | ========= 3 | 4 | .. toctree:: 5 | :glob: 6 | 7 | diffusion_map 8 | kernel 9 | visualization 10 | -------------------------------------------------------------------------------- /docs/reference/kernel.rst: -------------------------------------------------------------------------------- 1 | kernel 2 | ====== 3 | 4 | .. automodule:: pydiffmap.kernel 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/reference/visualization.rst: -------------------------------------------------------------------------------- 1 | visualization 2 | ============= 3 | 4 | .. automodule:: pydiffmap.visualization 5 | :members: 6 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx>=1.3 2 | sphinx-rtd-theme 3 | -e . 4 | -------------------------------------------------------------------------------- /docs/spelling_wordlist.txt: -------------------------------------------------------------------------------- 1 | builtin 2 | builtins 3 | classmethod 4 | staticmethod 5 | classmethods 6 | staticmethods 7 | args 8 | kwargs 9 | callstack 10 | Changelog 11 | Indices 12 | -------------------------------------------------------------------------------- /docs/theory.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | Theory 3 | ====== 4 | 5 | Diffusion maps is a dimension reduction technique that can be used to discover low dimensional structure in high 6 | dimensional data. It assumes that the data points, which are given as points in a high dimensional metric space, 7 | actually live on a lower dimensional structure. To uncover this structure, diffusion maps builds a neighborhood graph 8 | on the data based on the distances between nearby points. Then a graph Laplacian **L** is constructed on the neighborhood 9 | graph. Many variants exist that approximate different differential operators. For example, *standard* diffusion maps 10 | approximates the differential operator 11 | 12 | .. math:: 13 | 14 | \mathcal{L}f = \Delta f - 2(1-\alpha)\nabla f \cdot \frac{\nabla q}{q} 15 | 16 | 17 | where :math:`\Delta` is the Laplace Beltrami operator, :math:`\nabla` is the gradient operator and :math:`q` is the 18 | sampling density. The normalization parameter :math:`\alpha`, which is typically between 0.0 and 1.0, determines how 19 | much :math:`q` is allowed to bias the operator :math:`\mathcal{L}`. 20 | Standard diffusion maps on a dataset ``X``, which has to given as a numpy array with different rows corresponding to 21 | different observations, is implemented in pydiffmap as:: 22 | 23 | mydmap = diffusion_map.DiffusionMap.from_sklearn(epsilon = my_epsilon, alpha = my_alpha) 24 | mydmap.fit(X) 25 | 26 | Here ``epsilon`` is a scale parameter used to rescale distances between data points. 27 | We can also choose ``epsilon`` automatically due to an an algorithm by Berry, Harlim and Giannakis:: 28 | 29 | mydmap = dm.DiffusionMap.from_sklearn(alpha = my_alpha, epsilon = 'bgh') 30 | 31 | For additional optional arguments of the DiffusionMap class, see usage and documentation. 32 | 33 | A variant of diffusion maps, 'TMDmap', unbiases with respect to :math:`q` and approximates the differential operator 34 | 35 | .. math:: 36 | 37 | \mathcal{L}f = \Delta f + \nabla (\log\pi) \cdot \nabla f 38 | 39 | where :math:`\pi` is a 'target distribution' that defines the drift term and has to be known up to a normalization 40 | constant. TMDmap is implemented in pydiffmap as:: 41 | 42 | mydmap = diffusion_map.TMDmap(epsilon = my_epsilon, alpha = 1.0, change_of_measure=com_fxn) 43 | mydmap.fit(X) 44 | 45 | where ``com_fxn`` is function that takes in a coordinate and outputs the value of the target distribution :math:`\pi` . 46 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Usage 3 | ===== 4 | 5 | To use pyDiffMap in a project:: 6 | 7 | import pydiffmap 8 | 9 | To initialize a diffusion map object:: 10 | 11 | mydmap = diffusion_map.DiffusionMap.from_sklearn(n_evecs = 1, epsilon = 1.0, alpha = 0.5, k=64) 12 | 13 | where ``n_evecs`` is the number of eigenvectors that are computed, ``epsilon`` is a scale parameter 14 | used to rescale distances between data points, ``alpha`` is a normalization parameter (typically between 0.0 and 1.0) 15 | that influences the effect of the sampling density, and ``k`` is the number of nearest neighbors considered when the kernel 16 | is computed. A larger ``k`` means increased accuracy but larger computation time. 17 | The ``from_sklearn`` command is used because we are constructing using the scikit-learn nearest neighbor framework. 18 | For additional optional arguments, see documentation. 19 | 20 | We can also employ automatic epsilon detection due to an algorithm by Berry, Harlim and Giannakis:: 21 | 22 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs = 1, alpha = 0.5, epsilon = 'bgh', k=64) 23 | 24 | To fit to a dataset ``X`` (array-like, shape (n_query, n_features)):: 25 | 26 | mydmap.fit(X) 27 | 28 | The diffusion map coordinates can also be accessed directly via:: 29 | 30 | dmap = mydmap.fit_transform(X) 31 | 32 | This returns an array ``dmap`` with shape (n_query, n_evecs). E.g. ``dmap[:,0]`` is the first diffusion coordinate 33 | evaluated on the data ``X``. 34 | 35 | In order to compute diffusion coordinates at the out of sample location(s) ``Y``:: 36 | 37 | dmap_Y = mydmap.transform(Y) 38 | -------------------------------------------------------------------------------- /examples/Data/4wells_traj.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/examples/Data/4wells_traj.npy -------------------------------------------------------------------------------- /examples/Data/dimer_energy.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/examples/Data/dimer_energy.npy -------------------------------------------------------------------------------- /examples/Data/dimer_trajectory.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DiffusionMapsAcademics/pyDiffMap/22adc99faa83708e9ac05224015fa02c3a7f3c91/examples/Data/dimer_trajectory.npy -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | 5 | [flake8] 6 | max-line-length = 140 7 | exclude = */migrations/* 8 | ignore = E501,E226,E731,W503 9 | 10 | [pep8] 11 | max-line-length = 140 12 | ignore = E501,E226,E731,W503 13 | 14 | [tool:pytest] 15 | testpaths = tests 16 | norecursedirs = 17 | migrations 18 | 19 | python_files = 20 | test_*.py 21 | *_test.py 22 | tests.py 23 | addopts = 24 | -ra 25 | --strict 26 | --doctest-modules 27 | --doctest-glob=\*.rst 28 | --tb=short 29 | 30 | [isort] 31 | force_single_line = True 32 | line_length = 120 33 | known_first_party = pydiffmap 34 | default_section = THIRDPARTY 35 | forced_separate = test_pydiffmap 36 | not_skip = __init__.py 37 | skip = migrations 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | from __future__ import absolute_import 4 | from __future__ import print_function 5 | 6 | import io 7 | import re 8 | from glob import glob 9 | from os.path import basename 10 | from os.path import dirname 11 | from os.path import join 12 | from os.path import splitext 13 | 14 | from setuptools import find_packages 15 | from setuptools import setup 16 | 17 | 18 | def read(*names, **kwargs): 19 | with io.open( 20 | join(dirname(__file__), *names), 21 | encoding=kwargs.get('encoding', 'utf8') 22 | ) as fh: 23 | return fh.read() 24 | 25 | 26 | setup( 27 | name='pydiffmap', 28 | version='0.2.0.1', 29 | license='MIT license', 30 | description='Library for constructing variable bandwidth diffusion maps', 31 | long_description='%s\n%s' % ( 32 | re.compile('^.. start-badges.*^.. end-badges', re.M | re.S).sub('', read('README.rst')), 33 | re.sub(':[a-z]+:`~?(.*?)`', r'``\1``', read('CHANGELOG.rst')) 34 | ), 35 | author='Ralf Banisch, Erik Henning Thiede, Zofia Trstanova', 36 | author_email='ralf.banisch@fu-berlin.de, ehthiede@gmail.com, zofia.trstanova@ed.ac.uk', 37 | url='https://github.com/DiffusionMapsAcademics/pyDiffMap', 38 | packages=find_packages('src'), 39 | package_dir={'': 'src'}, 40 | py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')], 41 | include_package_data=True, 42 | zip_safe=False, 43 | classifiers=[ 44 | # complete classifier list: http://pypi.python.org/pypi?%3Aaction=list_classifiers 45 | 'Development Status :: 3 - Alpha', 46 | 'Topic :: Scientific/Engineering', 47 | 'Intended Audience :: Science/Research', 48 | 'License :: OSI Approved :: MIT License', 49 | 'Operating System :: Unix', 50 | 'Operating System :: POSIX', 51 | 'Operating System :: Microsoft :: Windows', 52 | 'Programming Language :: Python', 53 | 'Programming Language :: Python :: 2.7', 54 | 'Programming Language :: Python :: 3', 55 | 'Programming Language :: Python :: 3.4', 56 | 'Programming Language :: Python :: 3.5', 57 | 'Programming Language :: Python :: 3.6', 58 | 'Programming Language :: Python :: 3.7', 59 | 'Programming Language :: Python :: 3.8', 60 | 'Topic :: Scientific/Engineering', 61 | ], 62 | keywords=[ 63 | 'diffusion maps', 'manifold learning', 'molecular dynamics', 64 | 'dimensionality reduction' 65 | # eg: 'keyword1', 'keyword2', 'keyword3', 66 | ], 67 | install_requires=[ 68 | 'numpy', 'scipy', 'scikit-learn', 'matplotlib', 'six', 'numexpr' 69 | ], 70 | extras_require={ 71 | # eg: 72 | # 'rst': ['docutils>=0.11'], 73 | # ':python_version=="2.6"': ['argparse'], 74 | }, 75 | ) 76 | # entry_points={ 77 | # 'console_scripts': [ 78 | # 'pyDiffMap = pyDiffMap.cli:main', 79 | # ] 80 | # }, 81 | -------------------------------------------------------------------------------- /src/pydiffmap/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A library for constructing diffusion maps. 3 | """ 4 | from __future__ import absolute_import 5 | 6 | from . import diffusion_map 7 | from . import kernel 8 | from . import visualization 9 | from . import utils 10 | 11 | __all__ = ['diffusion_map', 'kernel', 'visualization', 'utils'] 12 | -------------------------------------------------------------------------------- /src/pydiffmap/diffusion_map.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Routines and Class definitions for the diffusion maps algorithm. 4 | """ 5 | from __future__ import absolute_import 6 | 7 | import numpy as np 8 | import scipy.sparse as sps 9 | import scipy.sparse.linalg as spsl 10 | import warnings 11 | from . import kernel 12 | from . import utils 13 | 14 | 15 | class DiffusionMap(object): 16 | """ 17 | Diffusion Map object for data analysis 18 | 19 | Parameters 20 | ---------- 21 | kernel_object : Kernel object. 22 | Kernel object that outputs the values of the kernel. Must have the method .fit(X) and .compute() methods. 23 | Any epsilon desired for normalization should be stored at kernel_object.epsilon_fitted and any bandwidths 24 | should be located at kernel_object.bandwidths. 25 | alpha : scalar, optional 26 | Exponent to be used for the left normalization in constructing the diffusion map. 27 | n_evecs : int, optional 28 | Number of diffusion map eigenvectors to return 29 | weight_fxn : callable or None, optional 30 | Callable function that take in a point, and outputs the value of the weight matrix at those points. 31 | density_fxn : callable or None, optional 32 | Callable function that take in X, and outputs the value of the density of X. Used instead of kernel density estimation in the normalisation. 33 | bandwidth_normalize: boolean, optional 34 | If true, normalize the final constructed transition matrix by the bandwidth as described in Berry and Harlim. [1]_ 35 | oos : 'nystroem' or 'power', optional 36 | Method to use for out-of-sample extension. 37 | 38 | References 39 | ---------- 40 | .. [1] T. Berry, and J. Harlim, Applied and Computational Harmonic Analysis 40, 68-96 41 | (2016). 42 | """ 43 | 44 | def __init__(self, kernel_object, alpha=0.5, n_evecs=1, 45 | weight_fxn=None, density_fxn=None, 46 | bandwidth_normalize=False, oos='nystroem'): 47 | """ 48 | Initializes Diffusion Map, sets parameters. 49 | """ 50 | self.alpha = alpha 51 | self.n_evecs = n_evecs 52 | self.epsilon_fitted = None 53 | self.weight_fxn = weight_fxn 54 | self.bandwidth_normalize = bandwidth_normalize 55 | self.oos = oos 56 | self.density_fxn = density_fxn 57 | self.local_kernel = kernel_object 58 | 59 | @classmethod 60 | def from_sklearn(cls, alpha=0.5, k=64, kernel_type='gaussian', epsilon='bgh', n_evecs=1, neighbor_params=None, 61 | metric='euclidean', metric_params=None, weight_fxn=None, density_fxn=None, bandwidth_type=None, 62 | bandwidth_normalize=False, oos='nystroem'): 63 | """ 64 | Builds the diffusion map using a kernel constructed using the Scikit-learn nearest neighbor object. 65 | Parameters are largely the same as the constructor, but in place of the kernel object it take 66 | the following parameters. 67 | 68 | Parameters 69 | ---------- 70 | k : int, optional 71 | Number of nearest neighbors over which to construct the kernel. 72 | kernel_type : string, optional 73 | Type of kernel to construct. Currently the only option is 'gaussian', but more will be implemented. 74 | epsilon: string or scalar, optional 75 | Method for choosing the epsilon. Currently, the only options are to provide a scalar (epsilon is set to the provided scalar) 'bgh' (Berry, Giannakis and Harlim), and 'bgh_generous' ('bgh' method, with answer multiplied by 2. 76 | neighbor_params : dict or None, optional 77 | Optional parameters for the nearest Neighbor search. See scikit-learn NearestNeighbors class for details. 78 | metric : string, optional 79 | Metric for distances in the kernel. Default is 'euclidean'. The callable should take two arrays as input and return one value indicating the distance between them. 80 | metric_params : dict or None, optional 81 | Optional parameters required for the metric given. 82 | bandwidth_type: callable, number, string, or None, optional 83 | Type of bandwidth to use in the kernel. If None (default), a fixed bandwidth kernel is used. If a callable function, the data is passed to the function, and the bandwidth is output (note that the function must take in an entire dataset, not the points 1-by-1). If a number, e.g. -.25, a kernel density estimate is performed, and the bandwidth is taken to be q**(input_number). For a string input, the input is assumed to be an evaluatable expression in terms of the dimension d, e.g. "-1/(d+2)". The dimension is then estimated, and the bandwidth is set to q**(evaluated input string). 84 | 85 | Examples 86 | -------- 87 | # setup neighbor_params list with as many jobs as CPU cores and kd_tree neighbor search. 88 | >>> neighbor_params = {'n_jobs': -1, 'algorithm': 'kd_tree'} 89 | # initialize diffusion map object with the top two eigenvalues being computed, epsilon set to 0.1 90 | # and alpha set to 1.0. 91 | >>> mydmap = DiffusionMap.from_sklearn(n_evecs = 2, epsilon = .1, alpha = 1.0, neighbor_params = neighbor_params) 92 | 93 | References 94 | ---------- 95 | .. [1] T. Berry, and J. Harlim, Applied and Computational Harmonic Analysis 40, 68-96 96 | (2016). 97 | """ 98 | 99 | buendia = kernel.Kernel(kernel_type=kernel_type, k=k, epsilon=epsilon, neighbor_params=neighbor_params, metric=metric, metric_params=metric_params, bandwidth_type=bandwidth_type) 100 | dmap = cls(buendia, alpha=alpha, n_evecs=n_evecs, weight_fxn=weight_fxn, density_fxn=density_fxn, bandwidth_normalize=bandwidth_normalize, oos=oos) 101 | # if ((bandwidth_type is None) and (bandwidth_normalize is True)): 102 | # warnings.warn('Bandwith normalization set to true, but no bandwidth function provided. Setting to False.') 103 | return dmap 104 | 105 | def _build_kernel(self, X, my_kernel): 106 | my_kernel.fit(X) 107 | kernel_matrix = utils._symmetrize_matrix(my_kernel.compute()) 108 | return kernel_matrix, my_kernel 109 | 110 | def _compute_weights(self, X): 111 | if self.weight_fxn is not None: 112 | N = np.shape(X)[0] 113 | return np.array([self.weight_fxn(Xi) for Xi in X]).reshape(N) 114 | else: 115 | return None 116 | 117 | def _make_right_norm_vec(self, kernel_matrix, q=None, bandwidths=None): 118 | if q is None: 119 | # perform kde 120 | q = np.array(kernel_matrix.sum(axis=1)).ravel() 121 | if bandwidths is not None: 122 | q /= bandwidths**2 123 | right_norm_vec = np.power(q, -self.alpha) 124 | return q, right_norm_vec 125 | 126 | def _right_normalize(self, kernel_matrix, right_norm_vec, weights): 127 | m = right_norm_vec.shape[0] 128 | Dalpha = sps.spdiags(right_norm_vec, 0, m, m) 129 | kernel_matrix = kernel_matrix * Dalpha 130 | if weights is not None: 131 | weight_mat = sps.spdiags(weights, 0, m, m) 132 | kernel_matrix = kernel_matrix * weight_mat 133 | return kernel_matrix 134 | 135 | def _left_normalize(self, kernel_matrix): 136 | row_sum = kernel_matrix.sum(axis=1).transpose() 137 | n = row_sum.shape[1] 138 | Dalpha = sps.spdiags(np.power(row_sum, -1), 0, n, n) 139 | P = Dalpha * kernel_matrix 140 | return P 141 | 142 | def _build_generator(self, P, epsilon_fitted, bandwidths=None, bandwidth_normalize=False): 143 | m, n = P.shape 144 | L = (P - sps.eye(m, n, k=(n - m))) / epsilon_fitted 145 | if bandwidth_normalize: 146 | if bandwidths is not None: 147 | bw_diag = sps.spdiags(np.power(bandwidths, -2), 0, m, m) 148 | L = bw_diag * L 149 | else: 150 | warnings.warn('Bandwith normalization set to true, but no bandwidth function was found in normalization. Not performing normalization') 151 | 152 | return L 153 | 154 | def _make_diffusion_coords(self, L): 155 | evals, evecs = spsl.eigs(L, k=(self.n_evecs+1), which='LR') 156 | ix = evals.argsort()[::-1][1:] 157 | evals = np.real(evals[ix]) 158 | evecs = np.real(evecs[:, ix]) 159 | dmap = np.dot(evecs, np.diag(np.sqrt(-1. / evals))) 160 | return dmap, evecs, evals 161 | 162 | def construct_Lmat(self, X): 163 | """ 164 | Builds the transition matrix, but does NOT compute the eigenvectors. This is useful for applications where the transition matrix itself is the object of interest. 165 | 166 | Parameters 167 | ---------- 168 | X : array-like, shape (n_query, n_features) 169 | Data upon which to construct the diffusion map. 170 | 171 | Returns 172 | ------- 173 | self : the object itself 174 | """ 175 | kernel_matrix, my_kernel = self._build_kernel(X, self.local_kernel) 176 | weights = self._compute_weights(X) 177 | 178 | if self.density_fxn is not None: 179 | density = self.density_fxn(X) 180 | else: 181 | density = None 182 | try: 183 | bandwidths = my_kernel.bandwidths 184 | except AttributeError: 185 | bandwidths = None 186 | 187 | q, right_norm_vec = self._make_right_norm_vec(kernel_matrix, q=density, bandwidths=bandwidths) 188 | P = self._right_normalize(kernel_matrix, right_norm_vec, weights) 189 | P = self._left_normalize(P) 190 | L = self._build_generator(P, my_kernel.epsilon_fitted, bandwidths, bandwidth_normalize=self.bandwidth_normalize) 191 | 192 | # Save data 193 | self.local_kernel = my_kernel 194 | self.epsilon_fitted = my_kernel.epsilon_fitted 195 | self.data = X 196 | self.weights = weights 197 | self.kernel_matrix = kernel_matrix 198 | self.L = L 199 | self.q = q 200 | self.right_norm_vec = right_norm_vec 201 | return self 202 | 203 | def fit(self, X): 204 | """ 205 | Fits the data. 206 | 207 | Parameters 208 | ---------- 209 | X : array-like, shape (n_query, n_features) 210 | Data upon which to construct the diffusion map. 211 | 212 | Returns 213 | ------- 214 | self : the object itself 215 | """ 216 | self.construct_Lmat(X) 217 | dmap, evecs, evals = self._make_diffusion_coords(self.L) 218 | 219 | # Save constructed data. 220 | self.evals = evals 221 | self.evecs = evecs 222 | self.dmap = dmap 223 | return self 224 | 225 | def transform(self, Y): 226 | """ 227 | Performs Nystroem out-of-sample extension to calculate the values of the diffusion coordinates at each given point. 228 | 229 | Parameters 230 | ---------- 231 | Y : array-like, shape (n_query, n_features) 232 | Data for which to perform the out-of-sample extension. 233 | 234 | Returns 235 | ------- 236 | phi : numpy array, shape (n_query, n_eigenvectors) 237 | Transformed value of the given values. 238 | """ 239 | if np.array_equal(self.data, Y): 240 | return self.dmap 241 | else: 242 | # turn Y into 2D array if needed 243 | if (Y.ndim == 1): 244 | Y = Y[np.newaxis, :] 245 | 246 | if self.oos == "nystroem": 247 | return nystroem_oos(self, Y) 248 | elif self.oos == "power": 249 | return power_oos(self, Y) 250 | else: 251 | raise ValueError('Did not understand the OOS algorithm specified') 252 | 253 | def fit_transform(self, X): 254 | """ 255 | Fits the data and returns diffusion coordinates. equivalent to calling dmap.fit(X).transform(x). 256 | 257 | Parameters 258 | ---------- 259 | X : array-like, shape (n_query, n_features) 260 | Data upon which to construct the diffusion map. 261 | 262 | Returns 263 | ------- 264 | phi : numpy array, shape (n_query, n_eigenvectors) 265 | Transformed value of the given values. 266 | """ 267 | self.fit(X) 268 | return self.dmap 269 | 270 | 271 | class TMDmap(DiffusionMap): 272 | """ 273 | Implementation of the TargetMeasure diffusion map. This provides a more convenient interface for some hyperparameter selection for the general diffusion object. It takes the same parameters as the base Diffusion Map object. However, rather than taking a weight function, it takes as input a change of measure function. 274 | 275 | Parameters 276 | ---------- 277 | change_of_measure : callable, optional 278 | Function that takes in a point and evaluates the change-of-measure between the density otherwise stationary to the diffusion map and the desired density. 279 | """ 280 | 281 | def __init__(self, alpha=0.5, k=64, kernel_type='gaussian', epsilon='bgh', 282 | n_evecs=1, neighbor_params=None, metric='euclidean', 283 | metric_params=None, change_of_measure=None, density_fxn=None, 284 | bandwidth_type=None, bandwidth_normalize=False, oos='nystroem'): 285 | 286 | def weight_fxn(y_i): 287 | return np.sqrt(change_of_measure(y_i)) 288 | 289 | buendia = kernel.Kernel(kernel_type=kernel_type, k=k, epsilon=epsilon, neighbor_params=neighbor_params, metric=metric, metric_params=metric_params, bandwidth_type=bandwidth_type) 290 | 291 | super(TMDmap, self).__init__(buendia, alpha=alpha, n_evecs=n_evecs, weight_fxn=weight_fxn, density_fxn=density_fxn, bandwidth_normalize=bandwidth_normalize, oos=oos) 292 | 293 | 294 | def nystroem_oos(dmap_object, Y): 295 | """ 296 | Performs Nystroem out-of-sample extension to calculate the values of the diffusion coordinates at each given point. 297 | 298 | Parameters 299 | ---------- 300 | dmap_object : DiffusionMap object 301 | Diffusion map upon which to perform the out-of-sample extension. 302 | Y : array-like, shape (n_query, n_features) 303 | Data for which to perform the out-of-sample extension. 304 | 305 | Returns 306 | ------- 307 | phi : numpy array, shape (n_query, n_eigenvectors) 308 | Transformed value of the given values. 309 | """ 310 | # check if Y is equal to data. If yes, no computation needed. 311 | # compute the values of the kernel matrix 312 | kernel_extended = dmap_object.local_kernel.compute(Y) 313 | weights = dmap_object._compute_weights(dmap_object.local_kernel.data) 314 | P = dmap_object._left_normalize(dmap_object._right_normalize(kernel_extended, dmap_object.right_norm_vec, weights)) 315 | oos_evecs = P * dmap_object.dmap 316 | # evals_p = dmap_object.local_kernel.epsilon_fitted * dmap_object.evals + 1. 317 | # oos_dmap = np.dot(oos_evecs, np.diag(1. / evals_p)) 318 | return oos_evecs 319 | 320 | 321 | def power_oos(dmap_object, Y): 322 | """ 323 | Performs out-of-sample extension to calculate the values of the diffusion coordinates at each given point using the power-like method. 324 | 325 | Parameters 326 | ---------- 327 | dmap_object : DiffusionMap object 328 | Diffusion map upon which to perform the out-of-sample extension. 329 | Y : array-like, shape (n_query, n_features) 330 | Data for which to perform the out-of-sample extension. 331 | 332 | Returns 333 | ------- 334 | phi : numpy array, shape (n_query, n_eigenvectors) 335 | Transformed value of the given values. 336 | """ 337 | m = int(Y.shape[0]) 338 | k_yx, y_bandwidths = dmap_object.local_kernel.compute(Y, return_bandwidths=True) # Evaluate on ref points 339 | yy_right_norm_vec = dmap_object._make_right_norm_vec(k_yx, y_bandwidths)[1] 340 | k_yy_diag = dmap_object.local_kernel.kernel_fxn(0, dmap_object.epsilon_fitted) 341 | data_full = np.vstack([dmap_object.local_kernel.data, Y]) 342 | k_full = sps.hstack([k_yx, sps.eye(m) * k_yy_diag]) 343 | right_norm_full = np.hstack([dmap_object.right_norm_vec, yy_right_norm_vec]) 344 | weights = dmap_object._compute_weights(data_full) 345 | 346 | P = dmap_object._left_normalize(dmap_object._right_normalize(k_full, right_norm_full, weights)) 347 | L = dmap_object._build_generator(P, dmap_object.epsilon_fitted, y_bandwidths) 348 | L_yx = L[:, :-m] 349 | L_yy = np.array(L[:, -m:].diagonal()) 350 | adj_evals = dmap_object.evals - L_yy.reshape(-1, 1) 351 | dot_part = np.array(L_yx.dot(dmap_object.dmap)) 352 | return (1. / adj_evals) * dot_part 353 | -------------------------------------------------------------------------------- /src/pydiffmap/kernel.py: -------------------------------------------------------------------------------- 1 | """ 2 | A class to implement diffusion kernels. 3 | """ 4 | 5 | import numbers 6 | import numpy as np 7 | import numexpr as ne 8 | import scipy.sparse as sps 9 | import warnings 10 | from sklearn.neighbors import NearestNeighbors 11 | from six import string_types 12 | from . import utils 13 | try: 14 | from scipy.special import logsumexp 15 | except ModuleNotFoundError: 16 | from scipy.misc import logsumexp 17 | 18 | 19 | class Kernel(object): 20 | """ 21 | Class abstracting the evaluation of kernel functions on the dataset. 22 | 23 | Parameters 24 | ---------- 25 | kernel_type : string or callable, optional 26 | Type of kernel to construct. Currently the only option is 'gaussian' (the default), but more will be implemented. 27 | epsilon : string, optional 28 | Method for choosing the epsilon. Currently, the only options are to provide a scalar (epsilon is set to the provided scalar) 'bgh' (Berry, Giannakis and Harlim), and 'bgh_generous' ('bgh' method, with answer multiplied by 2. 29 | k : int, optional 30 | Number of nearest neighbors over which to construct the kernel. 31 | neighbor_params : dict or None, optional 32 | Optional parameters for the nearest Neighbor search. See scikit-learn NearestNeighbors class for details. 33 | metric : string, optional 34 | Distance metric to use in constructing the kernel. This can be selected from any of the scipy.spatial.distance metrics, or a callable function returning the distance. 35 | metric_params : dict or None, optional 36 | Optional parameters required for the metric given. 37 | bandwidth_type: callable, number, string, or None, optional 38 | Type of bandwidth to use in the kernel. If None (default), a fixed bandwidth kernel is used. If a callable function, the data is passed to the function, and the bandwidth is output (note that the function must take in an entire dataset, not the points 1-by-1). If a number, e.g. -.25, a kernel density estimate is performed, and the bandwidth is taken to be q**(input_number). For a string input, the input is assumed to be an evaluatable expression in terms of the dimension d, e.g. "-1/(d+2)". The dimension is then estimated, and the bandwidth is set to q**(evaluated input string). 39 | """ 40 | 41 | def __init__(self, kernel_type='gaussian', epsilon='bgh', k=64, neighbor_params=None, metric='euclidean', metric_params=None, bandwidth_type=None): 42 | self.kernel_fxn = _parse_kernel_type(kernel_type) 43 | self.epsilon = epsilon 44 | self.k = k 45 | self.metric = metric 46 | self.metric_params = metric_params 47 | if neighbor_params is None: 48 | neighbor_params = {} 49 | self.neighbor_params = neighbor_params 50 | self.bandwidth_type = bandwidth_type 51 | self.d = None 52 | self.epsilon_fitted = None 53 | 54 | def build_bandwidth_fxn(self, bandwidth_type): 55 | """ 56 | Parses an input string or function specifying the bandwidth. 57 | 58 | Parameters 59 | ---------- 60 | bandwidth_fxn : string or number or callable 61 | Bandwidth to use. If a number, taken to be the beta parameter in [1]_. 62 | If a string, taken to again be beta, but with an evaluatable 63 | expression as a function of the intrinsic dimension d, e.g. '1/(d+2)'. 64 | If a function, taken to be a function that outputs the bandwidth. 65 | 66 | References 67 | ---------- 68 | .. [1] T. Berry, and J. Harlim, Applied and Computational Harmonic Analysis 40, 68-96 69 | (2016). 70 | """ 71 | if self.bandwidth_type is None: 72 | return None 73 | elif callable(self.bandwidth_type): 74 | return self.bandwidth_type 75 | else: 76 | is_string = isinstance(self.bandwidth_type, string_types) 77 | is_number = isinstance(self.bandwidth_type, numbers.Number) 78 | if (is_string or is_number): 79 | kde_function, d = self._build_nn_kde() 80 | if is_string: 81 | beta = ne.evaluate(self.bandwidth_type) 82 | elif is_number: 83 | beta = self.bandwidth_type 84 | else: 85 | raise Exception("Honestly, we shouldn't have gotten to this point in the code") 86 | bandwidth_fxn = lambda x: kde_function(x)**beta 87 | return bandwidth_fxn 88 | else: 89 | raise ValueError("Bandwidth Type was not a callable, string, or number. Don't know what to make of it.") 90 | 91 | def _build_nn_kde(self, num_nearest_neighbors=8): 92 | my_nnkde = NNKDE(self.neigh, k=num_nearest_neighbors) 93 | my_nnkde.fit() 94 | bandwidth_fxn = lambda x: my_nnkde.compute(x) 95 | self.kde = my_nnkde 96 | return bandwidth_fxn, my_nnkde.d 97 | 98 | def _compute_bandwidths(self, X): 99 | if self.bandwidth_fxn is not None: 100 | return self.bandwidth_fxn(X) 101 | else: 102 | return None 103 | 104 | def fit(self, X): 105 | """ 106 | Fits the kernel to the data X, constructing the nearest neighbor tree. 107 | 108 | Parameters 109 | ---------- 110 | X : array-like, shape (n_query, n_features) 111 | Data upon which to fit the nearest neighbor tree. 112 | 113 | Returns 114 | ------- 115 | self : the object itself 116 | """ 117 | k0 = min(self.k, np.shape(X)[0]) 118 | self.data = X 119 | # Construct Nearest Neighbor Tree 120 | with warnings.catch_warnings(): 121 | warnings.filterwarnings("ignore", message="Parameter p is found in metric_params. The corresponding parameter from __init__ is ignored.") 122 | self.neigh = NearestNeighbors(n_neighbors=k0, 123 | metric=self.metric, 124 | metric_params=self.metric_params, 125 | **self.neighbor_params) 126 | self.neigh.fit(X) 127 | self.bandwidth_fxn = self.build_bandwidth_fxn(self.bandwidth_type) 128 | self.bandwidths = self._compute_bandwidths(X) 129 | self.scaled_dists = self._get_scaled_distance_mat(self.data, self.bandwidths) 130 | self.choose_optimal_epsilon() 131 | return self 132 | 133 | def compute(self, Y=None, return_bandwidths=False): 134 | """ 135 | Computes the sparse kernel matrix. 136 | 137 | Parameters 138 | ---------- 139 | Y : array-like, shape (n_query, n_features), optional. 140 | Data against which to calculate the kernel values. If not provided, calculates against the data provided in the fit. 141 | return_bandwidths : boolean, optional 142 | If True, also returns the computed bandwidth for each y point. 143 | 144 | Returns 145 | ------- 146 | K : array-like, shape (n_query_X, n_query_Y) 147 | Values of the kernel matrix. 148 | y_bandwidths : array-like, shape (n_query_y) 149 | Bandwidth evaluated at each point Y. Only returned if return_bandwidths is True. 150 | 151 | """ 152 | if Y is None: 153 | Y = self.data 154 | # if np.array_equal(Y, self.data): # Avoid recomputing nearest neighbors unless needed. 155 | if _check_equal(Y, self.data): 156 | y_bandwidths = self.bandwidths 157 | K = self.scaled_dists 158 | else: 159 | # perform k nearest neighbour search on X and Y and construct sparse matrix 160 | # retrieve all nonzero elements and apply kernel function to it 161 | y_bandwidths = self._compute_bandwidths(Y) 162 | K = self._get_scaled_distance_mat(Y, y_bandwidths=y_bandwidths) 163 | K.data = self.kernel_fxn(K.data, self.epsilon_fitted) 164 | if return_bandwidths: 165 | return K, y_bandwidths 166 | else: 167 | return K 168 | 169 | def _get_scaled_distance_mat(self, Y, y_bandwidths=None): 170 | # Scales distance matrix by (rho(x) rho(y))^1/2, where rho is the 171 | # bandwidth. 172 | dists = self.neigh.kneighbors_graph(Y, mode='distance') 173 | if y_bandwidths is not None: 174 | bw_x = np.power(self.bandwidths, 0.5) 175 | bw_y = np.power(y_bandwidths, 0.5) 176 | dists = _scale_by_bw(dists, bw_x, bw_y) 177 | return dists 178 | 179 | def choose_optimal_epsilon(self, epsilon=None): 180 | """ 181 | Chooses the optimal value of epsilon and automatically detects the 182 | dimensionality of the data. 183 | 184 | Parameters 185 | ---------- 186 | epsilon : string or scalar, optional 187 | Method for choosing the epsilon. Currently, the only options are to provide a scalar (epsilon is set to the provided scalar) or 'bgh' (Berry, Giannakis and Harlim). 188 | 189 | Returns 190 | ------- 191 | self : the object itself 192 | """ 193 | if epsilon is None: 194 | epsilon = self.epsilon 195 | 196 | # Choose Epsilon according to method provided. 197 | if isinstance(epsilon, numbers.Number): # if user provided. 198 | self.epsilon_fitted = epsilon 199 | return self 200 | elif ((epsilon == 'bgh') or (epsilon == 'bgh_generous')): # Berry, Giannakis Harlim method. 201 | if (self.metric != 'euclidean'): # TODO : replace with call to scipy metrics. 202 | warnings.warn('The BGH method for choosing epsilon assumes a euclidean metric. However, the metric being used is %s. Proceed at your own risk...' % self.metric) 203 | if self.scaled_dists is None: 204 | self.scaled_dists = self._get_scaled_distance_mat(self.data, self.bandwidths) 205 | self.epsilon_fitted, self.d = choose_optimal_epsilon_BGH(self.scaled_dists.data**2) 206 | if epsilon == 'bgh_generous': 207 | self.epsilon_fitted *= 2. 208 | else: 209 | raise ValueError("Method for automatically choosing epsilon was given as %s, but this was not recognized" % epsilon) 210 | return self 211 | 212 | 213 | class NNKDE(object): 214 | """ 215 | Class building a kernel density estimate with a variable bandwidth built from the k nearest neighbors. 216 | 217 | Parameters 218 | ---------- 219 | neighbors : scikit-learn NearestNeighbors object 220 | NearestNeighbors object to use in constructing the KDE. 221 | k : int, optional 222 | Number of nearest neighbors to use in the construction of the bandwidth. This must be less or equal to the number of nearest neighbors used by the nearest neighbor object. 223 | """ 224 | 225 | def __init__(self, neighbors, k=8): 226 | self.neigh = neighbors 227 | self.kernel_fxn = _parse_kernel_type('gaussian') 228 | self.k = k 229 | 230 | def _reduce_nn(self, nn_graph, k): 231 | # gets the k nearest neighbors of an m nearest nearest graph, 232 | # where m >n 233 | sub_neighbors = [] 234 | for row in nn_graph: 235 | dense_row = np.array(row[row.nonzero()]).ravel() 236 | sorted_ndxs = np.argpartition(dense_row, k-1) 237 | sorted_row = dense_row[sorted_ndxs[:k]] 238 | sub_neighbors.append(sorted_row) 239 | return np.array(sub_neighbors) 240 | 241 | def _build_bandwidth(self): 242 | dist_graph_vals = self._reduce_nn(self.dist_graph_sq, k=self.k-1) 243 | avg_sq_dist = np.array(dist_graph_vals.sum(axis=1)).ravel() 244 | self.bandwidths = np.sqrt(avg_sq_dist/(self.k-1)).ravel() 245 | 246 | def _choose_epsilon(self): 247 | # dist_graph_sq = self.neigh.kneighbors_graph(n_neighbors=self.neigh.n_neighbors-1, mode='distance') 248 | dist_graph_sq = self.dist_graph_sq.copy() 249 | n = dist_graph_sq.shape[0] 250 | dist_graph_sq = _scale_by_bw(dist_graph_sq, self.bandwidths, self.bandwidths) 251 | sq_dists = np.hstack([dist_graph_sq.data, np.zeros(n)]) 252 | self.epsilon_fitted, self.d = choose_optimal_epsilon_BGH(sq_dists) 253 | 254 | def fit(self): 255 | """ 256 | Fits the kde object to the data provided in the nearest neighbor object. 257 | """ 258 | self.dist_graph_sq = self.neigh.kneighbors_graph(n_neighbors=self.neigh.n_neighbors-1, 259 | mode='distance') 260 | self.dist_graph_sq.data = self.dist_graph_sq.data**2 261 | self._build_bandwidth() 262 | self._choose_epsilon() 263 | 264 | def compute(self, Y): 265 | """ 266 | Computes the density at each query point in Y. 267 | 268 | Parameters 269 | ---------- 270 | Y : array-like, shape (n_query, n_features) 271 | Data against which to calculate the kernel values. If not provided, calculates against the data provided in the fit. 272 | 273 | 274 | Returns 275 | ------- 276 | q : array-like, shape (n_query) 277 | Density evaluated at each point Y. 278 | """ 279 | dist_bw = self.neigh.kneighbors_graph(Y, mode='distance', n_neighbors=self.k) 280 | dist_bw.data = dist_bw.data**2 281 | avg_sq_dist = np.array(dist_bw.sum(axis=1)).ravel() 282 | y_bandwidths = np.sqrt(avg_sq_dist/(self.k-1)).ravel() 283 | K = self.neigh.kneighbors_graph(Y, mode='distance') 284 | K.data = K.data**2 285 | K = _scale_by_bw(K, self.bandwidths, y_bandwidths) 286 | K.data /= 4. * self.epsilon_fitted 287 | K.data = np.exp(-K.data) 288 | density = np.array(K.mean(axis=1)).ravel() 289 | density /= y_bandwidths**self.d 290 | density /= (4 * np.pi * self.epsilon_fitted)**(self.d / 2.) 291 | return density 292 | 293 | 294 | def choose_optimal_epsilon_BGH(scaled_distsq, epsilons=None): 295 | """ 296 | Calculates the optimal epsilon for kernel density estimation according to 297 | the criteria in Berry, Giannakis, and Harlim. 298 | 299 | Parameters 300 | ---------- 301 | scaled_distsq : numpy array 302 | Values for scaled distance squared values, in no particular order or shape. (This is the exponent in the Gaussian Kernel, aka the thing that gets divided by epsilon). 303 | epsilons : array-like, optional 304 | Values of epsilon from which to choose the optimum. If not provided, uses all powers of 2. from 2^-40 to 2^40 305 | 306 | Returns 307 | ------- 308 | epsilon : float 309 | Estimated value of the optimal length-scale parameter. 310 | d : int 311 | Estimated dimensionality of the system. 312 | 313 | Notes 314 | ----- 315 | This code explicitly assumes the kernel is gaussian, for now. 316 | 317 | References 318 | ---------- 319 | The algorithm given is based on [1]_. If you use this code, please cite them. 320 | 321 | .. [1] T. Berry, D. Giannakis, and J. Harlim, Physical Review E 91, 032915 322 | (2015). 323 | """ 324 | if epsilons is None: 325 | epsilons = 2**np.arange(-40., 41., 1.) 326 | 327 | epsilons = np.sort(epsilons).astype('float') 328 | log_T = [logsumexp(-scaled_distsq/(4. * eps)) for eps in epsilons] 329 | log_eps = np.log(epsilons) 330 | log_deriv = np.diff(log_T)/np.diff(log_eps) 331 | max_loc = np.argmax(log_deriv) 332 | # epsilon = np.max([np.exp(log_eps[max_loc]), np.exp(log_eps[max_loc+1])]) 333 | epsilon = np.exp(log_eps[max_loc]) 334 | d = np.round(2.*log_deriv[max_loc]) 335 | return epsilon, d 336 | 337 | 338 | def _parse_kernel_type(kernel_type): 339 | """ 340 | Parses an input string or function specifying the kernel. 341 | 342 | Parameters 343 | ---------- 344 | kernel_type : string or callable 345 | Type of kernel to construct. Currently the only option is 'gaussian' or 346 | a user provided function. If set to a user defined function, it should 347 | take in two arguments: in order, a vector of distances between two 348 | samples, and a length-scale parameter epsilon. The units on epsilon 349 | should be distance squared. 350 | 351 | Returns 352 | ------- 353 | kernel_fxn : callable 354 | Function that takes in the distance and length-scale parameter, and outputs the value of the kernel. 355 | """ 356 | if kernel_type.lower() == 'gaussian': 357 | def gaussian_kfxn(d, epsilon): 358 | return np.exp(-d**2 / (4. * epsilon)) 359 | return gaussian_kfxn 360 | elif callable(kernel_type): 361 | return kernel_type 362 | else: 363 | raise("Error: Kernel type not understood.") 364 | 365 | 366 | def _scale_by_bw(d_yx, bw_x, bw_y): 367 | """ 368 | Scale a distance matrix with the bandwidth functions while retaining explicit zeros. 369 | Note that this reorders the indices in d_yx. 370 | 371 | Parameters 372 | ---------- 373 | d_yx : scipy sparse matrix 374 | Sparse matrix whose i,j'th element corresponds to f(y_i, x_j) 375 | dw_x : numpy array 376 | Array of bandwidth values evaluated at each x_i 377 | dw_y : numpy array 378 | Array of bandwidth values evaluated at each y_i 379 | 380 | Returns 381 | ------ 382 | scaled_d_yx : scipy sparse matrix 383 | Sparse matrix whose i,j'th element corresponds to f(y_i, x_j)/ bw[y_i] bw[x_j] 384 | """ 385 | m, n = d_yx.shape 386 | x_bw_diag = sps.spdiags(np.power(bw_x, -1), 0, n, n) 387 | y_bw_diag = sps.spdiags(np.power(bw_y, -1), 0, m, m) 388 | row, col = utils._get_sparse_row_col(d_yx) 389 | inv_bw = sps.csr_matrix((np.ones(d_yx.data.shape), (row, col)), shape=d_yx.shape) 390 | inv_bw = y_bw_diag * inv_bw * x_bw_diag 391 | d_yx.sort_indices() 392 | inv_bw.sort_indices() 393 | d_yx.data = d_yx.data * inv_bw.data 394 | return d_yx 395 | 396 | 397 | def _check_equal(X, Y): 398 | """ 399 | Check if two datasets are equal. 400 | 401 | Parameters 402 | ---------- 403 | X : array-like, shape (n_query, n_features), optional. 404 | Data against which to calculate the kernel values. If not provided, calculates against the data provided in the fit. 405 | Y : array-like, shape (n_query, n_features), optional. 406 | Data against which to calculate the kernel values. If not provided, calculates against the data provided in the fit. 407 | 408 | Returns 409 | ------- 410 | is_equal : bool 411 | True if the datasets are equal, False if not. 412 | """ 413 | X_is_sparse = isinstance(X, sps.spmatrix) 414 | Y_is_sparse = isinstance(Y, sps.spmatrix) 415 | if (X_is_sparse and Y_is_sparse): 416 | if X.shape != Y.shape: 417 | return False 418 | else: 419 | nonzero_rows, nonzero_cols = (X - Y).nonzero() 420 | return (len(nonzero_rows) == 0) 421 | else: 422 | return np.array_equal(X, Y) 423 | -------------------------------------------------------------------------------- /src/pydiffmap/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Utilities for constructing diffusion maps. 4 | """ 5 | import numpy as np 6 | import scipy.sparse as sps 7 | 8 | 9 | def lookup_fxn(x, vals): 10 | """ 11 | Builds a simple function that acts as a lookup table. Useful for 12 | constructing bandwidth and weigth functions from existing values. 13 | 14 | Parameters 15 | ---------- 16 | x : iterable 17 | values to input for the function 18 | vals : iterable 19 | Output values for the function. Must be of the same length as x. 20 | 21 | Returns 22 | ------- 23 | lf : function 24 | A function that, when input a value in x, outputs the corresponding 25 | value in vals. 26 | """ 27 | # Build dictionary 28 | lookup = {} 29 | for i in range(len(x)): 30 | lookup[str(x[i])] = vals[i] 31 | 32 | # Define and return lookup function 33 | def lf(xi): 34 | return lookup[str(xi)] 35 | 36 | return lf 37 | 38 | 39 | def sparse_from_fxn(X, K, function, Y=None): 40 | """ 41 | For a function f, constructs a sparse matrix where each element is 42 | f(Y_i, X_j) with the same sparsity structure as the matrix K. 43 | 44 | Parameters 45 | ---------- 46 | neighbors : scikit-learn NearestNeighbors object 47 | Data structure containing the nearest neighbor information. 48 | X values are drawn from the data in this object. 49 | function : function 50 | Function to apply to the pair Y_i, X_j. Must take only two arguments 51 | and return a number. 52 | Y : iterable or None 53 | Values corresponding to each column of the matrix. If None, defaults 54 | to the data in the neighbors object. 55 | 56 | Returns 57 | ------- 58 | M : scipy sparse csr matrix 59 | Matrix with elements f(Y_i, X_j) for nearest neighbors, and zero 60 | otherwise. Here Y_i is the i'th datapoint in Y, and X_j is the 61 | j'th datapoint in the NearestNeighbors object. 62 | """ 63 | if Y is None: 64 | Y = X 65 | row, col = _get_sparse_row_col(K) 66 | 67 | fxn_vals = [] 68 | for i, j in zip(row, col): 69 | fxn_vals.append(function(Y[i], X[j])) 70 | fxn_vals = np.array(fxn_vals) 71 | return sps.csr_matrix((fxn_vals, (row, col)), shape=K.shape) 72 | 73 | 74 | def _get_sparse_row_col(sparse_mat): 75 | sparse_mat = sparse_mat.tocoo() 76 | return sparse_mat.row, sparse_mat.col 77 | 78 | 79 | def _symmetrize_matrix(K, mode='or'): 80 | """ 81 | Symmetrizes a sparse kernel matrix. 82 | 83 | Parameters 84 | ---------- 85 | K : scipy sparse matrix 86 | The sparse matrix to be symmetrized, with positive elements on the nearest neighbors. 87 | mode : string 88 | The method of symmetrization to be implemented. Current options are 'average', 'and', and 'or'. 89 | 90 | Returns 91 | ------- 92 | K_sym : scipy sparse matrix 93 | Symmetrized kernel matrix. 94 | """ 95 | 96 | if mode == 'average': 97 | return 0.5*(K + K.transpose()) 98 | elif mode == 'or': 99 | Ktrans = K.transpose() 100 | dK = abs(K - Ktrans) 101 | K = K + Ktrans 102 | K = K + dK 103 | return 0.5*K 104 | elif mode == 'and': 105 | Ktrans = K.transpose() 106 | dK = abs(K - Ktrans) 107 | K = K + Ktrans 108 | K = K - dK 109 | return 0.5*K 110 | else: 111 | raise ValueError('Did not understand symmetrization method') 112 | -------------------------------------------------------------------------------- /src/pydiffmap/visualization.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Some convenient visalisation routines. 4 | """ 5 | from __future__ import absolute_import 6 | 7 | import matplotlib.pyplot as plt 8 | from mpl_toolkits.mplot3d import Axes3D # noqa F401 9 | 10 | 11 | def embedding_plot(dmap_instance, dim=2, scatter_kwargs=None, show=True): 12 | """ 13 | Creates diffusion map embedding scatterplot. By default, the first two diffusion 14 | coordinates are plotted against each other. 15 | 16 | Parameters 17 | ---------- 18 | dmap_instance : DiffusionMap Instance 19 | An instance of the DiffusionMap class. 20 | dim: int, optional, 2 or 3. 21 | Optional argument that controls if a two- or three dimensional plot is produced. 22 | scatter_kwargs : dict, optional 23 | Optional arguments to be passed to the scatter plot, e.g. point color, 24 | point size, colormap, etc. 25 | show : boolean, optional 26 | If true, calls plt.show() 27 | 28 | Returns 29 | ------- 30 | fig : pyplot figure object 31 | Figure object where everything is plotted on. 32 | 33 | Examples 34 | -------- 35 | # Plots the top two diffusion coords, colored by the first coord. 36 | >>> scatter_kwargs = {'s': 2, 'c': mydmap.dmap[:,0], 'cmap': 'viridis'} 37 | >>> embedding_plot(mydmap, scatter_kwargs) 38 | 39 | """ 40 | if scatter_kwargs is None: 41 | scatter_kwargs = {} 42 | fig = plt.figure(figsize=(6, 6)) 43 | if (dim == 2): 44 | plt.scatter(dmap_instance.dmap[:, 0], dmap_instance.dmap[:, 1], **scatter_kwargs) 45 | plt.title('Embedding given by first two DCs.') 46 | plt.xlabel(r'$\psi_1$') 47 | plt.ylabel(r'$\psi_2$') 48 | elif (dim == 3): 49 | ax = fig.add_subplot(111, projection='3d') 50 | ax.scatter(dmap_instance.dmap[:, 0], dmap_instance.dmap[:, 1], dmap_instance.dmap[:, 2], **scatter_kwargs) 51 | ax.set_title('Embedding given by first three DCs.') 52 | ax.set_xlabel(r'$\psi_1$') 53 | ax.set_ylabel(r'$\psi_2$') 54 | ax.set_zlabel(r'$\psi_3$') 55 | plt.axis('tight') 56 | if show: 57 | plt.show() 58 | return fig 59 | 60 | 61 | def data_plot(dmap_instance, n_evec=1, dim=2, scatter_kwargs=None, show=True): 62 | """ 63 | Creates diffusion map embedding scatterplot. By default, the first two diffusion 64 | coordinates are plotted against each other. This only plots against the first two or three 65 | (as controlled by 'dim' parameter) dimensions of the data, however: 66 | effectively this assumes the data is two resp. three dimensional. 67 | 68 | Parameters 69 | ---------- 70 | dmap_instance : DiffusionMap Instance 71 | An instance of the DiffusionMap class. 72 | n_evec: int, optional 73 | The eigenfunction that should be used to color the plot. 74 | dim: int, optional, 2 or 3. 75 | Optional argument that controls if a two- or three dimensional plot is produced. 76 | scatter_kwargs : dict, optional 77 | Optional arguments to be passed to the scatter plot, e.g. point color, 78 | point size, colormap, etc. 79 | show : boolean, optional 80 | If true, calls plt.show() 81 | 82 | Returns 83 | ------- 84 | fig : pyplot figure object 85 | Figure object where everything is plotted on. 86 | """ 87 | if scatter_kwargs is None: 88 | scatter_kwargs = {} 89 | fig = plt.figure(figsize=(6, 6)) 90 | if (dim == 2): 91 | plt.scatter(dmap_instance.data[:, 0], dmap_instance.data[:, 1], c=dmap_instance.dmap[:, n_evec-1], **scatter_kwargs) 92 | plt.title('Data coloured with first DC.') 93 | plt.xlabel('x') 94 | plt.ylabel('y') 95 | elif (dim == 3): 96 | ax = fig.add_subplot(111, projection='3d') 97 | ax.scatter(dmap_instance.data[:, 0], dmap_instance.data[:, 1], dmap_instance.data[:, 2], c=dmap_instance.dmap[:, n_evec-1], **scatter_kwargs) 98 | ax.set_title('Data coloured with first DC.') 99 | ax.set_xlabel('x') 100 | ax.set_ylabel('y') 101 | ax.set_zlabel('z') 102 | plt.axis('tight') 103 | if show: 104 | plt.show() 105 | return fig 106 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from scipy.special import erfinv 4 | 5 | 6 | @pytest.fixture(scope='module') 7 | def spherical_data(): 8 | # Construct dataset 9 | phi = np.pi*np.linspace(-1, 1, 61)[1:] 10 | theta = np.pi*np.linspace(-1, 1, 33)[1:-1] 11 | Phi, Theta = np.meshgrid(phi, theta) 12 | Phi = Phi.ravel() 13 | Theta = Theta.ravel() 14 | 15 | X = np.cos(Theta)*np.cos(Phi) 16 | Y = np.cos(Theta)*np.sin(Phi) 17 | Z = np.sin(Theta) 18 | return np.array([X, Y, Z]).transpose(), Phi, Theta 19 | 20 | 21 | @pytest.fixture(scope='module') 22 | def uniform_2d_data(): 23 | x = np.linspace(0., 1., 61)*2.*np.pi 24 | y = np.linspace(0., 1., 31)*np.pi 25 | X, Y = np.meshgrid(x, y) 26 | X = X.ravel() 27 | Y = Y.ravel() 28 | data = np.array([X, Y]).transpose() 29 | return data, X, Y 30 | 31 | 32 | @pytest.fixture(scope='module') 33 | def harmonic_1d_data(): 34 | N = 201 35 | delta = 1. / (N+1) 36 | xgrid = 2 * np.arange(1, N+1) * delta - 1. 37 | x = np.sqrt(2) * erfinv(xgrid) 38 | return x.reshape(-1, 1) 39 | -------------------------------------------------------------------------------- /tests/test_diffusionmap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from pydiffmap import diffusion_map as dm 5 | 6 | 7 | class TestDiffusionMap(object): 8 | @pytest.mark.parametrize('epsilon', [0.002, 'bgh']) 9 | def test_1Dstrip_evals(self, epsilon): 10 | """ 11 | Test that we compute the correct eigenvalues on a 1d strip of length 2*pi. 12 | Diffusion map parameters in this test are hand-selected to give good results. 13 | Eigenvalue approximation will fail if k is set too small, or epsilon not optimal (sensitive). 14 | """ 15 | # Setup true values to test again. 16 | # real_evals = k^2 for k in 0.5*[1 2 3 4] 17 | real_evals = -0.25*np.array([1, 4, 9, 16]) 18 | X = np.linspace(0., 1., 81)*2.*np.pi 19 | data = np.array([X]).transpose() 20 | THRESH = 0.05 21 | # Setup diffusion map 22 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=epsilon, alpha=1.0, k=20) 23 | mydmap.fit(data) 24 | 25 | # Check that relative error values are beneath tolerance. 26 | errors_eval = abs((mydmap.evals - real_evals)/real_evals) 27 | total_error = np.max(errors_eval) 28 | 29 | assert(total_error < THRESH) 30 | 31 | @pytest.mark.parametrize('epsilon', [0.002, 'bgh']) 32 | def test_1Dstrip_evecs(self, epsilon): 33 | """ 34 | Test that we compute the correct eigenvectors (cosines) on a 1d strip of length 2*pi. 35 | Diffusion map parameters in this test are hand-selected to give good results. 36 | Eigenvector approximation will fail if epsilon is set way too small or too large (robust). 37 | """ 38 | # Setup true values to test again. 39 | # real_evecs = cos(k*x) for k in 0.5*[1 2 3 4] 40 | # Setup data and accuracy threshold 41 | X = np.linspace(0., 1., 81)*2.*np.pi 42 | data = np.array([X]).transpose() 43 | THRESH = 0.003 44 | # Setup diffusion map 45 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=epsilon, alpha=1.0, k=40) 46 | mydmap.fit_transform(data) 47 | errors_evec = [] 48 | for k in np.arange(4): 49 | errors_evec.append(abs(np.corrcoef(np.cos(0.5*(k+1)*X), mydmap.evecs[:, k])[0, 1])) 50 | 51 | # Check that relative error values are beneath tolerance. 52 | total_error = 1 - np.min(errors_evec) 53 | assert(total_error < THRESH) 54 | 55 | @pytest.mark.parametrize('epsilon', [0.005, 'bgh']) 56 | def test_1Dstrip_nonunif_evals(self, epsilon): 57 | """ 58 | Test that we compute the correct eigenvalues on a 1d strip of length 2*pi with nonuniform sampling. 59 | Diffusion map parameters in this test are hand-selected to give good results. 60 | Eigenvalue approximation will fail if k is set too small, or epsilon not optimal (sensitive). 61 | """ 62 | # Setup true values to test again. 63 | # real_evals = k^2 for k in 0.5*[1 2 3 4] 64 | real_evals = -0.25*np.array([1, 4, 9, 16]) 65 | # Setup data and accuracy threshold 66 | X = (np.linspace(0., 1., 81)**2)*2.*np.pi 67 | data = np.array([X]).transpose() 68 | THRESH = 0.1 69 | # Setup diffusion map 70 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=epsilon, alpha=1.0, k=40) 71 | mydmap.fit_transform(data) 72 | 73 | # Check that relative error values are beneath tolerance. 74 | errors_eval = abs((mydmap.evals- real_evals)/real_evals) 75 | total_error = np.max(errors_eval) 76 | assert(total_error < THRESH) 77 | 78 | @pytest.mark.parametrize('epsilon', [0.005, 'bgh']) 79 | def test_1Dstrip_nonunif_evecs(self, epsilon): 80 | """ 81 | Test that we compute the correct eigenvectors (cosines) on a 1d strip of length 2*pi with nonuniform sampling. 82 | Diffusion map parameters in this test are hand-selected to give good results. 83 | Eigenvector approximation will fail if epsilon is set way too small or too large (robust). 84 | """ 85 | # Setup true values to test again. 86 | # real_evecs = cos(k*x) for k in 0.5*[1 2 3 4] 87 | # Setup data and accuracy threshold 88 | X = (np.linspace(0., 1., 81)**2)*2.*np.pi 89 | data = np.array([X]).transpose() 90 | THRESH = 0.01 91 | # Setup diffusion map 92 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, epsilon=epsilon, alpha=1.0, k=40) 93 | mydmap.fit_transform(data) 94 | errors_evec = [] 95 | for k in np.arange(4): 96 | errors_evec.append(abs(np.corrcoef(np.cos(0.5*(k+1)*X), mydmap.evecs[:, k])[0, 1])) 97 | 98 | # Check that relative error values are beneath tolerance. 99 | total_error = 1 - np.min(errors_evec) 100 | assert(total_error < THRESH) 101 | 102 | def test_2Dstrip_evals(self, uniform_2d_data): 103 | """ 104 | Test that we compute the correct eigenvalues on a 2d strip of length 2*pi. 105 | Diffusion map parameters in this test are hand-selected to give good results. 106 | Eigenvalue approximation will fail if k is set too small, or epsilon not optimal (sensitive). 107 | """ 108 | # Setup true values to test again. 109 | # real_evals = kx^2 + ky^2 for kx = 0.5*[1 0 2 1] and ky = [0 1 0 1]. 110 | real_evals = -0.25*np.array([1, 4, 4, 5]) 111 | # Setup data and accuracy threshold 112 | data, X, Y = uniform_2d_data 113 | THRESH = 0.2 114 | 115 | eps = 0.0025 116 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, alpha=1.0, k=100, epsilon=eps) 117 | mydmap.fit(data) 118 | 119 | # Check that relative error values are beneath tolerance. 120 | errors_eval = abs((mydmap.evals- real_evals)/real_evals) 121 | total_error = np.max(errors_eval) 122 | assert(total_error < THRESH) 123 | 124 | def test_2Dstrip_evecs(self, uniform_2d_data): 125 | """ 126 | Test that we compute the correct eigenvectors (cosines) on a 2d strip of length 2*pi. 127 | Diffusion map parameters in this test are hand-selected to give good results. 128 | Eigenvector approximation will fail if epsilon is set way too small or too large (robust). 129 | """ 130 | # Setup true values to test again. 131 | # real_evecs = cos(kx*x)*cos(ky*y) for kx = 0.5*[1 0 2 1] and ky = [0 1 0 1]. 132 | # Setup data and accuracy threshold 133 | data, X, Y = uniform_2d_data 134 | THRESH = 0.01 135 | 136 | eps = 0.0025 137 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, alpha=1.0, k=100, epsilon=eps) 138 | mydmap.fit(data) 139 | errors_evec = [] 140 | errors_evec.append(abs(np.corrcoef(np.cos(0.5*1*X), mydmap.evecs[:, 0])[0, 1])) 141 | errors_evec.append(abs(np.corrcoef(np.cos(Y), mydmap.evecs[:, 1])[0, 1])) 142 | errors_evec.append(abs(np.corrcoef(np.cos(0.5*2*X), mydmap.evecs[:, 2])[0, 1])) 143 | errors_evec.append(abs(np.corrcoef(np.cos(0.5*1*X)*np.cos(Y), mydmap.evecs[:, 3])[0, 1])) 144 | 145 | # Check that relative error values are beneath tolerance. 146 | total_error = 1 - np.min(errors_evec) 147 | assert(total_error < THRESH) 148 | 149 | def test_sphere_evals(self, spherical_data): 150 | """ 151 | Test that we compute the correct eigenvalues on a 2d sphere embedded in 3d. 152 | Diffusion map parameters in this test are hand-selected to give good results. 153 | Eigenvalue approximation will fail if k is set too small, or epsilon not optimal (sensitive). 154 | """ 155 | data, Phi, Theta = spherical_data 156 | # Setup true values to test against. 157 | real_evals = -1 * np.array([2, 2, 2, 6]) # =l(l+1) 158 | THRESH = 0.1 159 | eps = 0.015 160 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, alpha=1.0, k=400, epsilon=eps) 161 | mydmap.fit(data) 162 | 163 | # Check eigenvalues pass below error tolerance. 164 | errors_eval = abs((mydmap.evals- real_evals)/real_evals) 165 | max_eval_error = np.max(errors_eval) 166 | assert(max_eval_error < THRESH) 167 | 168 | def test_sphere_evecs(self, spherical_data): 169 | """ 170 | Test that we compute the correct eigenvectors (spherical harmonics) on a 2d sphere embedded in R^3. 171 | Diffusion map parameters in this test are hand-selected to give good results. 172 | Eigenvector approximation will fail if epsilon is set way too small or too large (robust). 173 | """ 174 | data, Phi, Theta = spherical_data 175 | THRESH = 0.001 176 | eps = 0.015 177 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=4, alpha=1.0, k=400, epsilon=eps) 178 | mydmap.fit(data) 179 | # rotate sphere so that maximum of first DC is at the north pole 180 | northpole = np.argmax(mydmap.dmap[:, 0]) 181 | phi_n = Phi[northpole] 182 | theta_n = Theta[northpole] 183 | R = np.array([[np.sin(theta_n)*np.cos(phi_n), np.sin(theta_n)*np.sin(phi_n), -np.cos(theta_n)], 184 | [-np.sin(phi_n), np.cos(phi_n), 0], 185 | [np.cos(theta_n)*np.cos(phi_n), np.cos(theta_n)*np.sin(phi_n), np.sin(theta_n)]]) 186 | data_rotated = np.dot(R, data.transpose()) 187 | # check that error is beneath tolerance. 188 | evec_error = 1 - np.corrcoef(mydmap.dmap[:, 0], data_rotated[2, :])[0, 1] 189 | assert(evec_error < THRESH) 190 | 191 | def test_explicit_density(self, harmonic_1d_data): 192 | """ 193 | Test explicit density function. 194 | This test tests the implementation and is independent on all the other parameters. 195 | """ 196 | 197 | data = harmonic_1d_data 198 | density_fxn = lambda x: (1.0/(np.sqrt(np.pi * 2))) * np.exp(-0.5 * x**2).squeeze() 199 | 200 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=2, epsilon=0.1, alpha=0.5, k=100, density_fxn=density_fxn) 201 | mydmap.fit(data) 202 | 203 | err = np.max((np.abs(mydmap.q / np.linalg.norm(mydmap.q) - density_fxn(data) / np.linalg.norm(density_fxn(data))))) 204 | 205 | assert(err == 0) 206 | 207 | @pytest.mark.parametrize('epsilon', [0.1, 'bgh']) 208 | def test_explicit_density_kde(self, harmonic_1d_data, epsilon): 209 | """ 210 | Test the implicit kernel density estimator. Results depend on knearest neighbors 211 | and epsilon. This test is not very stable, tolerancy threshold is therefore chosen high. 212 | """ 213 | THRESH = 0.2 214 | data = harmonic_1d_data 215 | # reject_outliers to stabilise 216 | m = 2 217 | data = data[abs(data - np.mean(data)) < m * np.std(data), np.newaxis] 218 | 219 | density_fxn = lambda x: (1.0/(np.sqrt(np.pi * 2))) * np.exp(-0.5 * x**2).squeeze() 220 | 221 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=2, epsilon=epsilon, alpha=0.5, k=100) 222 | dmap = mydmap.fit(data) 223 | 224 | true = density_fxn(data) / np.linalg.norm(density_fxn(data)) 225 | kde = mydmap.q / np.linalg.norm(mydmap.q) 226 | 227 | err = np.linalg.norm(true - kde) / np.linalg.norm(kde) 228 | 229 | assert(err < THRESH) 230 | 231 | class TestNystroem(object): 232 | @pytest.mark.parametrize('method', ['nystroem', 'power']) 233 | def test_2Dstrip_nystroem(self, uniform_2d_data, method): 234 | """ 235 | Test the nystroem extension in the transform() function. 236 | """ 237 | # Setup data and accuracy threshold 238 | data, X, Y = uniform_2d_data 239 | THRESH = 0.01 240 | # Setup diffusion map 241 | eps = 0.01 242 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=1, alpha=1.0, k=100, epsilon=eps, oos=method) 243 | mydmap.fit(data) 244 | # Setup values to test against (regular grid) 245 | x_test, y_test = np.meshgrid(np.linspace(0, 2*np.pi, 80), np.linspace(0, np.pi, 40)) 246 | X_test = np.array([x_test.ravel(), y_test.ravel()]).transpose() 247 | # call nystroem extension 248 | dmap_ext = mydmap.transform(X_test) 249 | # extract first diffusion coordinate and normalize 250 | V_test = dmap_ext[:, 0] 251 | V_test = V_test/np.linalg.norm(V_test) 252 | # true dominant eigenfunction = cos(0.5*x), normalize 253 | V_true = np.cos(.5*x_test).ravel() 254 | V_true = V_true/np.linalg.norm(V_true) 255 | # compute L2 error, deal with remaining sign ambiguity 256 | error = min([np.linalg.norm(V_true+V_test), np.linalg.norm(V_true-V_test)]) 257 | assert(error < THRESH) 258 | 259 | 260 | class TestWeighting(object): 261 | @pytest.mark.parametrize('epsilon', [0.002, 'bgh']) 262 | @pytest.mark.parametrize('oos', ['power', 'nystroem', False]) 263 | @pytest.mark.parametrize('dmap_method', ['base', 'TMDmap']) 264 | def test_1Dstrip_evecs(self, epsilon, oos, dmap_method): 265 | """ 266 | Test measure reweighting. We reweight the uniform distribution to 267 | approximate a Gaussian distribution. For numerical reasons, we truncate 268 | the domain to the interval [-5, 5]. 269 | 270 | Here, we test eigenvector accuracy. Eigenvectors should be the 271 | probabalists Hermite polynomials. 272 | """ 273 | # Setup data and accuracy threshold 274 | # X = np.linspace(-5., 5., 201) 275 | X = np.linspace(0, 2.5, 101)**2 276 | X = np.hstack([-1 * np.copy(X[1:][::-1]), X]) 277 | if not oos: 278 | Y = X 279 | oos = 'nystroem' 280 | else: 281 | Y = np.linspace(-5., 5., 101) 282 | data_x = np.array([X]).transpose() 283 | data_y = np.array([Y]).transpose() 284 | EVEC_THRESH = 0.005 285 | EVAL_THRESH = 0.003 286 | # Setup true values to test against. 287 | real_evecs = [Y, Y**2-1, Y**3-3*Y, 288 | Y**4-6*Y**2+3] # Hermite polynomials 289 | real_evals = -1 * np.arange(1, 5) 290 | # Setup diffusion map 291 | if dmap_method == 'TMDmap': 292 | com_fxn = lambda y_j: np.exp(-.5*np.dot(y_j, y_j)) 293 | mydmap = dm.TMDmap(alpha=1., n_evecs=4, epsilon=epsilon, k=100, change_of_measure=com_fxn, oos=oos) 294 | else: 295 | weight_fxn = lambda y_j: np.exp(-.25*np.dot(y_j, y_j)) 296 | mydmap = dm.DiffusionMap.from_sklearn(alpha=1., n_evecs=4, epsilon=epsilon, k=100, weight_fxn=weight_fxn, oos=oos) 297 | 298 | # Fit data and build dmap 299 | mydmap.fit(data_x) 300 | evecs = mydmap.transform(data_y) 301 | errors_evec = [] 302 | for k in range(4): 303 | errors_evec.append(abs(np.corrcoef(real_evecs[k], evecs[:, k])[0, 1])) 304 | 305 | # Check that relative evec error values are beneath tolerance. 306 | total_evec_error = 1 - np.min(errors_evec) 307 | assert(total_evec_error < EVEC_THRESH) 308 | # Check that relative eval error values are beneath tolerance. 309 | errors_eval = abs((mydmap.evals- real_evals)/real_evals) 310 | total_eval_error = np.min(errors_eval) 311 | assert(total_eval_error < EVAL_THRESH) 312 | 313 | 314 | class TestBandwidths(object): 315 | @pytest.mark.parametrize('alpha_beta', [(0., -1./3), (-1./4, -1./2)]) 316 | @pytest.mark.parametrize('explicit_bandwidth', [False, True]) 317 | def test_bandwidth_norm(self, harmonic_1d_data, alpha_beta, explicit_bandwidth): 318 | data = harmonic_1d_data 319 | alpha, beta = alpha_beta 320 | X = data[:, 0] 321 | THRESHS = np.array([0.01, 0.01, 0.1]) 322 | ref_evecs = [X, X**2, (X**3 - 3 * X)/np.sqrt(6)] 323 | 324 | if explicit_bandwidth: 325 | bandwidth_type = lambda x: np.exp(-1. * x[:, 0]**2 * (beta / 2.)) # bandwidth is density^beta 326 | else: 327 | bandwidth_type = beta 328 | 329 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=3, epsilon='bgh', alpha=alpha, 330 | k=50, bandwidth_type=bandwidth_type, bandwidth_normalize=True) 331 | mydmap.fit_transform(data) 332 | errors_evec = [] 333 | for k in np.arange(3): 334 | errors_evec.append(abs(np.corrcoef(ref_evecs[k], mydmap.evecs[:, k])[0, 1])) 335 | # Check that relative error values are beneath tolerance. 336 | total_error = 1 - np.array(errors_evec) 337 | assert((total_error < THRESHS).all()) 338 | 339 | @pytest.mark.parametrize('alpha_beta', [(0., -1./3), (-1./4, -1./2)]) 340 | @pytest.mark.parametrize('explicit_bandwidth', [False, True]) 341 | def test_bandwidth_norm_oos(self, harmonic_1d_data, alpha_beta, explicit_bandwidth): 342 | data = harmonic_1d_data 343 | alpha, beta = alpha_beta 344 | oos_data = np.linspace(-1.5, 1.5, 51).reshape(-1, 1) 345 | Y = oos_data.ravel() 346 | THRESHS = np.array([0.01, 0.01, 0.1]) 347 | ref_evecs = [Y, Y**2, (Y**3 - 3 * Y)/np.sqrt(6)] 348 | 349 | if explicit_bandwidth: 350 | bandwidth_type = lambda x: np.exp(-1. * x[:, 0]**2 * (beta / 2.)) # bandwidth is density^beta 351 | else: 352 | bandwidth_type = beta 353 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=3, epsilon='bgh', alpha=alpha, 354 | k=50, bandwidth_type=bandwidth_type, bandwidth_normalize=True, 355 | oos='power') 356 | mydmap.fit(data) 357 | oos_evecs = mydmap.transform(oos_data) 358 | errors_evec = [] 359 | for k in np.arange(3): 360 | errors_evec.append(abs(np.corrcoef(ref_evecs[k], oos_evecs[:, k])[0, 1])) 361 | # Check that relative error values are beneath tolerance. 362 | total_error = 1 - np.array(errors_evec) 363 | assert((total_error < THRESHS).all()) 364 | -------------------------------------------------------------------------------- /tests/test_kernel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from pydiffmap import kernel 5 | from scipy.spatial.distance import cdist 6 | from sklearn.neighbors import NearestNeighbors 7 | import scipy.sparse as sps 8 | 9 | x_values_set = [np.vstack((np.linspace(-1, 1, 11), np.arange(11))).T] # set of X vals 10 | y_values_set = [None, np.vstack((np.linspace(-1, 1, 11), np.arange(11))).T, np.arange(6).reshape(-1, 2), np.arange(22).reshape(-1, 2)] # all sets of Y's 11 | bandwidth_fxns = [None, lambda x: np.ones(x.shape[0]), lambda x: x[:, 1]/10. + 1] 12 | epsilons = [10., 1.] # Possible epsilons 13 | 14 | 15 | class TestKernel(object): 16 | # These decorators run the test against all possible y, epsilon values. 17 | @pytest.mark.parametrize('x_values', x_values_set) 18 | @pytest.mark.parametrize('y_values', y_values_set) 19 | @pytest.mark.parametrize('epsilon', epsilons) 20 | @pytest.mark.parametrize('bandwidth_fxn', bandwidth_fxns) 21 | @pytest.mark.parametrize('metric, metric_params', [ 22 | ('euclidean', None), 23 | ('minkowski', {'p': 1}) 24 | ]) 25 | def test_matrix_output(self, x_values, y_values, epsilon, bandwidth_fxn, metric, metric_params): 26 | """ 27 | Test that we are returning the correct kernel values. 28 | """ 29 | # Setup true values to test again. 30 | if y_values is None: 31 | y_values_ref = x_values 32 | else: 33 | y_values_ref = y_values 34 | if metric == 'minkowski': 35 | pw_distance = cdist(y_values_ref, x_values, metric='minkowski', p=metric_params['p']) 36 | else: 37 | pw_distance = cdist(y_values_ref, x_values, metric=metric) 38 | if bandwidth_fxn is None: 39 | ref_bandwidth_fxn = lambda x: np.ones(x.shape[0]) 40 | else: 41 | ref_bandwidth_fxn = bandwidth_fxn 42 | x_bandwidth = ref_bandwidth_fxn(x_values) 43 | y_bandwidth = ref_bandwidth_fxn(y_values_ref).reshape(-1, 1) 44 | scaled_sq_dists = pw_distance**2 / (x_bandwidth * y_bandwidth) 45 | true_values = np.exp(-1.*scaled_sq_dists/(4. * epsilon)) 46 | 47 | # Construct the kernel and fit to data. 48 | mykernel = kernel.Kernel(kernel_type='gaussian', metric=metric, 49 | metric_params=metric_params, epsilon=epsilon, 50 | k=x_values.shape[0],bandwidth_type=bandwidth_fxn) 51 | mykernel.fit(x_values) 52 | K_matrix = mykernel.compute(y_values).toarray() 53 | 54 | # Check that error values are beneath tolerance. 55 | error_values = (K_matrix-true_values).ravel() 56 | total_error = np.linalg.norm(error_values) 57 | assert(total_error < 1E-8) 58 | 59 | @pytest.mark.parametrize('x_values', x_values_set) 60 | @pytest.mark.parametrize('y_values', y_values_set) 61 | @pytest.mark.parametrize('use_sparse', [True, False]) 62 | @pytest.mark.parametrize('metric, metric_params', [ 63 | ('euclidean', None), 64 | ('minkowski', {'p': 1}) 65 | ]) 66 | def test_sparse_input(self, x_values, y_values, metric, metric_params, use_sparse): 67 | """ 68 | Test that we are returning the correct kernel values. 69 | """ 70 | # Setup true values to test again. 71 | epsilon = 10. 72 | bandwidth_fxn = None 73 | if y_values is None: 74 | y_values_ref = x_values 75 | else: 76 | y_values_ref = y_values 77 | if metric == 'minkowski': 78 | pw_distance = cdist(y_values_ref, x_values, metric='minkowski', p=metric_params['p']) 79 | else: 80 | pw_distance = cdist(y_values_ref, x_values, metric=metric) 81 | if bandwidth_fxn is None: 82 | ref_bandwidth_fxn = lambda x: np.ones(x.shape[0]) 83 | else: 84 | ref_bandwidth_fxn = bandwidth_fxn 85 | if use_sparse: 86 | x_values = sps.csr_matrix(x_values) 87 | y_values_ref = sps.csr_matrix(y_values_ref) 88 | x_bandwidth = ref_bandwidth_fxn(x_values) 89 | y_bandwidth = ref_bandwidth_fxn(y_values_ref).reshape(-1, 1) 90 | scaled_sq_dists = pw_distance**2 / (x_bandwidth * y_bandwidth) 91 | true_values = np.exp(-1.*scaled_sq_dists/(4. * epsilon)) 92 | 93 | # Construct the kernel and fit to data. 94 | mykernel = kernel.Kernel(kernel_type='gaussian', metric=metric, 95 | metric_params=metric_params, epsilon=epsilon, 96 | k=x_values.shape[0], bandwidth_type=bandwidth_fxn) 97 | mykernel.fit(x_values) 98 | K_matrix = mykernel.compute(y_values).toarray() 99 | 100 | # Check that error values are beneath tolerance. 101 | error_values = (K_matrix-true_values).ravel() 102 | total_error = np.linalg.norm(error_values) 103 | assert(total_error < 1E-8) 104 | 105 | @pytest.mark.parametrize('k', np.arange(2, 14, 2)) 106 | @pytest.mark.parametrize('neighbor_params', [{'algorithm': 'auto'}, {'algorithm': 'ball_tree'}]) 107 | @pytest.mark.parametrize('x_values', x_values_set) 108 | def test_neighborlists(self, x_values, k, neighbor_params): 109 | """ 110 | Test that neighborlisting gives the right number of elements. 111 | """ 112 | # Correct number of nearest neighbors. 113 | k0 = min(k, x_values.shape[0]) 114 | 115 | # Construct kernel matrix. 116 | mykernel = kernel.Kernel(kernel_type='gaussian', metric='euclidean', 117 | epsilon=1., k=k0, neighbor_params=neighbor_params) 118 | mykernel.fit(x_values) 119 | K_matrix = mykernel.compute(x_values) 120 | 121 | # Check if each row has correct number of elements 122 | row_has_k_elements = (K_matrix.nnz == k0*x_values.shape[0]) 123 | assert(row_has_k_elements) 124 | 125 | @pytest.mark.parametrize('eps_method', ['bgh', 'bgh_generous']) 126 | def test_auto_epsilon_selection(self, eps_method): 127 | X = np.arange(100).reshape(-1, 1) 128 | mykernel = kernel.Kernel(kernel_type='gaussian', metric='euclidean', 129 | epsilon=eps_method, k=10) 130 | mykernel.fit(X) 131 | if eps_method == 'bgh': 132 | assert(mykernel.epsilon_fitted == 0.25) 133 | else: 134 | assert(mykernel.epsilon_fitted == 0.50) 135 | assert(mykernel.d == 1.0) 136 | 137 | 138 | class TestKNN(object): 139 | def test_harmonic_kde(self, harmonic_1d_data): 140 | # Setup Data 141 | data = harmonic_1d_data 142 | Y = np.linspace(-2.5, 2.5, 201) 143 | oos_data = Y.reshape(-1, 1) 144 | ref_density = np.exp(-Y**2 / 2.) / np.sqrt(2 * np.pi) 145 | THRESH = 0.003 146 | # Build kde object 147 | nneighbs = NearestNeighbors(n_neighbors=120) 148 | nneighbs.fit(data) 149 | my_kde = kernel.NNKDE(nneighbs, k=16) 150 | my_kde.fit() 151 | density = my_kde.compute(oos_data) 152 | error = np.sqrt(np.mean((density - ref_density)**2)) 153 | assert(error < THRESH) 154 | 155 | 156 | class TestBGHEpsilonSelection(object): 157 | @pytest.mark.parametrize('k', [10, 30, 100]) 158 | def test_1D_uniform_data(self, k): 159 | X = np.arange(100).reshape(-1, 1) 160 | neigh = NearestNeighbors(n_neighbors=k) 161 | sq_dist = neigh.fit(X).kneighbors_graph(X, mode='distance').data**2. 162 | epsilons = 2**np.arange(-20., 20.) 163 | eps, d = kernel.choose_optimal_epsilon_BGH(sq_dist, epsilons) 164 | assert(eps == 0.25) 165 | assert(d == 1.0) 166 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | 4 | from pydiffmap import utils 5 | from scipy.sparse import csr_matrix 6 | from sklearn.neighbors import NearestNeighbors 7 | 8 | x_1d = np.arange(10) 9 | x_2d = np.arange(20).reshape(10, 2) 10 | y_1d = np.arange(10) + 0.5 11 | y_2d = np.arange(20).reshape(10, 2) + 0.5 12 | 13 | 14 | class TestLookupFunction(object): 15 | @pytest.mark.parametrize('x', [x_1d, x_2d]) 16 | @pytest.mark.parametrize('vals', [y_1d, y_2d]) 17 | def test_lookup_fxn(self, x, vals): 18 | N = len(x) 19 | shuffle_indices = np.arange(N) 20 | np.random.shuffle(shuffle_indices) 21 | lf = utils.lookup_fxn(x, vals) 22 | shuffle_y = np.array([lf(xi) for xi in x[shuffle_indices]]) 23 | assert((shuffle_y == vals[shuffle_indices]).all()) 24 | 25 | 26 | class TestSparseFromFxn(object): 27 | @pytest.mark.parametrize('Y', [y_2d, None]) 28 | def test_sparse_from_fxn(self, Y): 29 | nneighbors = NearestNeighbors(10) 30 | nneighbors.fit(x_2d) 31 | Y2 = Y 32 | if Y2 is None: 33 | Y2 = x_2d 34 | K = nneighbors.kneighbors_graph(Y2, mode='connectivity') 35 | ref_mat = nneighbors.kneighbors_graph(Y2, mode='distance') 36 | dist_fxn = lambda Y, X: np.linalg.norm(Y - X) 37 | dist_mat = utils.sparse_from_fxn(x_2d, K, dist_fxn, Y) 38 | assert(np.linalg.norm((dist_mat - ref_mat).data) < 1e-10) 39 | 40 | 41 | class TestSymmetrization(): 42 | test_mat = csr_matrix([[0, 2.], [0, 3.]]) 43 | 44 | def test_and_symmetrization(self): 45 | ref_mat = np.array([[0, 0], [0, 3.]]) 46 | symmetrized = utils._symmetrize_matrix(self.test_mat, mode='and') 47 | symmetrized = symmetrized.toarray() 48 | assert (np.linalg.norm(ref_mat - symmetrized) == 0.) 49 | 50 | def test_or_symmetrization(self): 51 | ref_mat = np.array([[0, 2.], [2., 3.]]) 52 | symmetrized = utils._symmetrize_matrix(self.test_mat, mode='or') 53 | symmetrized = symmetrized.toarray() 54 | assert (np.linalg.norm(ref_mat - symmetrized) == 0.) 55 | 56 | def test_avg_symmetrization(self): 57 | ref_mat = np.array([[0, 1.], [1., 3.]]) 58 | symmetrized = utils._symmetrize_matrix(self.test_mat, mode='average') 59 | symmetrized = symmetrized.toarray() 60 | assert (np.linalg.norm(ref_mat - symmetrized) == 0.) 61 | -------------------------------------------------------------------------------- /tests/test_visualization.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import pytest 4 | 5 | from pydiffmap import diffusion_map as dm 6 | 7 | 8 | @pytest.fixture(scope='module') 9 | def dummy_dmap(uniform_2d_data): 10 | data, X, Y = uniform_2d_data 11 | print(data) 12 | mydmap = dm.DiffusionMap.from_sklearn(n_evecs=3, k=5) 13 | mydmap.fit(data) 14 | return mydmap 15 | 16 | 17 | if sys.version_info[0] >= 3: 18 | from pydiffmap import visualization as viz 19 | import matplotlib.pyplot as plt 20 | 21 | class TestEmbeddingPlot(): 22 | @pytest.mark.parametrize('dim', [2, 3]) 23 | def test_no_kwargs(self, dummy_dmap, dim): 24 | mydmap = dummy_dmap 25 | fig = viz.embedding_plot(mydmap, dim=dim, scatter_kwargs=None, show=False) 26 | assert(fig) 27 | 28 | def test_fixed_coloring(self, dummy_dmap): 29 | mydmap = dummy_dmap 30 | scatter_kwargs = {'c': 'r'} 31 | true_coloring = (1.0, 0., 0., 1) 32 | fig = viz.embedding_plot(mydmap, scatter_kwargs=scatter_kwargs, show=False) 33 | SC = fig.axes[0].collections[0] 34 | assert(np.all(SC._facecolors[0] == true_coloring)) 35 | 36 | @pytest.mark.parametrize('size', [4., np.arange(1, 82)]) 37 | def test_size(self, dummy_dmap, size): 38 | mydmap = dummy_dmap 39 | scatter_kwargs = {'s': size} 40 | fig = viz.embedding_plot(mydmap, scatter_kwargs=scatter_kwargs, show=False) 41 | SC = fig.axes[0].collections[0] 42 | actual_sizes = SC.get_sizes() 43 | assert(np.all(actual_sizes == size)) 44 | 45 | @pytest.mark.parametrize('cmap', [None, 'Blues', plt.cm.Spectral]) 46 | def test_colormap(self, dummy_dmap, cmap): 47 | # This just tests if the code runs... 48 | # Replace with something more stringent? 49 | mydmap = dummy_dmap 50 | scatter_kwargs = {'c': mydmap.dmap[:, 0], 'cmap': cmap} 51 | fig = viz.embedding_plot(mydmap, scatter_kwargs=scatter_kwargs, show=False) 52 | assert(fig) 53 | 54 | class TestDataPlot(): 55 | def test_no_kwargs(self, dummy_dmap): 56 | mydmap = dummy_dmap 57 | fig = viz.data_plot(mydmap, scatter_kwargs=None, show=False) 58 | assert(fig) 59 | 60 | @pytest.mark.parametrize('size', [4., np.arange(1, 82)]) 61 | def test_size(self, dummy_dmap, size): 62 | mydmap = dummy_dmap 63 | scatter_kwargs = {'s': size} 64 | fig = viz.data_plot(mydmap, 1, scatter_kwargs=scatter_kwargs, show=False) 65 | SC = fig.axes[0].collections[0] 66 | actual_sizes = SC.get_sizes() 67 | assert(np.all(actual_sizes == size)) 68 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | ; a generative tox configuration, see: https://tox.readthedocs.io/en/latest/config.html#generative-envlist 2 | [testenv:bootstrap] 3 | deps = 4 | jinja2 5 | matrix 6 | tox 7 | skip_install = true 8 | commands = 9 | python ci/bootstrap.py --no-env 10 | passenv = 11 | * 12 | 13 | [tox] 14 | envlist = 15 | clean, 16 | check, 17 | {py27,py34,py35,py36,py37}, 18 | report, 19 | docs 20 | 21 | [testenv] 22 | basepython = 23 | py27: {env:TOXPYTHON:python2.7} 24 | py35: {env:TOXPYTHON:python3.5} 25 | {py36,docs}: {env:TOXPYTHON:python3.6} 26 | py37: {env:TOXPYTHON:python3.7} 27 | {bootstrap,clean,check,report,codecov}: {env:TOXPYTHON:python3} 28 | setenv = 29 | PYTHONPATH={toxinidir}/tests 30 | PYTHONUNBUFFERED=yes 31 | passenv = 32 | * 33 | usedevelop = false 34 | deps = 35 | pytest 36 | pytest-travis-fold 37 | pytest-cov 38 | commands = 39 | {posargs:pytest --cov --cov-report=term-missing -vv tests} 40 | 41 | [testenv:check] 42 | deps = 43 | docutils 44 | check-manifest 45 | flake8 46 | readme-renderer 47 | pygments 48 | isort 49 | skip_install = true 50 | commands = 51 | python setup.py check --strict --metadata --restructuredtext 52 | check-manifest {toxinidir} 53 | # flake8 src tests setup.py 54 | # isort --verbose --check-only --diff --recursive src tests setup.py 55 | 56 | [testenv:docs] 57 | deps = 58 | -r{toxinidir}/docs/requirements.txt 59 | commands = 60 | sphinx-build {posargs:-E} -b html docs dist/docs 61 | # sphinx-build -b linkcheck docs dist/docs 62 | 63 | [testenv:codecov] 64 | deps = 65 | codecov 66 | skip_install = true 67 | commands = 68 | codecov [] 69 | 70 | 71 | [testenv:report] 72 | deps = coverage 73 | skip_install = true 74 | commands = 75 | coverage report 76 | coverage html 77 | 78 | [testenv:clean] 79 | commands = coverage erase 80 | skip_install = true 81 | deps = coverage 82 | 83 | --------------------------------------------------------------------------------