├── .gitignore
├── .travis.yml
├── HOWTO_RELEASE.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── conda_recipes
    ├── README.md
    ├── build_all.sh
    ├── flann
    │   ├── .binstar.yml
    │   ├── build.sh
    │   └── meta.yaml
    ├── megaman
    │   ├── .binstar.yml
    │   ├── build.sh
    │   ├── meta.yaml
    │   └── run_test.sh
    ├── pyamg
    │   ├── .binstar.yml
    │   ├── build.sh
    │   ├── meta.yaml
    │   └── run_test.sh
    └── pyflann
    │   ├── .binstar.yml
    │   ├── build.sh
    │   └── meta.yaml
├── doc
    ├── .gitignore
    ├── Makefile
    ├── conf.py
    ├── embedding
    │   ├── API.rst
    │   ├── index.rst
    │   ├── isomap.rst
    │   ├── locally_linear.rst
    │   ├── ltsa.rst
    │   └── spectral_embedding.rst
    ├── geometry
    │   ├── API.rst
    │   ├── geometry.rst
    │   └── index.rst
    ├── images
    │   ├── circle_to_ellipse_embedding.png
    │   ├── index.rst
    │   ├── spectra_D4000.png
    │   ├── spectra_Halpha.png
    │   ├── spectra_Halpha.rst
    │   ├── word2vec.rst
    │   └── word2vec_rmetric_plot_no_digits.png
    ├── index.rst
    ├── installation.rst
    ├── sphinxext
    │   └── numpy_ext
    │   │   ├── __init__.py
    │   │   ├── astropyautosummary.py
    │   │   ├── autodoc_enhancements.py
    │   │   ├── automodapi.py
    │   │   ├── automodsumm.py
    │   │   ├── changelog_links.py
    │   │   ├── comment_eater.py
    │   │   ├── compiler_unparse.py
    │   │   ├── docscrape.py
    │   │   ├── docscrape_sphinx.py
    │   │   ├── doctest.py
    │   │   ├── edit_on_github.py
    │   │   ├── numpydoc.py
    │   │   ├── phantom_import.py
    │   │   ├── smart_resolver.py
    │   │   ├── tocdepthfix.py
    │   │   ├── traitsdoc.py
    │   │   ├── utils.py
    │   │   └── viewcode.py
    └── utils
    │   ├── API.rst
    │   └── index.rst
├── examples
    ├── example.py
    ├── examples_index.ipynb
    ├── manifold_intro.ipynb
    ├── megaman_install_usage_colab.ipynb
    ├── megaman_tutorial.ipynb
    ├── megaman_tutorial.py
    ├── rad_est_utils.py
    ├── radius_estimation_tutorial.ipynb
    ├── tutorial_data_plot.png
    ├── tutorial_embeddings.png
    ├── tutorial_isomap_plot.png
    └── tutorial_spectral_plot.png
├── megaman
    ├── __check_build
    │   ├── __init__.py
    │   ├── _check_build.pyx
    │   └── setup.py
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── datasets.py
    │   └── megaman.png
    ├── embedding
    │   ├── __init__.py
    │   ├── base.py
    │   ├── isomap.py
    │   ├── locally_linear.py
    │   ├── ltsa.py
    │   ├── spectral_embedding.py
    │   └── tests
    │   │   ├── __init__.py
    │   │   ├── test_base.py
    │   │   ├── test_embeddings.py
    │   │   ├── test_isomap.py
    │   │   ├── test_lle.py
    │   │   ├── test_ltsa.py
    │   │   └── test_spectral_embedding.py
    ├── geometry
    │   ├── __init__.py
    │   ├── adjacency.py
    │   ├── affinity.py
    │   ├── complete_adjacency_matrix.py
    │   ├── cyflann
    │   │   ├── __init__.py
    │   │   ├── cyflann_index.cc
    │   │   ├── cyflann_index.h
    │   │   ├── index.pxd
    │   │   ├── index.pyx
    │   │   └── setup.py
    │   ├── geometry.py
    │   ├── laplacian.py
    │   ├── rmetric.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_adjacency.py
    │   │   ├── test_affinity.py
    │   │   ├── test_complete_adjacency_matrix.py
    │   │   ├── test_geometry.py
    │   │   ├── test_laplacian.m
    │   │   ├── test_laplacian.py
    │   │   ├── test_rmetric.py
    │   │   └── testmegaman_laplacian_rad0_2_lam1_5_n200.mat
    │   └── utils.py
    ├── plotter
    │   ├── __init__.py
    │   ├── covar_plotter3.py
    │   ├── plotter.py
    │   ├── scatter_3d.py
    │   └── utils.py
    ├── relaxation
    │   ├── __init__.py
    │   ├── optimizer.py
    │   ├── precomputed.py
    │   ├── riemannian_relaxation.py
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── eps_halfdome.mat
    │   │   ├── rloss_halfdome.mat
    │   │   ├── test_precomputed_S.py
    │   │   ├── test_precomputed_Y.py
    │   │   ├── test_regression_test.py
    │   │   ├── test_relaxation_keywords.py
    │   │   ├── test_tracing_var.py
    │   │   └── utils.py
    │   ├── trace_variable.py
    │   └── utils.py
    ├── setup.py
    └── utils
    │   ├── __init__.py
    │   ├── analyze_dimension_and_radius.py
    │   ├── covar_plotter.py
    │   ├── eigendecomp.py
    │   ├── estimate_radius.py
    │   ├── k_means_clustering.py
    │   ├── large_sparse_functions.py
    │   ├── nystrom_extension.py
    │   ├── spectral_clustering.py
    │   ├── testing.py
    │   ├── tests
    │       ├── __init__.py
    │       ├── test_analyze_dimension_and_radius.py
    │       ├── test_eigendecomp.py
    │       ├── test_estimate_radius.py
    │       ├── test_nystrom.py
    │       ├── test_spectral_clustering.py
    │       ├── test_testing.py
    │       └── test_validation.py
    │   └── validation.py
├── setup.py
└── tools
    └── cythonize.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.pyc
 3 | junk*
 4 | *.cxx
 5 | *.c
 6 | cythonize.dat
 7 | 
 8 | cover
 9 | 
10 | MANIFEST
11 | 
12 | # Byte-compiled / optimized / DLL files
13 | __pycache__/
14 | *.py[cod]
15 | *$py.class
16 | 
17 | # C extensions
18 | *.so
19 | 
20 | # Distribution / packaging
21 | .Python
22 | env/
23 | build/
24 | develop-eggs/
25 | dist/
26 | downloads/
27 | eggs/
28 | .eggs/
29 | lib/
30 | lib64/
31 | parts/
32 | sdist/
33 | var/
34 | *.egg-info/
35 | .installed.cfg
36 | *.egg
37 | 
38 | # PyInstaller
39 | #  Usually these files are written by a python script from a template
40 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
41 | *.manifest
42 | *.spec
43 | 
44 | # Installer logs
45 | pip-log.txt
46 | pip-delete-this-directory.txt
47 | 
48 | # Unit test / coverage reports
49 | htmlcov/
50 | .tox/
51 | .coverage
52 | .coverage.*
53 | .cache
54 | nosetests.xml
55 | coverage.xml
56 | *,cover
57 | .hypothesis/
58 | 
59 | # Translations
60 | *.mo
61 | *.pot
62 | 
63 | # Django stuff:
64 | *.log
65 | local_settings.py
66 | 
67 | # Flask instance folder
68 | instance/
69 | 
70 | # Sphinx documentation
71 | docs/_build/
72 | 
73 | # PyBuilder
74 | target/
75 | 
76 | # IPython Notebook
77 | .ipynb_checkpoints
78 | Untitled*.ipynb
79 | 
80 | # pyenv
81 | .python-version
82 | 
83 | # macos DS_Store
84 | .DS_Store
85 | **/*/.DS_Store
86 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | # sudo false implies containerized builds
 4 | sudo: false
 5 | 
 6 | python:
 7 |   - 2.7
 8 |   - 3.4
 9 |   - 3.5
10 | 
11 | env:
12 |   global:
13 |     # Directory where tests are run from
14 |     - TEST_DIR=/tmp/megaman
15 |     - CONDA_CHANNEL="conda-forge"
16 |     - CONDA_DEPS="pip nose coverage cython scikit-learn flann h5py"
17 |     - PIP_DEPS="coveralls"
18 |   matrix:
19 |     - EXTRA_DEPS="pyflann pyamg"
20 |     - EXTRA_DEPS=""
21 | 
22 | before_install:
23 | - export MINICONDA=$HOME/miniconda
24 | - export PATH="$MINICONDA/bin:$PATH"
25 | - hash -r
26 | - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
27 | - bash miniconda.sh -b -f -p $MINICONDA
28 | - conda config --set always_yes yes
29 | - conda update conda
30 | - conda info -a
31 | - conda create -n testenv python=$TRAVIS_PYTHON_VERSION
32 | - source activate testenv
33 | - conda install -c $CONDA_CHANNEL $CONDA_DEPS $EXTRA_DEPS
34 | - travis_retry pip install $PIP_DEPS
35 | 
36 | install:
37 | - python setup.py install
38 | 
39 | script:
40 | - mkdir -p $TEST_DIR
41 | - cd $TEST_DIR && nosetests -v --with-coverage --cover-package=megaman megaman
42 | 
43 | after_success:
44 | - coveralls
45 | 


--------------------------------------------------------------------------------
/HOWTO_RELEASE.md:
--------------------------------------------------------------------------------
 1 | # How to Release
 2 | 
 3 | Here's a quick step-by-step for cutting a new release of megaman.
 4 | 
 5 | ## Pre-release
 6 | 
 7 | 1. update version in ``megaman/__init__.py`` to, e.g. "0.1"
 8 | 
 9 | 2. update version in **two places** in ``doc/conf.py`` to the same
10 | 
11 | 3. create a release tag; e.g.
12 |    ```
13 |    $ git tag -a v0.1 -m 'version 0.1 release'
14 |    ```
15 | 
16 | 4. push the commits and tag to github
17 | 
18 | 5. confirm that CI tests pass on github
19 | 
20 | 6. under "tags" on github, update the release notes
21 | 
22 | 
23 | ## Publishing the Release
24 | 
25 | 1. push the new release to PyPI (requires jakevdp's permissions)
26 |    ```
27 |    $ python setup.py sdist upload
28 |    ```
29 | 
30 | 2. change directories to ``doc`` and build the documentation:
31 |    ```
32 |    $ cd doc/
33 |    $ make html     # build documentation
34 |    $ make publish  # publish to github pages
35 |    ```
36 | 
37 | 3. Publish the conda build:
38 |    submit a PR to http://github.com/conda-forge/megaman-feedstock
39 |    updating recipe/meta.yaml with the appropriate version. Once merged,
40 |    then the conda install command will point to the new version.
41 | 
42 | ## Post-release
43 | 
44 | 1. update version in ``megaman/__init__.py`` to next version; e.g. '0.2.dev0'
45 | 
46 | 2. update version in ``doc/conf.py`` to the same (in two places)
47 | 
48 | 3. push changes to github
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2016
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 
25 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | include *.py
3 | recursive-include megaman *.py *.pyx *.pxd *.cc *.h *.mat *.png
4 | recursive-include doc *
5 | recursive-include tools *.py
6 | recursive-include examples *.py *.ipynb
7 | include Makefile
8 | include LICENSE
9 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | CURRENT_DIR = $(shell pwd)
 2 | TEST_DIR = /tmp/megaman
 3 | PKG = megaman
 4 | 
 5 | install:
 6 | 	python setup.py install
 7 | 
 8 | clean:
 9 | 	rm -r build/
10 | 
11 | test-dir:
12 | 	mkdir -p $(TEST_DIR)
13 | 
14 | test: test-dir install
15 | 	cd $(TEST_DIR) && nosetests $(PKG)
16 | 
17 | doctest: test-dir install
18 | 	cd $(TEST_DIR) && nosetests --with-doctest $(PKG)
19 | 
20 | test-coverage: test-dir install
21 | 	cd $(TEST_DIR) && nosetests --with-coverage --cover-package=$(PKG) $(PKG)
22 | 
23 | test-coverage-html: test-dir install
24 | 	cd $(TEST_DIR) && nosetests --with-coverage --cover-html --cover-package=$(PKG) $(PKG)
25 | 	rsync -r $(TEST_DIR)/cover $(CURRENT_DIR)/
26 | 	echo "open ./cover/index.html with a web browser to see coverage report"
27 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # megaman: Manifold Learning for Millions of Points
  2 | 
  3 | <img src="https://raw.githubusercontent.com/mmp2/megaman/master/doc/images/word2vec_rmetric_plot_no_digits.png" height=200><img src="https://raw.githubusercontent.com/mmp2/megaman/master/doc/images/spectra_D4000.png" height=200><img src="https://raw.githubusercontent.com/mmp2/megaman/master/doc/images/spectra_Halpha.png" height=200>
  4 | 
  5 | [![Anaconda-Server Badge](https://anaconda.org/conda-forge/megaman/badges/downloads.svg)](https://anaconda.org/conda-forge/megaman)
  6 | [![build status](http://img.shields.io/travis/mmp2/megaman/master.svg?style=flat)](https://travis-ci.org/mmp2/megaman)
  7 | [![version status](http://img.shields.io/pypi/v/megaman.svg?style=flat)](https://pypi.python.org/pypi/megaman)
  8 | [![license](http://img.shields.io/badge/license-BSD-blue.svg?style=flat)](https://github.com/mmp2/megaman/blob/master/LICENSE)
  9 | 
 10 | ``megaman`` is a scalable manifold learning package implemented in
 11 | python. It has a front-end API designed to be familiar
 12 | to [scikit-learn](http://scikit-learn.org/) but harnesses
 13 | the C++ Fast Library for Approximate Nearest Neighbors (FLANN)
 14 | and the Sparse Symmetric Positive Definite (SSPD) solver
 15 | Locally Optimal Block Precodition Gradient (LOBPCG) method
 16 | to scale manifold learning algorithms to large data sets.
 17 | On a personal computer megaman can embed 1 million data points
 18 | with hundreds of dimensions in 10 minutes.
 19 | megaman is designed for researchers and as such caches intermediary
 20 | steps and indices to allow for fast re-computation with new parameters.
 21 | 
 22 | Package documentation can be found at http://mmp2.github.io/megaman/
 23 | 
 24 | If you use our software please cite the following JMLR paper:
 25 | 
 26 | McQueen, Meila, VanderPlas, & Zhang, "Megaman: Scalable Manifold Learning in Python",
 27 | Journal of Machine Learning Research, Vol 17 no. 14, 2016.
 28 | http://jmlr.org/papers/v17/16-109.html
 29 | 
 30 | You can also find our arXiv paper at http://arxiv.org/abs/1603.02763
 31 | 
 32 | ## Examples
 33 | 
 34 | - [Tutorial Notebook]( https://github.com/mmp2/megaman/blob/master/examples/megaman_tutorial.ipynb)
 35 | 
 36 | ## Installation and Examples in Google Colab
 37 | 
 38 | Below it's a tutorial to install megaman on Google Colab, through Conda environment.
 39 | 
 40 | It also provides tutorial of using megaman to build spectral embedding on uniform swiss roll dataset.
 41 | 
 42 | - [Install & Example script]( https://colab.research.google.com/drive/1ms22YK3TvrIx0gji6UZqG0zoSNRCWtXj?usp=sharing)
 43 | - [You can download the Jupyter Notebook version here]( https://github.com/mmp2/megaman/blob/master/examples/megaman_install_usage_colab.ipynb)
 44 | 
 45 | ## ~~Installation with Conda~~
 46 | 
 47 | <!-- The easiest way to install ``megaman`` and its dependencies is with
 48 | [conda](http://conda.pydata.org/miniconda.html), the cross-platform package
 49 | manager for the scientific Python ecosystem.
 50 | 
 51 | To install megaman and its dependencies, run
 52 | 
 53 | ```
 54 | $ conda install megaman --channel=conda-forge
 55 | ```
 56 | 
 57 | Currently builds are available for OSX and Linux, on Python 2.7, 3.4, and 3.5.
 58 | For other operating systems, see the full install instructions below. -->
 59 | 
 60 | Due to the change of API,
 61 | `$ conda install -c conda-forge megaman`
 62 | is no longer supported.
 63 | We are currently working on fixing the bug.
 64 | 
 65 | Please see the full install instructions below to build `megaman` from source.
 66 | 
 67 | ## Installation from source
 68 | 
 69 | To install megaman from source requires the following:
 70 | 
 71 | - [python](http://python.org) tested with versions 2.7, 3.5 and 3.6
 72 | - [numpy](http://numpy.org) version 1.8 or higher
 73 | - [scipy](http://scipy.org) version 0.16.0 or higher
 74 | - [scikit-learn](http://scikit-learn.org)
 75 | - [FLANN](http://www.cs.ubc.ca/research/flann/)
 76 | - [pyflann](http://www.cs.ubc.ca/research/flann/) which offers another method of computing distance matrices (this is bundled with the FLANN source code)
 77 | - [cython](http://cython.org/)
 78 | - a C++ compiler such as ``gcc``/``g++``
 79 | 
 80 | Optional requirements include
 81 | 
 82 | - [pyamg](http://pyamg.org/), which allows for faster decompositions of large matrices
 83 | - [nose](https://nose.readthedocs.org/) for running the unit tests
 84 | - [h5py](http://www.h5py.org) for reading testing .mat files
 85 | - [plotly](https://plot.ly) an graphing library for interactive plot
 86 | 
 87 | 
 88 | These requirements can be installed on Linux and MacOSX using the following conda command:
 89 | 
 90 | ```shell
 91 | $ conda create -n manifold_env python=3.5 -y
 92 | # can also use python=2.7 or python=3.6
 93 | 
 94 | $ source activate manifold_env
 95 | $ conda install --channel=conda-forge -y pip nose coverage cython numpy scipy \
 96 |                                          scikit-learn pyflann pyamg h5py plotly
 97 | ```
 98 | 
 99 | Clone this repository and `cd` into source repository
100 | 
101 | ```shell
102 | $ cd /tmp/
103 | $ git clone https://github.com/mmp2/megaman.git
104 | $ cd megaman
105 | ```
106 | 
107 | Finally, within the source repository, run this command to install the ``megaman`` package itself:
108 | ```shell
109 | $ python setup.py install
110 | ```
111 | 
112 | ## Unit Tests
113 | megaman uses ``nose`` for unit tests. With ``nose`` installed, type
114 | ```
115 | $ make test
116 | ```
117 | to run the unit tests. ``megaman`` is tested on Python versions 2.7, 3.4, and 3.5.
118 | 
119 | ## Authors
120 | - [James McQueen](http://www.stat.washington.edu/people/jmcq/)
121 | - [Marina Meila](http://www.stat.washington.edu/mmp/)
122 | - [Zhongyue Zhang](https://github.com/Jerryzcn)
123 | - [Jake VanderPlas](http://www.vanderplas.com)
124 | - [Yu-Chia Chen](https://github.com/yuchaz)
125 | 
126 | ## Other Contributors
127 | 
128 | - Xiao Wang: lazy rmetric, Nystrom Extension
129 | - [Hangliang Ren (Harry)](https://github.com/Harryahh): Installation tutorials, Spectral Embedding
130 | 
131 | ## Future Work
132 | 
133 | See this issues list for what we have planned for upcoming releases:
134 | 
135 | [Future Work](https://github.com/mmp2/megaman/issues/47)
136 | 


--------------------------------------------------------------------------------
/conda_recipes/README.md:
--------------------------------------------------------------------------------
1 | # Conda recipes
2 | 
3 | This directory contains conda build recipes for megaman and its dependencies.
4 | For more information see the
5 | [Conda Build documentation](http://conda.pydata.org/docs/build_tutorials/pkgs2.html)
6 | 


--------------------------------------------------------------------------------
/conda_recipes/build_all.sh:
--------------------------------------------------------------------------------
1 | conda config --set anaconda_upload yes
2 | conda build flann
3 | conda build --py all pyflann
4 | conda build --python 2.7 --python 3.4 --python 3.5 --numpy 1.9 --numpy 1.10 pyamg
5 | conda build --python 2.7 --python 3.4 --python 3.5 --numpy 1.10 megaman
6 | 


--------------------------------------------------------------------------------
/conda_recipes/flann/.binstar.yml:
--------------------------------------------------------------------------------
 1 | package: flann
 2 | platform:
 3 |   - osx-64
 4 |   - osx-32
 5 |   - linux-64
 6 |   - linux-32
 7 | script:
 8 |   - conda build .
 9 | build_targets:
10 |   - conda
11 | 


--------------------------------------------------------------------------------
/conda_recipes/flann/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # cannot build flann from within the source directory
 4 | mkdir build
 5 | cd build
 6 | 
 7 | # On OSX, we need to ensure we're using conda's gcc/g++
 8 | if [[ `uname` == Darwin ]]; then
 9 |     export CC=gcc
10 |     export CXX=g++
11 | fi
12 | 
13 | cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX -DBUILD_MATLAB_BINDINGS:BOOL=OFF -DBUILD_PYTHON_BINDINGS:BOOL=OFF -DBUILD_EXAMPLES:BOOL=OFF
14 | 
15 | make -j$CPU_COUNT install
16 | 


--------------------------------------------------------------------------------
/conda_recipes/flann/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |     name: flann
 3 |     version: "1.8.5dev"
 4 | 
 5 | source:
 6 |     git_url: https://github.com/mariusmuja/flann.git
 7 |     git_tag: b8a442fd98f8ce32ae3465bfd3427b5cbc36f6a5
 8 | 
 9 | build:
10 |     number: 2
11 |     string: {{PKG_BUILDNUM}}_g{{GIT_FULL_HASH[:7]}}
12 | 
13 | requirements:
14 |     build:
15 |         - gcc 4.8* # [osx]
16 |         - hdf5
17 |         - cmake
18 |     run:
19 |         - libgcc 4.8* #[osx]
20 |         - hdf5
21 | 
22 | about:
23 |     home: http://www.cs.ubc.ca/research/flann/
24 |     license: BSD
25 |     license_file: COPYING
26 | 


--------------------------------------------------------------------------------
/conda_recipes/megaman/.binstar.yml:
--------------------------------------------------------------------------------
 1 | package: megaman
 2 | platform:
 3 |   - osx-64
 4 |   - osx-32
 5 |   - linux-64
 6 |   - linux-32
 7 | script:
 8 |   - conda build .
 9 | build_targets:
10 |   - conda
11 | 


--------------------------------------------------------------------------------
/conda_recipes/megaman/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # On OSX, we need to ensure we're using conda's gcc/g++
 4 | if [[ `uname` == Darwin ]]; then
 5 |     export CC=gcc
 6 |     export CXX=g++
 7 | fi
 8 | 
 9 | $PYTHON setup.py install
10 | 


--------------------------------------------------------------------------------
/conda_recipes/megaman/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |     name: megaman
 3 |     version: 0.1.1
 4 | 
 5 | source:
 6 |     git_url: https://github.com/mmp2/megaman.git
 7 |     git_tag: v0.1.1
 8 | 
 9 | build:
10 |     number: 2
11 |     string: np{{CONDA_NPY}}py{{CONDA_PY}}_{{PKG_BUILDNUM}}
12 | 
13 | requirements:
14 |     build:
15 |         - python >=2.7,<3|>=3.4,{{PY_VER}}*
16 |         - numpy {{NPY_VER}}*
17 |         - cython
18 |         - flann
19 |         - gcc 4.8* # [osx]
20 |     run:
21 |         - python {{PY_VER}}*
22 |         - numpy {{NPY_VER}}*
23 |         - scipy >=0.16
24 |         - scikit-learn >=0.17
25 |         - pyamg
26 |         - pyflann
27 |         - libgcc 4.8* # [osx]
28 | 
29 | test:
30 |     requires:
31 |         - nose
32 |     imports:
33 |         - megaman
34 |         - megaman.geometry
35 |         - megaman.embedding
36 |         - megaman.utils
37 | 
38 | about:
39 |     home: http://mmp2.github.io/megaman
40 |     license: BSD
41 |     license_file: LICENSE
42 | 


--------------------------------------------------------------------------------
/conda_recipes/megaman/run_test.sh:
--------------------------------------------------------------------------------
1 | nosetests -v megaman
2 | 


--------------------------------------------------------------------------------
/conda_recipes/pyamg/.binstar.yml:
--------------------------------------------------------------------------------
 1 | package: pyamg
 2 | platform:
 3 |   - osx-64
 4 |   - osx-32
 5 |   - linux-64
 6 |   - linux-32
 7 | script:
 8 |   - conda build .
 9 | build_targets:
10 |   - conda
11 | 


--------------------------------------------------------------------------------
/conda_recipes/pyamg/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # On OSX, we need to ensure we're using conda's gcc/g++
 4 | if [[ `uname` == Darwin ]]; then
 5 |     export CC=gcc
 6 |     export CXX=g++
 7 | fi
 8 | 
 9 | $PYTHON setup.py install
10 | 


--------------------------------------------------------------------------------
/conda_recipes/pyamg/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |     name: pyamg
 3 |     version: "3.0.2"
 4 | 
 5 | source:
 6 |     git_url: https://github.com/pyamg/pyamg.git
 7 |     git_tag: v3.0.2
 8 | 
 9 | build:
10 |     number: 2
11 |     string: np{{CONDA_NPY}}py{{CONDA_PY}}_{{PKG_BUILDNUM}}
12 | 
13 | requirements:
14 |     build:
15 |         - python >=2.7,<3|>=3.4,{{PY_VER}}*
16 |         - numpy {{NPY_VER}}*
17 |         - scipy
18 |         - nose
19 |         - zlib  # [linux]
20 |         - gcc 4.8* # [osx]
21 |     run:
22 |         - python {{PY_VER}}*
23 |         - numpy {{NPY_VER}}*
24 |         - scipy
25 |         - zlib  # [linux]
26 | 
27 | test:
28 |     requires:
29 |         - nose
30 |     imports:
31 |         - pyamg
32 | 
33 | about:
34 |     home: http://www.pyamg.org/
35 |     license: MIT
36 |     license_file: LICENSE.txt
37 | 


--------------------------------------------------------------------------------
/conda_recipes/pyamg/run_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | if [[ `uname` == Darwin ]] && [ $PY_VER == "2.7" ]; then
4 |     echo "skipping tests; see https://github.com/pyamg/pyamg/issues/165"
5 | else
6 |     nosetests -v pyamg
7 | fi
8 | 


--------------------------------------------------------------------------------
/conda_recipes/pyflann/.binstar.yml:
--------------------------------------------------------------------------------
 1 | package: pyflann
 2 | platform:
 3 |   - osx-64
 4 |   - osx-32
 5 |   - linux-64
 6 |   - linux-32
 7 | script:
 8 |   - conda build .
 9 | build_targets:
10 |   - conda
11 | 


--------------------------------------------------------------------------------
/conda_recipes/pyflann/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cd src/python
4 | cmake . -DLIBRARY_OUTPUT_PATH=$PREFIX/lib -DFLANN_VERSION="$PKG_VERSION"
5 | $PYTHON setup.py install
6 | 


--------------------------------------------------------------------------------
/conda_recipes/pyflann/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |     name: pyflann
 3 |     version: "1.8.5dev"
 4 | 
 5 | source:
 6 |     git_url: https://github.com/mariusmuja/flann.git
 7 |     git_tag: b8a442fd98f8ce32ae3465bfd3427b5cbc36f6a5
 8 | 
 9 | build:
10 |     number: 2
11 |     string: py{{CONDA_PY}}_{{PKG_BUILDNUM}}_g{{GIT_FULL_HASH[:7]}}
12 | 
13 | requirements:
14 |     build:
15 |         - python {{PY_VER}}*
16 |         - setuptools
17 |         - flann 1.8.5dev
18 |         - cmake
19 |     run:
20 |         - python {{PY_VER}}*
21 |         - flann 1.8.5dev
22 |         - numpy
23 | 
24 | test:
25 |     imports:
26 |         - pyflann
27 | 
28 | about:
29 |     home: http://www.cs.ubc.ca/research/flann/
30 |     license: BSD
31 |     license_file: COPYING
32 | 


--------------------------------------------------------------------------------
/doc/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | 


--------------------------------------------------------------------------------
/doc/embedding/API.rst:
--------------------------------------------------------------------------------
 1 | .. _embedding_API:
 2 | 
 3 | .. testsetup:: *
 4 | 
 5 |    from megaman.embedding import *
 6 | 
 7 | API Documentation
 8 | =================
 9 | 
10 | .. automodule:: megaman.embedding.spectral_embedding
11 |    :members:
12 | 
13 | .. automodule:: megaman.embedding.isomap
14 |    :members:
15 | 
16 | .. automodule:: megaman.embedding.locally_linear
17 |    :members:
18 | 
19 | .. automodule:: megaman.embedding.ltsa
20 |    :members:
21 | 


--------------------------------------------------------------------------------
/doc/embedding/index.rst:
--------------------------------------------------------------------------------
 1 | .. _embedding:
 2 | 
 3 | ***************************************************
 4 | Tools for Embedding (``megaman.embedding``)
 5 | ***************************************************
 6 | 
 7 | This module contains tools for nonlinear embedding data sets.
 8 | These tools include Isomap, Spectral Embedding & Diffusion
 9 | Maps, Local Tangent Space Alignment, and Locally Linear
10 | Embedding
11 | 
12 | .. toctree::
13 |    :maxdepth: 2
14 | 
15 |    isomap.rst
16 |    locally_linear.rst
17 |    ltsa.rst
18 |    spectral_embedding.rst
19 |    API
20 | 


--------------------------------------------------------------------------------
/doc/embedding/isomap.rst:
--------------------------------------------------------------------------------
 1 | .. _isomap:
 2 | 
 3 | Isomap
 4 | ======
 5 | 
 6 | Isomap is one of the embeddings implemented in the megaman package.
 7 | Isomap uses Multidimensional Scaling (MDS) to preserve pairwsise
 8 | graph shortest distance computed using a sparse neighborhood graph.
 9 | 
10 | For more information see:
11 | 
12 | * Tenenbaum, J.B.; De Silva, V.; & Langford, J.C.
13 |   A global geometric framework for nonlinear dimensionality reduction.
14 |   Science 290 (5500)
15 | 
16 | :class:'~megaman.embedding.Isomap'
17 |     This class is used to interface with isomap embedding function.
18 |     Like all embedding functions in megaman it operates using a
19 |     Geometry object. The Isomap class allows you to optionally
20 |     pass an exiting Geometry object, otherwise it creates one.
21 | 
22 | API of Isomap
23 | -------------
24 | 
25 | The Isomap model, along with all the other models in megaman, have an API
26 | designed to follow in the same vein of
27 | `scikit-learn <http://scikit-learn.org/>`_ API.
28 | 
29 | Consequentially, the Isomap class functions as follows
30 | 
31 | 1. At class instantiation `.Isomap()` parameters are passed. See API
32 |    documementation for more information. An existing Geometry object
33 |    can be passed to `.Isomap()`.
34 | 2. The `.fit()` method creates a Geometry object if one was not
35 |    already passed and then calculates th embedding.
36 |    The number of components and eigen solver can also be passed to the
37 |    `.fit()` function. Since Isomap caches important quantities
38 |    (like the graph distance matrix) which do not change by selecting
39 |    different eigen solvers and embeding dimension these can be passed
40 |    and a new embedding computed without re-computing existing quantities.
41 |    the `.fit()` function does not return anything but it does create
42 |    the attribute `self.embedding_` only one `self.embedding_` exists
43 |    at a given time. If a new embedding is computed the old one is overwritten.
44 | 3. The `.fit_transform()` function calls the `fit()` function and returns
45 |    the embedding. It does not allow for changing parameters.
46 | 
47 | See the API documentation for further information.
48 | 
49 | Example Usage
50 | -------------
51 | 
52 | Here is an example using the function on a random data set::
53 | 
54 |    import numpy as np
55 |    from megaman.geometry import Geometry
56 |    from megaman.embedding import Isomap
57 | 
58 |    X = np.random.randn(100, 10)
59 |    radius = 5
60 |    adjacency_method = 'cyflann'
61 |    adjacency_kwds = {'radius':radius} # ignore distances above this radius
62 |    
63 |    geom  = Geometry(adjacency_method=adjacency_method, adjacency_kwds=adjacency_kwds)
64 |    
65 |    isomap = Isomap(n_components=n_components, eigen_solver='arpack', geom=geom)
66 |    embed_isomap = isomap.fit_transform(X)
67 | 


--------------------------------------------------------------------------------
/doc/embedding/locally_linear.rst:
--------------------------------------------------------------------------------
 1 | .. _locally_linear:
 2 | 
 3 | Locally Linear Embedding
 4 | ========================
 5 | 
 6 | Locally linear embedding is one of the methods implemented in the megaman package.
 7 | Locally Linear Embedding uses reconstruction weights estiamted on the original
 8 | data set to produce an embedding that preserved the original reconstruction
 9 | weights.
10 | 
11 | For more information see:
12 | 
13 | * Roweis, S. & Saul, L. Nonlinear dimensionality reduction
14 |   by locally linear embedding.  Science 290:2323 (2000).
15 | 
16 | :class:'~megaman.embedding.LocallyLinearEmbedding'
17 |     This class is used to interface with locally linear embedding function.
18 |     Like all embedding functions in megaman it operates using a
19 |     Geometry object. The Locally Linear class allows you to optionally
20 |     pass an exiting Geometry object, otherwise it creates one.
21 | 
22 | 
23 | API of Locally Linear Embedding
24 | -------------------------------
25 | 
26 | The Locally Linear model, along with all the other models in megaman, have an API
27 | designed to follow in the same vein of
28 | `scikit-learn <http://scikit-learn.org/>`_ API.
29 | 
30 | Consequentially, the Locally Linear class functions as follows
31 | 
32 | 1. At class instantiation `.LocallyLinear()` parameters are passed. See API
33 |    documementation for more information. An existing Geometry object
34 |    can be passed to `.LocallyLinear()`.
35 | 2. The `.fit()` method creates a Geometry object if one was not
36 |    already passed and then calculates th embedding.
37 |    The number of components and eigen solver can also be passed to the
38 |    `.fit()` function. WARNING: NOT COMPLETED
39 |    Since LocallyLinear caches important quantities
40 |    (like the barycenter weight matrix) which do not change by selecting
41 |    different eigen solvers and embeding dimension these can be passed
42 |    and a new embedding computed without re-computing existing quantities.
43 |    the `.fit()` function does not return anything but it does create
44 |    the attribute `self.embedding_` only one `self.embedding_` exists
45 |    at a given time. If a new embedding is computed the old one is overwritten.
46 | 3. The `.fit_transform()` function calls the `fit()` function and returns
47 |    the embedding. It does not allow for changing parameters.
48 | 
49 | See the API documentation for further information.
50 | 
51 | Example Usage
52 | -------------
53 | 
54 | Here is an example using the function on a random data set::
55 | 
56 |    import numpy as np
57 |    from megaman.geometry import Geometry
58 |    from megaman.embedding import (Isomap, LocallyLinearEmbedding, LTSA, SpectralEmbedding)
59 | 
60 |    X = np.random.randn(100, 10)
61 |    radius = 5
62 |    adjacency_method = 'cyflann'
63 |    adjacency_kwds = {'radius':radius} # ignore distances above this radius
64 |    
65 |    geom  = Geometry(adjacency_method=adjacency_method, adjacency_kwds=adjacency_kwds)
66 |    lle = LocallyLinearEmbedding(n_components=n_components, eigen_solver='arpack', geom=geom)
67 |    embed_lle = lle.fit_transform(X)
68 | 


--------------------------------------------------------------------------------
/doc/embedding/ltsa.rst:
--------------------------------------------------------------------------------
 1 | .. _ltsa:
 2 | 
 3 | Local Tangent Space Alignment
 4 | =============================
 5 | 
 6 | Local Tangent Space Alignment is one of the methods implemented in the megaman package.
 7 | Local Tangent Space Alighment uses independent estimates of the local tangent
 8 | space at each point and then uses a global alignment procedure with a
 9 | unit-scale condition to create a single embedding from each local tangent
10 | space.
11 | 
12 | For more information see:
13 | 
14 | * Zhang, Z. & Zha, H. Principal manifolds and nonlinear
15 |   dimensionality reduction via tangent space alignment.
16 |   Journal of Shanghai Univ.  8:406 (2004)
17 | 
18 | :class:'~megaman.embedding.LTSA'
19 |     This class is used to interface with local tangent space
20 |     alignment embedding function.
21 |     Like all embedding functions in megaman it operates using a
22 |     Geometry object. The Locally Linear class allows you to optionally
23 |     pass an exiting Geometry object, otherwise it creates one.
24 | 
25 | 
26 | API of Local Tangent Space Alignment
27 | ------------------------------------
28 | 
29 | The Locally Tangent Space Alignment model, along with all the other models in megaman,
30 | have an API designed to follow in the same vein of
31 | `scikit-learn <http://scikit-learn.org/>`_ API.
32 | 
33 | Consequentially, the LTSA class functions as follows
34 | 
35 | 1. At class instantiation `.LTSA()` parameters are passed. See API
36 |    documementation for more information. An existing Geometry object
37 |    can be passed to `.LTSA()`.
38 | 2. The `.fit()` method creates a Geometry object if one was not
39 |    already passed and then calculates th embedding.
40 |    The eigen solver can also be passed to the
41 |    `.fit()` function. WARNING: NOT COMPLETED
42 |    Since LTSA caches important quantities
43 |    (like the local tangent spaces) which do not change by selecting
44 |    different eigen solvers and this can be passed
45 |    and a new embedding computed without re-computing existing quantities.
46 |    the `.fit()` function does not return anything but it does create
47 |    the attribute `self.embedding_` only one `self.embedding_` exists
48 |    at a given time. If a new embedding is computed the old one is overwritten.
49 | 3. The `.fit_transform()` function calls the `fit()` function and returns
50 |    the embedding. It does not allow for changing parameters.
51 | 
52 | See the API documentation for further information.
53 | 
54 | Example Usage
55 | -------------
56 | 
57 | Here is an example using the function on a random data set::
58 | 
59 |    import numpy as np
60 |    from megaman.geometry import Geometry
61 |    from megaman.embedding import (Isomap, LocallyLinearEmbedding, LTSA, SpectralEmbedding)
62 | 
63 |    X = np.random.randn(100, 10)
64 |    radius = 5
65 |    adjacency_method = 'cyflann'
66 |    adjacency_kwds = {'radius':radius} # ignore distances above this radius
67 |    
68 |    geom  = Geometry(adjacency_method=adjacency_method, adjacency_kwds=adjacency_kwds)
69 |    
70 |    ltsa =LTSA(n_components=n_components, eigen_solver='arpack', geom=geom)
71 |    embed_ltsa = ltsa.fit_transform(X)
72 | 


--------------------------------------------------------------------------------
/doc/embedding/spectral_embedding.rst:
--------------------------------------------------------------------------------
 1 | .. _spectral_embedding:
 2 | 
 3 | Spectral Embedding
 4 | ==================
 5 | 
 6 | Spectral Embedding is on of the methods implemented in the megaman package.
 7 | Spectral embedding (and diffusion maps) uses the spectrum (eigen vectors
 8 | and eigen values) of a graph Laplacian estimated from the data set. There
 9 | are a number of different graph Laplacians that can be used.
10 | 
11 | For more information see:
12 | 
13 | * A Tutorial on Spectral Clustering, 2007
14 |   Ulrike von Luxburg
15 |   http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.165.9323
16 | 
17 | :class:'~megaman.embedding.SpectralEmbedding'
18 |     This class is used to interface with spectral embedding function.
19 |     Like all embedding functions in megaman it operates using a
20 |     Geometry object. The Isomap class allows you to optionally
21 |     pass an exiting Geometry object, otherwise it creates one.
22 | 
23 | API of Spectral Embedding
24 | -------------------------
25 | 
26 | The Spectral Embedding model, along with all the other models in megaman,
27 | have an API designed to follow in the same vein of
28 | `scikit-learn <http://scikit-learn.org/>`_ API.
29 | 
30 | Consequentially, the LTSA class functions as follows
31 | 
32 | 1. At class instantiation `.SpectralEmbedding()` parameters are passed. See API
33 |    documementation for more information. An existing Geometry object
34 |    can be passed to `.SpectralEmbedding()`. Here is also where
35 |    you have the option to use diffusion maps.
36 | 2. The `.fit()` method creates a Geometry object if one was not
37 |    already passed and then calculates th embedding.
38 |    The eigen solver can also be passed to the
39 |    `.fit()` function. WARNING: NOT COMPLETED
40 |    Since Geometry caches important quantities
41 |    (like the graph Laplacian) which do not change by selecting
42 |    different eigen solvers and this can be passed
43 |    and a new embedding computed without re-computing existing quantities.
44 |    the `.fit()` function does not return anything but it does create
45 |    the attribute `self.embedding_` only one `self.embedding_` exists
46 |    at a given time. If a new embedding is computed the old one is overwritten.
47 | 3. The `.fit_transform()` function calls the `fit()` function and returns
48 |    the embedding. It does not allow for changing parameters.
49 | 
50 | See the API documentation for further information.
51 | 
52 | Example Usage
53 | -------------
54 | 
55 | Here is an example using the function on a random data set::
56 | 
57 |    import numpy as np
58 |    from megaman.geometry import Geometry
59 |    from megaman.embedding import SpectralEmbedding
60 | 
61 |    X = np.random.randn(100, 10)
62 |    radius = 5
63 |    adjacency_method = 'cyflann'
64 |    adjacency_kwds = {'radius':radius} # ignore distances above this radius
65 |    affinity_method = 'gaussian'
66 |    affinity_kwds = {'radius':radius} # A = exp(-||x - y||/radius^2) 
67 |    laplacian_method = 'geometric'
68 |    laplacian_kwds = {'scaling_epps':radius} # scaling ensures convergence to Laplace-Beltrami operator
69 |    
70 |    geom  = Geometry(adjacency_method=adjacency_method, adjacency_kwds=adjacency_kwds,
71 |                     affinity_method=affinity_method, affinity_kwds=affinity_kwds,
72 |                     laplacian_method=laplacian_method, laplacian_kwds=laplacian_kwds)
73 |    
74 |    spectral = SpectralEmbedding(n_components=n_components, eigen_solver='arpack',
75 |                                 geom=geom)
76 |    embed_spectral = spectral.fit_transform(X)


--------------------------------------------------------------------------------
/doc/geometry/API.rst:
--------------------------------------------------------------------------------
 1 | .. _geometry_API:
 2 | 
 3 | .. testsetup:: *
 4 | 
 5 |    from megaman.geometry import *
 6 | 
 7 | API Documentation
 8 | =================
 9 | 
10 | .. automodule:: megaman.geometry.geometry
11 |    :members:
12 | 
13 | .. automodule:: megaman.geometry.rmetric
14 |    :members:
15 | 


--------------------------------------------------------------------------------
/doc/geometry/geometry.rst:
--------------------------------------------------------------------------------
 1 | .. _geom:
 2 | 
 3 | Geometry
 4 | ========
 5 | 
 6 | One of the fundamental objectives of manifold learning is to understand
 7 | the geometry of the data. As such the primary class of this package
 8 | is the geometry class:
 9 | 
10 | :class:'~megaman.geometry.Geometry'
11 |     This class is used as the interface to compute various quantities
12 |     on the original data set including: pairwise distance graphs,
13 |     affinity matrices, and laplacian matrices. It also caches these
14 |     quantities and allows for fast re-computation with new parameters.
15 | 
16 | API of Geometry
17 | ---------------
18 | 
19 | The Geometry class is used to interface with functions that compute various
20 | geometric quantities with respect to the original data set. This is the object
21 | that is passed (or computed) within each embedding function. It is how
22 | megaman caches important quantities allowing for fast re-computation with
23 | various new parameters. Beyond instantiation, the Geometry class offers
24 | three types of functions: compute, set & delete that work with the four
25 | primary data matrices: (raw) data, adjacency matrix, affinity matrix,
26 | and Laplacian Matrix. 
27 | 
28 | 1. Class instantiation : during class instantiation you input the parameters
29 |    concerning the original data matrix such as the distance calculation method,
30 |    neighborhood and affinity radius, laplacian type. Each of the three
31 |    computed matrices (adjacency, affinity, laplacian) have their
32 |    own keyword dictionaries which permit these methods to easily be extended.
33 | 2. `set_[some]_matrix` : these functions allow you to assign a matrix of data
34 |    to the geometry object. In particular these are used to fit the geometry
35 |    to your input data (which may be of the form data_matrix, adjacency_matrix,
36 |    or affinity_matrix). You can also set a Laplacian matrix. 
37 | 3. `compute_[some]_matrix` : these functions are designed to compute the 
38 |    selected matrix (e.g. adjacency). Additional keyword arguments can be
39 |    passed which override the ones passed at instantiation. NB: this method
40 |    will always re-compute a matrix.
41 | 4. Geometry Attributes. Other than the parameters passed at instantiation each
42 |    matrix that is computed is stored as an attribute e.g. geom.adjacency_matrix,
43 |    geom.adjacency_matrix, geom.laplacian_matrix. Raw data is stored as geom.X.
44 |    If you want to query for these matrices without recomputing you should use
45 |    these attributes e.g. my_affinity = geom.affinity_matrix. 
46 | 5. `delete_[some]_matrix` : if you are working with large data sets and choose
47 |     an algorithm (e.g. Isomap or Spectral Embedding) that do not require the
48 | 	original data_matrix, these methods can be used to clear memory. 
49 | 
50 | See the API documentation for further information.
51 | 
52 | Example Usage
53 | -------------
54 | 
55 | Here is an example using the function on a random data set::
56 | 
57 |    import numpy as np
58 |    from megaman.geometry import Geometry
59 | 
60 |    X = np.random.randn(100, 10)
61 |    radius = 5
62 |    adjacency_method = 'cyflann'
63 |    adjacency_kwds = {'radius':radius} # ignore distances above this radius
64 |    affinity_method = 'gaussian'
65 |    affinity_kwds = {'radius':radius} # A = exp(-||x - y||/radius^2) 
66 |    laplacian_method = 'geometric'
67 |    laplacian_kwds = {'scaling_epps':radius} # scaling ensures convergence to Laplace-Beltrami operator
68 |    
69 |    geom  = Geometry(adjacency_method=adjacency_method, adjacency_kwds=adjacency_kwds,
70 |                     affinity_method=affinity_method, affinity_kwds=affinity_kwds,
71 |                     laplacian_method=laplacian_method, laplacian_kwds=laplacian_kwds)


--------------------------------------------------------------------------------
/doc/geometry/index.rst:
--------------------------------------------------------------------------------
 1 | .. _geometry:
 2 | 
 3 | ***************************************************
 4 | Tools for Geometric Analysis (``megaman.geometry``)
 5 | ***************************************************
 6 | 
 7 | This module contains tools for analyzing inherent geometry of a data set.
 8 | These tools include pairwise distance calculation, as well as affinity and
 9 | Laplacian construction (e.g. :class:`~megaman.geometry.Geometry`).
10 | 
11 | .. toctree::
12 |    :maxdepth: 2
13 | 
14 |    geometry.rst
15 |    API
16 | 


--------------------------------------------------------------------------------
/doc/images/circle_to_ellipse_embedding.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/doc/images/circle_to_ellipse_embedding.png


--------------------------------------------------------------------------------
/doc/images/index.rst:
--------------------------------------------------------------------------------
 1 | .. _images:
 2 | 
 3 | *********************
 4 | Figures from Megaman 
 5 | *********************
 6 | 
 7 | This section contains some experimental results from using the
 8 | megaman package. 
 9 | 
10 | .. toctree::
11 |    :maxdepth: 2
12 | 
13 |    spectra_Halpha.rst
14 |    word2vec.rst


--------------------------------------------------------------------------------
/doc/images/spectra_D4000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/doc/images/spectra_D4000.png


--------------------------------------------------------------------------------
/doc/images/spectra_Halpha.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/doc/images/spectra_Halpha.png


--------------------------------------------------------------------------------
/doc/images/spectra_Halpha.rst:
--------------------------------------------------------------------------------
 1 | .. _spectrum_Halpha:
 2 | 
 3 | Spectrum Halpha Plot
 4 | ====================
 5 | 
 6 | .. figure:: spectra_Halpha.png
 7 |    :scale: 50 %
 8 |    :alt: spectrm Halpha
 9 | 
10 |    A three-dimensional embedding of the main sample of galaxy spectra
11 |    from the Sloan Digital Sky Survey (approximately 675,000 spectra
12 |    observed in 3750 dimensions). Colors in the above figure indicate
13 |    the strength of Hydrogen alpha emission, a very nonlinear feature
14 |    which requires dozens of dimensions to be captured in a linear embedding.


--------------------------------------------------------------------------------
/doc/images/word2vec.rst:
--------------------------------------------------------------------------------
 1 | .. _word2vec:
 2 | 
 3 | Word2Vec Plot
 4 | ====================
 5 | 
 6 | .. figure:: word2vec_rmetric_plot_no_digits.png
 7 |    :scale: 50 %
 8 |    :alt: word2vec embedding with R. metric
 9 |    
10 |    3,000,000 words and phrases mapped by word2vec using Google News into 300
11 |    dimensions. The data was then embedded into 2 dimensions using Spectral
12 |    Embedding. The plot shows a sample of 10,000 points displaying the overall
13 |    shape of the embedding as well as the estimated "stretch" 
14 |    (i.e. dual push-forward Riemannian metric) at various locations in the embedding. 


--------------------------------------------------------------------------------
/doc/images/word2vec_rmetric_plot_no_digits.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/doc/images/word2vec_rmetric_plot_no_digits.png


--------------------------------------------------------------------------------
/doc/index.rst:
--------------------------------------------------------------------------------
 1 | .. image:: images/spectra_Halpha.png
 2 |    :height: 238 px
 3 |    :width: 318 px
 4 |    :align: left
 5 |    :target: /megaman/images/spectra_Halpha
 6 | .. image:: images/word2vec_rmetric_plot_no_digits.png
 7 |    :height: 250 px
 8 |    :width: 220 px
 9 |    :align: right
10 |    :target: /megaman/images/word2vec
11 |    
12 | 
13 | megaman: Manifold Learning for Millions of Points
14 | =================================================
15 | 
16 | megaman is a scalable manifold learning package implemented in
17 | python. It has a front-end API designed to be familiar
18 | to `scikit-learn <http://scikit-learn.org/>`_ but harnesses
19 | the C++ Fast Library for Approximate Nearest Neighbors (FLANN)
20 | and the Sparse Symmetric Positive Definite (SSPD) solver
21 | Locally Optimal Block Precodition Gradient (LOBPCG) method
22 | to scale manifold learning algorithms to large data sets.
23 | It is designed for researchers and as such caches intermediary
24 | steps and indices to allow for fast re-computation with new parameters.
25 | 
26 | For issues & contributions, see the source 
27 | `repository on github <http://github.com/mmp2/megaman/>`_.
28 | 
29 | For example notebooks see the 
30 | `index on github <https://github.com/mmp2/megaman/blob/master/examples/examples_index.ipynb>`_.
31 | 
32 | You can also read our 
33 | `arXiv paper <http://arxiv.org/abs/1603.02763>`_.
34 | 
35 | Documentation
36 | =============
37 | 
38 | .. toctree::
39 |    :maxdepth: 2
40 | 
41 |    installation
42 |    geometry/index
43 |    embedding/index
44 |    utils/index
45 |    images/index
46 | 
47 | 
48 | Indices and tables
49 | ==================
50 | 
51 | * :ref:`genindex`
52 | * :ref:`modindex`
53 | * :ref:`search`
54 | 


--------------------------------------------------------------------------------
/doc/installation.rst:
--------------------------------------------------------------------------------
 1 | Installation
 2 | ============
 3 | 
 4 | Though ``megaman`` has a fair number of compiled dependencies, it is
 5 | straightforward to install using the cross-platform conda_ package manager.
 6 | 
 7 | Installation with Conda
 8 | -----------------------
 9 | 
10 | To install ``megaman`` and all its dependencies using conda_, run::
11 | 
12 |     $ conda install megaman --channel=conda-forge
13 | 
14 | Currently builds are available for OSX and Linux, on Python 2.7, 3.4, and 3.5.
15 | For other operating systems, see the full install instructions below.
16 | 
17 | Installation from Source
18 | ------------------------
19 | 
20 | To install ``megaman`` from source requires the following:
21 | 
22 | - python_: tested with versions 2.7, 3.4, and 3.5
23 | - numpy_: version 1.8 or higher
24 | - scipy_: version 0.16.0 or higher
25 | - scikit-learn_: version 0.16.0 or higher
26 | - FLANN_: version 1.8 or higher
27 | - cython_: version 0.23 or higher
28 | - a C++ compiler such as ``gcc``/``g++`` (we recommend version 4.8.*)
29 | 
30 | Optional requirements include:
31 | 
32 | - pyamg_, which provides fast decompositions of large sparse matrices
33 | - pyflann_, which offers an alternative FLANN interface for computing distance matrices (this is bundled with the FLANN source code)
34 | - nose_ for running the unit tests
35 | 
36 | These requirements can be installed on Linux and MacOSX using the following conda command::
37 | 
38 |     $ conda install --channel=jakevdp pip nose coverage gcc cython numpy scipy scikit-learn pyflann pyamg
39 | 
40 | Finally, within the source repository, run this command to install the ``megaman`` package itself::
41 | 
42 |     $ python setup.py install
43 | 
44 | Unit Tests
45 | ----------
46 | ``megaman`` uses nose_ for unit tests. To run the unit tests once ``nose`` is installed, type in the source directory::
47 | 
48 |     $ make test
49 | 
50 | or, outside the source directory once ``megaman`` is installed::
51 | 
52 |     $ nosetests megaman
53 | 
54 | ``megaman`` is tested on Python versions 2.7, 3.4, and 3.5.
55 | 
56 | .. _conda: http://conda.pydata.org/miniconda.html
57 | .. _python: http://python.org
58 | .. _numpy: http://numpy.org
59 | .. _scipy: http://scipy.org
60 | .. _scikit-learn: http://scikit-learn.org
61 | .. _FLANN: http://www.cs.ubc.ca/research/flann/
62 | .. _pyamg: http://pyamg.org/
63 | .. _pyflann: http://www.cs.ubc.ca/research/flann/
64 | .. _nose: https://nose.readthedocs.org/
65 | .. _cython: http://cython.org/
66 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 | 
3 | from .numpydoc import setup
4 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/astropyautosummary.py:
--------------------------------------------------------------------------------
  1 | # Licensed under a 3-clause BSD style license - see LICENSE.rst
  2 | """
  3 | This sphinx extension builds off of `sphinx.ext.autosummary` to
  4 | clean up some issues it presents in the Astropy docs.
  5 | 
  6 | The main issue this fixes is the summary tables getting cut off before the
  7 | end of the sentence in some cases.
  8 | 
  9 | Note: Sphinx 1.2 appears to have fixed the the main issues in the stock
 10 | autosummary extension that are addressed by this extension.  So use of this
 11 | extension with newer versions of Sphinx is deprecated.
 12 | """
 13 | 
 14 | import re
 15 | 
 16 | from distutils.version import LooseVersion
 17 | 
 18 | import sphinx
 19 | 
 20 | from sphinx.ext.autosummary import Autosummary
 21 | 
 22 | from ...utils import deprecated
 23 | 
 24 | # used in AstropyAutosummary.get_items
 25 | _itemsummrex = re.compile(r'^([A-Z].*?\.(?:\s|$))')
 26 | 
 27 | 
 28 | @deprecated('1.0', message='AstropyAutosummary is only needed when used '
 29 |                            'with Sphinx versions less than 1.2')
 30 | class AstropyAutosummary(Autosummary):
 31 |     def get_items(self, names):
 32 |         """Try to import the given names, and return a list of
 33 |         ``[(name, signature, summary_string, real_name), ...]``.
 34 |         """
 35 |         from sphinx.ext.autosummary import (get_import_prefixes_from_env,
 36 |             import_by_name, get_documenter, mangle_signature)
 37 | 
 38 |         env = self.state.document.settings.env
 39 | 
 40 |         prefixes = get_import_prefixes_from_env(env)
 41 | 
 42 |         items = []
 43 | 
 44 |         max_item_chars = 50
 45 | 
 46 |         for name in names:
 47 |             display_name = name
 48 |             if name.startswith('~'):
 49 |                 name = name[1:]
 50 |                 display_name = name.split('.')[-1]
 51 | 
 52 |             try:
 53 |                 import_by_name_values = import_by_name(name, prefixes=prefixes)
 54 |             except ImportError:
 55 |                 self.warn('[astropyautosummary] failed to import %s' % name)
 56 |                 items.append((name, '', '', name))
 57 |                 continue
 58 | 
 59 |             # to accommodate Sphinx v1.2.2 and v1.2.3
 60 |             if len(import_by_name_values) == 3:
 61 |                 real_name, obj, parent = import_by_name_values
 62 |             elif len(import_by_name_values) == 4:
 63 |                 real_name, obj, parent, module_name = import_by_name_values
 64 | 
 65 |             # NB. using real_name here is important, since Documenters
 66 |             #     handle module prefixes slightly differently
 67 |             documenter = get_documenter(obj, parent)(self, real_name)
 68 |             if not documenter.parse_name():
 69 |                 self.warn('[astropyautosummary] failed to parse name %s' % real_name)
 70 |                 items.append((display_name, '', '', real_name))
 71 |                 continue
 72 |             if not documenter.import_object():
 73 |                 self.warn('[astropyautosummary] failed to import object %s' % real_name)
 74 |                 items.append((display_name, '', '', real_name))
 75 |                 continue
 76 | 
 77 |             # -- Grab the signature
 78 | 
 79 |             sig = documenter.format_signature()
 80 |             if not sig:
 81 |                 sig = ''
 82 |             else:
 83 |                 max_chars = max(10, max_item_chars - len(display_name))
 84 |                 sig = mangle_signature(sig, max_chars=max_chars)
 85 |                 sig = sig.replace('*', r'\*')
 86 | 
 87 |             # -- Grab the summary
 88 | 
 89 |             doc = list(documenter.process_doc(documenter.get_doc()))
 90 | 
 91 |             while doc and not doc[0].strip():
 92 |                 doc.pop(0)
 93 |             m = _itemsummrex.search(" ".join(doc).strip())
 94 |             if m:
 95 |                 summary = m.group(1).strip()
 96 |             elif doc:
 97 |                 summary = doc[0].strip()
 98 |             else:
 99 |                 summary = ''
100 | 
101 |             items.append((display_name, sig, summary, real_name))
102 | 
103 |         return items
104 | 
105 | 
106 | def setup(app):
107 |     # need autosummary, of course
108 |     app.setup_extension('sphinx.ext.autosummary')
109 | 
110 |     # Don't make the replacement if Sphinx is at least 1.2
111 |     if LooseVersion(sphinx.__version__) < LooseVersion('1.2.0'):
112 |         # this replaces the default autosummary with the astropy one
113 |         app.add_directive('autosummary', AstropyAutosummary)
114 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/autodoc_enhancements.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Miscellaneous enhancements to help autodoc along.
 3 | """
 4 | 
 5 | 
 6 | # See
 7 | # https://github.com/astropy/astropy-helpers/issues/116#issuecomment-71254836
 8 | # for further background on this.
 9 | def type_object_attrgetter(obj, attr, *defargs):
10 |     """
11 |     This implements an improved attrgetter for type objects (i.e. classes)
12 |     that can handle class attributes that are implemented as properties on
13 |     a metaclass.
14 | 
15 |     Normally `getattr` on a class with a `property` (say, "foo"), would return
16 |     the `property` object itself.  However, if the class has a metaclass which
17 |     *also* defines a `property` named "foo", ``getattr(cls, 'foo')`` will find
18 |     the "foo" property on the metaclass and resolve it.  For the purposes of
19 |     autodoc we just want to document the "foo" property defined on the class,
20 |     not on the metaclass.
21 | 
22 |     For example::
23 | 
24 |         >>> class Meta(type):
25 |         ...     @property
26 |         ...     def foo(cls):
27 |         ...         return 'foo'
28 |         ...
29 |         >>> class MyClass(metaclass=Meta):
30 |         ...     @property
31 |         ...     def foo(self):
32 |         ...         \"\"\"Docstring for MyClass.foo property.\"\"\"
33 |         ...         return 'myfoo'
34 |         ...
35 |         >>> getattr(MyClass, 'foo')
36 |         'foo'
37 |         >>> type_object_attrgetter(MyClass, 'foo')
38 |         <property at 0x...>
39 |         >>> type_object_attrgetter(MyClass, 'foo').__doc__
40 |         'Docstring for MyClass.foo property.'
41 | 
42 |     The last line of the example shows the desired behavior for the purposes
43 |     of autodoc.
44 |     """
45 | 
46 |     for base in obj.__mro__:
47 |         if attr in base.__dict__:
48 |             if isinstance(base.__dict__[attr], property):
49 |                 # Note, this should only be used for properties--for any other
50 |                 # type of descriptor (classmethod, for example) this can mess
51 |                 # up existing expectations of what getattr(cls, ...) returns
52 |                 return base.__dict__[attr]
53 |             break
54 | 
55 |     return getattr(obj, attr, *defargs)
56 | 
57 | 
58 | def setup(app):
59 |     app.add_autodoc_attrgetter(type, type_object_attrgetter)
60 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/changelog_links.py:
--------------------------------------------------------------------------------
 1 | # Licensed under a 3-clause BSD style license - see LICENSE.rst
 2 | """
 3 | This sphinx extension makes the issue numbers in the changelog into links to
 4 | GitHub issues.
 5 | """
 6 | 
 7 | from __future__ import print_function
 8 | 
 9 | import re
10 | 
11 | from docutils.nodes import Text, reference
12 | 
13 | BLOCK_PATTERN = re.compile('\[#.+\]', flags=re.DOTALL)
14 | ISSUE_PATTERN = re.compile('#[0-9]+')
15 | 
16 | 
17 | def process_changelog_links(app, doctree, docname):
18 |     for rex in app.changelog_links_rexes:
19 |         if rex.match(docname):
20 |             break
21 |     else:
22 |         # if the doc doesn't match any of the changelog regexes, don't process
23 |         return
24 | 
25 |     app.info('[changelog_links] Adding changelog links to "{0}"'.format(docname))
26 | 
27 |     for item in doctree.traverse():
28 | 
29 |         if not isinstance(item, Text):
30 |             continue
31 | 
32 |         # We build a new list of items to replace the current item. If
33 |         # a link is found, we need to use a 'reference' item.
34 |         children = []
35 | 
36 |         # First cycle through blocks of issues (delimited by []) then
37 |         # iterate inside each one to find the individual issues.
38 |         prev_block_end = 0
39 |         for block in BLOCK_PATTERN.finditer(item):
40 |             block_start, block_end = block.start(), block.end()
41 |             children.append(Text(item[prev_block_end:block_start]))
42 |             block = item[block_start:block_end]
43 |             prev_end = 0
44 |             for m in ISSUE_PATTERN.finditer(block):
45 |                 start, end = m.start(), m.end()
46 |                 children.append(Text(block[prev_end:start]))
47 |                 issue_number = block[start:end]
48 |                 refuri = app.config.github_issues_url + issue_number[1:]
49 |                 children.append(reference(text=issue_number,
50 |                                           name=issue_number,
51 |                                           refuri=refuri))
52 |                 prev_end = end
53 | 
54 |             prev_block_end = block_end
55 | 
56 |             # If no issues were found, this adds the whole item,
57 |             # otherwise it adds the remaining text.
58 |             children.append(Text(block[prev_end:block_end]))
59 | 
60 |         # If no blocks were found, this adds the whole item, otherwise
61 |         # it adds the remaining text.
62 |         children.append(Text(item[prev_block_end:]))
63 | 
64 |         # Replace item by the new list of items we have generated,
65 |         # which may contain links.
66 |         item.parent.replace(item, children)
67 | 
68 | 
69 | def setup_patterns_rexes(app):
70 |     app.changelog_links_rexes = [re.compile(pat) for pat in
71 |                                  app.config.changelog_links_docpattern]
72 | 
73 | 
74 | def setup(app):
75 |     app.connect('doctree-resolved', process_changelog_links)
76 |     app.connect('builder-inited', setup_patterns_rexes)
77 |     app.add_config_value('github_issues_url', None, True)
78 |     app.add_config_value('changelog_links_docpattern', ['.*changelog.*', 'whatsnew/.*'], True)
79 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/comment_eater.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, absolute_import, print_function
  2 | 
  3 | import sys
  4 | if sys.version_info[0] >= 3:
  5 |     from io import StringIO
  6 | else:
  7 |     from io import StringIO
  8 | 
  9 | import compiler
 10 | import inspect
 11 | import textwrap
 12 | import tokenize
 13 | 
 14 | from .compiler_unparse import unparse
 15 | 
 16 | 
 17 | class Comment(object):
 18 |     """ A comment block.
 19 |     """
 20 |     is_comment = True
 21 |     def __init__(self, start_lineno, end_lineno, text):
 22 |         # int : The first line number in the block. 1-indexed.
 23 |         self.start_lineno = start_lineno
 24 |         # int : The last line number. Inclusive!
 25 |         self.end_lineno = end_lineno
 26 |         # str : The text block including '#' character but not any leading spaces.
 27 |         self.text = text
 28 | 
 29 |     def add(self, string, start, end, line):
 30 |         """ Add a new comment line.
 31 |         """
 32 |         self.start_lineno = min(self.start_lineno, start[0])
 33 |         self.end_lineno = max(self.end_lineno, end[0])
 34 |         self.text += string
 35 | 
 36 |     def __repr__(self):
 37 |         return '%s(%r, %r, %r)' % (self.__class__.__name__, self.start_lineno,
 38 |             self.end_lineno, self.text)
 39 | 
 40 | 
 41 | class NonComment(object):
 42 |     """ A non-comment block of code.
 43 |     """
 44 |     is_comment = False
 45 |     def __init__(self, start_lineno, end_lineno):
 46 |         self.start_lineno = start_lineno
 47 |         self.end_lineno = end_lineno
 48 | 
 49 |     def add(self, string, start, end, line):
 50 |         """ Add lines to the block.
 51 |         """
 52 |         if string.strip():
 53 |             # Only add if not entirely whitespace.
 54 |             self.start_lineno = min(self.start_lineno, start[0])
 55 |             self.end_lineno = max(self.end_lineno, end[0])
 56 | 
 57 |     def __repr__(self):
 58 |         return '%s(%r, %r)' % (self.__class__.__name__, self.start_lineno,
 59 |             self.end_lineno)
 60 | 
 61 | 
 62 | class CommentBlocker(object):
 63 |     """ Pull out contiguous comment blocks.
 64 |     """
 65 |     def __init__(self):
 66 |         # Start with a dummy.
 67 |         self.current_block = NonComment(0, 0)
 68 | 
 69 |         # All of the blocks seen so far.
 70 |         self.blocks = []
 71 | 
 72 |         # The index mapping lines of code to their associated comment blocks.
 73 |         self.index = {}
 74 | 
 75 |     def process_file(self, file):
 76 |         """ Process a file object.
 77 |         """
 78 |         if sys.version_info[0] >= 3:
 79 |             nxt = file.__next__
 80 |         else:
 81 |             nxt = file.next
 82 |         for token in tokenize.generate_tokens(nxt):
 83 |             self.process_token(*token)
 84 |         self.make_index()
 85 | 
 86 |     def process_token(self, kind, string, start, end, line):
 87 |         """ Process a single token.
 88 |         """
 89 |         if self.current_block.is_comment:
 90 |             if kind == tokenize.COMMENT:
 91 |                 self.current_block.add(string, start, end, line)
 92 |             else:
 93 |                 self.new_noncomment(start[0], end[0])
 94 |         else:
 95 |             if kind == tokenize.COMMENT:
 96 |                 self.new_comment(string, start, end, line)
 97 |             else:
 98 |                 self.current_block.add(string, start, end, line)
 99 | 
100 |     def new_noncomment(self, start_lineno, end_lineno):
101 |         """ We are transitioning from a noncomment to a comment.
102 |         """
103 |         block = NonComment(start_lineno, end_lineno)
104 |         self.blocks.append(block)
105 |         self.current_block = block
106 | 
107 |     def new_comment(self, string, start, end, line):
108 |         """ Possibly add a new comment.
109 | 
110 |         Only adds a new comment if this comment is the only thing on the line.
111 |         Otherwise, it extends the noncomment block.
112 |         """
113 |         prefix = line[:start[1]]
114 |         if prefix.strip():
115 |             # Oops! Trailing comment, not a comment block.
116 |             self.current_block.add(string, start, end, line)
117 |         else:
118 |             # A comment block.
119 |             block = Comment(start[0], end[0], string)
120 |             self.blocks.append(block)
121 |             self.current_block = block
122 | 
123 |     def make_index(self):
124 |         """ Make the index mapping lines of actual code to their associated
125 |         prefix comments.
126 |         """
127 |         for prev, block in zip(self.blocks[:-1], self.blocks[1:]):
128 |             if not block.is_comment:
129 |                 self.index[block.start_lineno] = prev
130 | 
131 |     def search_for_comment(self, lineno, default=None):
132 |         """ Find the comment block just before the given line number.
133 | 
134 |         Returns None (or the specified default) if there is no such block.
135 |         """
136 |         if not self.index:
137 |             self.make_index()
138 |         block = self.index.get(lineno, None)
139 |         text = getattr(block, 'text', default)
140 |         return text
141 | 
142 | 
143 | def strip_comment_marker(text):
144 |     """ Strip # markers at the front of a block of comment text.
145 |     """
146 |     lines = []
147 |     for line in text.splitlines():
148 |         lines.append(line.lstrip('#'))
149 |     text = textwrap.dedent('\n'.join(lines))
150 |     return text
151 | 
152 | 
153 | def get_class_traits(klass):
154 |     """ Yield all of the documentation for trait definitions on a class object.
155 |     """
156 |     # FIXME: gracefully handle errors here or in the caller?
157 |     source = inspect.getsource(klass)
158 |     cb = CommentBlocker()
159 |     cb.process_file(StringIO(source))
160 |     mod_ast = compiler.parse(source)
161 |     class_ast = mod_ast.node.nodes[0]
162 |     for node in class_ast.code.nodes:
163 |         # FIXME: handle other kinds of assignments?
164 |         if isinstance(node, compiler.ast.Assign):
165 |             name = node.nodes[0].name
166 |             rhs = unparse(node.expr).strip()
167 |             doc = strip_comment_marker(cb.search_for_comment(node.lineno, default=''))
168 |             yield name, rhs, doc
169 | 
170 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/doctest.py:
--------------------------------------------------------------------------------
 1 | # Licensed under a 3-clause BSD style license - see LICENSE.rst
 2 | """
 3 | This is a set of three directives that allow us to insert metadata
 4 | about doctests into the .rst files so the testing framework knows
 5 | which tests to skip.
 6 | 
 7 | This is quite different from the doctest extension in Sphinx itself,
 8 | which actually does something.  For astropy, all of the testing is
 9 | centrally managed from py.test and Sphinx is not used for running
10 | tests.
11 | """
12 | import re
13 | from docutils.nodes import literal_block
14 | from sphinx.util.compat import Directive
15 | 
16 | 
17 | class DoctestSkipDirective(Directive):
18 |     has_content = True
19 | 
20 |     def run(self):
21 |         # Check if there is any valid argument, and skip it. Currently only
22 |         # 'win32' is supported in astropy.tests.pytest_plugins.
23 |         if re.match('win32', self.content[0]):
24 |             self.content = self.content[2:]
25 |         code = '\n'.join(self.content)
26 |         return [literal_block(code, code)]
27 | 
28 | 
29 | class DoctestRequiresDirective(DoctestSkipDirective):
30 |     # This is silly, but we really support an unbounded number of
31 |     # optional arguments
32 |     optional_arguments = 64
33 | 
34 | 
35 | def setup(app):
36 |     app.add_directive('doctest-requires', DoctestRequiresDirective)
37 |     app.add_directive('doctest-skip', DoctestSkipDirective)
38 |     app.add_directive('doctest-skip-all', DoctestSkipDirective)
39 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/smart_resolver.py:
--------------------------------------------------------------------------------
 1 | # Licensed under a 3-clause BSD style license - see LICENSE.rst
 2 | """
 3 | The classes in the astropy docs are documented by their API location,
 4 | which is not necessarily where they are defined in the source.  This
 5 | causes a problem when certain automated features of the doc build,
 6 | such as the inheritance diagrams or the `Bases` list of a class
 7 | reference a class by its canonical location rather than its "user"
 8 | location.
 9 | 
10 | In the `autodoc-process-docstring` event, a mapping from the actual
11 | name to the API name is maintained.  Later, in the `missing-reference`
12 | event, unresolved references are looked up in this dictionary and
13 | corrected if possible.
14 | """
15 | 
16 | from docutils.nodes import literal, reference
17 | 
18 | 
19 | def process_docstring(app, what, name, obj, options, lines):
20 |     if isinstance(obj, type):
21 |         env = app.env
22 |         if not hasattr(env, 'class_name_mapping'):
23 |             env.class_name_mapping = {}
24 |         mapping = env.class_name_mapping
25 |         mapping[obj.__module__ + '.' + obj.__name__] = name
26 | 
27 | 
28 | def missing_reference_handler(app, env, node, contnode):
29 |     if not hasattr(env, 'class_name_mapping'):
30 |         env.class_name_mapping = {}
31 |     mapping = env.class_name_mapping
32 |     reftype = node['reftype']
33 |     reftarget = node['reftarget']
34 |     if reftype in ('obj', 'class', 'exc', 'meth'):
35 |         reftarget = node['reftarget']
36 |         suffix = ''
37 |         if reftarget not in mapping:
38 |             if '.' in reftarget:
39 |                 front, suffix = reftarget.rsplit('.', 1)
40 |             else:
41 |                 suffix = reftarget
42 | 
43 |             if suffix.startswith('_') and not suffix.startswith('__'):
44 |                 # If this is a reference to a hidden class or method,
45 |                 # we can't link to it, but we don't want to have a
46 |                 # nitpick warning.
47 |                 return node[0].deepcopy()
48 | 
49 |             if reftype in ('obj', 'meth') and '.' in reftarget:
50 |                 if front in mapping:
51 |                     reftarget = front
52 |                     suffix = '.' + suffix
53 | 
54 |             if (reftype in ('class', ) and '.' in reftarget
55 |                 and reftarget not in mapping):
56 | 
57 |                 if '.' in front:
58 |                     reftarget, _ = front.rsplit('.', 1)
59 |                     suffix = '.' + suffix
60 |                 reftarget = reftarget + suffix
61 |                 prefix = reftarget.rsplit('.')[0]
62 |                 if (reftarget not in mapping and
63 |                     prefix in env.intersphinx_named_inventory):
64 | 
65 |                     if reftarget in env.intersphinx_named_inventory[prefix]['py:class']:
66 |                         newtarget = env.intersphinx_named_inventory[prefix]['py:class'][reftarget][2]
67 |                         if not node['refexplicit'] and \
68 |                                 '~' not in node.rawsource:
69 |                             contnode = literal(text=reftarget)
70 |                         newnode = reference('', '', internal=True)
71 |                         newnode['reftitle'] = reftarget
72 |                         newnode['refuri'] = newtarget
73 |                         newnode.append(contnode)
74 | 
75 |                         return newnode
76 | 
77 |         if reftarget in mapping:
78 |             newtarget = mapping[reftarget] + suffix
79 |             if not node['refexplicit'] and not '~' in node.rawsource:
80 |                 contnode = literal(text=newtarget)
81 |             newnode = env.domains['py'].resolve_xref(
82 |                 env, node['refdoc'], app.builder, 'class', newtarget,
83 |                 node, contnode)
84 |             if newnode is not None:
85 |                 newnode['reftitle'] = reftarget
86 |             return newnode
87 | 
88 | 
89 | def setup(app):
90 |     app.connect('autodoc-process-docstring', process_docstring)
91 | 
92 |     app.connect('missing-reference', missing_reference_handler)
93 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/tocdepthfix.py:
--------------------------------------------------------------------------------
 1 | from sphinx import addnodes
 2 | 
 3 | 
 4 | def fix_toc_entries(app, doctree):
 5 |     # Get the docname; I don't know why this isn't just passed in to the
 6 |     # callback
 7 |     # This seems a bit unreliable as it's undocumented, but it's not "private"
 8 |     # either:
 9 |     docname = app.builder.env.temp_data['docname']
10 |     if app.builder.env.metadata[docname].get('tocdepth', 0) != 0:
11 |         # We need to reprocess any TOC nodes in the doctree and make sure all
12 |         # the files listed in any TOCs are noted
13 |         for treenode in doctree.traverse(addnodes.toctree):
14 |             app.builder.env.note_toctree(docname, treenode)
15 | 
16 | 
17 | def setup(app):
18 |     app.connect('doctree-read', fix_toc_entries)
19 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/traitsdoc.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =========
  3 | traitsdoc
  4 | =========
  5 | 
  6 | Sphinx extension that handles docstrings in the Numpy standard format, [1]
  7 | and support Traits [2].
  8 | 
  9 | This extension can be used as a replacement for ``numpydoc`` when support
 10 | for Traits is required.
 11 | 
 12 | .. [1] http://projects.scipy.org/numpy/wiki/CodingStyleGuidelines#docstring-standard
 13 | .. [2] http://code.enthought.com/projects/traits/
 14 | 
 15 | """
 16 | from __future__ import division, absolute_import, print_function
 17 | 
 18 | import inspect
 19 | import os
 20 | import pydoc
 21 | import collections
 22 | 
 23 | from . import docscrape
 24 | from . import docscrape_sphinx
 25 | from .docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString
 26 | 
 27 | from . import numpydoc
 28 | 
 29 | from . import comment_eater
 30 | 
 31 | class SphinxTraitsDoc(SphinxClassDoc):
 32 |     def __init__(self, cls, modulename='', func_doc=SphinxFunctionDoc):
 33 |         if not inspect.isclass(cls):
 34 |             raise ValueError("Initialise using a class. Got %r" % cls)
 35 |         self._cls = cls
 36 | 
 37 |         if modulename and not modulename.endswith('.'):
 38 |             modulename += '.'
 39 |         self._mod = modulename
 40 |         self._name = cls.__name__
 41 |         self._func_doc = func_doc
 42 | 
 43 |         docstring = pydoc.getdoc(cls)
 44 |         docstring = docstring.split('\n')
 45 | 
 46 |         # De-indent paragraph
 47 |         try:
 48 |             indent = min(len(s) - len(s.lstrip()) for s in docstring
 49 |                          if s.strip())
 50 |         except ValueError:
 51 |             indent = 0
 52 | 
 53 |         for n,line in enumerate(docstring):
 54 |             docstring[n] = docstring[n][indent:]
 55 | 
 56 |         self._doc = docscrape.Reader(docstring)
 57 |         self._parsed_data = {
 58 |             'Signature': '',
 59 |             'Summary': '',
 60 |             'Description': [],
 61 |             'Extended Summary': [],
 62 |             'Parameters': [],
 63 |             'Returns': [],
 64 |             'Raises': [],
 65 |             'Warns': [],
 66 |             'Other Parameters': [],
 67 |             'Traits': [],
 68 |             'Methods': [],
 69 |             'See Also': [],
 70 |             'Notes': [],
 71 |             'References': '',
 72 |             'Example': '',
 73 |             'Examples': '',
 74 |             'index': {}
 75 |             }
 76 | 
 77 |         self._parse()
 78 | 
 79 |     def _str_summary(self):
 80 |         return self['Summary'] + ['']
 81 | 
 82 |     def _str_extended_summary(self):
 83 |         return self['Description'] + self['Extended Summary'] + ['']
 84 | 
 85 |     def __str__(self, indent=0, func_role="func"):
 86 |         out = []
 87 |         out += self._str_signature()
 88 |         out += self._str_index() + ['']
 89 |         out += self._str_summary()
 90 |         out += self._str_extended_summary()
 91 |         for param_list in ('Parameters', 'Traits', 'Methods',
 92 |                            'Returns','Raises'):
 93 |             out += self._str_param_list(param_list)
 94 |         out += self._str_see_also("obj")
 95 |         out += self._str_section('Notes')
 96 |         out += self._str_references()
 97 |         out += self._str_section('Example')
 98 |         out += self._str_section('Examples')
 99 |         out = self._str_indent(out,indent)
100 |         return '\n'.join(out)
101 | 
102 | def looks_like_issubclass(obj, classname):
103 |     """ Return True if the object has a class or superclass with the given class
104 |     name.
105 | 
106 |     Ignores old-style classes.
107 |     """
108 |     t = obj
109 |     if t.__name__ == classname:
110 |         return True
111 |     for klass in t.__mro__:
112 |         if klass.__name__ == classname:
113 |             return True
114 |     return False
115 | 
116 | def get_doc_object(obj, what=None, config=None):
117 |     if what is None:
118 |         if inspect.isclass(obj):
119 |             what = 'class'
120 |         elif inspect.ismodule(obj):
121 |             what = 'module'
122 |         elif isinstance(obj, collections.Callable):
123 |             what = 'function'
124 |         else:
125 |             what = 'object'
126 |     if what == 'class':
127 |         doc = SphinxTraitsDoc(obj, '', func_doc=SphinxFunctionDoc, config=config)
128 |         if looks_like_issubclass(obj, 'HasTraits'):
129 |             for name, trait, comment in comment_eater.get_class_traits(obj):
130 |                 # Exclude private traits.
131 |                 if not name.startswith('_'):
132 |                     doc['Traits'].append((name, trait, comment.splitlines()))
133 |         return doc
134 |     elif what in ('function', 'method'):
135 |         return SphinxFunctionDoc(obj, '', config=config)
136 |     else:
137 |         return SphinxDocString(pydoc.getdoc(obj), config=config)
138 | 
139 | def setup(app):
140 |     # init numpydoc
141 |     numpydoc.setup(app, get_doc_object)
142 | 
143 | 


--------------------------------------------------------------------------------
/doc/sphinxext/numpy_ext/utils.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | import sys
 3 | 
 4 | 
 5 | def find_mod_objs(modname, onlylocals=False):
 6 |     """ Returns all the public attributes of a module referenced by name.
 7 | 
 8 |     .. note::
 9 |         The returned list *not* include subpackages or modules of
10 |         `modname`,nor does it include private attributes (those that
11 |         beginwith '_' or are not in `__all__`).
12 | 
13 |     Parameters
14 |     ----------
15 |     modname : str
16 |         The name of the module to search.
17 |     onlylocals : bool
18 |         If True, only attributes that are either members of `modname` OR one of
19 |         its modules or subpackages will be included.
20 | 
21 |     Returns
22 |     -------
23 |     localnames : list of str
24 |         A list of the names of the attributes as they are named in the
25 |         module `modname` .
26 |     fqnames : list of str
27 |         A list of the full qualified names of the attributes (e.g.,
28 |         ``astropy.utils.misc.find_mod_objs``). For attributes that are
29 |         simple variables, this is based on the local name, but for
30 |         functions or classes it can be different if they are actually
31 |         defined elsewhere and just referenced in `modname`.
32 |     objs : list of objects
33 |         A list of the actual attributes themselves (in the same order as
34 |         the other arguments)
35 | 
36 |     """
37 | 
38 |     __import__(modname)
39 |     mod = sys.modules[modname]
40 | 
41 |     if hasattr(mod, '__all__'):
42 |         pkgitems = [(k, mod.__dict__[k]) for k in mod.__all__]
43 |     else:
44 |         pkgitems = [(k, mod.__dict__[k]) for k in dir(mod) if k[0] != '_']
45 | 
46 |     # filter out modules and pull the names and objs out
47 |     ismodule = inspect.ismodule
48 |     localnames = [k for k, v in pkgitems if not ismodule(v)]
49 |     objs = [v for k, v in pkgitems if not ismodule(v)]
50 | 
51 |     # fully qualified names can be determined from the object's module
52 |     fqnames = []
53 |     for obj, lnm in zip(objs, localnames):
54 |         if hasattr(obj, '__module__') and hasattr(obj, '__name__'):
55 |             fqnames.append(obj.__module__ + '.' + obj.__name__)
56 |         else:
57 |             fqnames.append(modname + '.' + lnm)
58 | 
59 |     if onlylocals:
60 |         valids = [fqn.startswith(modname) for fqn in fqnames]
61 |         localnames = [e for i, e in enumerate(localnames) if valids[i]]
62 |         fqnames = [e for i, e in enumerate(fqnames) if valids[i]]
63 |         objs = [e for i, e in enumerate(objs) if valids[i]]
64 | 
65 |     return localnames, fqnames, objs
66 | 


--------------------------------------------------------------------------------
/doc/utils/API.rst:
--------------------------------------------------------------------------------
 1 | .. _utils_API:
 2 | 
 3 | .. testsetup:: *
 4 | 
 5 |    from megaman.utils import *
 6 | 
 7 | API Documentation
 8 | =================
 9 | 
10 | .. automodule:: megaman.utils.eigendecomp
11 |    :members:
12 | 


--------------------------------------------------------------------------------
/doc/utils/index.rst:
--------------------------------------------------------------------------------
 1 | .. _utils:
 2 | 
 3 | ***************************************************
 4 | Utility tools for megaman (``megaman.utils``)
 5 | ***************************************************
 6 | 
 7 | This module contains utility functions used inside
 8 | megaman. In particular the eigendecomposition.
 9 | 
10 | .. toctree::
11 |    :maxdepth: 2
12 | 
13 |    API
14 | 


--------------------------------------------------------------------------------
/examples/example.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import scipy as sp
 4 | import scipy.sparse as sparse
 5 | from megaman.geometry import Geometry
 6 | from sklearn import datasets
 7 | from megaman.embedding import (Isomap, LocallyLinearEmbedding,
 8 |                                LTSA, SpectralEmbedding)
 9 | 
10 | # Generate an example data set
11 | N = 10
12 | X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
13 | 
14 | # Geometry is the main class that will Cache things like distance, affinity, and laplacian.
15 | # you instantiate the Geometry class with the parameters & methods for the three main components:
16 | # Adjacency: an NxN (sparse) pairwise matrix indicating neighborhood regions
17 | # Affinity an NxN (sparse) pairwise matrix insicated similarity between points 
18 | # Laplacian an NxN (sparse) pairwsie matrix containing geometric manifold information
19 | 
20 | radius = 5
21 | adjacency_method = 'cyflann'
22 | adjacency_kwds = {'radius':radius} # ignore distances above this radius
23 | affinity_method = 'gaussian'
24 | affinity_kwds = {'radius':radius} # A = exp(-||x - y||/radius^2) 
25 | laplacian_method = 'geometric'
26 | laplacian_kwds = {'scaling_epps':radius} # scaling ensures convergence to Laplace-Beltrami operator
27 | 
28 | geom  = Geometry(adjacency_method=adjacency_method, adjacency_kwds=adjacency_kwds,
29 | 				 affinity_method=affinity_method, affinity_kwds=affinity_kwds,
30 | 				 laplacian_method=laplacian_method, laplacian_kwds=laplacian_kwds)
31 | 
32 | # You can/should also use the set_data_matrix, set_adjacency_matrix, set_affinity_matrix
33 | # to send your data set (in whichever form it takes) this way.
34 | geom.set_data_matrix(X)
35 | 
36 | # You can get the distance, affinity etc with e.g: Geometry.get_distance_matrix()
37 | 	# you can update the keyword arguments passed inially using these functions
38 | adjacency_matrix = geom.compute_adjacency_matrix()
39 | # by defualt this is pass-by-reference. Use copy=True to get a copied version.
40 | 
41 | # If you don't want to pre-compute a Geometry you can pass a dictionary or geometry
42 | # arguments to one of the embedding classes.
43 | geom  = {'adjacency_method':adjacency_method, 'adjacency_kwds':adjacency_kwds,
44 | 		 'affinity_method':affinity_method, 'affinity_kwds':affinity_kwds,
45 | 		 'laplacian_method':laplacian_method, 'laplacian_kwds':laplacian_kwds}
46 | 	
47 | 
48 | # an example follows for creating each embedding into 2 dimensions.
49 | n_components = 2
50 | 
51 | # LTSA
52 | ltsa =LTSA(n_components=n_components, eigen_solver='arpack',
53 | 			geom=geom)
54 | embed_ltsa = ltsa.fit_transform(X)
55 | 
56 | # LLE
57 | lle = LocallyLinearEmbedding(n_components=n_components, eigen_solver='arpack',
58 | 							 geom=geom)
59 | embed_lle = lle.fit_transform(X)
60 | 
61 | # Isomap
62 | isomap = Isomap(n_components=n_components, eigen_solver='arpack',
63 | 				geom=geom)
64 | embed_isomap = isomap.fit_transform(X)
65 | 
66 | # Spectral Embedding 
67 | spectral = SpectralEmbedding(n_components=n_components, eigen_solver='arpack',
68 | 							 geom=geom)
69 | embed_spectral = spectral.fit_transform(X)


--------------------------------------------------------------------------------
/examples/examples_index.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# `megaman`: Manifold Learning for Millions of Points "
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "This noteook contains links to examples of using `megaman` to perform manifold learning on data. \n",
15 |     "\n",
16 |     "See also the [megaman documentation](http://mmp2.github.io/megaman/)."
17 |    ]
18 |   },
19 |   {
20 |    "cell_type": "markdown",
21 |    "metadata": {},
22 |    "source": [
23 |     "* [megaman_tutorial.ipynb](https://github.com/mmp2/megaman/blob/master/examples/megaman_tutorial.ipynb)\n",
24 |     "* [manifold_intro.ipynb](https://github.com/mmp2/megaman/blob/master/examples/manifold_intro.ipynb)"
25 |    ]
26 |   },
27 |   {
28 |    "cell_type": "code",
29 |    "execution_count": null,
30 |    "metadata": {
31 |     "collapsed": true
32 |    },
33 |    "outputs": [],
34 |    "source": []
35 |   }
36 |  ],
37 |  "metadata": {
38 |   "kernelspec": {
39 |    "display_name": "Python 2",
40 |    "language": "python",
41 |    "name": "python2"
42 |   },
43 |   "language_info": {
44 |    "codemirror_mode": {
45 |     "name": "ipython",
46 |     "version": 2
47 |    },
48 |    "file_extension": ".py",
49 |    "mimetype": "text/x-python",
50 |    "name": "python",
51 |    "nbconvert_exporter": "python",
52 |    "pygments_lexer": "ipython2",
53 |    "version": "2.7.11"
54 |   }
55 |  },
56 |  "nbformat": 4,
57 |  "nbformat_minor": 0
58 | }
59 | 


--------------------------------------------------------------------------------
/examples/rad_est_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy
  3 | import scipy.linalg
  4 | import os
  5 | import plotly.graph_objs as go
  6 | 
  7 | try:
  8 |     from tqdm import *
  9 |     tqdm_installed = True
 10 | except ImportError as e:
 11 |     tqdm_installed = False
 12 |     print('tqdm not installed, will not show the progress bar')
 13 | 
 14 | def find_neighbors(idx, dist):
 15 |     nbr = dist[idx, :].nonzero()[1]
 16 |     if idx not in nbr:
 17 |         return np.append(nbr, idx)
 18 |     else:
 19 |         return nbr
 20 | 
 21 | 
 22 | def find_local_singular_values(data, idx, dist,  dim=15):
 23 |     nbr = find_neighbors(idx, dist)
 24 |     if nbr.shape[0] == 1:
 25 |         return np.zeros(dim)
 26 |     else:
 27 |         local_pca_data = data[nbr, :]
 28 |         local_center = np.mean(local_pca_data, axis=0)
 29 |         local_pca_data -= local_center[None, :]
 30 | 
 31 |         sing = scipy.linalg.svd(local_pca_data, compute_uv=False)
 32 |         sing_return = sing[:dim]
 33 |         return np.pad(sing_return, (0, dim - sing_return.shape[0]), 'constant')
 34 | 
 35 | 
 36 | def find_all_singular_values(data, rad, dist):
 37 |     dist_copy = dist.copy()
 38 |     dist_copy[dist_copy > rad] = 0.0
 39 |     dist_copy.eliminate_zeros()
 40 |     dim = data.shape[1]
 41 |     singular_list = np.array([find_local_singular_values(data, idx, dist_copy, dim)
 42 |                               for idx in range(data.shape[0])])
 43 |     return singular_list
 44 | 
 45 | 
 46 | def find_mean_singular_values(data, rad, dist):
 47 |     singular_list = find_all_singular_values(data, rad, dist)
 48 |     return np.mean(singular_list, axis=0)
 49 | 
 50 | 
 51 | def find_argmax_dimension(data, dist, optimal_rad):
 52 |     singular_list = find_all_singular_values(data, optimal_rad, dist)
 53 |     singular_gap = np.hstack(
 54 |         (-1 * np.diff(singular_list, axis=1), singular_list[:, -1, None]))
 55 |     return np.argmax(singular_gap, axis=1) + 1
 56 | 
 57 | 
 58 | def ordinal (n):
 59 |     return "%d%s" % (n,"tsnrhtdd"[(n//10%10!=1)*(n%10<4)*n%10::4])
 60 | 
 61 | 
 62 | def estimate_dimension(data, dist, rad_search_space=None):
 63 |     if rad_search_space is None:
 64 |         rad_search_space = np.logspace(np.log10(1e-1), np.log10(5), 50)
 65 | 
 66 |     rad_iterator = rad_search_space if not tqdm_installed else tqdm(
 67 |         rad_search_space)
 68 |     sgv = np.array([find_mean_singular_values(data, rad, dist)
 69 |                     for rad in rad_iterator])
 70 | 
 71 |     return rad_search_space, sgv
 72 | 
 73 | 
 74 | def plot_singular_values_versus_radius(singular_values, rad_search_space, start_idx, end_idx):
 75 |     all_trace = []
 76 |     singular_gap = -np.diff(singular_values,axis=1)
 77 |     for idx, sing in enumerate(singular_values.T):
 78 |         singular_line = go.Scatter(
 79 |             x=rad_search_space, y=sing, name='{} singular value'.format(ordinal(idx+1))
 80 |         )
 81 |         if idx <= 2:
 82 |             singular_line['text'] = [ 'Singular gap: {:.2f}'.format(singular_gap[rid, idx]) for rid in range(50) ]
 83 |         if idx > 3:
 84 |             singular_line['hoverinfo'] = 'none'
 85 |         all_trace.append(singular_line)
 86 |         if idx == 2:
 87 |             # HACK: just specify the color manually, need to generate each later.
 88 |             all_trace.append(go.Scatter(
 89 |                 x=rad_search_space[start_idx:end_idx], y=singular_values[start_idx:end_idx,2],
 90 |                 mode='lines',marker=dict(color='green'),
 91 |                 showlegend=False, hoverinfo='none'
 92 |             ))
 93 |             all_trace.append(go.Scatter(
 94 |                 x=rad_search_space[start_idx:end_idx], y=singular_values[start_idx:end_idx,1],
 95 |                 fill='tonexty', mode='none', showlegend=False, hoverinfo='none'
 96 |             ))
 97 |     return all_trace
 98 | 
 99 | def generate_layouts(start_idx, end_idx, est_rad_dim1, est_rad_dim2, rad_search_space):
100 |     return go.Layout(
101 |         title='Singular values - radii plot',
102 |         xaxis=dict(
103 |             title='$\\text{Radius } r $',
104 |             # type='log',
105 |             autorange=True
106 |         ),
107 |         yaxis=dict(title='$\\text{Singular value } \\sigma$'),
108 |         shapes=[{
109 |             'type': 'rect',
110 |             'xref': 'x',
111 |             'yref': 'paper',
112 |             'x0': rad_search_space[start_idx],
113 |             'y0': 0,
114 |             'x1': rad_search_space[end_idx-1],
115 |             'y1': 1,
116 |             'fillcolor': '#d3d3d3',
117 |             'opacity': 0.4,
118 |             'line': {
119 |                 'width': 0,
120 |             }
121 |         }],
122 |         annotations=[
123 |         dict(
124 |             x=est_rad_dim1,
125 |             y=0,
126 |             xref='x',
127 |             yref='y',
128 |             text='$\\hat{r}_{d=1}$',
129 |             font = dict(size = 30),
130 |             showarrow=True,
131 |             arrowhead=7,
132 |             ax=20,
133 |             ay=30
134 |         ),
135 |         dict(
136 |             x=est_rad_dim2,
137 |             y=0,
138 |             xref='x',
139 |             yref='y',
140 |             text='$\\hat{r}_{d=2}$',
141 |             font = dict(size = 30),
142 |             showarrow=True,
143 |             arrowhead=7,
144 |             ax=-20,
145 |             ay=30
146 |         )
147 |     ])
148 | 


--------------------------------------------------------------------------------
/examples/tutorial_data_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/examples/tutorial_data_plot.png


--------------------------------------------------------------------------------
/examples/tutorial_embeddings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/examples/tutorial_embeddings.png


--------------------------------------------------------------------------------
/examples/tutorial_isomap_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/examples/tutorial_isomap_plot.png


--------------------------------------------------------------------------------
/examples/tutorial_spectral_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/examples/tutorial_spectral_plot.png


--------------------------------------------------------------------------------
/megaman/__check_build/__init__.py:
--------------------------------------------------------------------------------
 1 | # Author: Jake VanderPlas
 2 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 3 | #
 4 | # Adapted from scikit-learn's similar utility
 5 | 
 6 | """ Module to give helpful messages to the user that did not
 7 | compile megaman properly (adapted from scikit-learn's check_build utility)
 8 | """
 9 | import os
10 | 
11 | INPLACE_MSG = """
12 | It appears that you are importing a local megaman source tree.
13 | Please either use an inplace install or try from another location."""
14 | 
15 | STANDARD_MSG = """
16 | If you have used an installer, please check that it is suited for your
17 | Python version, your operating system and your platform."""
18 | 
19 | ERROR_TEMPLATE = """{error}
20 | ___________________________________________________________________________
21 | Contents of {local_dir}:
22 | {contents}
23 | ___________________________________________________________________________
24 | It seems that megaman has not been built correctly.
25 | 
26 | If you have installed megaman from source, please do not forget
27 | to build the package before using it: run `python setup.py install`
28 | in the source directory.
29 | {msg}"""
30 | 
31 | 
32 | def raise_build_error(e):
33 |     # Raise a comprehensible error and list the contents of the
34 |     # directory to help debugging on the mailing list.
35 |     local_dir = os.path.split(__file__)[0]
36 |     msg = STANDARD_MSG
37 |     if local_dir == "megaman/__check_build":
38 |         # Picking up the local install: this will work only if the
39 |         # install is an 'inplace build'
40 |         msg = INPLACE_MSG
41 |     dir_content = list()
42 |     for i, filename in enumerate(os.listdir(local_dir)):
43 |         if ((i + 1) % 3):
44 |             dir_content.append(filename.ljust(26))
45 |         else:
46 |             dir_content.append(filename + '\n')
47 |     contents = ''.join(dir_content).strip()
48 |     raise ImportError(ERROR_TEMPLATE.format(error=e,
49 |                                             local_dir=local_dir,
50 |                                             contents=contents,
51 |                                             msg=msg))
52 | 
53 | try:
54 |     from ._check_build import check_build
55 | except ImportError as e:
56 |     raise_build_error(e)
57 | 


--------------------------------------------------------------------------------
/megaman/__check_build/_check_build.pyx:
--------------------------------------------------------------------------------
1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
2 | 
3 | def check_build():
4 |     return
5 | 


--------------------------------------------------------------------------------
/megaman/__check_build/setup.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import numpy
 4 | 
 5 | 
 6 | def configuration(parent_package='', top_path=None):
 7 |     from numpy.distutils.misc_util import Configuration
 8 |     config = Configuration('__check_build', parent_package, top_path)
 9 |     config.add_extension('_check_build',
10 |                          sources=['_check_build.c'])
11 | 
12 |     return config
13 | 
14 | if __name__ == '__main__':
15 |     from numpy.distutils.core import setup
16 |     setup(**configuration(top_path='').todict())
17 | 


--------------------------------------------------------------------------------
/megaman/__init__.py:
--------------------------------------------------------------------------------
1 | """megaman: Scalable Manifold Learning"""
2 | 
3 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
4 | 
5 | __version__ = "0.3.dev0"
6 | 
7 | from . import __check_build
8 | 


--------------------------------------------------------------------------------
/megaman/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import (get_megaman_image, generate_megaman_data,
2 |                        generate_megaman_manifold, generate_noisefree_hourglass,
3 |                        generate_noisy_hourglass)
4 | 


--------------------------------------------------------------------------------
/megaman/datasets/datasets.py:
--------------------------------------------------------------------------------
  1 | """Some sample datasets"""
  2 | from __future__ import division
  3 | 
  4 | import os
  5 | 
  6 | import numpy as np
  7 | from scipy import ndimage
  8 | from sklearn.utils import check_random_state
  9 | 
 10 | import collections
 11 | 
 12 | def get_megaman_image(factor=1):
 13 |     """Return an RGBA representation of the megaman icon"""
 14 |     imfile = os.path.join(os.path.dirname(__file__), 'megaman.png')
 15 |     data = ndimage.imread(imfile) / 255
 16 |     if factor > 1:
 17 |         data = data.repeat(factor, axis=0).repeat(factor, axis=1)
 18 |     return data
 19 | 
 20 | 
 21 | def generate_megaman_data(sampling=2):
 22 |     """Generate 2D point data of the megaman image"""
 23 |     data = get_megaman_image()
 24 |     x = np.arange(sampling * data.shape[1]) / float(sampling)
 25 |     y = np.arange(sampling * data.shape[0]) / float(sampling)
 26 |     X, Y = map(np.ravel, np.meshgrid(x, y))
 27 |     C = data[np.floor(Y.max() - Y).astype(int),
 28 |              np.floor(X).astype(int)]
 29 |     return np.vstack([X, Y]).T, C
 30 | 
 31 | 
 32 | def _make_S_curve(x, range=(-0.75, 0.75)):
 33 |     """Make a 2D S-curve from a 1D vector"""
 34 |     assert x.ndim == 1
 35 |     x = x - x.min()
 36 |     theta = 2 * np.pi * (range[0] + (range[1] - range[0]) * x / x.max())
 37 |     X = np.empty((x.shape[0], 2), dtype=float)
 38 |     X[:, 0] = np.sign(theta) * (1 - np.cos(theta))
 39 |     X[:, 1] = np.sin(theta)
 40 |     X *= x.max() / (2 * np.pi * (range[1] - range[0]))
 41 |     return X
 42 | 
 43 | 
 44 | def generate_megaman_manifold(sampling=2, nfolds=2,
 45 |                               rotate=True, random_state=None):
 46 |     """Generate a manifold of the megaman data"""
 47 |     X, c = generate_megaman_data(sampling)
 48 |     for i in range(nfolds):
 49 |         X = np.hstack([_make_S_curve(x) for x in X.T])
 50 | 
 51 |     if rotate:
 52 |         rand = check_random_state(random_state)
 53 |         R = rand.randn(X.shape[1], X.shape[1])
 54 |         U, s, VT = np.linalg.svd(R)
 55 |         X = np.dot(X, U)
 56 | 
 57 |     return X, c
 58 | 
 59 | def generate_noisefree_hourglass(n_size, scaling_factor=1.75, seed=None):
 60 |     if seed is not None:
 61 |         np.random.seed(seed)
 62 |     fz = lambda z: -4*z**4 + 4*z**2 + 1
 63 |     X = np.random.normal(0,1,[n_size,3])
 64 |     sphere = X / np.linalg.norm(X,axis=1)[:,None]
 65 |     r = np.linalg.norm(sphere,axis=1)
 66 | 
 67 |     x,y,z = sphere.T
 68 |     theta = np.arctan2(y,x)
 69 |     phi = np.arccos(z/r)
 70 | 
 71 |     r_hour = fz(z)
 72 |     theta_hour = theta
 73 |     z_hour = z
 74 |     phi_hour = np.arccos(z_hour/r_hour)
 75 | 
 76 |     x_hour = r_hour*np.cos(theta_hour)*np.sin(phi_hour)
 77 |     y_hour = r_hour*np.sin(theta_hour)*np.sin(phi_hour)
 78 |     z_hour = r_hour*np.cos(phi_hour)
 79 | 
 80 |     x_hour *= 0.5
 81 |     y_hour *= 0.5
 82 | 
 83 |     hourglass = np.vstack((x_hour,y_hour,z_hour)).T
 84 |     hourglass *= scaling_factor
 85 | 
 86 |     return hourglass
 87 | 
 88 | def _genereate_noises(sigmas, size, dimensions, seed=None):
 89 |     if seed is not None:
 90 |         np.random.seed(seed)
 91 |     if isinstance(sigmas, (collections.Sequence, np.ndarray)):
 92 |         assert len(sigmas) == dimensions, \
 93 |             'The size of sigmas should be the same as noises dimensions'
 94 |         return np.random.multivariate_normal(np.zeros(dimensions),
 95 |                                              np.diag(sigmas), size)
 96 |     else:
 97 |         return np.random.normal(0,sigmas,[size,dimensions])
 98 | 
 99 | def _add_noises_on_primary_dimensions(data,sigmas=0.1,seed=None):
100 |     size,dim = data.shape
101 |     noises = _genereate_noises(sigmas,size,dim)
102 |     return data + noises
103 | 
104 | def _add_noises_on_additional_dimensions(data,addition_dims,sigmas=1,seed=None):
105 |     if addition_dims == 0:
106 |         return data
107 |     else:
108 |         noises = _genereate_noises(sigmas,data.shape[0],addition_dims,seed)
109 |         return np.hstack((data,noises))
110 | 
111 | def generate_noisy_hourglass(size, sigma_primary=0.05, addition_dims=0,
112 |                              sigma_additional=0.1, scaling_factor=1.75, seed=None):
113 |     hourglass = generate_noisefree_hourglass(size, scaling_factor, seed)
114 |     hourglass = _add_noises_on_primary_dimensions(hourglass, sigma_primary)
115 |     hourglass = _add_noises_on_additional_dimensions(hourglass, addition_dims,
116 |                                                      sigma_additional)
117 |     return hourglass
118 | 


--------------------------------------------------------------------------------
/megaman/datasets/megaman.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/datasets/megaman.png


--------------------------------------------------------------------------------
/megaman/embedding/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The :mod:`sklearn.megaman` module implements data embedding techniques.
 3 | """
 4 | 
 5 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 6 | 
 7 | from .locally_linear import LocallyLinearEmbedding
 8 | from .isomap import Isomap
 9 | from .ltsa import LTSA
10 | from .spectral_embedding import SpectralEmbedding
11 | 


--------------------------------------------------------------------------------
/megaman/embedding/base.py:
--------------------------------------------------------------------------------
  1 | """ base estimator class for megaman """
  2 | 
  3 | # Author: James McQueen  -- <jmcq@u.washington.edu>
  4 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  5 | 
  6 | import numpy as np
  7 | from scipy.sparse import isspmatrix
  8 | 
  9 | from sklearn.base import BaseEstimator, TransformerMixin
 10 | from sklearn.utils.validation import check_array
 11 | 
 12 | from ..geometry.geometry import Geometry
 13 | 
 14 | # from sklearn.utils.validation import FLOAT_DTYPES
 15 | FLOAT_DTYPES = (np.float64, np.float32, np.float16)
 16 | 
 17 | 
 18 | class BaseEmbedding(BaseEstimator, TransformerMixin):
 19 |     """ Base Class for all megaman embeddings.
 20 | 
 21 |     Inherits BaseEstimator and TransformerMixin from sklearn.
 22 | 
 23 |     BaseEmbedding creates the common interface to the geometry
 24 |     class for all embeddings as well as providing a common
 25 |     .fit_transform().
 26 | 
 27 |     Parameters
 28 |     ----------
 29 |     n_components : integer
 30 |         number of coordinates for the manifold.
 31 |     radius : float (optional)
 32 |         radius for adjacency and affinity calculations. Will be overridden if
 33 |         either is set in `geom`
 34 |     geom : dict or megaman.geometry.Geometry object
 35 |         specification of geometry parameters: keys are
 36 |         ["adjacency_method", "adjacency_kwds", "affinity_method",
 37 |          "affinity_kwds", "laplacian_method", "laplacian_kwds"]
 38 | 
 39 |     Attributes
 40 |     ----------
 41 |     geom_ : a fitted megaman.geometry.Geometry object.
 42 |     """
 43 |     def __init__(self, n_components=2, radius=None, geom=None):
 44 |         self.n_components = n_components
 45 |         self.radius = radius
 46 |         self.geom = geom
 47 | 
 48 |     def _validate_input(self, X, input_type):
 49 |         if input_type == 'data':
 50 |             sparse_formats = None
 51 |         elif input_type in ['adjacency', 'affinity']:
 52 |             sparse_formats = ['csr', 'coo', 'lil', 'bsr', 'dok', 'dia']
 53 |         else:
 54 |             raise ValueError("unrecognized input_type: {0}".format(input_type))
 55 |         return check_array(X, dtype=FLOAT_DTYPES, accept_sparse=sparse_formats)
 56 | 
 57 |     # # The world is not ready for this...
 58 |     # def estimate_radius(self, X, input_type='data', intrinsic_dim=None):
 59 |     #     """Estimate a radius based on the data and intrinsic dimensionality
 60 |     #
 61 |     #     Parameters
 62 |     #     ----------
 63 |     #     X : array_like, [n_samples, n_features]
 64 |     #         dataset for which radius is estimated
 65 |     #     intrinsic_dim : int (optional)
 66 |     #         estimated intrinsic dimensionality of the manifold. If not
 67 |     #         specified, then intrinsic_dim = self.n_components
 68 |     #
 69 |     #     Returns
 70 |     #     -------
 71 |     #     radius : float
 72 |     #         The estimated radius for the fit
 73 |     #     """
 74 |     #     if input_type == 'affinity':
 75 |     #         return None
 76 |     #     elif input_type == 'adjacency':
 77 |     #         return X.max()
 78 |     #     elif input_type == 'data':
 79 |     #         if intrinsic_dim is None:
 80 |     #             intrinsic_dim = self.n_components
 81 |     #         mean_std = np.std(X, axis=0).mean()
 82 |     #         n_features = X.shape[1]
 83 |     #         return 0.5 * mean_std / n_features ** (1. / (intrinsic_dim + 6))
 84 |     #     else:
 85 |     #         raise ValueError("Unrecognized input_type: {0}".format(input_type))
 86 | 
 87 |     def fit_geometry(self, X=None, input_type='data'):
 88 |         """Inputs self.geom, and produces the fitted geometry self.geom_"""
 89 |         if self.geom is None:
 90 |             self.geom_ = Geometry()
 91 |         elif isinstance(self.geom, Geometry):
 92 |             self.geom_ = self.geom
 93 |         else:
 94 |             try:
 95 |                 kwds = dict(**self.geom)
 96 |             except TypeError:
 97 |                 raise ValueError("geom must be a Geometry instance or "
 98 |                                  "a mappable/dictionary")
 99 |             self.geom_ = Geometry(**kwds)
100 | 
101 |         if self.radius is not None:
102 |             self.geom_.set_radius(self.radius, override=False)
103 | 
104 |         # if self.radius == 'auto':
105 |         #     if X is not None and input_type != 'affinity':
106 |         #         self.geom_.set_radius(self.estimate_radius(X, input_type),
107 |         #                               override=False)
108 |         # else:
109 |         #     self.geom_.set_radius(self.radius,
110 |         #                           override=False)
111 | 
112 |         if X is not None:
113 |             self.geom_.set_matrix(X, input_type)
114 | 
115 |         return self
116 | 
117 |     def fit_transform(self, X, y=None, input_type='data'):
118 |         """Fit the model from data in X and transform X.
119 | 
120 |         Parameters
121 |         ----------
122 |         input_type : string, one of: 'data', 'distance' or 'affinity'.
123 |             The values of input data X. (default = 'data')
124 |         X: array-like, shape (n_samples, n_features)
125 |             Training vector, where n_samples in the number of samples
126 |             and n_features is the number of features.
127 | 
128 |         If self.input_type is 'distance':
129 | 
130 |         X : array-like, shape (n_samples, n_samples),
131 |             Interpret X as precomputed distance or adjacency graph
132 |             computed from samples.
133 | 
134 |         Returns
135 |         -------
136 |         X_new: array-like, shape (n_samples, n_components)
137 |         """
138 |         self.fit(X, y=y, input_type=input_type)
139 |         return self.embedding_
140 | 
141 |     def transform(self, X, y=None, input_type='data'):
142 |         raise NotImplementedError("transform() not implemented. "
143 |                                   "Try fit_transform()")
144 | 


--------------------------------------------------------------------------------
/megaman/embedding/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/embedding/tests/__init__.py


--------------------------------------------------------------------------------
/megaman/embedding/tests/test_base.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import numpy as np
 4 | from numpy.testing import assert_allclose
 5 | 
 6 | from megaman.utils.testing import assert_raise_message
 7 | from megaman.geometry.geometry import Geometry
 8 | from megaman.embedding.base import BaseEmbedding
 9 | 
10 | 
11 | def test_geometry_dict():
12 |     """ Test passing a dictionary and confirm the output """
13 |     geom_dict = dict(adjacency_method = 'auto',
14 |                      adjacency_kwds = {'radius':4},
15 |                      affinity_method = 'auto',
16 |                      affinity_kwds = {'radius':4},
17 |                      laplacian_method = 'geometric',
18 |                      laplacian_kwds = {'scaling_eps':4})
19 |     g1 = Geometry(**geom_dict)
20 |     base_embedding = BaseEmbedding(geom=geom_dict).fit_geometry()
21 |     assert(g1.__dict__ == base_embedding.geom_.__dict__)
22 | 
23 | 
24 | def test_geometry_object():
25 |     """ Test passing a geometry object and confirm the output """
26 |     g1 = Geometry(adjacency_method = 'auto',
27 |                   adjacency_kwds = {'radius':4},
28 |                   affinity_method = 'auto',
29 |                   affinity_kwds = {'radius':4},
30 |                   laplacian_method = 'geometric',
31 |                   laplacian_kwds = {'scaling_eps':4})
32 |     base_embedding = BaseEmbedding(geom=g1).fit_geometry()
33 |     assert(g1.__dict__ == base_embedding.geom_.__dict__)
34 | 
35 | 
36 | def test_geometry_update():
37 |     """ Test passing geometry object then independently update a parameter and confirm that the embedding
38 |         geometry is also updated """
39 |     g1 = Geometry(adjacency_method = 'auto',
40 |                   adjacency_kwds = {'radius':4},
41 |                   affinity_method = 'auto',
42 |                   affinity_kwds = {'radius':4},
43 |                   laplacian_method = 'geometric',
44 |                   laplacian_kwds = {'scaling_eps':4})
45 |     base_embedding = BaseEmbedding(geom=g1)
46 |     X = np.random.rand(10, 2)
47 |     # Now update g1 -- object that was passed
48 |     g1.set_data_matrix(X)
49 |     # confirm internal object is updated
50 |     assert_allclose(g1.X, base_embedding.geom.X)
51 | 


--------------------------------------------------------------------------------
/megaman/embedding/tests/test_embeddings.py:
--------------------------------------------------------------------------------
 1 | """General tests for embeddings"""
 2 | 
 3 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 4 | 
 5 | from itertools import product
 6 | 
 7 | import numpy as np
 8 | from numpy.testing import assert_raises, assert_allclose
 9 | 
10 | from megaman.embedding import (Isomap, LocallyLinearEmbedding,
11 |                                LTSA, SpectralEmbedding)
12 | from megaman.geometry.geometry import Geometry
13 | 
14 | EMBEDDINGS = [Isomap, LocallyLinearEmbedding, LTSA, SpectralEmbedding]
15 | 
16 | # # TODO: make estimator_checks pass!
17 | # def test_estimator_checks():
18 | #     from sklearn.utils.estimator_checks import check_estimator
19 | #     for Embedding in EMBEDDINGS:
20 | #         yield check_estimator, Embedding
21 | 
22 | 
23 | def test_embeddings_fit_vs_transform():
24 |     rand = np.random.RandomState(42)
25 |     X = rand.rand(100, 5)
26 |     geom = Geometry(adjacency_kwds = {'radius':1.0},
27 |                     affinity_kwds = {'radius':1.0})
28 | 
29 |     def check_embedding(Embedding, n_components):
30 |         model = Embedding(n_components=n_components,
31 |                           geom=geom, random_state=rand)
32 |         embedding = model.fit_transform(X)
33 |         assert model.embedding_.shape == (X.shape[0], n_components)
34 |         assert_allclose(embedding, model.embedding_)
35 | 
36 |     for Embedding in EMBEDDINGS:
37 |         for n_components in [1, 2, 3]:
38 |             yield check_embedding, Embedding, n_components
39 | 
40 | 
41 | def test_embeddings_bad_arguments():
42 |     rand = np.random.RandomState(32)
43 |     X = rand.rand(100, 3)
44 | 
45 |     def check_bad_args(Embedding):
46 |         # no radius set
47 |         embedding = Embedding()
48 |         assert_raises(ValueError, embedding.fit, X)
49 | 
50 |         # unrecognized geometry
51 |         embedding = Embedding(radius=2, geom='blah')
52 |         assert_raises(ValueError, embedding.fit, X)
53 | 
54 |     for Embedding in EMBEDDINGS:
55 |         yield check_bad_args, Embedding
56 | 


--------------------------------------------------------------------------------
/megaman/embedding/tests/test_isomap.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import sys
 4 | import numpy as np
 5 | import scipy as sp
 6 | import scipy.sparse as sparse
 7 | from scipy.spatial.distance import squareform, pdist
 8 | from itertools import product
 9 | from sklearn import manifold, datasets
10 | from sklearn.neighbors import NearestNeighbors
11 | 
12 | from numpy.testing import assert_array_almost_equal
13 | 
14 | import megaman.embedding.isomap as iso
15 | import megaman.geometry.geometry as geom
16 | from megaman.utils.eigendecomp import EIGEN_SOLVERS
17 | 
18 | 
19 | def _check_with_col_sign_flipping(A, B, tol=0.0):
20 |     """ Check array A and B are equal with possible sign flipping on
21 |     each columns"""
22 |     sign = True
23 |     for column_idx in range(A.shape[1]):
24 |         sign = sign and ((((A[:, column_idx] -
25 |                             B[:, column_idx]) ** 2).mean() <= tol ** 2) or
26 |                          (((A[:, column_idx] +
27 |                             B[:, column_idx]) ** 2).mean() <= tol ** 2))
28 |         if not sign:
29 |             return False
30 |     return True
31 | 
32 | def test_isomap_with_sklearn():
33 |     N = 10
34 |     X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
35 |     n_components = 2
36 |     n_neighbors = 3
37 |     knn = NearestNeighbors(n_neighbors + 1).fit(X)
38 |     # Assign the geometry matrix to get the same answer since sklearn using k-neighbors instead of radius-neighbors
39 |     g = geom.Geometry(X)
40 |     g.set_adjacency_matrix(knn.kneighbors_graph(X, mode = 'distance'))
41 |     # test Isomap with sklearn
42 |     sk_Y_iso = manifold.Isomap(n_neighbors, n_components, eigen_solver = 'arpack').fit_transform(X)
43 |     mm_Y_iso = iso.isomap(g, n_components)
44 |     assert(_check_with_col_sign_flipping(sk_Y_iso, mm_Y_iso, 0.05))
45 | 
46 | def test_isomap_simple_grid():
47 |     # Isomap should preserve distances when all neighbors are used
48 |     N_per_side = 5
49 |     Npts = N_per_side ** 2
50 |     radius = 10
51 |     # grid of equidistant points in 2D, n_components = n_dim
52 |     X = np.array(list(product(range(N_per_side), repeat=2)))
53 |     # distances from each point to all others
54 |     G = squareform(pdist(X))
55 |     g = geom.Geometry(adjacency_kwds = {'radius':radius})
56 |     for eigen_solver in EIGEN_SOLVERS:
57 |         clf = iso.Isomap(n_components = 2, eigen_solver = eigen_solver, geom=g)
58 |         clf.fit(X)
59 |         G_iso = squareform(pdist(clf.embedding_))
60 |         assert_array_almost_equal(G, G_iso)
61 | 


--------------------------------------------------------------------------------
/megaman/embedding/tests/test_lle.py:
--------------------------------------------------------------------------------
  1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  2 | 
  3 | import sys
  4 | import numpy as np
  5 | import scipy as sp
  6 | import scipy.sparse as sparse
  7 | from scipy.spatial.distance import squareform, pdist
  8 | from itertools import product
  9 | from numpy.testing import assert_array_almost_equal
 10 | 
 11 | from sklearn import manifold, datasets
 12 | from sklearn.neighbors import NearestNeighbors
 13 | 
 14 | import megaman.embedding.locally_linear as lle
 15 | import megaman.geometry.geometry as geom
 16 | from megaman.utils.eigendecomp import EIGEN_SOLVERS
 17 | 
 18 | 
 19 | def _check_with_col_sign_flipping(A, B, tol=0.0):
 20 |     """ Check array A and B are equal with possible sign flipping on
 21 |     each columns"""
 22 |     sign = True
 23 |     for column_idx in range(A.shape[1]):
 24 |         sign = sign and ((((A[:, column_idx] -
 25 |                             B[:, column_idx]) ** 2).mean() <= tol ** 2) or
 26 |                          (((A[:, column_idx] +
 27 |                             B[:, column_idx]) ** 2).mean() <= tol ** 2))
 28 |         if not sign:
 29 |             return False
 30 |     return True
 31 | 
 32 | def test_lle_with_sklearn():
 33 |     N = 10
 34 |     X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
 35 |     n_components = 2
 36 |     n_neighbors = 3
 37 |     knn = NearestNeighbors(n_neighbors + 1).fit(X)
 38 |     G = geom.Geometry()
 39 |     G.set_data_matrix(X)
 40 |     G.set_adjacency_matrix(knn.kneighbors_graph(X, mode = 'distance'))
 41 |     sk_Y_lle = manifold.LocallyLinearEmbedding(n_neighbors, n_components, method = 'standard').fit_transform(X)
 42 |     (mm_Y_lle, err) = lle.locally_linear_embedding(G, n_components)
 43 |     assert(_check_with_col_sign_flipping(sk_Y_lle, mm_Y_lle, 0.05))
 44 | 
 45 | def test_barycenter_kneighbors_graph():
 46 |     X = np.array([[0, 1], [1.01, 1.], [2, 0]])
 47 |     distance_matrix = squareform(pdist(X))
 48 |     A = lle.barycenter_graph(distance_matrix, X)
 49 |     # check that columns sum to one
 50 |     assert_array_almost_equal(np.sum(A.toarray(), 1), np.ones(3))
 51 |     pred = np.dot(A.toarray(), X)
 52 |     assert(np.linalg.norm(pred - X) / X.shape[0] < 1)
 53 | 
 54 | def test_lle_simple_grid():
 55 |     # note: ARPACK is numerically unstable, so this test will fail for
 56 |     #       some random seeds.  We choose 20 because the tests pass.
 57 |     rng = np.random.RandomState(20)
 58 |     tol = 0.1
 59 |     # grid of equidistant points in 2D, n_components = n_dim
 60 |     X = np.array(list(product(range(5), repeat=2)))
 61 |     X = X + 1e-10 * rng.uniform(size=X.shape)
 62 |     n_components = 2
 63 |     G = geom.Geometry(adjacency_kwds = {'radius':3})
 64 |     G.set_data_matrix(X)
 65 |     tol = 0.1
 66 |     distance_matrix = G.compute_adjacency_matrix()
 67 |     N = lle.barycenter_graph(distance_matrix, X).todense()
 68 |     reconstruction_error = np.linalg.norm(np.dot(N, X) - X, 'fro')
 69 |     assert(reconstruction_error < tol)
 70 |     for eigen_solver in EIGEN_SOLVERS:
 71 |         clf = lle.LocallyLinearEmbedding(n_components = n_components, geom = G,
 72 |                                 eigen_solver = eigen_solver, random_state = rng)
 73 |         clf.fit(X)
 74 |         assert(clf.embedding_.shape[1] == n_components)
 75 |         reconstruction_error = np.linalg.norm(
 76 |         np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
 77 |         assert(reconstruction_error < tol)
 78 | 
 79 | def test_lle_manifold():
 80 |     rng = np.random.RandomState(0)
 81 |     # similar test on a slightly more complex manifold
 82 |     X = np.array(list(product(np.arange(18), repeat=2)))
 83 |     X = np.c_[X, X[:, 0] ** 2 / 18]
 84 |     X = X + 1e-10 * rng.uniform(size=X.shape)
 85 |     n_components = 2
 86 |     G = geom.Geometry(adjacency_kwds = {'radius':3})
 87 |     G.set_data_matrix(X)
 88 |     distance_matrix = G.compute_adjacency_matrix()
 89 |     tol = 1.5
 90 |     N = lle.barycenter_graph(distance_matrix, X).todense()
 91 |     reconstruction_error = np.linalg.norm(np.dot(N, X) - X)
 92 |     assert(reconstruction_error < tol)
 93 |     for eigen_solver in EIGEN_SOLVERS:
 94 |         clf = lle.LocallyLinearEmbedding(n_components = n_components, geom = G,
 95 |                                 eigen_solver = eigen_solver, random_state = rng)
 96 |         clf.fit(X)
 97 |         assert(clf.embedding_.shape[1] == n_components)
 98 |         reconstruction_error = np.linalg.norm(
 99 |             np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
100 |         assert(reconstruction_error < tol)
101 | 


--------------------------------------------------------------------------------
/megaman/embedding/tests/test_ltsa.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import sys
 4 | import numpy as np
 5 | import scipy as sp
 6 | import scipy.sparse as sparse
 7 | from itertools import product
 8 | 
 9 | from sklearn import manifold, datasets
10 | from sklearn.neighbors import NearestNeighbors
11 | 
12 | from numpy.testing import assert_array_almost_equal
13 | import megaman.embedding.ltsa as ltsa
14 | from megaman.embedding.locally_linear import barycenter_graph
15 | import megaman.geometry.geometry as geom
16 | from megaman.utils.eigendecomp import EIGEN_SOLVERS
17 | 
18 | 
19 | def _check_with_col_sign_flipping(A, B, tol=0.0):
20 |     """ Check array A and B are equal with possible sign flipping on
21 |     each columns"""
22 |     sign = True
23 |     for column_idx in range(A.shape[1]):
24 |         sign = sign and ((((A[:, column_idx] -
25 |                             B[:, column_idx]) ** 2).mean() <= tol ** 2) or
26 |                          (((A[:, column_idx] +
27 |                             B[:, column_idx]) ** 2).mean() <= tol ** 2))
28 |         if not sign:
29 |             return False
30 |     return True
31 | 
32 | 
33 | def test_ltsa_with_sklearn():
34 |     N = 10
35 |     X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
36 |     n_components = 2
37 |     n_neighbors = 3
38 |     knn = NearestNeighbors(n_neighbors + 1).fit(X)
39 |     G = geom.Geometry()
40 |     G.set_data_matrix(X)
41 |     G.set_adjacency_matrix(knn.kneighbors_graph(X, mode = 'distance'))
42 |     sk_Y_ltsa = manifold.LocallyLinearEmbedding(n_neighbors, n_components,
43 |                                                 method = 'ltsa',
44 |                                                 eigen_solver = 'arpack').fit_transform(X)
45 |     (mm_Y_ltsa, err) = ltsa.ltsa(G, n_components, eigen_solver = 'arpack')
46 |     assert(_check_with_col_sign_flipping(sk_Y_ltsa, mm_Y_ltsa, 0.05))
47 | 
48 | 
49 | def test_ltsa_eigendecomps():
50 |     N = 10
51 |     X, color = datasets.samples_generator.make_s_curve(N, random_state=0)
52 |     n_components = 2
53 |     G = geom.Geometry(adjacency_method = 'brute', adjacency_kwds = {'radius':2})
54 |     G.set_data_matrix(X)
55 |     mm_ltsa_ref, err_ref = ltsa.ltsa(G, n_components,
56 |                                      eigen_solver=EIGEN_SOLVERS[0])
57 |     for eigen_solver in EIGEN_SOLVERS[1:]:
58 |         mm_ltsa, err = ltsa.ltsa(G, n_components, eigen_solver=eigen_solver)
59 |         assert(_check_with_col_sign_flipping(mm_ltsa, mm_ltsa_ref, 0.05))
60 | 
61 | 
62 | def test_ltsa_manifold():
63 |     rng = np.random.RandomState(0)
64 |     # similar test on a slightly more complex manifold
65 |     X = np.array(list(product(np.arange(18), repeat=2)))
66 |     X = np.c_[X, X[:, 0] ** 2 / 18]
67 |     X = X + 1e-10 * rng.uniform(size=X.shape)
68 |     n_components = 2
69 |     G = geom.Geometry(adjacency_kwds = {'radius':3})
70 |     G.set_data_matrix(X)
71 |     distance_matrix = G.compute_adjacency_matrix()
72 |     tol = 1.5
73 |     N = barycenter_graph(distance_matrix, X).todense()
74 |     reconstruction_error = np.linalg.norm(np.dot(N, X) - X)
75 |     assert(reconstruction_error < tol)
76 |     for eigen_solver in EIGEN_SOLVERS:
77 |         clf = ltsa.LTSA(n_components = n_components, geom = G,
78 |                         eigen_solver = eigen_solver, random_state = rng)
79 |         clf.fit(X)
80 |         assert(clf.embedding_.shape[1] == n_components)
81 |         reconstruction_error = np.linalg.norm(
82 |             np.dot(N, clf.embedding_) - clf.embedding_, 'fro') ** 2
83 |         assert(reconstruction_error < tol)
84 | 


--------------------------------------------------------------------------------
/megaman/geometry/__init__.py:
--------------------------------------------------------------------------------
1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
2 | 
3 | from .rmetric import RiemannMetric
4 | from .geometry import Geometry
5 | from .adjacency import Adjacency, compute_adjacency_matrix, adjacency_methods
6 | from .affinity import Affinity, compute_affinity_matrix, affinity_methods
7 | from .laplacian import Laplacian, compute_laplacian_matrix, laplacian_methods
8 | 


--------------------------------------------------------------------------------
/megaman/geometry/affinity.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | from __future__ import division
 4 | import numpy as np
 5 | from scipy.sparse import isspmatrix
 6 | from sklearn.utils.validation import check_array
 7 | 
 8 | from .utils import RegisterSubclasses
 9 | 
10 | 
11 | def compute_affinity_matrix(adjacency_matrix, method='auto', **kwargs):
12 |     """Compute the affinity matrix with the given method"""
13 |     if method == 'auto':
14 |         method = 'gaussian'
15 |     return Affinity.init(method, **kwargs).affinity_matrix(adjacency_matrix)
16 | 
17 | 
18 | def affinity_methods():
19 |     """Return the list of valid affinity methods"""
20 |     return ['auto'] + list(Affinity.methods())
21 | 
22 | 
23 | class Affinity(RegisterSubclasses):
24 |     """Base class for computing affinity matrices"""
25 |     def __init__(self, radius=None, symmetrize=True):
26 |         if radius is None:
27 |             raise ValueError("must specify radius for affinity matrix")
28 |         self.radius = radius
29 |         self.symmetrize = symmetrize
30 | 
31 |     def affinity_matrix(self, adjacency_matrix):
32 |         raise NotImplementedError()
33 | 
34 | 
35 | class GaussianAffinity(Affinity):
36 |     name = "gaussian"
37 | 
38 |     @staticmethod
39 |     def _symmetrize(A):
40 |         # TODO: make this more efficient?
41 |         # Also, need to maintain explicit zeros!
42 |         return 0.5 * (A + A.T)
43 | 
44 |     def affinity_matrix(self, adjacency_matrix):
45 |         A = check_array(adjacency_matrix, dtype=float, copy=True,
46 |                         accept_sparse=['csr', 'csc', 'coo'])
47 | 
48 |         if isspmatrix(A):
49 |             data = A.data
50 |         else:
51 |             data = A
52 | 
53 |         # in-place computation of
54 |         # data = np.exp(-(data / radius) ** 2)
55 |         data **= 2
56 |         data /= -self.radius ** 2
57 |         np.exp(data, out=data)
58 | 
59 |         if self.symmetrize:
60 |             A = self._symmetrize(A)
61 | 
62 |         # for sparse, need a true zero on the diagonal
63 |         # TODO: make this more efficient?
64 |         if isspmatrix(A):
65 |             A.setdiag(1)
66 | 
67 |         return A
68 | 


--------------------------------------------------------------------------------
/megaman/geometry/complete_adjacency_matrix.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | from .adjacency import CyFLANNAdjacency, compute_adjacency_matrix
 3 | from scipy.sparse import vstack, hstack
 4 | 
 5 | def complete_adjacency_matrix(Dtrain, Xtrain, Xtest, adjacency_kwds):
 6 |     if 'cyflann_kwds' in adjacency_kwds.keys():
 7 |         cyflann_kwds = adjacency_kwds['cyflann_kwds']
 8 |     else:
 9 |         cyflann_kwds = {}
10 |     radius = adjacency_kwds['radius']
11 |     Cyflann = CyFLANNAdjacency(radius=radius, **cyflann_kwds)
12 |     train_index = Cyflann.build_index(Xtrain)
13 |     test_train_adjacency = train_index.radius_neighbors_graph(Xtest, radius)
14 |     test_test_adjacency = compute_adjacency_matrix(Xtest, method='cyflann', **adjacency_kwds)    
15 |     train_adjacency = hstack([Dtrain, test_train_adjacency.transpose()])
16 |     test_adjacency = hstack([test_train_adjacency, test_test_adjacency])    
17 |     return vstack([train_adjacency, test_adjacency])


--------------------------------------------------------------------------------
/megaman/geometry/cyflann/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/geometry/cyflann/__init__.py


--------------------------------------------------------------------------------
/megaman/geometry/cyflann/cyflann_index.cc:
--------------------------------------------------------------------------------
  1 | /* Authors: Zhongyue Zhang <zhangz6@cs.washington.edu>
  2 | 
  3 | LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  4 | ================================================= */
  5 | 
  6 | #include "cyflann_index.h"
  7 | 
  8 | CyflannIndex::CyflannIndex(const std::vector<float>& dataset, int num_dims) {
  9 |     int num_pts = dataset.size() / num_dims;
 10 |     dataset_ = new float[dataset.size()];
 11 |     std::copy(dataset.begin(), dataset.end(), dataset_);
 12 |     Matrix<float> data(dataset_, num_pts, num_dims);
 13 |     // TODO: add support for different distance metric.
 14 |     index_ = new Index< L2<float> >(data, KMeansIndexParams());
 15 | }
 16 | 
 17 | CyflannIndex::CyflannIndex(const std::vector<float>& dataset, int num_dims,
 18 |         std::string index_type, int num_trees, int branching, int iterations,
 19 |         float cb_index) {
 20 |     int num_pts = dataset.size() / num_dims;
 21 |     dataset_ = new float[dataset.size()];
 22 |     std::copy(dataset.begin(), dataset.end(), dataset_);
 23 |     Matrix<float> data(dataset_, num_pts, num_dims);
 24 |     // TODO: wrap all info into a class in the future.
 25 |     if (index_type == "kdtrees") {
 26 |         index_ = new Index< L2<float> >(data, KDTreeIndexParams(num_trees));
 27 |     } else if (index_type == "kmeans") {
 28 |         index_ = new Index< L2<float> >(data, KMeansIndexParams(branching,
 29 |                 iterations,  FLANN_CENTERS_RANDOM, cb_index));
 30 |     } else {
 31 |         index_ = new Index< L2<float> >(data, CompositeIndexParams(num_trees,
 32 |                 branching, iterations,  FLANN_CENTERS_RANDOM, cb_index));
 33 |     }
 34 | }
 35 | 
 36 | CyflannIndex::CyflannIndex(const std::vector<float>& dataset, int num_dims,
 37 |         float target_precision, float build_weight, float memory_weight,
 38 |         float sample_fraction) {
 39 |     int num_pts = dataset.size() / num_dims;
 40 |     dataset_ = new float[dataset.size()];
 41 |     std::copy(dataset.begin(), dataset.end(), dataset_);
 42 |     Matrix<float> data(dataset_, num_pts, num_dims);
 43 |     // TODO: add support for different distance metric.
 44 |     index_ = new Index< L2<float> >(data, AutotunedIndexParams(
 45 |             target_precision, build_weight, memory_weight, sample_fraction));
 46 | }
 47 | 
 48 | CyflannIndex::CyflannIndex(const std::vector<float>& dataset, int num_dims,
 49 |         std::string filename) {
 50 |     int num_pts = dataset.size() / num_dims;
 51 |     dataset_ = new float[dataset.size()];
 52 |     std::copy(dataset.begin(), dataset.end(), dataset_);
 53 |     Matrix<float> data(dataset_, num_pts, num_dims);
 54 |     // TODO: add support for different distance metric.
 55 |     index_ = new Index< L2<float> >(data, SavedIndexParams(filename));
 56 | }
 57 | 
 58 | CyflannIndex::~CyflannIndex() {
 59 |     delete index_;
 60 |     delete[] dataset_;
 61 | }
 62 | 
 63 | void CyflannIndex::buildIndex(){
 64 |     index_->buildIndex();
 65 | }
 66 | 
 67 | int CyflannIndex::knnSearch(const std::vector<float>& queries,
 68 |         std::vector< std::vector<int> >& indices,
 69 |         std::vector< std::vector<float> >& dists,
 70 |         int knn, int num_dims, int num_checks) {
 71 |     int num_pts = queries.size() / num_dims;
 72 |     float* array = new float[queries.size()];
 73 |     std::copy(queries.begin(), queries.end(), array);
 74 |     Matrix<float> qpts(array, num_pts, num_dims);
 75 |     int res = index_->knnSearch(qpts, indices, dists, knn,
 76 |         SearchParams(num_checks));
 77 |     delete[] array;
 78 |     return res;
 79 | }
 80 | 
 81 | int CyflannIndex::radiusSearch(const std::vector<float>& queries,
 82 |         std::vector< std::vector<int> >& indices,
 83 |         std::vector< std::vector<float> >& dists,
 84 |         float radius, int num_dims, int num_checks) {
 85 |     int num_pts = queries.size() / num_dims;
 86 |     float* array = new float[queries.size()];
 87 |     std::copy(queries.begin(), queries.end(), array);
 88 |     Matrix<float> dataset(array, num_pts, num_dims);
 89 |     int res = index_->radiusSearch(dataset, indices, dists, radius,
 90 |         SearchParams(num_checks));
 91 |     delete[] array;
 92 |     return res;
 93 | }
 94 | 
 95 | void CyflannIndex::save(std::string filename) {
 96 |     index_->save(filename);
 97 | }
 98 | 
 99 | int CyflannIndex::veclen() { return index_->veclen(); }
100 | 
101 | int CyflannIndex::size() { return index_->size(); }
102 | 


--------------------------------------------------------------------------------
/megaman/geometry/cyflann/cyflann_index.h:
--------------------------------------------------------------------------------
 1 | /* Authors: Zhongyue Zhang <zhangz6@cs.washington.edu>
 2 | 
 3 | LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 4 | ================================================= */
 5 | #ifndef CYFLANN_INDEX_H_
 6 | #define CYFLANN_INDEX_H_
 7 | 
 8 | #include <flann/flann.hpp>
 9 | #include <vector>
10 | using namespace flann;
11 | 
12 | class CyflannIndex {
13 | public:
14 | 
15 |     CyflannIndex(const std::vector<float>& dataset, int num_dims);
16 | 
17 |     CyflannIndex(const std::vector<float>& dataset, int num_dims,
18 |         std::string index_type, int num_trees, int branching, int iterations,
19 |         float cb_index);
20 | 
21 |     CyflannIndex(const std::vector<float>& dataset, int num_dims,
22 |             float target_precision, float build_weight, float memory_weight,
23 |             float sample_fraction);
24 | 
25 |     CyflannIndex(const std::vector<float>& dataset, int num_dims,
26 |             std::string filename);
27 | 
28 |     ~CyflannIndex();
29 | 
30 |     void buildIndex();
31 | 
32 |     int knnSearch(const std::vector<float>& queries,
33 |             std::vector< std::vector<int> >& indices,
34 |             std::vector< std::vector<float> >& dists,
35 |             int knn, int num_dims, int num_checks);
36 | 
37 |     int radiusSearch(const std::vector<float>& queries,
38 |             std::vector< std::vector<int> >& indices,
39 |             std::vector< std::vector<float> >& dists,
40 |             float radius, int num_dims, int num_checks);
41 | 
42 |     void save(std::string filename);
43 | 
44 |     int veclen();
45 | 
46 |     int size();
47 | 
48 | private:
49 |     float* dataset_;
50 |     Index< L2<float> >* index_;
51 | };
52 | 
53 | // Takes a flattened matrix queries, with dimension num_dims.
54 | // For each data point in queries, search for neighbors within the radius.
55 | int radiusSearch(const std::vector<float>& queries,
56 | 		 std::vector< std::vector<int> >& indices,
57 |                  std::vector< std::vector<float> >& dists,
58 | 		 float radius, int num_dims);
59 | 
60 | #endif // CYFLANN_INDEX_H_
61 | 


--------------------------------------------------------------------------------
/megaman/geometry/cyflann/index.pxd:
--------------------------------------------------------------------------------
 1 | # Authors: Zhongyue Zhang <zhangz6@cs.washington.edu>
 2 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 3 | 
 4 | from __future__ import division
 5 | import cython
 6 | import numpy as np
 7 | cimport numpy as np
 8 | from libcpp.vector cimport vector
 9 | from libcpp.string cimport string
10 | 
11 | ctypedef np.float32_t dtype_t
12 | ctypedef np.int32_t dtypei_t
13 | 
14 | cdef extern from "cyflann_index.h":
15 |     cdef cppclass CyflannIndex:
16 |         CyflannIndex(const vector[dtype_t]& dataset, dtypei_t ndim) except +
17 |         CyflannIndex(const vector[dtype_t]& dataset, dtypei_t num_dims,
18 |                 string index_type, dtypei_t num_trees, dtypei_t branching,
19 |                 dtypei_t iterations, dtype_t cb_index)
20 |         CyflannIndex(const vector[dtype_t]& dataset, dtypei_t ndim,
21 |                 dtype_t target_precision, dtype_t build_weight,
22 |                 dtype_t memory_weight, dtype_t sample_fraction)
23 |         CyflannIndex(const vector[dtype_t]& dataset, dtypei_t ndim,
24 |                 string filename)
25 |         void buildIndex()
26 |         int knnSearch(const vector[dtype_t]& queries,
27 |             vector[vector[dtypei_t]]& indices,
28 |             vector[vector[dtype_t]]& dists,
29 |             dtypei_t knn, dtypei_t num_dims, dtypei_t num_checks)
30 |         int radiusSearch(const vector[dtype_t]& queries,
31 |             vector[vector[dtypei_t]]& indices,
32 |             vector[vector[dtype_t]]& dists,
33 |             dtype_t radius, dtypei_t num_dims, dtypei_t num_checks)
34 |         void save(string filename)
35 |         int veclen()
36 |         int size()
37 | 


--------------------------------------------------------------------------------
/megaman/geometry/cyflann/setup.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import os
 4 | import sys
 5 | import platform
 6 | 
 7 | FLANN_ROOT = os.environ.get('FLANN_ROOT', sys.exec_prefix)
 8 | CONDA_BUILD = os.environ.get('CONDA_BUILD', 0)
 9 | 
10 | def configuration(parent_package='', top_path=None):
11 |     import numpy
12 |     from numpy.distutils.misc_util import Configuration
13 | 
14 |     config = Configuration('geometry/cyflann', parent_package, top_path)
15 |     libraries = ['flann', 'flann_cpp']
16 |     if os.name == 'posix':
17 |         libraries.append('m')
18 | 
19 |     kwds = {}
20 |     flann_include = os.path.join(FLANN_ROOT, 'include')
21 |     flann_lib = os.path.join(FLANN_ROOT, 'lib')
22 | 
23 |     if CONDA_BUILD:
24 |         # conda uses relative dynamic library paths
25 |         pass
26 |     else:
27 |         # direct installations use absolute library paths
28 |         print("Compiling FLANN with FLANN_ROOT={0}".format(FLANN_ROOT))
29 | 
30 |         # from http://stackoverflow.com/questions/19123623/python-runtime-library-dirs-doesnt-work-on-mac
31 |         if platform.system() == 'Darwin':
32 |             kwds['extra_link_args'] = ['-Wl,-rpath,'+flann_lib]
33 |         kwds['runtime_library_dirs'] = [flann_lib]
34 | 
35 |     config.add_extension("index",
36 |            sources=["index.cxx", "cyflann_index.cc"],
37 |            include_dirs=[numpy.get_include(), flann_include],
38 |            libraries=libraries,
39 |            library_dirs=[flann_lib],
40 |            extra_compile_args=["-O3"],
41 |            **kwds)
42 | 
43 |     return config
44 | 


--------------------------------------------------------------------------------
/megaman/geometry/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/geometry/tests/__init__.py


--------------------------------------------------------------------------------
/megaman/geometry/tests/test_adjacency.py:
--------------------------------------------------------------------------------
  1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  2 | 
  3 | from nose import SkipTest
  4 | 
  5 | import numpy as np
  6 | from numpy.testing import assert_allclose, assert_raises, assert_equal
  7 | from scipy.sparse import isspmatrix
  8 | from scipy.spatial.distance import cdist, pdist, squareform
  9 | 
 10 | from megaman.geometry import (Geometry, compute_adjacency_matrix, Adjacency,
 11 |                               adjacency_methods)
 12 | 
 13 | 
 14 | try:
 15 |     import pyflann as pyf
 16 |     NO_PYFLANN = False
 17 | except ImportError:
 18 |     NO_PYFLANN = True
 19 | 
 20 | 
 21 | def test_adjacency_methods():
 22 |     assert_equal(set(adjacency_methods()),
 23 |                  {'auto', 'pyflann', 'ball_tree',
 24 |                   'cyflann', 'brute', 'kd_tree'})
 25 | 
 26 | 
 27 | def test_adjacency_input_validation():
 28 |     X = np.random.rand(20, 3)
 29 |     # need to specify radius or n_neighbors
 30 |     assert_raises(ValueError, compute_adjacency_matrix, X)
 31 |     # cannot specify both radius and n_neighbors
 32 |     assert_raises(ValueError, compute_adjacency_matrix, X,
 33 |                   radius=1, n_neighbors=10)
 34 | 
 35 | 
 36 | def test_adjacency():
 37 |     rng = np.random.RandomState(36)
 38 |     X = rng.rand(100, 3)
 39 |     Gtrue = {}
 40 | 
 41 |     exact_methods = [m for m in Adjacency.methods()
 42 |                      if not m.endswith('flann')]
 43 | 
 44 |     def check_kneighbors(n_neighbors, method):
 45 |         if method == 'pyflann' and NO_PYFLANN:
 46 |             raise SkipTest("pyflann not installed")
 47 | 
 48 |         G = compute_adjacency_matrix(X, method=method,
 49 |                             n_neighbors=n_neighbors)
 50 |         assert isspmatrix(G)
 51 |         assert G.shape == (X.shape[0], X.shape[0])
 52 |         if method in exact_methods:
 53 |             assert_allclose(G.toarray(), Gtrue[n_neighbors].toarray())
 54 | 
 55 |     def check_radius(radius, method):
 56 |         if method == 'pyflann' and NO_PYFLANN:
 57 |             raise SkipTest("pyflann not installed")
 58 | 
 59 |         G = compute_adjacency_matrix(X, method=method,
 60 |                             radius=radius)
 61 |         assert isspmatrix(G)
 62 |         assert G.shape == (X.shape[0], X.shape[0])
 63 |         if method in exact_methods:
 64 |             assert_allclose(G.toarray(), Gtrue[radius].toarray())
 65 | 
 66 |     for n_neighbors in [5, 10, 15]:
 67 |         Gtrue[n_neighbors] = compute_adjacency_matrix(X, method='brute',
 68 |                                              n_neighbors=n_neighbors)
 69 |         for method in Adjacency.methods():
 70 |             yield check_kneighbors, n_neighbors, method
 71 | 
 72 |     for radius in [0.1, 0.5, 1.0]:
 73 |         Gtrue[radius] = compute_adjacency_matrix(X, method='brute',
 74 |                                         radius=radius)
 75 |         for method in Adjacency.methods():
 76 |             yield check_radius, radius, method
 77 | 
 78 | 
 79 | def test_unknown_method():
 80 |     X = np.arange(20).reshape((10, 2))
 81 |     assert_raises(ValueError, compute_adjacency_matrix, X, 'foo')
 82 | 
 83 | 
 84 | def test_all_methods_close():
 85 |     rand = np.random.RandomState(36)
 86 |     X = rand.randn(10, 2)
 87 |     D_true = squareform(pdist(X))
 88 |     D_true[D_true > 0.5] = 0
 89 | 
 90 |     def check_method(method):
 91 |         kwargs = {}
 92 |         if method == 'pyflann':
 93 |             try:
 94 |                 import pyflann as pyf
 95 |             except ImportError:
 96 |                 raise SkipTest("pyflann not installed.")
 97 |             flindex = pyf.FLANN()
 98 |             flindex.build_index(X, algorithm='kmeans',
 99 |                                 target_precision=0.9)
100 |             kwargs['flann_index'] = flindex
101 |         this_D = compute_adjacency_matrix(X, method=method, radius=0.5,
102 |                                           **kwargs)
103 |         assert_allclose(this_D.toarray(), D_true, rtol=1E-5)
104 | 
105 |     for method in ['auto', 'cyflann', 'pyflann', 'brute']:
106 |         yield check_method, method
107 | 
108 | 
109 | def test_custom_adjacency():
110 |     class CustomAdjacency(Adjacency):
111 |         name = "custom"
112 |         def adjacency_graph(self, X):
113 |             return squareform(pdist(X))
114 | 
115 |     rand = np.random.RandomState(42)
116 |     X = rand.rand(10, 2)
117 |     D = compute_adjacency_matrix(X, method='custom', radius=1)
118 |     assert_allclose(D, cdist(X, X))
119 | 
120 |     Adjacency._remove_from_registry("custom")
121 | 
122 | def test_cyflann_index_type():
123 |     rand = np.random.RandomState(36)
124 |     X = rand.randn(10, 2)
125 |     D_true = squareform(pdist(X))
126 |     D_true[D_true > 1.5] = 0
127 |     
128 |     def check_index_type(index_type):
129 |         method = 'cyflann'
130 |         radius = 1.5
131 |         cyflann_kwds = {'index_type':index_type}
132 |         adjacency_kwds = {'radius':radius, 'cyflann_kwds':cyflann_kwds}
133 |         this_D = compute_adjacency_matrix(X=X, method = 'cyflann', **adjacency_kwds)
134 |         assert_allclose(this_D.toarray(), D_true, rtol=1E-5, atol=1E-5)
135 |     
136 |     for index_type in ['kmeans', 'kdtrees']:
137 |         yield check_index_type, index_type


--------------------------------------------------------------------------------
/megaman/geometry/tests/test_affinity.py:
--------------------------------------------------------------------------------
  1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  2 | 
  3 | from __future__ import division ## removes integer division
  4 | 
  5 | import os
  6 | 
  7 | import numpy as np
  8 | from numpy.testing import assert_allclose, assert_equal, assert_raises
  9 | 
 10 | from scipy.spatial.distance import cdist, pdist, squareform
 11 | from scipy.sparse import csr_matrix
 12 | from scipy import io
 13 | 
 14 | from megaman.geometry import (compute_adjacency_matrix,
 15 |                               compute_affinity_matrix, Affinity,
 16 |                               affinity_methods)
 17 | 
 18 | random_state = np.random.RandomState(36)
 19 | n_sample = 10
 20 | d = 2
 21 | X = random_state.randn(n_sample, d)
 22 | D = squareform(pdist(X))
 23 | D[D > 1/d] = 0
 24 | 
 25 | 
 26 | TEST_DATA = os.path.join(os.path.dirname(__file__),
 27 |                         'testmegaman_laplacian_rad0_2_lam1_5_n200.mat')
 28 | 
 29 | 
 30 | 
 31 | def test_affinity_methods():
 32 |     assert_equal(set(affinity_methods()), {'auto', 'gaussian'})
 33 | 
 34 | 
 35 | def test_affinity_input_validation():
 36 |     X = np.random.rand(20, 3)
 37 |     D = compute_adjacency_matrix(X, radius=1)
 38 |     assert_raises(ValueError, compute_affinity_matrix, X)
 39 | 
 40 | 
 41 | def test_affinity_sparse_vs_dense():
 42 |     """
 43 |     Test that A_sparse is the same as A_dense for a small A matrix
 44 |     """
 45 |     rad = 2.
 46 |     n_samples = 6
 47 |     X = np.arange(n_samples)
 48 |     X = X[ :,np.newaxis]
 49 |     X = np.concatenate((X,np.zeros((n_samples,1),dtype=float)),axis=1)
 50 |     X = np.asarray( X, order="C" )
 51 |     test_dist_matrix = compute_adjacency_matrix( X, method = 'auto', radius = rad )
 52 |     A_dense = compute_affinity_matrix(test_dist_matrix.toarray(), method = 'auto',
 53 |                                       radius = rad, symmetrize = False )
 54 |     A_sparse = compute_affinity_matrix(csr_matrix(test_dist_matrix),
 55 |                                        method = 'auto', radius = rad, symmetrize = False)
 56 |     A_spdense = A_sparse.toarray()
 57 |     A_spdense[ A_spdense == 0 ] = 1.
 58 |     assert_allclose(A_dense, A_spdense)
 59 | 
 60 | 
 61 | def test_affinity_vs_matlab():
 62 |     """Test that the affinity calculation matches the matlab result"""
 63 |     matlab = io.loadmat(TEST_DATA)
 64 | 
 65 |     D = np.sqrt(matlab['S'])  # matlab outputs squared distances
 66 |     A_matlab = matlab['A']
 67 |     radius = matlab['rad'][0]
 68 | 
 69 |     # check dense affinity computation
 70 |     A_dense = compute_affinity_matrix(D, radius=radius)
 71 |     assert_allclose(A_dense, A_matlab)
 72 | 
 73 |     # check sparse affinity computation
 74 |     A_sparse = compute_affinity_matrix(csr_matrix(D), radius=radius)
 75 |     assert_allclose(A_sparse.toarray(), A_matlab)
 76 | 
 77 | 
 78 | def test_affinity():
 79 |     rand = np.random.RandomState(42)
 80 |     X = np.random.rand(20, 3)
 81 |     D = cdist(X, X)
 82 | 
 83 |     def check_affinity(adjacency_radius, affinity_radius, symmetrize):
 84 |         adj = compute_adjacency_matrix(X, radius=adjacency_radius)
 85 |         aff = compute_affinity_matrix(adj, radius=affinity_radius,
 86 |                                       symmetrize=True)
 87 | 
 88 |         A = np.exp(-(D / affinity_radius) ** 2)
 89 |         A[D > adjacency_radius] = 0
 90 |         assert_allclose(aff.toarray(), A)
 91 | 
 92 |     for adjacency_radius in [0.5, 1.0, 5.0]:
 93 |         for affinity_radius in [0.1, 0.5, 1.0]:
 94 |             for symmetrize in [True, False]:
 95 |                 yield (check_affinity, adjacency_radius,
 96 |                        affinity_radius, symmetrize)
 97 | 
 98 | 
 99 | def test_custom_affinity():
100 |     class CustomAffinity(Affinity):
101 |         name = "custom"
102 |         def affinity_matrix(self, adjacency_matrix):
103 |             return np.exp(-abs(adjacency_matrix.toarray()))
104 | 
105 |     rand = np.random.RandomState(42)
106 |     X = rand.rand(10, 2)
107 |     D = compute_adjacency_matrix(X, radius=10)
108 |     A = compute_affinity_matrix(D, method='custom', radius=1)
109 |     assert_allclose(A, np.exp(-abs(D.toarray())))
110 | 
111 |     Affinity._remove_from_registry("custom")
112 | 


--------------------------------------------------------------------------------
/megaman/geometry/tests/test_complete_adjacency_matrix.py:
--------------------------------------------------------------------------------
 1 | from scipy.spatial.distance import cdist, pdist, squareform
 2 | from megaman.geometry.adjacency import compute_adjacency_matrix
 3 | from megaman.geometry.complete_adjacency_matrix import complete_adjacency_matrix
 4 | import numpy as np
 5 | from numpy.testing import assert_allclose
 6 | 
 7 | def test_complete_adjacency():
 8 |     rand = np.random.RandomState(36)
 9 |     radius = 1.5
10 |     X = rand.randn(10, 2)
11 |     Xtest = rand.randn(4, 2)
12 |     
13 |     Xtotal = np.vstack([X, Xtest])
14 |     D_true = squareform(pdist(Xtotal))
15 |     D_true[D_true > radius] = 0
16 |     
17 |     adjacency_kwds = {'radius':radius}
18 |     Dtrain = compute_adjacency_matrix(X, method='cyflann', radius = radius)
19 |     this_D = complete_adjacency_matrix(Dtrain, X, Xtest, adjacency_kwds)
20 |     
21 |     assert_allclose(this_D.toarray(), D_true, rtol=1E-4)


--------------------------------------------------------------------------------
/megaman/geometry/tests/test_laplacian.m:
--------------------------------------------------------------------------------
 1 | % generates the test data used by test_laplacian.py
 2 | % LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 3 | %
 4 | 
 5 | %addpath /mnt/costila/speclust/code-dominique-rmetric/
 6 | addpath /mnt/costila/mmp/research/spectral/dominique-epsilon/EpsilonDemo/
 7 | 
 8 | outfroot = 'testmegaman_laplacian'
 9 | rad = 0.2;
10 | renormlam = 1.5;   % renormalization exponent
11 | opts.lam = renormlam;
12 | n = 200;
13 | seed  = 36;
14 | rand( 'seed', seed );
15 | xx1 = rand( 1, n );
16 | xx2 = rand( 1, n );
17 | xx3 = sin( 2*pi*xx1).*sqrt(xx2);
18 | 
19 | xx = [ xx1; xx2; xx3 ];
20 | 
21 | epps = rad*rad;
22 | [ A, S ] = similarity( xx', epps );
23 | norms = {'geometric', 'unormalized', 'randomwalk', 'symmetricnormalized', 'renormalized' };
24 | names = {'geom', 'unnorm', 'rw', 'symnorm', 'reno1_5' };
25 | 
26 | for ii = 1:length( norms );
27 |     disp( norms{ ii } )
28 |     opts.lapType = norms{ ii };
29 |     [ L, phi, lam, flag ] = laplacian( A, 2, epps, opts );
30 |     eval( [ 'L' names{ ii } '=L;']);
31 |     eval( [ 'phi' names{ ii } '=phi;']);
32 |     eval( [ 'lam' names{ ii } '=lam;']);
33 | end;
34 | 
35 | [G, VV, LL, Ginv ] = rmetric( Lgeom, phigeom, 2, 0 );
36 | 
37 | rad
38 | num2str_(rad)
39 | renormlam
40 | num2str_(renormlam)
41 | outfname = [ outfroot '_rad' num2str_(rad) '_lam' num2str_(renormlam) '_n' num2str( n ) '.mat' ]
42 | 
43 | save( outfname )
44 | 


--------------------------------------------------------------------------------
/megaman/geometry/tests/test_laplacian.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import os
 4 | 
 5 | import numpy as np
 6 | from numpy.testing import assert_allclose, assert_equal, assert_raises
 7 | 
 8 | from scipy.sparse import isspmatrix, csr_matrix
 9 | from scipy import io
10 | 
11 | from megaman.geometry import (compute_adjacency_matrix,
12 |                               compute_affinity_matrix,
13 |                               Laplacian, compute_laplacian_matrix,
14 |                               laplacian_methods)
15 | 
16 | 
17 | TEST_DATA = os.path.join(os.path.dirname(__file__),
18 |                         'testmegaman_laplacian_rad0_2_lam1_5_n200.mat')
19 | 
20 | 
21 | def test_laplacian_methods():
22 |     assert_equal(set(laplacian_methods()),
23 |                  {'auto', 'renormalized', 'symmetricnormalized',
24 |                   'geometric', 'randomwalk', 'unnormalized'})
25 | 
26 | 
27 | def test_laplacian_vs_matlab():
28 |     # Test that the laplacian calculation matches the matlab result
29 |     matlab = io.loadmat(TEST_DATA)
30 | 
31 |     laplacians = {'unnormalized': matlab['Lunnorm'],
32 |                   'symmetricnormalized': matlab['Lsymnorm'],
33 |                   'geometric': matlab['Lgeom'],
34 |                   'randomwalk': matlab['Lrw'],
35 |                   'renormalized': matlab['Lreno1_5']}
36 | 
37 |     radius = matlab['rad'][0]
38 | 
39 |     def check_laplacian(input_type, laplacian_method):
40 |         kwargs = {'scaling_epps': radius}
41 |         if laplacian_method == 'renormalized':
42 |             kwargs['renormalization_exponent'] = 1.5
43 |         adjacency = input_type(np.sqrt(matlab['S']))
44 |         affinity = compute_affinity_matrix(adjacency, radius=radius)
45 |         laplacian = compute_laplacian_matrix(affinity,
46 |                                              method=laplacian_method,
47 |                                              **kwargs)
48 |         if input_type is csr_matrix:
49 |             laplacian = laplacian.toarray()
50 |         assert_allclose(laplacian, laplacians[laplacian_method])
51 | 
52 |     for input_type in [np.array, csr_matrix]:
53 |         for laplacian_method in laplacians:
54 |             yield check_laplacian, input_type, laplacian_method
55 | 
56 | 
57 | def test_laplacian_smoketest():
58 |     rand = np.random.RandomState(42)
59 |     X = rand.rand(20, 2)
60 |     adj = compute_adjacency_matrix(X, radius=0.5)
61 |     aff = compute_affinity_matrix(adj, radius=0.1)
62 | 
63 |     def check_laplacian(method):
64 |         lap = compute_laplacian_matrix(aff, method=method)
65 | 
66 |         assert isspmatrix(lap)
67 |         assert_equal(lap.shape, (X.shape[0], X.shape[0]))
68 | 
69 |     for method in Laplacian.asymmetric_methods():
70 |         yield check_laplacian, method
71 | 
72 | 
73 | def test_laplacian_unknown_method():
74 |     """Test that laplacian fails with an unknown method type"""
75 |     A = np.array([[ 5, 2, 1 ], [ 2, 3, 2 ],[1,2,5]])
76 |     assert_raises(ValueError, compute_laplacian_matrix, A, method='<unknown>')
77 | 
78 | 
79 | def test_laplacian_full_output():
80 |     # Test that full_output symmetrized laplacians have the right form
81 |     rand = np.random.RandomState(42)
82 |     X = rand.rand(20, 2)
83 | 
84 |     def check_symmetric(method, adjacency_radius, affinity_radius):
85 |         adj = compute_adjacency_matrix(X, radius=adjacency_radius)
86 |         aff = compute_affinity_matrix(adj, radius=affinity_radius)
87 |         lap, lapsym, w = compute_laplacian_matrix(aff, method=method,
88 |                                                   full_output=True)
89 | 
90 |         sym = w[:, np.newaxis] * (lap.toarray() + np.eye(*lap.shape))
91 | 
92 |         assert_allclose(lapsym.toarray(), sym)
93 | 
94 |     for method in Laplacian.asymmetric_methods():
95 |         for adjacency_radius in [0.5, 1.0]:
96 |             for affinity_radius in [0.1, 0.3]:
97 |                 yield check_symmetric, method, adjacency_radius, affinity_radius
98 | 


--------------------------------------------------------------------------------
/megaman/geometry/tests/test_rmetric.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import os
 4 | 
 5 | from nose.tools import assert_true
 6 | from nose.tools import assert_equal
 7 | import scipy.io
 8 | from scipy.sparse import csr_matrix
 9 | from scipy.sparse import csc_matrix
10 | from scipy.sparse import isspmatrix
11 | import numpy as np
12 | from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose
13 | 
14 | from nose.tools import assert_raises
15 | from nose.plugins.skip import SkipTest
16 | 
17 | from megaman.geometry.rmetric import *
18 | from megaman.embedding.spectral_embedding import _graph_is_connected
19 | 
20 | TEST_DATA = os.path.join(os.path.dirname(__file__),
21 |                         'testmegaman_laplacian_rad0_2_lam1_5_n200.mat')
22 | 
23 | def _load_test_data():
24 |     """ Loads a .mat file from . and extract the following dense matrices
25 |     test_dist_matrix = matrix of distances
26 |     L = the geometric Laplacian
27 |     Ginv = the dual Riemann metric [n,2,2] array
28 |     G = the Riemann metric [n,2,2] array
29 |     phi = embedding in 2 dimensions [n, 2] array
30 |     rad = scalar, radius used in affinity calculations, Laplacians
31 |         Note: rad is returned as an array of dimension 1. Outside one must
32 |         make it a scalar by rad = rad[0]
33 | 
34 |     """
35 |     xdict = scipy.io.loadmat(TEST_DATA)
36 |     rad = xdict[ 'rad' ]
37 |     test_dist_matrix = xdict[ 'S' ] # S contains squared distances
38 |     test_dist_matrix = np.sqrt( test_dist_matrix ) #unused
39 |     A = xdict[ 'A' ] #unused
40 |     L = xdict[ 'Lgeom' ]
41 |     G = xdict[ 'G' ]
42 |     H = xdict[ 'Ginv' ]
43 |     H = np.transpose( H, ( 2, 0, 1 ))
44 |     G = np.transpose( G, ( 2, 0, 1 ))
45 |     phi = xdict[ 'phigeom' ]
46 | 
47 |     print( 'phi.shape = ', phi.shape )
48 |     print( 'G.shape = ', G.shape )
49 |     print( 'H.shape = ', H.shape )
50 |     print( 'L.shape = ', L.shape )
51 |     return rad, L, G, H, phi
52 | 
53 | def test_equal_original(almost_equal_decimals = 5):
54 |     """ Loads the results from a matlab run and checks that our results
55 |     are the same. The results loaded are the Laplacian, embedding phi,
56 |     Riemannian metric G[2,2,200], and dual Riemannian metric H[2,2,200]
57 | 
58 |     Currently, this tests the riemann_metric() function only.
59 |     TODO: to test the class RiemannMetric
60 | 
61 |     Only riemann_metric with given L is tested. For other inputs, to test
62 |     later after the structure of the code is stabilized. (I.e I may remove
63 |     the computation of the L to another function.
64 |     """
65 |     rad, L, Gtest, Htest, phi = _load_test_data()
66 | 
67 |     H = riemann_metric( phi, laplacian = L, n_dim = 2, invert_h = False )[0]
68 |     n = phi.shape[ 0 ]
69 |     assert_array_almost_equal( Htest, H, almost_equal_decimals )
70 | 
71 |     # To prevent the accumulation of small numerical errors, change the
72 |     #  generation process of G from invert H to invertion of Htest
73 |     G = compute_G_from_H(Htest)[0]
74 |     tol = np.mean( Gtest[:,0,0])*10**(-almost_equal_decimals )
75 |     assert_allclose( Gtest, G, tol)
76 | #    assert_array_max_ulp( Gtest, G, almost_equal_decimals )
77 |     # this assertion fails because Gtest is generally asymmetric. G is
78 |     # mostly symmetric but not always. I suspect this is due to the
79 |     # numerical errors, as many of these 2x2 matrices are very poorly
80 |     # conditioned. What to do? Perhaps generate another matlab test set
81 |     # with better condition numbers...
82 | 
83 | def test_lazy_rmetric(almost_equal_decimals=5):
84 |     """ Load results from matlab and check lazy rmetric gets the
85 |     same value as the full rmetric on a subset
86 |     """
87 |     rad, L, Gtest, Htest, phi = _load_test_data()
88 |     n = phi.shape[0]
89 |     sample = np.random.choice(range(n), min(50, n), replace=False)
90 |     H = riemann_metric(phi, laplacian = L, n_dim = 2)[0]
91 |     Hlazy = riemann_metric_lazy(phi, sample=sample, laplacian=L, n_dim=2)[0]
92 |     assert_array_almost_equal( Hlazy, H[sample, :,:], almost_equal_decimals)
93 | 


--------------------------------------------------------------------------------
/megaman/geometry/tests/testmegaman_laplacian_rad0_2_lam1_5_n200.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/geometry/tests/testmegaman_laplacian_rad0_2_lam1_5_n200.mat


--------------------------------------------------------------------------------
/megaman/geometry/utils.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | __all__ = ["RegisterSubclasses"]
 4 | 
 5 | 
 6 | # From six.py
 7 | def with_metaclass(meta, *bases):
 8 |     """Create a base class with a metaclass."""
 9 |     # Use a dummy metaclass that replaces itself with the actual metaclass.
10 |     class metaclass(type):
11 |         def __new__(cls, name, this_bases, d):
12 |             return meta(name, bases, d)
13 |     return type.__new__(metaclass, '_TemporaryClass', (), {})
14 | 
15 | 
16 | class RegistryMeta(type):
17 |     """Metaclass for object type which registers subclasses"""
18 |     def __init__(cls, name, bases, dct):
19 |         if name in ['_TemporaryClass', 'RegisterSubclasses']:
20 |             # these are hidden baseclasses. Do nothing
21 |             pass
22 |         elif not hasattr(cls, '_method_registry'):
23 |             # this is a registry class.  Create an empty registry
24 |             cls._method_registry = {}
25 |         elif hasattr(cls, 'name'):
26 |             # this is a labeled derived class.  Add cls to the registry
27 |             cls._method_registry[cls.name] = cls
28 | 
29 |         super(RegistryMeta, cls).__init__(name, bases, dct)
30 | 
31 | 
32 | class RegisterSubclasses(with_metaclass(RegistryMeta)):
33 |     @classmethod
34 |     def get_method(cls, method):
35 |         if method not in cls._method_registry:
36 |             raise ValueError("method={0} not valid. Must be one of "
37 |                              "{1}".format(method, list(cls.methods())))
38 |         return cls._method_registry[method]
39 | 
40 |     @classmethod
41 |     def init(cls, method, *args, **kwargs):
42 |         Method = cls.get_method(method)
43 |         return Method(*args, **kwargs)
44 | 
45 |     @classmethod
46 |     def _remove_from_registry(cls, method):
47 |         cls._method_registry.pop(method, None)
48 | 
49 |     @classmethod
50 |     def methods(cls):
51 |         return cls._method_registry.keys()
52 | 


--------------------------------------------------------------------------------
/megaman/plotter/__init__.py:
--------------------------------------------------------------------------------
1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
2 | 
3 | from .plotter import (plot_with_plotly, plot_embedding_with_plotly,
4 |     plot_with_matplotlib, plot_embedding_with_matplotlib)
5 | 


--------------------------------------------------------------------------------
/megaman/plotter/plotter.py:
--------------------------------------------------------------------------------
 1 | # Author: Yu-Chia Chen <yuchaz@uw.edu>
 2 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 3 | 
 4 | import numpy as np
 5 | from .utils import *
 6 | from .utils import _check_backend
 7 | from .scatter_3d import scatter_plot3d_plotly, scatter_plot3d_matplotlib
 8 | from .covar_plotter3 import covar_plotter3d_plotly, covar_plotter3d_matplotlib
 9 | 
10 | @_check_backend('plotly')
11 | def plot_with_plotly( embedding, rieman_metric, nstd=2,
12 |                       color_by_ratio=True, if_ellipse=False ):
13 |     from plotly.offline import iplot
14 |     import plotly.graph_objs as go
15 |     sigma_norms = get_top_two_sigma_norm(rieman_metric, color_by_ratio)
16 |     colors, colorscale = generate_colors_and_colorscale('gist_rainbow',
17 |                                                         sigma_norms)
18 |     scatter_pt = scatter_plot3d_plotly(embedding, coloring=sigma_norms,
19 |                                        colorscale=colorscale)
20 |     index = generate_grid(embedding.shape[0])
21 | 
22 |     if if_ellipse:
23 |         ellipses_pt = covar_plotter3d_plotly(embedding,
24 |                                              rieman_metric, index, colors)
25 |         scatter_pt = ellipses_pt + scatter_pt
26 | 
27 |     layout = plotly_layout(embedding)
28 |     fig = go.Figure(data=scatter_pt,layout=layout)
29 |     iplot(fig,filename='scatter-3d-plotly')
30 | 
31 | def plot_embedding_with_plotly(trace_var,idx,if_ellipse=False):
32 |     plot_with_plotly(trace_var.Y[idx],trace_var.H[idx]/30,if_ellipse=if_ellipse)
33 | 
34 | @_check_backend('matplotlib')
35 | def plot_with_matplotlib(embedding, rieman_metric, nstd=2,
36 |                          color_by_ratio=True, if_ellipse=False):
37 |     import matplotlib.pyplot as plt
38 |     sigma_norms = get_top_two_sigma_norm(rieman_metric, color_by_ratio)
39 |     colors, _ncor = get_colors_array('gist_rainbow', sigma_norms, base255=False)
40 |     fig,ax = scatter_plot3d_matplotlib(embedding, sigma_norms)
41 | 
42 |     index = generate_grid(embedding.shape[0])
43 |     if if_ellipse:
44 |         ax = covar_plotter3d_matplotlib(embedding, rieman_metric,
45 |                                         index, ax, colors)
46 |     plt.show()
47 | 
48 | def plot_embedding_with_matplotlib(trace_var,idx,if_ellipse=False):
49 |     plot_with_matplotlib(trace_var.Y[idx],trace_var.H[idx]/30,if_ellipse=if_ellipse)
50 | 


--------------------------------------------------------------------------------
/megaman/plotter/scatter_3d.py:
--------------------------------------------------------------------------------
 1 | # Author: Yu-Chia Chen <yuchaz@uw.edu>
 2 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 3 | 
 4 | import numpy as np
 5 | from .utils import _check_backend
 6 | 
 7 | @_check_backend('matplotlib')
 8 | def scatter_plot3d_matplotlib(embedding, coloring=None, fig=None,
 9 |                               subplot=False, subplot_grid=None, **kwargs):
10 |     from mpl_toolkits.mplot3d import art3d, Axes3D
11 |     if fig is None:
12 |         import matplotlib.pyplot as plt
13 |         fig = plt.figure()
14 |     if subplot and subplot_grid is not None:
15 |         sx,sy,sz = subplot_grid
16 |         ax = fig.add_subplot(sx,sy,sz,projection='3d')
17 |     else:
18 |         if subplot is None and subplot:
19 |             import warnings
20 |             warnings.warn(
21 |                 'Subplot grid is not provided, switching to non-subplot mode')
22 |         ax = fig.gca(projection='3d')
23 | 
24 |     ax.set_aspect('equal')
25 |     s = [2 for i in range(embedding.shape[0])]
26 |     x,y,z = embedding[:,:3].T
27 | 
28 |     if isinstance(coloring, str) and coloring.lower() in 'xyz':
29 |         color_idx = 'xyz'.find(coloring)
30 |         coloring = embedding[:,color_idx].flatten()
31 | 
32 |     if coloring is None:
33 |         ax.scatter(x,y,z,s=s,**kwargs)
34 |     else:
35 |         sc = ax.scatter(x,y,z,c=coloring,cmap='gist_rainbow',s=s,**kwargs)
36 |         fig.colorbar(sc)
37 | 
38 |     max_range = np.array(
39 |         [x.max()-x.min(), y.max()-y.min(), z.max()-z.min()]).max() / 2.0
40 | 
41 |     mid_x = (x.max()+x.min()) * 0.5
42 |     mid_y = (y.max()+y.min()) * 0.5
43 |     mid_z = (z.max()+z.min()) * 0.5
44 |     ax.set_xlim(mid_x - max_range, mid_x + max_range)
45 |     ax.set_ylim(mid_y - max_range, mid_y + max_range)
46 |     ax.set_zlim(mid_z - max_range, mid_z + max_range)
47 | 
48 |     return fig, ax
49 | 
50 | @_check_backend('plotly')
51 | def scatter_plot3d_plotly(embedding, coloring=None,
52 |                           colorscale='Rainbow', **kwargs):
53 |     import plotly.graph_objs as go
54 |     x,y,z = embedding[:,:3].T
55 |     if isinstance(coloring, str) and coloring.lower() in 'xyz':
56 |         color_idx = 'xyz'.find(coloring)
57 |         coloring = embedding[:,color_idx].flatten()
58 | 
59 |     marker = kwargs.pop('marker',None)
60 |     name = kwargs.pop('name','Embedding')
61 |     scatter_plot = go.Scatter3d(
62 |         x=x,
63 |         y=y,
64 |         z=z,
65 |         mode='markers',
66 |         marker=dict(
67 |             size=2,
68 |             opacity=0.8,
69 |         ),
70 |         name=name,
71 |         **kwargs
72 |     )
73 |     if coloring is not None:
74 |         scatter_plot['marker'].update(dict(
75 |             color=coloring,
76 |             colorscale=colorscale,
77 |             showscale=True,
78 |         ))
79 |     elif marker is not None:
80 |         scatter_plot['marker'].update(marker)
81 | 
82 |     return [scatter_plot]
83 | 


--------------------------------------------------------------------------------
/megaman/plotter/utils.py:
--------------------------------------------------------------------------------
 1 | # Author: Yu-Chia Chen <yuchaz@uw.edu>
 2 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 3 | 
 4 | import numpy as np
 5 | 
 6 | def _check_backend(backend):
 7 |     def decorator(func):
 8 |         def wrapper(*args,**kwargs):
 9 |             import warnings
10 |             warnings.warn(
11 |                 'Be careful in using megaman.plotter modules'
12 |                 ' API will change in the next release.',
13 |                 FutureWarning
14 |             )
15 |             import pkgutil
16 |             package = pkgutil.find_loader(backend)
17 |             if package is not None:
18 |                 return func(*args,**kwargs)
19 |             else:
20 |                 raise ImportError('plotting backend {} not installed'.format(backend))
21 |         return wrapper
22 |     return decorator
23 | 
24 | @_check_backend('matplotlib')
25 | def get_colors_array(name,coloring,base255=True):
26 |     from matplotlib import colors, cm
27 |     cmap = cm.get_cmap(name=name)
28 |     norm = colors.Normalize()
29 |     normalized_coloring = norm(coloring)
30 |     colors_array = (cmap(normalized_coloring)[:,:3]*255).astype(np.uint8) \
31 |                    if base255 else cmap(normalized_coloring)
32 |     return colors_array, normalized_coloring
33 | 
34 | def generate_plotly_colorscale(name,num=256):
35 |     colormap, normalized_coloring = get_colors_array(name,np.arange(num))
36 |     return [ [n_coloring, 'rgb({},{},{})'.format(*colormap[idx])] \
37 |              for idx, n_coloring in enumerate(normalized_coloring) ]
38 | 
39 | def generate_colors_and_colorscale(name,coloring,**kwargs):
40 |     colors_array, _ncor = get_colors_array(name,coloring)
41 |     colorscale = generate_plotly_colorscale(name,**kwargs)
42 |     return colors_array, colorscale
43 | 
44 | def generate_grid(size,num_groups=100):
45 |     return np.arange(0,size,num_groups)
46 | 
47 | @_check_backend('plotly')
48 | def plotly_layout(embedding):
49 |     import plotly.graph_objs as go
50 |     max_value = 1.2*np.max(np.absolute(embedding[:,:3]))
51 |     axis_range = [-max_value,max_value]
52 |     layout = go.Layout(
53 |         title='Plot with ellipse',
54 |         height=600,
55 |         width=600,
56 |         scene=dict(
57 |             xaxis=dict(
58 |                 gridcolor='rgb(255, 255, 255)',
59 |                 zerolinecolor='rgb(255, 255, 255)',
60 |                 showbackground=True,
61 |                 backgroundcolor='rgb(230, 230,230)',
62 |                 range=axis_range,
63 |             ),
64 |             yaxis=dict(
65 |                 gridcolor='rgb(255, 255, 255)',
66 |                 zerolinecolor='rgb(255, 255, 255)',
67 |                 showbackground=True,
68 |                 backgroundcolor='rgb(230, 230,230)',
69 |                 range=axis_range,
70 |             ),
71 |             zaxis=dict(
72 |                 gridcolor='rgb(255, 255, 255)',
73 |                 zerolinecolor='rgb(255, 255, 255)',
74 |                 showbackground=True,
75 |                 backgroundcolor='rgb(230, 230,230)',
76 |                 range=axis_range,
77 |             ),
78 |         )
79 |     )
80 |     return layout
81 | 
82 | def get_top_two_sigma_norm(H,color_by_ratio=True):
83 |     eigen_vals = np.array([ sorted_eigh(Hk)[0][:2] for Hk in H ])
84 |     if color_by_ratio == True:
85 |         toptwo_eigen_vals_norm = eigen_vals[:,1] / eigen_vals[:,0]
86 |     else:
87 |         toptwo_eigen_vals_norm = eigen_vals[:,0]
88 |     return toptwo_eigen_vals_norm
89 | 
90 | def sorted_eigh(M):
91 |     vals, vecs = np.linalg.eigh(M)
92 |     return vals[::-1], vecs[:,::-1]
93 | 


--------------------------------------------------------------------------------
/megaman/relaxation/__init__.py:
--------------------------------------------------------------------------------
1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
2 | 
3 | from .riemannian_relaxation import *
4 | from .trace_variable import TracingVariable
5 | 


--------------------------------------------------------------------------------
/megaman/relaxation/optimizer.py:
--------------------------------------------------------------------------------
  1 | # Author: Yu-Chia Chen <yuchaz@uw.edu>
  2 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  3 | 
  4 | from __future__ import division
  5 | from megaman.geometry.utils import RegisterSubclasses
  6 | 
  7 | def init_optimizer(**kwargs):
  8 |     optimizer = kwargs.get('step_method', 'fixed')
  9 |     return BaseOptimizer.init(optimizer, **kwargs)
 10 | 
 11 | class BaseOptimizer(RegisterSubclasses):
 12 |     """
 13 |     Base class for the optimizer.
 14 | 
 15 |     BaseOptimizer creates the common interface to the optimzer class
 16 |     as well as providing a common apply_optimization() which can be used
 17 |     in RiemannianRelaxation class to update the embeddings.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     linesearch : bool
 22 |         If use linesearch to search for optima eta.
 23 |     eta_max : float
 24 |         (Linesearch mode) The maximum learning rate (eta) to start search with.
 25 |     eta : float
 26 |         (Non linesearch mode) The fixed learning rate (eta) to use.
 27 |     linesearch_first : bool
 28 |         (Linesearch mode)  If do linesearch at first iteration.
 29 |     """
 30 |     def __init__(self, linesearch=False, eta_max=None, eta=None,
 31 |                  linesearch_first=False, **kwargs):
 32 |         self.linesearch = linesearch
 33 |         if self.linesearch:
 34 |             self.linesearch_first = linesearch_first
 35 |             if eta_max is not None:
 36 |                 self.eta_max = eta_max
 37 |                 self.eta_min = 2**-10
 38 |             else:
 39 |                 raise ValueError('Should provide eta_max keyword '
 40 |                                  'when linesearch method is used.')
 41 |         else:
 42 |             if eta is not None:
 43 |                 self.eta = eta
 44 |             else:
 45 |                 raise ValueError('Should provide eta keyword '
 46 |                                  'when fixed method is used.')
 47 | 
 48 |     def apply_optimization(self, update_embedding_with, grad, **kwargs):
 49 |         """
 50 |         Calculating (Obtaining) the learning rate (eta) and apply optimizations
 51 |         on the embedding states by the specified method.
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         update_embedding_with : function
 56 |             Function used to update the state of RiemannianRelaxation
 57 |             class (Y or S).
 58 | 
 59 |         grad : (n x s) array
 60 |             Gradients used in updating the embedding.
 61 | 
 62 |         calc_loss : function (used by its child function)
 63 |             Function used to calculated the loss from the temperary state of
 64 |             RiemannianRelaxation instance. (YT or ST)
 65 | 
 66 |         loss : float (used by its child function)
 67 |             Loss of the current state of RiemannianRelaxation instance.
 68 |         """
 69 |         if self.linesearch:
 70 |             return self._apply_linesearch_optimzation(update_embedding_with,
 71 |                                                       grad, **kwargs)
 72 |         else:
 73 |             return self._apply_fixed_optimization(update_embedding_with,
 74 |                                                   grad, **kwargs)
 75 | 
 76 |     def _apply_linesearch_optimzation(self, update_embedding_with, grad,
 77 |                                       calc_loss, loss, **kwargs):
 78 |         self.eta = self.eta_max
 79 |         if kwargs.get('first_iter',False) and not self.linesearch_first:
 80 |             self.eta = kwargs.get('eta_first',1)
 81 |         loss_diff = 1
 82 |         while loss_diff > 0:
 83 |             loss_diff, temp_embedding, delta = self._linesearch_once(
 84 |                 update_embedding_with,grad,calc_loss,loss,**kwargs)
 85 |             if self.eta <= self.eta_min and loss_diff > 0:
 86 |                 loss_diff, temp_embedding, delta = self._linesearch_once(
 87 |                     update_embedding_with,grad,calc_loss,loss,**kwargs)
 88 |                 loss_diff = -1
 89 |         self.eta *= 2
 90 |         update_embedding_with(new_embedding=temp_embedding)
 91 |         return delta
 92 | 
 93 |     def _linesearch_once(self, update_embedding_with, grad,
 94 |                          calc_loss, loss, **kwargs):
 95 |         delta = self._calc_delta(grad)
 96 |         temp_embedding = update_embedding_with(delta=delta,copy=True)
 97 |         loss_diff = calc_loss(temp_embedding) - loss
 98 |         self.eta /= 2
 99 |         return loss_diff, temp_embedding, delta
100 | 
101 |     def _apply_fixed_optimization(self,update_embedding_with,grad,**kwargs):
102 |         delta = self._calc_delta(grad)
103 |         update_embedding_with(delta=delta)
104 |         return delta
105 | 
106 |     def _calc_delta(self,grad,**kwargs):
107 |         raise NotImplementedError()
108 | 
109 | class FixedOptimizer(BaseOptimizer):
110 |     """Optimizer for fixed (non-momentum) method."""
111 |     name='fixed'
112 |     def _calc_delta(self,grad,**kwargs):
113 |         return -self.eta*grad
114 | 
115 | class MomentumOptimizer(BaseOptimizer):
116 |     """Optimizer for momentum method."""
117 |     name='momentum'
118 |     def __init__(self,momentum,**kwargs):
119 |         BaseOptimizer.__init__(**kwargs)
120 |         self.momentum = momentum
121 |         self.last_delta = 0
122 | 
123 |     def _calc_delta(self,grad,**kwargs):
124 |         return -self.eta * grad + self.momentum * self.last_delta
125 | 
126 |     def apply_optimization(self,update_embedding_with,grad,**kwargs):
127 |         self.last_delta = BaseOptimizer.apply_optimization(
128 |             self,update_embedding_with,grad,**kwargs)
129 |         return self.last_delta
130 | 


--------------------------------------------------------------------------------
/megaman/relaxation/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/relaxation/tests/__init__.py


--------------------------------------------------------------------------------
/megaman/relaxation/tests/eps_halfdome.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/relaxation/tests/eps_halfdome.mat


--------------------------------------------------------------------------------
/megaman/relaxation/tests/rloss_halfdome.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/relaxation/tests/rloss_halfdome.mat


--------------------------------------------------------------------------------
/megaman/relaxation/tests/test_precomputed_S.py:
--------------------------------------------------------------------------------
 1 | from megaman.relaxation.precomputed import *
 2 | from .utils import generate_toy_laplacian
 3 | 
 4 | class BaseTestARkNeighbors(object):
 5 |     def generate_laplacian(self):
 6 |         raise NotImplementedError()
 7 |     def setup_message(self):
 8 |         raise NotImplementedError()
 9 | 
10 |     def setUp(self):
11 |         self.generate_laplacian_and_range()
12 |         self.setup_message()
13 |         self.A, self.pairs = makeA(self.laplacian)
14 | 
15 |         # HACK: A is somehow sorted by column, so here I'll change it manually.
16 |         sortbyrow = np.lexsort((self.pairs[:,1],self.pairs[:,0]))
17 |         self.A = self.A[sortbyrow]
18 |         self.pairs = self.pairs[sortbyrow]
19 | 
20 |         # self.Rk_tensor, self.nbk = compute_Rk(self.laplacian,self.A,self.n)
21 |         self.correct_S, self.correct_pairs = self.project_S_from_laplacian()
22 | 
23 |     def generate_laplacian_and_range(self):
24 |         self.laplacian = self.generate_laplacian()
25 |         self.n = self.laplacian.shape[0]
26 |         self.range = np.arange(self.n)
27 |         self.Y = self.generate_toy_Y()
28 | 
29 |     def generate_toy_Y(self):
30 |         return np.random.uniform(size=self.n)
31 | 
32 |     def ij_is_neighbors(self,i,j):
33 |         return self.laplacian[i,j] != 0
34 | 
35 |     def project_S_from_laplacian(self):
36 |         # TODO: make the test process faster!
37 |         S = [ self.Y[i]-self.Y[j] for i in np.arange(self.n) \
38 |               for j in np.arange(i+1,self.n) \
39 |               if self.ij_is_neighbors(i,j) ]
40 |         pairs = [ [i,j] for i in np.arange(self.n) \
41 |                   for j in np.arange(i+1,self.n) \
42 |                   if self.ij_is_neighbors(i,j) ]
43 |         return np.array(S), np.array(pairs)
44 | 
45 |     def test_A_length_equality(self):
46 |         A_length = self.A.shape[0]
47 |         correct_A_length = self.correct_S.shape[0]
48 |         assert A_length == correct_A_length, 'The first dimension of A is calculated wrong.'
49 | 
50 |     def test_pairs(self):
51 |         np.testing.assert_array_equal(
52 |             self.pairs, self.correct_pairs,
53 |             err_msg='Sorted pairs should be the same.'
54 |         )
55 | 
56 |     def test_A(self):
57 |         testing_S = self.A.dot(self.Y)
58 |         np.testing.assert_allclose(
59 |             testing_S, self.correct_S,
60 |             err_msg='A*y should be the same as yj-yi for all j>i'
61 |         )
62 | 
63 |     def _test_ATAinv(self):
64 |         # TODO: why this test will running out of the memory?
65 |         ATAinv = np.linalg.pinv(self.A.T.dot(self.A).todense())
66 |         S = self.A.dot(self.Y)
67 |         testing_Y = ATAinv.dot(self.A.T).dot(S)
68 |         np.testing.assert_allclose(
69 |             testing_Y, self.Y,
70 |             err_msg='ATAinv * AT * S should be the same as original Y'
71 |         )
72 | 
73 |     def _test_Rk(self):
74 |         # TODO: Need to understand what Rk means.
75 |         pass
76 | 
77 | class TestAkRkNbkFromToyLaplacian(BaseTestARkNeighbors):
78 |     def generate_laplacian(self):
79 |         return generate_toy_laplacian(n=200)
80 |     def setup_message(self):
81 |         print ('Tesking Rk properties for toy laplacian.')
82 | 


--------------------------------------------------------------------------------
/megaman/relaxation/tests/test_regression_test.py:
--------------------------------------------------------------------------------
  1 | from megaman.relaxation import *
  2 | from functools import wraps
  3 | 
  4 | import numpy as np
  5 | import numpy.testing
  6 | 
  7 | from .utils import gen_data, Bunch
  8 | import shutil
  9 | 
 10 | def _regression_test(if_epsilon):
 11 |     def _test_deco(func):
 12 |         @wraps(func)
 13 |         def wrapper():
 14 |             test_dict = func()
 15 |             var = Bunch(test_dict)
 16 | 
 17 |             rr = run_riemannian_relaxation(var.laplacian, var.Y_list[0], var.d, var.relaxation_kwds)
 18 | 
 19 |             calculated_loss_list = []
 20 |             calculated_DL_list = []
 21 |             calculated_Y_list = []
 22 | 
 23 |             for idx,Y in enumerate(var.Y_list):
 24 |                 rr.Y = Y
 25 |                 rr.H = np.copy(var.H_list[idx])
 26 |                 if if_epsilon and idx >= 1:
 27 |                     rr.UU, rr.IUUEPS = compute_principal_plane(var.H_list[idx-1],rr.epsI,var.d)
 28 |                 calculated_loss_list.append(rr.rieman_loss())
 29 | 
 30 |             for idx,H in enumerate(var.H_list):
 31 |                 rr.H = H
 32 |                 rr.Y = np.copy(var.Y_list[idx])
 33 |                 calculated_DL_list.append(rr.compute_gradient())
 34 | 
 35 |             for idx,grad in enumerate(var.grad_list):
 36 |                 rr.grad = grad
 37 |                 rr.Y = np.copy(var.Y_list[idx])
 38 |                 rr.loss = var.loss_list[idx]
 39 |                 if if_epsilon:
 40 |                     rr.H = rr.compute_dual_rmetric()
 41 |                     rr.UU, rr.IUUEPS = compute_principal_plane(rr.H,rr.epsI,var.d)
 42 |                 rr.make_optimization_step(first_iter=(idx == 0))
 43 |                 calculated_Y_list.append(rr.Y)
 44 | 
 45 |             np.testing.assert_allclose(
 46 |                 calculated_loss_list, var.loss_list,
 47 |                 err_msg='Loss calculated from matlab should be similar to that calculated from python, in {}'.format(__name__)
 48 |             )
 49 |             np.testing.assert_allclose(
 50 |                 calculated_DL_list[:-1], var.DL_list,
 51 |                 err_msg='gradient difference calculated from matlab should be similar to that calculated from python, in {}'.format(__name__)
 52 |             )
 53 |             np.testing.assert_allclose(
 54 |                 calculated_Y_list, var.Y_list[1:],
 55 |                 err_msg='Y calculated from linesearch should be similar, in {}'.format(__name__)
 56 |             )
 57 | 
 58 |         return wrapper
 59 |     return _test_deco
 60 | 
 61 | @_regression_test(True)
 62 | def test_whole_eps():
 63 |     return gen_data('eps_halfdome','whole_eps')
 64 | 
 65 | @_regression_test(False)
 66 | def test_whole_rloss():
 67 |     return gen_data('rloss_halfdome','whole_eps')
 68 | 
 69 | @_regression_test(True)
 70 | def test_half_eps():
 71 |     return gen_data('eps_halfdome','half_eps')
 72 | 
 73 | @_regression_test(False)
 74 | def test_half_rloss():
 75 |     return gen_data('rloss_halfdome','half_eps')
 76 | 
 77 | @_regression_test(True)
 78 | def test_weight_eps():
 79 |     return gen_data('eps_halfdome','weight_eps')
 80 | 
 81 | @_regression_test(False)
 82 | def test_weight_rloss():
 83 |     return gen_data('rloss_halfdome','weight_eps')
 84 | 
 85 | @_regression_test(True)
 86 | def test_half_weight_eps():
 87 |     return gen_data('eps_halfdome','half_weight_eps')
 88 | 
 89 | @_regression_test(False)
 90 | def test_half_weight_rloss():
 91 |     return gen_data('rloss_halfdome','half_weight_eps')
 92 | 
 93 | if __name__ == '__main__':
 94 |     test_weight_rloss()
 95 | 
 96 | def tearDownModule():
 97 |     tmp_dir = '/tmp/test_backup'
 98 |     if os.path.exists(tmp_dir):
 99 |         shutil.rmtree(tmp_dir)
100 | 


--------------------------------------------------------------------------------
/megaman/relaxation/tests/test_relaxation_keywords.py:
--------------------------------------------------------------------------------
  1 | from megaman.relaxation.utils import *
  2 | from nose.tools import assert_raises
  3 | import numpy as np
  4 | import numpy.testing
  5 | import shutil, warnings
  6 | 
  7 | n, s, d = 1000, 3, 2
  8 | 
  9 | basic_kwds = {
 10 |     'verbose': False,
 11 |     'niter': 2000,
 12 |     'niter_trace': 0,
 13 |     'presave': False,
 14 |     'sqrd': True,
 15 |     'alpha': 0,
 16 |     'projected': False,
 17 |     'saveiter': 10,
 18 |     'printiter': 1,
 19 | }
 20 | 
 21 | nonprojected_epsilon_test = {
 22 |     'lossf': 'nonprojected_epsilon',
 23 |     'projected': False,
 24 |     'eps_orth': 0.1,
 25 | }
 26 | 
 27 | tmp_dir = '/tmp/test_backup'
 28 | def _initialize_kwds(kwds,n,s,d):
 29 |     kwds['backup_base_dir'] = tmp_dir
 30 |     return initialize_kwds(kwds,n,s,d)
 31 | 
 32 | def test_default_keywords():
 33 |     calculated_kwds = _initialize_kwds({},n,s,d)
 34 |     for k,v in basic_kwds.items():
 35 |         assert calculated_kwds[k] == v, 'keyword {} do not initialized correctly.'.format(k)
 36 | 
 37 |     assert calculated_kwds['weights'].shape[0] == 0, 'initialized weights is not zero.'
 38 |     np.testing.assert_allclose(
 39 |         calculated_kwds['subset'], np.arange(n),
 40 |         err_msg='initialized subset should be arange(n).'
 41 |     )
 42 | 
 43 | def test_normalize_weights():
 44 |     weights = np.array([1,4])
 45 |     calculated_kwds = _initialize_kwds(dict(weights=weights),n,s,d)
 46 |     np.testing.assert_allclose(
 47 |         calculated_kwds['weights'], [0.2,0.8],
 48 |         err_msg='The weights should be normalized'
 49 |     )
 50 | 
 51 | def test_default_lossf():
 52 |     calculated_kwds = _initialize_kwds({},n,s,d)
 53 |     for k,v in nonprojected_epsilon_test.items():
 54 |         assert calculated_kwds[k] == v, 'keyword {} do not initialized correctly.'.format(k)
 55 | 
 56 |     calculated_kwds = _initialize_kwds(dict(projected=True),n,s,d)
 57 |     assert calculated_kwds['lossf'] == 'projected_epsilon', 'lossf should be projected_epsilon when projected is True'
 58 | 
 59 |     calculated_kwds = _initialize_kwds({},n,d,d)
 60 |     assert calculated_kwds['lossf'] == 'nonprojected_rloss', 'lossf should be nonprojected_rloss for default'
 61 | 
 62 |     calculated_kwds = _initialize_kwds(dict(projected=True),n,d,d)
 63 |     assert calculated_kwds['lossf'] == 'projected_rloss', 'lossf should be projected_epsilon when projected is True'
 64 | 
 65 | def test_update_lossf():
 66 |     calculated_kwds = _initialize_kwds(dict(eps_orth=0.55),n,s,d)
 67 |     assert calculated_kwds['eps_orth'] == 0.55, 'eps_orth should be updated to 0.55.'
 68 | 
 69 | def test_raise_lossf_error():
 70 |     assert_raises(ValueError, _initialize_kwds, dict(lossf='rloss'),n,s,d)
 71 |     assert_raises(ValueError, _initialize_kwds, dict(lossf='epsilon'),n,d,d)
 72 |     assert_raises(ValueError, _initialize_kwds, dict(projected=True, subset=np.arange(0,n,5)),n,s,d)
 73 | 
 74 | def test_default_momentum():
 75 |     calculated_kwds = _initialize_kwds(dict(step_method='momentum',linesearch=False),n,s,d)
 76 |     test_momentum_kwds = {
 77 |         'm': 0.05,
 78 |         'eta': 1.0
 79 |     }
 80 |     for k,v in test_momentum_kwds.items():
 81 |         assert calculated_kwds[k] == v, 'keyword {} do not initialized correctly.'.format(k)
 82 | 
 83 | def test_default_fixed():
 84 |     calculated_kwds = _initialize_kwds(dict(step_method='fixed',linesearch=False),n,s,d)
 85 |     assert calculated_kwds['eta'] == 1.0, 'Default eta does not match'
 86 | 
 87 | def test_default_linsearch():
 88 |     calculated_kwds = _initialize_kwds(dict(projected=True),n,s,d)
 89 |     test_kwds = {
 90 |         'linesearch_first': False,
 91 |         'eta_max': 2**11,
 92 |     }
 93 |     for k,v in test_kwds.items():
 94 |         assert calculated_kwds[k] == v, 'keyword {} do not initialized correctly.'.format(k)
 95 | 
 96 |     calculated_kwds = _initialize_kwds(dict(projected=False),n,s,d)
 97 |     assert calculated_kwds['eta_max'] == 2**4, 'eta_max should be 2**4 if projected == False'
 98 | 
 99 | def test_backup_dir_function():
100 |     tmp_dir = '/tmp/test_backup'
101 |     calculated_kwds = initialize_kwds(dict(backup_base_dir=tmp_dir),n,s,d)
102 |     assert 'backup_dir' in calculated_kwds
103 |     backup_dir = calculated_kwds['backup_dir']
104 |     assert tmp_dir in backup_dir
105 |     assert os.path.exists(tmp_dir)
106 | 
107 | def test_not_int_warnings():
108 |     with warnings.catch_warnings(record=True) as w:
109 |         calculated_kwds = initialize_kwds(dict(printiter=1.3),n,s,d)
110 |         assert issubclass(w[-1].category, RuntimeWarning), \
111 |                'Should raise RuntimeWarning when input is not integer'
112 | 
113 | def tearDownModule():
114 |     tmp_dir = '/tmp/test_backup'
115 |     if os.path.exists(tmp_dir):
116 |         shutil.rmtree(tmp_dir)
117 | 


--------------------------------------------------------------------------------
/megaman/relaxation/tests/test_tracing_var.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | from .utils import generate_toy_laplacian
 3 | from megaman.relaxation.trace_variable import TracingVariable as tv
 4 | from megaman.relaxation import *
 5 | import shutil, os
 6 | 
 7 | def test_copy():
 8 |     n, s, d = 1000, 3, 2
 9 |     niter = 10
10 |     niter_trace = niter//2
11 |     ltrace = 2*niter_trace+1
12 |     L = generate_toy_laplacian(n)
13 |     Y0 = np.zeros((n,s))
14 |     rr = run_riemannian_relaxation(L, Y0, d, dict(niter=niter, niter_trace=niter_trace))
15 |     copied_tv = rr.trace_var.copy()
16 |     copied_tv.H = copied_tv.H[::2,:,:]
17 |     assert rr.trace_var.H.shape[0] == ltrace, 'The original size of H should not be affected by downsamping'
18 |     assert copied_tv.H.shape[0] == round(ltrace / 2), 'The size of copied H should be downsampled by 2'
19 | 
20 | def tearDownModule():
21 |     tmp_dir = '/tmp/test_backup'
22 |     if os.path.exists(tmp_dir):
23 |         shutil.rmtree(tmp_dir)
24 | 


--------------------------------------------------------------------------------
/megaman/relaxation/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | import scipy.sparse
 4 | import h5py
 5 | import copy, os
 6 | 
 7 | def generate_toy_laplacian(n=1000):
 8 |     neighbor_counts = 10
 9 |     adjacency_mat = np.zeros((n,n))
10 |     for i in range(n):
11 |         x = np.ones(neighbor_counts,dtype=np.int32)*i
12 |         y = np.random.choice(n, neighbor_counts, replace=False)
13 |         adjacency_mat[(x,y)] = 1
14 | 
15 |     np.fill_diagonal(adjacency_mat,0)
16 |     adjacency_mat = (adjacency_mat.T + adjacency_mat) / 2
17 |     degree = np.sum(adjacency_mat,axis=1)
18 |     degree_mat = np.diag(degree)
19 | 
20 |     return sp.sparse.csc_matrix(degree_mat - adjacency_mat)
21 | 
22 | def process_test_data():
23 |     namelist = ['rloss_halfdome', 'eps_halfdome']
24 |     return { name: process_one_loss_test_data(name) for name in namelist }
25 | 
26 | def process_one_loss_test_data(name):
27 |     file_dir = os.path.dirname(os.path.abspath(__file__))
28 |     path = os.path.join(file_dir,'{}.mat'.format(name))
29 |     f = h5py.File(path)
30 |     laplacian_ref = f['/{}/L'.format(name)]
31 |     laplacian = sp.sparse.csc_matrix((laplacian_ref['data'], laplacian_ref['ir'], laplacian_ref['jc']))
32 |     opts_list = ['whole_eps','half_eps','weight_eps','half_weight_eps']
33 |     processed_data = { opts:process_one_test_data(f,name,opts) for opts in opts_list }
34 |     processed_data['L'] = laplacian
35 |     processed_data['d'] = 2
36 |     return processed_data
37 | 
38 | def process_one_test_data(f, name, opts):
39 |     Y_ref_list = f['/{}/{}/trace/Y'.format(name,opts)]
40 |     Y_list = np.array([ f[Y_ref_list[idx,0]] for idx in range(Y_ref_list.shape[0]) ])
41 |     Y_list = np.swapaxes(Y_list, 1, 2)
42 | 
43 |     H_ref_list = f['/{}/{}/trace/H'.format(name,opts)]
44 |     H_list = np.array([ f[H_ref_list[idx,0]] for idx in range(H_ref_list.shape[0]) ])
45 | 
46 |     DL_ref_list = f['/{}/{}/trace/DL'.format(name,opts)]
47 |     DL_list = np.array([ f[DL_ref_list[idx,0]] for idx in range(DL_ref_list.shape[0]-1) ])
48 |     DL_list = np.swapaxes(DL_list, 1, 2)
49 | 
50 |     grad_ref_list = f['/{}/{}/trace/grad'.format(name,opts)]
51 |     grad_list = np.array([ f[grad_ref_list[idx,0]] for idx in range(grad_ref_list.shape[0]-1) ])
52 |     grad_list = np.swapaxes(grad_list, 1, 2)
53 | 
54 |     loss_list = np.squeeze(np.array(f['/{}/{}/loss'.format(name,opts)]))
55 |     etas_list = np.squeeze(np.array(f['/{}/{}/etas'.format(name,opts)]))
56 | 
57 |     rk_h5py = f['/{}/{}/opts'.format(name,opts)]
58 |     relaxation_kwds = {
59 |         'alpha': rk_h5py['alpha'][0,0],
60 |         'lossf': u''.join(chr(c) for c in rk_h5py['lossf']),
61 |         'step_method': 'fixed',
62 |         'linsearch': u''.join(chr(c) for c in rk_h5py['step_method']) == u'linesearch',
63 |         'projected': rk_h5py['projected'][0,0],
64 |         'eta_max': rk_h5py['eta_max'][0,0],
65 |         'backup_base_dir': '/tmp/test_backup',
66 |     }
67 |     if 'weight' in opts:
68 |         weights = np.squeeze(np.array(rk_h5py['w']))
69 |         relaxation_kwds['weights'] = weights
70 | 
71 |     if 'half' in opts:
72 |         relaxation_kwds['subset'] = np.arange(0,1000,2)
73 | 
74 |     if 'epsorth' in rk_h5py:
75 |         relaxation_kwds['eps_orth'] = rk_h5py['epsorth'][0,0]
76 |     if 'sqrd' in rk_h5py:
77 |         relaxation_kwds['sqrd'] = rk_h5py['sqrd'][0,0] == 1
78 |     return dict(
79 |         Y_list=Y_list, H_list=H_list, DL_list=DL_list, grad_list=grad_list,
80 |         loss_list=loss_list, etas_list=etas_list, relaxation_kwds=relaxation_kwds
81 |     )
82 | 
83 | class Bunch(object):
84 |     def __init__(self, adict):
85 |         self.__dict__.update(adict)
86 | 
87 | data = process_test_data()
88 | def gen_data(name, opts):
89 |     test_data = copy.deepcopy(data[name])
90 |     test_dict = test_data[opts]
91 |     test_dict['laplacian'] = test_data['L']
92 |     test_dict['d'] = test_data['d']
93 |     return test_dict
94 | 


--------------------------------------------------------------------------------
/megaman/relaxation/trace_variable.py:
--------------------------------------------------------------------------------
  1 | # Author: Yu-Chia Chen <yuchaz@uw.edu>
  2 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  3 | 
  4 | import numpy as np
  5 | import os, pickle, pprint, copy
  6 | 
  7 | from .utils import *
  8 | 
  9 | class TracingVariable(object):
 10 |     """
 11 |     The TracingVariable is the class to store the variables to trace and
 12 |     print relaxation reports in each 'printiter' iteration.
 13 |     """
 14 |     def __init__(self,n,s,relaxation_kwds,precomputed_kwds,**kwargs):
 15 |         self.niter_trace = relaxation_kwds['niter_trace']
 16 |         self.niter = relaxation_kwds['niter']
 17 |         self.ltrace = 2*self.niter_trace+1
 18 | 
 19 |         self.loss = np.zeros(self.niter+1)
 20 |         self.etas = np.zeros(self.niter+1)
 21 |         self.H = np.zeros((self.ltrace,n,s,s))
 22 |         self.Y = np.zeros((self.ltrace,n,s))
 23 |         self.lmin = np.finfo(np.float64).max
 24 | 
 25 |         self.verbose = relaxation_kwds['verbose']
 26 |         self.printiter = relaxation_kwds['printiter']
 27 |         self.saveiter = relaxation_kwds['saveiter']
 28 |         self.backup_dir = relaxation_kwds['backup_dir']
 29 | 
 30 |         create_output_dir(self.backup_dir)
 31 |         self.report_and_save_keywords(relaxation_kwds,precomputed_kwds)
 32 | 
 33 |     def copy(self):
 34 |         return copy.deepcopy(self)
 35 | 
 36 |     def report_and_save_keywords(self,relaxation_kwds,precomputed_kwds):
 37 |         """Save relaxation keywords to .txt and .pyc file"""
 38 |         report_name = os.path.join(self.backup_dir,'relaxation_keywords.txt')
 39 |         pretty_relax_kwds = pprint.pformat(relaxation_kwds,indent=4)
 40 |         with open(report_name,'w') as wf:
 41 |             wf.write(pretty_relax_kwds)
 42 |         wf.close()
 43 | 
 44 |         origin_name = os.path.join(self.backup_dir,'relaxation_keywords.pyc')
 45 |         with open(origin_name,'wb') as ro:
 46 |             pickle.dump(relaxation_kwds,ro,protocol=pickle.HIGHEST_PROTOCOL)
 47 |         ro.close()
 48 | 
 49 |         if relaxation_kwds['presave']:
 50 |             precomp_kwds_name = os.path.join(self.backup_dir,
 51 |                                              'precomputed_keywords.pyc')
 52 |             with open(precomp_kwds_name, 'wb') as po:
 53 |                 pickle.dump(precomputed_kwds, po,
 54 |                             protocol=pickle.HIGHEST_PROTOCOL)
 55 |             po.close()
 56 | 
 57 |     def update(self,iiter,H,Y,eta,loss):
 58 |         """Update the trace_var in new iteration"""
 59 |         if iiter <= self.niter_trace+1:
 60 |             self.H[iiter] = H
 61 |             self.Y[iiter] = Y
 62 |         elif iiter >self.niter - self.niter_trace + 1:
 63 |             self.H[self.ltrace+iiter-self.niter-1] = H
 64 |             self.Y[self.ltrace+iiter-self.niter-1] = Y
 65 | 
 66 |         self.etas[iiter] = eta
 67 |         self.loss[iiter] = loss
 68 |         if self.loss[iiter] < self.lmin:
 69 |             self.Yh = Y
 70 |             self.lmin = self.loss[iiter]
 71 |             self.miniter = iiter if not iiter == -1 else self.niter + 1
 72 | 
 73 |     def print_report(self,iiter):
 74 |         if self.verbose and iiter % self.printiter == 0:
 75 |             print ('Iteration number: {}'.format(iiter))
 76 |             print ('Last step size eta: {}'.format(self.etas[iiter]))
 77 |             print ('current loss (before gradient step): {}'
 78 |                    .format(self.loss[iiter]))
 79 |             print ('minimum loss: {}, at iteration: {}\n'
 80 |                    .format(self.lmin, self.miniter))
 81 | 
 82 |     def save_backup(self,iiter):
 83 |         if iiter % self.saveiter == 0 and iiter != 0:
 84 |             backup_name = os.path.join(self.backup_dir,'backup_trace.pyc')
 85 |             TracingVariable.save(self,backup_name)
 86 |             print ('Save backup at iteration: {}\n'.format(iiter))
 87 | 
 88 |     @classmethod
 89 |     def correct_file_extension(cls,filename):
 90 |         return os.path.splitext(filename)[0]+'.pyc'
 91 | 
 92 |     @classmethod
 93 |     def save(cls,instance,filename):
 94 |         """Class method save for saving TracingVariable."""
 95 |         filename = cls.correct_file_extension(filename)
 96 |         try:
 97 |             with open(filename,'wb') as f:
 98 |                 pickle.dump(instance,f,protocol=pickle.HIGHEST_PROTOCOL)
 99 |         except MemoryError as e:
100 |             print ('{} occurred, will downsampled the saved file by 20.'
101 |                    .format(type(e).__name__))
102 |             copy_instance = instance.copy()
103 |             copy_instance.H = copy_instance.H[::20,:,:]
104 |             copy_instance.Y = copy_instance.Y[::20,:]
105 |             with open(filename,'wb') as f:
106 |                 pickle.dump(copy_instance,f,protocol=pickle.HIGHEST_PROTOCOL)
107 | 
108 |     @classmethod
109 |     def load(cls,filename):
110 |         """Load from stored files"""
111 |         filename = cls.correct_file_extension(filename)
112 |         with open(filename,'rb') as f:
113 |             return pickle.load(f)
114 | 


--------------------------------------------------------------------------------
/megaman/setup.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import os
 4 | 
 5 | def configuration(parent_package='', top_path=None):
 6 |     from numpy.distutils.misc_util import Configuration
 7 | 
 8 |     config = Configuration('megaman', parent_package, top_path)
 9 | 
10 |     config.add_subpackage('__check_build')
11 |     config.add_subpackage('datasets')
12 |     config.add_subpackage('embedding')
13 |     config.add_subpackage('embedding/tests')
14 |     config.add_subpackage('geometry')
15 |     config.add_subpackage('geometry/cyflann')
16 |     config.add_subpackage('geometry/tests')
17 |     config.add_subpackage('plotter')
18 |     config.add_subpackage('relaxation')
19 |     config.add_subpackage('relaxation/tests')
20 |     config.add_subpackage('utils')
21 |     config.add_subpackage('utils/tests')
22 |     config.add_data_files('geometry/tests/testmegaman_laplacian_rad0_2_lam1_5_n200.mat')
23 |     config.add_data_files('relaxation/tests/eps_halfdome.mat')
24 |     config.add_data_files('relaxation/tests/rloss_halfdome.mat')
25 |     config.add_data_files('datasets/megaman.png')
26 | 
27 |     return config
28 | 
29 | if __name__ == '__main__':
30 |     from numpy.distutils.core import setup
31 |     setup(**configuration(top_path='').todict())
32 | 


--------------------------------------------------------------------------------
/megaman/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/utils/__init__.py


--------------------------------------------------------------------------------
/megaman/utils/covar_plotter.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import numpy as np
 4 | 
 5 | import matplotlib.pyplot as plt
 6 | from matplotlib.patches import Ellipse
 7 | 
 8 | def plot_point_cov(points, nstd=2, ax=None, **kwargs):
 9 |     """
10 |     Plots an `nstd` sigma ellipse based on the mean and covariance of a point
11 |     "cloud" (points, an Nx2 array).
12 | 
13 |     Parameters
14 |     ----------
15 |         points : An Nx2 array of the data points.
16 |         nstd : The radius of the ellipse in numbers of standard deviations.
17 |             Defaults to 2 standard deviations.
18 |         ax : The axis that the ellipse will be plotted on. Defaults to the
19 |             current axis.
20 |         Additional keyword arguments are pass on to the ellipse patch.
21 | 
22 |     Returns
23 |     -------
24 |         A matplotlib ellipse artist
25 |     """
26 |     pos = points.mean(axis=0)
27 |     cov = np.cov(points, rowvar=False)
28 |     return plot_cov_ellipse(cov, pos, nstd, ax, **kwargs)
29 | 
30 | def plot_cov_ellipse(cov, pos, nstd=2, ax=None, **kwargs):
31 |     """
32 |     Plots an `nstd` sigma error ellipse based on the specified covariance
33 |     matrix (`cov`). Additional keyword arguments are passed on to the
34 |     ellipse patch artist.
35 | 
36 |     Parameters
37 |     ----------
38 |         cov : The 2x2 covariance matrix to base the ellipse on
39 |         pos : The location of the center of the ellipse. Expects a 2-element
40 |             sequence of [x0, y0].
41 |         nstd : The radius of the ellipse in numbers of standard deviations.
42 |             Defaults to 2 standard deviations.
43 |         ax : The axis that the ellipse will be plotted on. Defaults to the
44 |             current axis.
45 |         Additional keyword arguments are pass on to the ellipse patch.
46 | 
47 |     Returns
48 |     -------
49 |         A matplotlib ellipse artist
50 |     """
51 |     def eigsorted(cov):
52 |         vals, vecs = np.linalg.eigh(cov)
53 |         order = vals.argsort()[::-1]
54 |         return vals[order], vecs[:,order]
55 | 
56 |     if ax is None:
57 |         ax = plt.gca()
58 | 
59 |     vals, vecs = eigsorted(cov)
60 |     theta = np.degrees(np.arctan2(*vecs[:,0][::-1]))
61 | 
62 |     # Width and height are "full" widths, not radius
63 |     width, height = 2 * nstd * np.sqrt(vals)
64 |     ellip = Ellipse(xy=pos, width=width, height=height, angle=theta, **kwargs)
65 | 
66 |     ax.add_artist(ellip)
67 |     return ellip
68 | 
69 | if __name__ == '__main__':
70 |     #-- Example usage -----------------------
71 |     # Generate some random, correlated data
72 |     points = np.random.multivariate_normal(
73 |             mean=(1,1), cov=[[0.4, 9],[9, 10]], size=1000
74 |             )
75 |     # Plot the raw points...
76 |     x, y = points.T
77 |     plt.plot(x, y, 'ro')
78 | 
79 |     # Plot a transparent 3 standard deviation covariance ellipse
80 |     plot_point_cov(points, nstd=3, alpha=0.5, color='green')
81 | 
82 |     plt.show()
83 | 


--------------------------------------------------------------------------------
/megaman/utils/k_means_clustering.py:
--------------------------------------------------------------------------------
  1 | """K-Means Clustering"""
  2 | 
  3 | # Author: James McQueen <jmcq@u.washington.edu>
  4 | #         Xiao Wang <wang19@u.washington.edu>
  5 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICEN
  6 | 
  7 | import numpy as np
  8 | import random 
  9 | 
 10 | class Kmeans():
 11 |     def __init__(self, K):
 12 |         self.K = K
 13 | 
 14 |     def fit(data):
 15 |         self.labels_ = k_means_clustering(data, self.K)
 16 |         
 17 |     def fit_transform(data):
 18 |         self.fit(data)
 19 |         return self.labels_
 20 | 
 21 | def k_means_clustering(data,K):
 22 |     """
 23 |     K-means clustering is an algorithm that take a data set and 
 24 |     a number of clusters K and returns the labels which represents
 25 |     the clusters of data which are similar to others
 26 |     
 27 |     Parameters    
 28 |     --------------------
 29 |     data: array-like, shape= (m_samples,n_samples)
 30 |     K: integer
 31 |         number of K clusters   
 32 |     Returns
 33 |     -------
 34 |     labels: array-like, shape (1,n_samples)    
 35 |     """
 36 |     N = data.shape[0]
 37 |     centroids, data_norms = orthogonal_initialization(data,K)
 38 |     old_centroids= np.zeros((N,K))
 39 |     labels = []
 40 |     
 41 |     # Run the main k-means algorithm
 42 |     while not _has_converged(centroids, old_centroids):    
 43 |         labels = get_labels(data, centroids,K)                
 44 |         centroids = get_centroids(data,K,labels,centroids,data_norms)
 45 |         old_centroids = centroids
 46 |         
 47 |     return labels
 48 | 
 49 | def orthogonal_initialization(X,K):
 50 |     """
 51 |     Initialize the centrodis by orthogonal_initialization.
 52 |     Parameters    
 53 |     --------------------
 54 |     X(data): array-like, shape= (m_samples,n_samples)
 55 |     K: integer
 56 |         number of K clusters   
 57 |     Returns
 58 |     -------
 59 |     centroids: array-like, shape (K,n_samples)  
 60 |     data_norms: array-like, shape=(1,n_samples)     
 61 |     """
 62 |     N,M = X.shape
 63 |     centroids= X[np.random.randint(0, N-1,1),:] 
 64 |     data_norms = np.linalg.norm(X, axis = 1)# contains the norm of each data point, only do this once
 65 |          
 66 |     center_norms = np.linalg.norm(centroids, axis=1) # contains the norms of the centers, will need to be updated when new center added
 67 |         
 68 |     for k in range(1,K):    
 69 |         ## Here's where we compute the cosine of the angle between them:
 70 |         # Compute the dot (inner) product between each data point and each center
 71 |         new_center_index,new_center = new_orthogonal_center(X,data_norms,centroids,center_norms =center_norms)
 72 |         centroids = np.vstack((centroids,new_center))          
 73 |         center_norms = np.hstack((center_norms,data_norms[new_center_index]))   
 74 |     return centroids,data_norms
 75 | 
 76 | def new_orthogonal_center(X,data_norms,centroids,center_norms=None): 
 77 |     """
 78 |     Initialize the centrodis by orthogonal_initialization.
 79 |     Parameters    
 80 |     --------------------
 81 |     X(data): array-like, shape= (m_samples,n_samples)
 82 |     data_norms: array-like, shape=(1,n_samples)
 83 |     center_norms:array-like,shape=(centroids.shape[0])
 84 |     centroids: array-like, shape (K,n_samples)        
 85 |     Returns
 86 |     -------
 87 |     new_center: array-like, shape (1,n_samples)
 88 |     new_center_index: integer   
 89 |                         data index of the new center
 90 |     """
 91 |     if center_norms is None:
 92 |         center_norms = np.linalg.norm(centroids, axis=1)
 93 |     cosine = np.inner(X,centroids) # cosine[i, j] = np.dot(X[i, :],centroids[j,:])
 94 |     cosine = cosine/center_norms # divide each column by the center norm
 95 |     cosine = cosine / data_norms[:,np.newaxis] # divide each row by the data norm  
 96 |     max_cosine = np.abs(np.max(cosine, 1)) # the largest (absolute) cosine for each data point 
 97 | 
 98 |     # then we find the index of the new center:
 99 |     new_center_index = np.argmin(max_cosine) # the data index of the new center is the smallest max cosine
100 |     new_center = X[new_center_index, :]       
101 |     return new_center_index,new_center
102 | 
103 | def get_labels(data, centroids,K):
104 |     """
105 |     Returns a label for each piece of data in the dataset
106 |     
107 |     Parameters
108 |     ------------
109 |     data: array-like, shape= (m_samples,n_samples)
110 |     K: integer
111 |         number of K clusters  
112 |     centroids: array-like, shape=(K, n_samples)     
113 |     
114 |     returns
115 |     -------------
116 |     labels: array-like, shape (1,n_samples)
117 |     """
118 |     distances = np.sqrt(((data - centroids[:, np.newaxis])**2).sum(axis=2))
119 |     return np.argmin(distances, axis=0)
120 |     
121 | def get_centroids(data,k,labels,centroids,data_norms):
122 |     """
123 |     For each element in the dataset, choose the closest centroid
124 |     
125 |     Parameters
126 |     ------------
127 |     data: array-like, shape= (m_samples,n_samples)
128 |     K: integer, number of K clusters  
129 |     centroids: array-like, shape=(K, n_samples)     
130 |     labels: array-like, shape (1,n_samples)
131 |     returns
132 |     -------------
133 |     centroids: array-like, shape (K,n_samples)    
134 |     """
135 | 
136 |     D = data.shape[1]    
137 |     for j in range(k):
138 |         cluster_points = np.where(labels == j)
139 |         cluster_total = len(cluster_points)
140 |         if cluster_total == 0:
141 |             _, temp = new_orthogonal_center(data,data_norms,centroids)
142 |         else:
143 |             temp = np.mean(data[cluster_points,:],axis=1)      
144 |         centroids[j,:] = temp
145 |     return centroids       
146 | 
147 | def _has_converged(centroids, old_centroids):
148 |     """
149 |     Stop if centroids stop to update
150 |     Parameters
151 |     -----------
152 |     centroids: array-like, shape=(K, n_samples)     
153 |     old_centroids: array-like, shape=(K, n_samples)
154 |     ------------    
155 |     returns
156 |     True: bool
157 |     
158 |     """
159 |     return (set([tuple(a) for a in centroids]) == set([tuple(a) for a in old_centroids]))    


--------------------------------------------------------------------------------
/megaman/utils/large_sparse_functions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy as sp
  3 | import cPickle
  4 | from scipy.io import loadmat, savemat
  5 | from scipy.sparse import coo_matrix, dia_matrix, identity
  6 | 
  7 | def save_sparse_in_2_parts(A, name):
  8 |     # mat and coo format easily readable into MATLAB
  9 |     nz = len(A.data)
 10 |     A = A.tocoo()
 11 |     A_1 = {'I1':A.row[xrange(0, int(nz/2))],
 12 |            'J1':A.col[xrange(0, int(nz/2))],
 13 |            'V1':A.data[xrange(0, int(nz/2))]}
 14 |     savemat(name + '_part_1.mat', A_1)
 15 | 
 16 |     A_2 = {'I2':A.row[xrange(int(nz/2), nz)],
 17 |            'J2':A.col[xrange(int(nz/2), nz)],
 18 |            'V2':A.data[xrange(int(nz/2), nz)]}
 19 |     savemat(name + '_part_2.mat', A_2)
 20 |     return(None)
 21 |     
 22 | def load_sparse_in_2_parts(f1, f2, n):
 23 |     A_1 = loadmat(f1)
 24 |     A_2 = loadmat(f2)
 25 |     row = np.append(A_1['I1'], A_2['I2'])
 26 |     col = np.append(A_1['J1'], A_2['J2'])
 27 |     data = np.append(A_1['V1'], A_2['V2'])
 28 |     A = coo_matrix((data, (row, col)), shape = (n, n))
 29 |     return(A)
 30 |     
 31 |     
 32 | def save_sparse_in_k_parts(A, name, k):
 33 |     nz = len(A.data)
 34 |     A = A.tocoo()
 35 |     nk = 0 
 36 |     nper = int(nz / k)
 37 |     for ii in range(k):
 38 |         fname = name + '_part_' + str(ii+1) + '.mat'
 39 |         nkp1 = nk + nper 
 40 |         if ii == k-1:
 41 |             nkp1 = nz 
 42 |         A_k = {'I':A.row[xrange(nk, nkp1)],
 43 |                'J':A.col[xrange(nk, nkp1)],
 44 |                'V':A.data[xrange(nk, nkp1)]}
 45 |         savemat(fname, A_k)
 46 |         nk = nkp1
 47 |     return(None)
 48 |     
 49 | def load_sparse_in_k_parts(name, k, n):
 50 |     row = np.array([])
 51 |     col = np.array([])
 52 |     data = np.array([])
 53 |     for ii in range(k):
 54 |         fname = name + '_part_' + str(ii+1) + '.mat'
 55 |         A_k = loadmat(fname)
 56 |         row = np.append(row, A_k['I'])
 57 |         col = np.append(col, A_k['J'])
 58 |         data = np.append(data, A_k['V'])
 59 |     A = coo_matrix((data, (row, col)), shape = (n, n))
 60 |     return(A)
 61 |     
 62 | def dump_array_in_k_parts(A, name, k):
 63 |     n = A.shape[0]
 64 |     nk = 0 
 65 |     nper = int(n / k)
 66 |     for ii in range(k):
 67 |         fname = name + '_part_' + str(ii+1) + '.p'
 68 |         nkp1 = nk + nper 
 69 |         if ii == k-1:
 70 |             nkp1 = n
 71 |         A_k = A[range(nk, nkp1)]
 72 |         cPickle.dump(A_k, open(fname, 'wb'), -1)
 73 |         nk = nkp1
 74 |     return(None)
 75 | 
 76 | def load_array_in_k_parts(name, k):
 77 |     for ii in range(k):
 78 |         fname = name + '_part_' + str(ii+1) + '.p'
 79 |         A_k = cPickle.load(open(fname, 'rb'))
 80 |         if ii == 0:
 81 |             A = A_k.copy()
 82 |         else:
 83 |             A = np.vstack((A, A_k))
 84 |     return(A)
 85 |     
 86 | def set_sparse_diag_to_one(mat):
 87 |     # appears to implicitly convert to csr which might be a problem 
 88 |     (n, n) = mat.shape
 89 |     # copy the matrix, subtract the diagonal values, add identity matrix 
 90 |     # see http://nbviewer.jupyter.org/gist/Midnighter/9992103 for speed testing
 91 |     cpy = mat - dia_matrix((mat.diagonal()[sp.newaxis, :], [0]), shape=(n, n)) + identity(n)
 92 |     return(cpy)
 93 |     
 94 | def set_coo_diag_to_one(mat):
 95 |     # this function takes a coo matrix and sets diagonal to one
 96 |     (n, n) = mat.shape
 97 |     off_diag = np.where(mat.row != mat.col)[0]
 98 |     row = np.append(mat.row[off_diag], range(n))
 99 |     col = np.append(mat.col[off_diag], range(n))
100 |     data = np.append(mat.data[off_diag], np.ones(n))
101 |     cpy = coo_matrix((data, (row, col)), shape = (n, n))
102 |     return(cpy)


--------------------------------------------------------------------------------
/megaman/utils/nystrom_extension.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Tue Jun 21 11:11:40 2016
 5 | 
 6 | @author: wang1
 7 | """
 8 | from __future__ import division 
 9 | import numpy as np
10 | import warnings
11 | from scipy.sparse import isspmatrix
12 | def nystrom_extension(C, e_vec, e_val):
13 |     """
14 |     Parameters
15 |     ----------
16 |     C: array-like, shape = (n, l)
17 |       Stacking the training and testing data where n
18 |       is the total number of data and l is the number of 
19 |       training data.
20 |     e_val: array, shape = (1,s)
21 |       If W equals to C[0:l, :], then e_val are the largest s
22 |       eig values of W
23 |     e_vec: array-like, shape = (l, s)
24 |       These are the corresponding eig vectors to e_val
25 |     
26 |     Returns
27 |     -------
28 |     eval_nystrom: array-like, shape = (1,s)
29 |       These are the estimated largest s eig values of the matrix where C is the 
30 |       first l columns.
31 |     evec_nystrom: arrau-like, shape = (n, s)
32 |       These are the corresponding eig vectors to eval_nystrom
33 |       
34 |     """
35 |     n,l = C.shape
36 |     W = C[0:l, :]
37 |     eval_nystrom = (n/l)*e_val
38 |     eval_inv = e_val.copy()
39 |     e_nonzero = np.where(e_val != 0)
40 |     # e_nonzero = [i for i, e in enumerate(e_val) if e != 0] #np.nonzero(a)[0]
41 |     eval_inv[e_nonzero] = 1.0/e_val[e_nonzero]
42 |     
43 |     if isspmatrix(C):
44 |         evec_nystrom = np.sqrt(l/n)*C.dot(e_vec)*eval_inv
45 |     else:
46 |         evec_nystrom = np.sqrt(l/n)*np.dot(C,e_vec)*eval_inv
47 |     return eval_nystrom,evec_nystrom
48 |     
49 |     
50 |     
51 |     
52 |     
53 |     
54 |     
55 |     
56 |     


--------------------------------------------------------------------------------
/megaman/utils/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mmp2/megaman/249a7d725de1f99ea7f6ba169a5a89468fc423ec/megaman/utils/tests/__init__.py


--------------------------------------------------------------------------------
/megaman/utils/tests/test_analyze_dimension_and_radius.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.random import RandomState
 3 | from scipy.spatial.distance import squareform, pdist
 4 | import megaman.utils.analyze_dimension_and_radius as adar
 5 | from scipy.sparse import csr_matrix
 6 | from numpy.testing import assert_array_almost_equal
 7 | 
 8 | def test_dim_distance_passed_vs_computed(seed=1234):
 9 |     rng = RandomState(seed)
10 |     X = rng.randn(100, 10)
11 |     dists = csr_matrix(squareform(pdist(X)))
12 |     rmin = 2
13 |     rmax = 10.0
14 |     nradii = 10
15 |     radii = 10**(np.linspace(np.log10(rmin), np.log10(rmax), nradii))
16 | 
17 |     results_passed = adar.neighborhood_analysis(dists, radii)
18 |     avg_neighbors = results_passed['avg_neighbors'].flatten()
19 |     radii = results_passed['radii'].flatten()
20 |     fit_range = range(len(radii))
21 |     dim_passed = adar.find_dimension_plot(avg_neighbors, radii, fit_range)
22 |     results_computed, dim_computed = adar.run_analyze_dimension_and_radius(X, rmin, rmax, nradii)
23 |     assert(dim_passed == dim_computed)
24 |     assert_array_almost_equal(results_passed['avg_neighbors'], results_computed['avg_neighbors'])


--------------------------------------------------------------------------------
/megaman/utils/tests/test_eigendecomp.py:
--------------------------------------------------------------------------------
  1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  2 | 
  3 | from megaman.utils.eigendecomp import (eigen_decomposition, null_space,
  4 |                                        EIGEN_SOLVERS)
  5 | from numpy.testing import assert_array_almost_equal
  6 | import numpy as np
  7 | 
  8 | 
  9 | SPD_SOLVERS = EIGEN_SOLVERS
 10 | NON_SPD_SOLVERS = ['auto', 'dense', 'arpack']
 11 | SOLVER_KWDS_DICT = {'auto':None,
 12 |                     'dense':{'turbo':True, 'type':1},
 13 |                     'arpack':{'mode':'normal', 'tol':0, 'maxiter':None},
 14 |                     'lobpcg':{'maxiter':20, 'tol':None},
 15 |                     'amg':{'maxiter':20, 'tol':None,'aggregate':'standard'}}
 16 | 
 17 | def _check_with_col_sign_flipping(A, B, tol=0.0):
 18 |     """ Check array A and B are equal with possible sign flipping on
 19 |     each columns"""
 20 |     sign = True
 21 |     for column_idx in range(A.shape[1]):
 22 |         sign = sign and ((((A[:, column_idx] -
 23 |                             B[:, column_idx]) ** 2).mean() <= tol ** 2) or
 24 |                          (((A[:, column_idx] +
 25 |                             B[:, column_idx]) ** 2).mean() <= tol ** 2))
 26 |         if not sign:
 27 |             return False
 28 |     return True
 29 | 
 30 | def _test_all_solvers(solvers_to_test, S, solver_kwds_dict={}):
 31 |     for largest in [True, False]:
 32 |         Lambdas = {};
 33 |         for eigen_solver in solvers_to_test:
 34 |             if eigen_solver in solver_kwds_dict.keys():
 35 |                 solver_kwds = solver_kwds_dict[eigen_solver]
 36 |             else:
 37 |                 solver_kwds = None
 38 |             lambdas, diffusion_map = eigen_decomposition(S, n_components = 3,
 39 |                                                         eigen_solver = eigen_solver,
 40 |                                                         largest = largest, drop_first = False,
 41 |                                                         solver_kwds=solver_kwds)
 42 |             Lambdas[eigen_solver] = np.sort(lambdas)
 43 |         # pairwise comparison:
 44 |         for i in range(len(solvers_to_test)):
 45 |             for j in range(i+1, len(solvers_to_test)):
 46 |                 print(largest)
 47 |                 print(str(solvers_to_test[i]) + " + " + str(solvers_to_test[j]))
 48 |                 assert_array_almost_equal(Lambdas[solvers_to_test[i]],
 49 |                                         Lambdas[solvers_to_test[j]])
 50 | 
 51 | def _test_all_null_solvers(solvers_to_test, S, solver_kwds_dict={}):
 52 |     for largest in [True, False]:
 53 |         Null_Space = {};
 54 |         for eigen_solver in solvers_to_test:
 55 |             if eigen_solver in solver_kwds_dict.keys():
 56 |                 solver_kwds = solver_kwds_dict[eigen_solver]
 57 |             else:
 58 |                 solver_kwds = None
 59 |             nullspace, errors = null_space(S, k = 3, eigen_solver = eigen_solver, solver_kwds=solver_kwds)
 60 |             Null_Space[eigen_solver] = nullspace
 61 |         # pairwise comparison:
 62 |         for i in range(len(solvers_to_test)):
 63 |             for j in range(i+1, len(solvers_to_test)):
 64 |                 print(largest)
 65 |                 print(str(solvers_to_test[i]) + " + " + str(solvers_to_test[j]))
 66 |                 _check_with_col_sign_flipping(Null_Space[solvers_to_test[i]],
 67 |                                         Null_Space[solvers_to_test[j]], 0.05)
 68 | def test_sym_pos_def_agreement():
 69 |     solvers_to_test = SPD_SOLVERS
 70 |     rng = np.random.RandomState(0)
 71 |     X = rng.uniform(size=(100, 40))
 72 |     S = np.dot(X.T, X)
 73 |     _test_all_solvers(solvers_to_test, S)
 74 | 
 75 | def test_null_space_sym_pos_def_agreement():
 76 |     solvers_to_test = SPD_SOLVERS
 77 |     solvers_to_test = SPD_SOLVERS
 78 |     rng = np.random.RandomState(0)
 79 |     X = rng.uniform(size=(100, 100))
 80 |     S = np.dot(X.T, X)
 81 |     _test_all_null_solvers(solvers_to_test, S)
 82 | 
 83 | def test_null_space_sym_agreement():
 84 |     solvers_to_test = NON_SPD_SOLVERS
 85 |     solvers_to_test = NON_SPD_SOLVERS
 86 |     rng = np.random.RandomState(0)
 87 |     X = rng.uniform(size=(16, 16))
 88 |     S = X + X.T
 89 |     _test_all_null_solvers(solvers_to_test, S)
 90 | 
 91 | def test_null_space_non_sym_agreement():
 92 |     solvers_to_test = NON_SPD_SOLVERS
 93 |     rng = np.random.RandomState(0)
 94 |     S = rng.uniform(size=(16, 16))
 95 |     _test_all_null_solvers(solvers_to_test, S)
 96 | 
 97 | def test_base_eigen_solver_kwds():
 98 |     solvers_to_test = SPD_SOLVERS
 99 |     rng = np.random.RandomState(0)
100 |     X = rng.uniform(size=(100, 40))
101 |     S = np.dot(X.T, X)
102 |     _test_all_solvers(solvers_to_test, S, solver_kwds_dict=SOLVER_KWDS_DICT)
103 | 
104 | def test_null_eigen_solver_kwds():
105 |     solvers_to_test = SPD_SOLVERS
106 |     rng = np.random.RandomState(0)
107 |     X = rng.uniform(size=(100, 40))
108 |     S = np.dot(X.T, X)
109 |     _test_all_null_solvers(solvers_to_test, S, solver_kwds_dict=SOLVER_KWDS_DICT)
110 | 


--------------------------------------------------------------------------------
/megaman/utils/tests/test_estimate_radius.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy.random import RandomState
 3 | from scipy.spatial.distance import squareform, pdist
 4 | from megaman.utils.estimate_radius import run_estimate_radius
 5 | from scipy.sparse import csr_matrix
 6 | from numpy.testing import assert_array_almost_equal
 7 | 
 8 | def test_radius_serial_vs_parallel(seed=1234):
 9 |     rng = RandomState(seed)
10 |     X = rng.randn(100, 10)
11 |     dists = csr_matrix(squareform(pdist(X)))
12 |     sample = range(100)
13 |     d = 3
14 |     rmin = 2
15 |     rmax = 10.0
16 |     ntry = 10
17 |     run_parallel = True
18 |     results_parallel = run_estimate_radius(X, dists, sample, d, rmin, rmax, ntry, run_parallel)
19 |     print(results_parallel)
20 |     results_serial = run_estimate_radius(X, dists, sample, d, rmin, rmax, ntry, False)
21 |     print(results_serial)
22 |     assert_array_almost_equal(results_parallel, results_serial)


--------------------------------------------------------------------------------
/megaman/utils/tests/test_nystrom.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from numpy import absolute
 3 | from numpy.linalg import qr
 4 | from megaman.utils.nystrom_extension import nystrom_extension
 5 | from numpy.testing import assert_array_almost_equal
 6 | 
 7 | 
 8 | def test_nystrom_extension(seed=123):
 9 |     """ Test Nystrom Extension: low rank approximation is exact when
10 |     G is itself low rank
11 |     """
12 |     n = 10
13 |     s = 2
14 |     rng = np.random.RandomState(seed)
15 |     X = rng.randn(n, s)
16 |     G = np.dot(X, X.T) # has rank s
17 | 
18 |     # find the linearly independent columns of 
19 |     q = qr(G)[1] 
20 |     q = absolute(q)
21 |     sums = np.sum(q,axis=1)
22 |     i = 0
23 |     dims = list()
24 |     while( i < n ): #dim is the matrix dimension
25 |         if(sums[i] > 1.e-10):
26 |             dims.append(i)
27 |         i += 1
28 |     
29 |     # Find the eigendecomposition of the full rank portion:
30 |     W = G[dims,:]
31 |     W = W[:,dims]
32 |     eval, evec = np.linalg.eigh(W)
33 |     
34 |     # pass the dims columns of G 
35 |     C = G[:,dims]
36 |     # Find the estimated eigendecomposition using Nystrom 
37 |     eval_nystrom, evec_nystrom = nystrom_extension(C, evec, eval)
38 |         
39 |     # reconstruct G using Nystrom Approximatiuon 
40 |     G_nystrom = np.dot(np.dot(evec_nystrom, np.diag(eval_nystrom)),evec_nystrom.T)
41 |     # since rank(W) = rank(G) = s the nystrom approximation of G is exact:
42 |     assert_array_almost_equal(G_nystrom, G)


--------------------------------------------------------------------------------
/megaman/utils/tests/test_spectral_clustering.py:
--------------------------------------------------------------------------------
 1 | from sklearn import neighbors
 2 | import numpy as np 
 3 | 
 4 | from megaman.utils.eigendecomp import EIGEN_SOLVERS
 5 | from megaman.utils.spectral_clustering import SpectralClustering
 6 | 
 7 | def test_spectral_clustering():
 8 |     K = 3
 9 |     num_per_cluster = 100
10 |     c = np.array([[1,0,0], [0,1,0], [0,0,1]])
11 |     X = np.repeat(c, np.repeat(num_per_cluster, K), axis = 0)
12 |     radius = 5 
13 |     rng = np.random.RandomState(36)
14 |     def check_labels(stabalize, renormalize, eigen_solver):
15 |         if eigen_solver in ['dense', 'auto']:
16 |             solver_kwds = {}
17 |         else:
18 |             solver_kwds = {'maxiter':100000, 'tol':1e-5}
19 |         SC = SpectralClustering(K=K, radius=radius, stabalize=stabalize, renormalize=renormalize,
20 |                                 eigen_solver = eigen_solver, solver_kwds=solver_kwds, random_state = rng,
21 |                                 additional_vectors = 0)
22 |         labels = SC.fit_transform(X, input_type= 'data')
23 |         for k in range(K):        
24 |             cluster_labs = labels[range((k*num_per_cluster),((k+1)*num_per_cluster))] 
25 |             first_lab = cluster_labs[0]
26 |             assert(np.all(cluster_labs == first_lab))
27 |             
28 |     for stabalize in [True, False]:
29 |         for renormalize in [True, False]:
30 |             for solver in EIGEN_SOLVERS:
31 |                 yield check_labels, stabalize, renormalize, solver


--------------------------------------------------------------------------------
/megaman/utils/tests/test_testing.py:
--------------------------------------------------------------------------------
 1 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
 2 | 
 3 | import warnings
 4 | import sys
 5 | import unittest
 6 | from nose.tools import assert_raises, assert_equal
 7 | 
 8 | from megaman.utils.testing import assert_raise_message, assert_no_warnings, assert_warns
 9 | 
10 | def test_assert_raise_message():
11 |     def _raise_ValueError(message):
12 |         raise ValueError(message)
13 | 
14 |     def _no_raise():
15 |         pass
16 | 
17 |     assert_raise_message(ValueError, "test",
18 |                          _raise_ValueError, "test")
19 | 
20 |     assert_raises(AssertionError,
21 |                   assert_raise_message, ValueError, "something else",
22 |                   _raise_ValueError, "test")
23 | 
24 |     assert_raises(ValueError,
25 |                   assert_raise_message, TypeError, "something else",
26 |                   _raise_ValueError, "test")
27 | 
28 |     assert_raises(AssertionError,
29 |                   assert_raise_message, ValueError, "test",
30 |                   _no_raise)
31 | 
32 |     # multiple exceptions in a tuple
33 |     assert_raises(AssertionError,
34 |                   assert_raise_message, (ValueError, AttributeError),
35 |                   "test", _no_raise)
36 | 
37 | 
38 | # This class is inspired from numpy 1.7 with an alteration to check
39 | # the reset warning filters after calls to assert_warns.
40 | # This assert_warns behavior is specific to scikit-learn because
41 | #`clean_warning_registry()` is called internally by assert_warns
42 | # and clears all previous filters.
43 | class TestWarns(unittest.TestCase):
44 |     def test_warn(self):
45 |         def f():
46 |             warnings.warn("yo")
47 |             return 3
48 | 
49 |         # Test that assert_warns is not impacted by externally set
50 |         # filters and is reset internally.
51 |         # This is because `clean_warning_registry()` is called internally by
52 |         # assert_warns and clears all previous filters.
53 |         warnings.simplefilter("ignore", UserWarning)
54 |         assert_equal(assert_warns(UserWarning, f), 3)
55 | 
56 |         # Test that the warning registry is empty after assert_warns
57 |         assert_equal(sys.modules['warnings'].filters, [])
58 | 
59 |         assert_raises(AssertionError, assert_no_warnings, f)
60 |         assert_equal(assert_no_warnings(lambda x: x, 1), 1)
61 | 
62 |     def test_warn_wrong_warning(self):
63 |         def f():
64 |             warnings.warn("yo", DeprecationWarning)
65 | 
66 |         failed = False
67 |         filters = sys.modules['warnings'].filters[:]
68 |         try:
69 |             try:
70 |                 # Should raise an AssertionError
71 |                 assert_warns(UserWarning, f)
72 |                 failed = True
73 |             except AssertionError:
74 |                 pass
75 |         finally:
76 |             sys.modules['warnings'].filters = filters
77 | 
78 |         if failed:
79 |             raise AssertionError("wrong warning caught by assert_warn")
80 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | # Setup script for megaman: scalable manifold learning
  2 | # LICENSE: Simplified BSD https://github.com/mmp2/megaman/blob/master/LICENSE
  3 | 
  4 | import io
  5 | import os
  6 | import re
  7 | import sys
  8 | import subprocess
  9 | 
 10 | PY2 = sys.version_info[0] == 2
 11 | PY3 = not PY2
 12 | if PY3:
 13 |     import importlib.machinery
 14 | 
 15 | 
 16 | def read(path, encoding='utf-8'):
 17 |     path = os.path.join(os.path.dirname(__file__), path)
 18 |     with io.open(path, encoding=encoding) as fp:
 19 |         return fp.read()
 20 | 
 21 | 
 22 | def version(path):
 23 |     """Obtain the packge version from a python file e.g. pkg/__init__.py
 24 | 
 25 |     See <https://packaging.python.org/en/latest/single_source_version.html>.
 26 |     """
 27 |     version_file = read(path)
 28 |     version_match = re.search(r"""^__version__ = ['"]([^'"]*)['"]""",
 29 |                               version_file, re.M)
 30 |     if version_match:
 31 |         return version_match.group(1)
 32 |     raise RuntimeError("Unable to find version string.")
 33 | 
 34 | 
 35 | def generate_cython():
 36 |     cwd = os.path.abspath(os.path.dirname(__file__))
 37 |     print("Cythonizing sources")
 38 |     p = subprocess.call([sys.executable,
 39 |                          os.path.join(cwd, 'tools', 'cythonize.py'),
 40 |                          'megaman'],
 41 |                         cwd=cwd)
 42 |     if p != 0:
 43 |         raise RuntimeError("Running cythonize failed!")
 44 | 
 45 | 
 46 | def configuration(parent_package='',top_path=None):
 47 |     from numpy.distutils.misc_util import Configuration
 48 |     config = Configuration(None, parent_package, top_path)
 49 |     config.set_options(ignore_setup_xxx_py=True,
 50 |                        assume_default_configuration=True,
 51 |                        delegate_options_to_subpackages=True,
 52 |                        quiet=True)
 53 | 
 54 |     config.add_subpackage('megaman')
 55 | 
 56 |     return config
 57 | 
 58 | DESCRIPTION = "megaman: Manifold Learning for Millions of Points"
 59 | LONG_DESCRIPTION = """
 60 | megaman: Manifold Learning for Millions of Points
 61 | =================================================
 62 | 
 63 | This repository contains a scalable implementation of several manifold learning
 64 | algorithms, making use of FLANN for fast approximate nearest neighbors and
 65 | PyAMG, LOBPCG, ARPACK, and other routines for fast matrix decompositions.
 66 | 
 67 | For more information, visit https://github.com/mmp2/megaman
 68 | """
 69 | NAME = "megaman"
 70 | AUTHOR = "Marina Meila"
 71 | AUTHOR_EMAIL = "mmp@stat.washington.delete_this.edu"
 72 | URL = 'https://github.com/mmp2/megaman'
 73 | DOWNLOAD_URL = 'https://github.com/mmp2/megaman'
 74 | LICENSE = 'BSD 3'
 75 | 
 76 | VERSION = version('megaman/__init__.py')
 77 | 
 78 | 
 79 | def setup_package():
 80 |     from numpy.distutils.core import setup
 81 | 
 82 |     old_path = os.getcwd()
 83 |     local_path = os.path.dirname(os.path.abspath(sys.argv[0]))
 84 |     src_path = local_path
 85 | 
 86 |     os.chdir(local_path)
 87 |     sys.path.insert(0, local_path)
 88 | 
 89 |     # Run build
 90 |     old_path = os.getcwd()
 91 |     os.chdir(src_path)
 92 |     sys.path.insert(0, src_path)
 93 | 
 94 |     cwd = os.path.abspath(os.path.dirname(__file__))
 95 |     if not os.path.exists(os.path.join(cwd, 'PKG-INFO')):
 96 |         # Generate Cython sources, unless building from source release
 97 |         generate_cython()
 98 | 
 99 |     try:
100 |         setup(name='megaman',
101 |               author=AUTHOR,
102 |               author_email=AUTHOR_EMAIL,
103 |               url=URL,
104 |               download_url=DOWNLOAD_URL,
105 |               description=DESCRIPTION,
106 |               long_description = LONG_DESCRIPTION,
107 |               version=VERSION,
108 |               license=LICENSE,
109 |               configuration=configuration,
110 |               classifiers=[
111 |                 'Development Status :: 4 - Beta',
112 |                 'Environment :: Console',
113 |                 'Intended Audience :: Science/Research',
114 |                 'License :: OSI Approved :: BSD License',
115 |                 'Natural Language :: English',
116 |                 'Programming Language :: Python :: 2.7',
117 |                 'Programming Language :: Python :: 3.4',
118 |                 'Programming Language :: Python :: 3.5'])
119 |     finally:
120 |         del sys.path[0]
121 |         os.chdir(old_path)
122 | 
123 |     return
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     setup_package()
128 | 


--------------------------------------------------------------------------------