├── .gitattributes ├── .gitignore ├── .idea ├── .gitignore ├── inspectionProfiles │ └── profiles_settings.xml └── umap-nan.iml ├── .pep8speaks.yml ├── .readthedocs.yaml ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.txt ├── Makefile ├── README.rst ├── appveyor.yml ├── azure-pipelines.yml ├── ci_scripts ├── install.sh ├── success.sh └── test.sh ├── doc ├── .gitignore ├── Makefile ├── _static │ └── .gitkeep ├── aligned_umap_basic_usage.rst ├── aligned_umap_plotly_plot.html ├── aligned_umap_politics_demo.rst ├── api.rst ├── basic_usage.rst ├── basic_usage_bokeh_example.html ├── benchmarking.rst ├── bokeh_digits_plot.py ├── clustering.rst ├── composing_models.rst ├── conf.py ├── densmap_demo.rst ├── doc_requirements.txt ├── document_embedding.rst ├── embedding_space.rst ├── exploratory_analysis.rst ├── faq.rst ├── how_umap_works.rst ├── images │ ├── 20newsgroups_hellinger_counts.png │ ├── 20newsgroups_hellinger_tfidf.png │ ├── BasicUsage_13_1.png │ ├── BasicUsage_18_0.png │ ├── BasicUsage_20_1.png │ ├── BasicUsage_26_1.png │ ├── BasicUsage_6_1.png │ ├── ESM_metagenomic_atlas.png │ ├── Hyperbolic_tiling.png │ ├── SupervisedUMAP_10_1.png │ ├── SupervisedUMAP_15_1.png │ ├── SupervisedUMAP_22_1.png │ ├── SupervisedUMAP_31_0.png │ ├── SupervisedUMAP_33_0.png │ ├── UMAPTransform_15_0.png │ ├── UMAPTransform_21_0.png │ ├── UMAP_zoo.png │ ├── activation_atlas.png │ ├── aligned_umap_basic_usage_15_0.png │ ├── aligned_umap_basic_usage_22_0.png │ ├── aligned_umap_basic_usage_29_0.png │ ├── aligned_umap_basic_usage_29_1.png │ ├── aligned_umap_basic_usage_5_1.png │ ├── aligned_umap_pendigits_3d.png │ ├── aligned_umap_pendigits_3d_1.png │ ├── aligned_umap_pendigits_anim.gif │ ├── aligned_umap_politics_demo_31_0.png │ ├── aligned_umap_politics_demo_spaghetti.png │ ├── audio_explorer.png │ ├── basic_usage_17_1.png │ ├── basic_usage_22_0.png │ ├── basic_usage_24_2.png │ ├── basic_usage_30_1.png │ ├── basic_usage_8_1.png │ ├── bert_embedding.png │ ├── c_elegans_3d.jpg │ ├── clustering_10_1.png │ ├── clustering_16_1.png │ ├── clustering_27_1.png │ ├── clustering_31_1.png │ ├── clustering_6_1.png │ ├── composing_models_11_1.png │ ├── composing_models_14_1.png │ ├── composing_models_18_1.png │ ├── composing_models_20_1.png │ ├── composing_models_23_1.png │ ├── composing_models_36_1.png │ ├── composing_models_38_1.png │ ├── composing_models_42_1.png │ ├── composing_models_44_1.png │ ├── composing_models_47_1.png │ ├── composing_models_50_1.png │ ├── composing_models_6_1.png │ ├── densmap_demo_10_1.png │ ├── densmap_demo_13_1.png │ ├── densmap_demo_16_1.png │ ├── densmap_demo_19_1.png │ ├── densmap_demo_21_1.png │ ├── densmap_demo_24_1.png │ ├── densmap_demo_6_1.png │ ├── embedding_projector.png │ ├── embedding_space_11_1.png │ ├── embedding_space_15_1.png │ ├── embedding_space_18_1.png │ ├── embedding_space_26_0.png │ ├── embedding_space_29_1.png │ ├── embedding_space_33_1.png │ ├── embedding_space_37_1.png │ ├── embedding_space_39_1.png │ ├── embedding_space_47_1.png │ ├── embedding_space_52_1.png │ ├── embedding_space_55_0.png │ ├── embedding_space_59_0.png │ ├── embedding_space_7_1.png │ ├── exploring_fashion_mnist.png │ ├── galaxy10_2D_densmap.svg │ ├── galaxy10_2D_densmap_supervised.svg │ ├── galaxy10_2D_densmap_supervised_prediction.svg │ ├── galaxy10_2D_umap.svg │ ├── galaxy10_2D_umap_supervised.svg │ ├── galaxy10_2D_umap_supervised_prediction.svg │ ├── galaxy10_subset.svg │ ├── how_umap_works_basic_graph.png │ ├── how_umap_works_fuzzy_open_cover.png │ ├── how_umap_works_local_metric_open_cover.png │ ├── how_umap_works_open_cover.png │ ├── how_umap_works_raw_data.png │ ├── how_umap_works_raw_graph.png │ ├── how_umap_works_umap_graph.png │ ├── how_umap_works_umap_graph_layout.png │ ├── how_umap_works_umap_layout.png │ ├── how_umap_works_umap_open_cover.png │ ├── how_umap_works_uniform_distribution_cover.png │ ├── inverse_transform_13_0.png │ ├── inverse_transform_20_1.png │ ├── inverse_transform_26_0.png │ ├── inverse_transform_7_1.png │ ├── mutual_nn_umap_20ngc.png │ ├── mutual_nn_umap_FMNIST.png │ ├── mutual_nn_umap_MNIST.png │ ├── mutual_nn_umap_connectivity.png │ ├── mutual_nn_umap_lc.png │ ├── mutual_nn_umap_results.png │ ├── organogenesis_paper.png │ ├── orion_particles.png │ ├── outliers_10_0.png │ ├── outliers_12_2.png │ ├── outliers_13_2.png │ ├── outliers_15_0.png │ ├── outliers_19_0.png │ ├── outliers_22_2.png │ ├── outliers_27_0.png │ ├── outliers_5_0.png │ ├── outliers_7_2.png │ ├── outliers_9_0.png │ ├── parameters_13_1.png │ ├── parameters_13_2.png │ ├── parameters_13_3.png │ ├── parameters_13_4.png │ ├── parameters_13_5.png │ ├── parameters_13_6.png │ ├── parameters_13_7.png │ ├── parameters_16_1.png │ ├── parameters_16_2.png │ ├── parameters_16_3.png │ ├── parameters_16_4.png │ ├── parameters_16_5.png │ ├── parameters_16_6.png │ ├── parameters_19_1.png │ ├── parameters_21_1.png │ ├── parameters_32_1.png │ ├── parameters_32_2.png │ ├── parameters_32_3.png │ ├── parameters_32_4.png │ ├── parameters_32_5.png │ ├── parameters_8_1.png │ ├── performance_14_1.png │ ├── performance_15_1.png │ ├── performance_17_1.png │ ├── performance_18_1.png │ ├── performance_20_1.png │ ├── performance_21_1.png │ ├── pixplot.png │ ├── plotting_10_1.png │ ├── plotting_12_1.png │ ├── plotting_14_1.png │ ├── plotting_19_2.png │ ├── plotting_21_2.png │ ├── plotting_32_2.png │ ├── plotting_34_2.png │ ├── plotting_38_1.png │ ├── plotting_40_1.png │ ├── plotting_42_0.png │ ├── plotting_44_1.png │ ├── plotting_8_2.png │ ├── population_umap.jpg │ ├── precomputed_k-nn11.png │ ├── precomputed_k-nn13.png │ ├── precomputed_k-nn17.png │ ├── precomputed_k-nn6.png │ ├── pumap-only.png │ ├── reproducibility_10_1.png │ ├── reproducibility_14_1.png │ ├── reproducibility_18_1.png │ ├── reproducibility_6_1.png │ ├── retrain_pumap_emb_x1.png │ ├── retrain_pumap_emb_x2.png │ ├── retrain_pumap_history.png │ ├── retrain_pumap_p_emb_x1.png │ ├── retrain_pumap_p_emb_x2.png │ ├── retrain_pumap_summary_2_removed.png │ ├── simplices.png │ ├── single_cell_umap.jpg │ ├── sparse_11_1.png │ ├── sparse_18_0.png │ ├── sparse_31_1.png │ ├── sparse_35_0.png │ ├── structure_recent_phil.png │ ├── syllabus_galaxy.png │ ├── time_cluster.png │ ├── umap-loss.png │ ├── umap-only.png │ ├── umap_explorer.png │ ├── umap_primes.png │ ├── umap_surrey.png │ └── umap_vae_pca.png ├── index.rst ├── interactive_viz.rst ├── inverse_transform.rst ├── logo.png ├── logo_large.png ├── make.bat ├── mutual_nn_umap.rst ├── nomic_atlas_umap_of_text_embeddings.rst ├── nomic_atlas_visualizing_mnist_training_dynamics.rst ├── outliers.rst ├── parameters.rst ├── parametric_umap.rst ├── performance.rst ├── plotting.rst ├── plotting_example_interactive.py ├── plotting_example_nomic_atlas.py ├── plotting_interactive_example.html ├── precomputed_k-nn.rst ├── release_notes.rst ├── reproducibility.rst ├── scientific_papers.rst ├── sparse.rst ├── supervised.rst ├── transform.rst └── transform_landmarked_pumap.rst ├── docs_requirements.txt ├── examples ├── README.txt ├── digits │ ├── digits.html │ └── digits.py ├── galaxy10sdss.py ├── inverse_transform_example.py ├── iris │ ├── iris.html │ └── iris.py ├── mnist_torus_sphere_example.py ├── mnist_transform_new_data.py ├── plot_algorithm_comparison.py ├── plot_fashion-mnist_example.py ├── plot_feature_extraction_classification.py └── plot_mnist_example.py ├── images ├── densmap_example_mnist.png ├── iris.png ├── mnist_digits.png ├── sklearn_digits.png ├── umap_example_fashion_mnist1.png ├── umap_example_mnist1.png └── umap_example_shuttle.png ├── notebooks ├── AnimatingUMAP.ipynb ├── Document embedding using UMAP.ipynb ├── MNIST_Landmarks.ipynb ├── Parametric_UMAP │ ├── 01.0-parametric-umap-mnist-embedding-basic.ipynb │ ├── 02.0-parametric-umap-mnist-embedding-convnet.ipynb │ ├── 03.0-parametric-umap-mnist-embedding-convnet-with-reconstruction.ipynb │ ├── 04.0-parametric-umap-mnist-embedding-convnet-with-autoencoder-loss.ipynb │ ├── 05.0-parametric-umap-with-callback.ipynb │ ├── 06.0-nonparametric-umap.ipynb │ └── 07.0-parametric-umap-global-loss.ipynb ├── UMAP usage and parameters.ipynb ├── nomic-atlas-umap-of-text-embeddings.ipynb └── nomic-atlas-visualizing-mnist-training-dynamics.ipynb ├── paper.bib ├── paper.md ├── setup.py └── umap ├── __init__.py ├── aligned_umap.py ├── distances.py ├── layouts.py ├── parametric_umap.py ├── plot.py ├── sparse.py ├── spectral.py ├── tests ├── __init__.py ├── conftest.py ├── digits_embedding_42.npy ├── test_aligned_umap.py ├── test_chunked_parallel_spatial_metric.py ├── test_composite_models.py ├── test_data_input.py ├── test_densmap.py ├── test_parametric_umap.py ├── test_plot.py ├── test_spectral.py ├── test_umap.py ├── test_umap_get_feature_names_out.py ├── test_umap_metrics.py ├── test_umap_nn.py ├── test_umap_on_iris.py ├── test_umap_ops.py ├── test_umap_repeated_data.py ├── test_umap_trustworthiness.py └── test_umap_validation_params.py ├── umap_.py ├── utils.py └── validation.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-language=Python -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # virtual environment 2 | venv 3 | 4 | # non-stylistic pycharm configs 5 | .idea/misc.xml 6 | .idea/modules.xml 7 | .idea/umap.iml 8 | .idea/vcs.xml 9 | .idea/workspace.xml 10 | .idea/dictionaries 11 | .idea/other.xml 12 | 13 | # Mac Finder layout 14 | .DS_Store 15 | 16 | # IPython/Jupyter notebook checkpoints 17 | *.ipynb_checkpoints 18 | 19 | # Python 2.x & 3.x bytecode cache 20 | *.pyc 21 | *__pycache__ 22 | 23 | # metadata from pip-installing repo 24 | umap_learn.egg-info 25 | 26 | # docs 27 | doc/auto_examples 28 | doc/_build -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/umap-nan.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /.pep8speaks.yml: -------------------------------------------------------------------------------- 1 | pycodestyle: # Same as scanner.linter value. Other option is flake8 2 | max-line-length: 88 # Default is 79 in PEP 8 3 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.11" 12 | 13 | 14 | # Build documentation in the "docs/" directory with Sphinx 15 | sphinx: 16 | configuration: doc/conf.py 17 | 18 | # Optional but recommended, declare the Python requirements required 19 | # to build your documentation 20 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 21 | python: 22 | install: 23 | - requirements: docs_requirements.txt 24 | - method: pip 25 | path: . 26 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | cache: 4 | apt: true 5 | # We use three different cache directory 6 | # to work around a Travis bug with multi-platform cache 7 | directories: 8 | - $HOME/.cache/pip 9 | - $HOME/download 10 | env: 11 | global: 12 | # Directory where tests are run from 13 | - TEST_DIR=/tmp/test_dir/ 14 | - MODULE=umap 15 | 16 | matrix: 17 | include: 18 | - python: 3.6 19 | os: linux 20 | - env: DISTRIB="conda" PYTHON_VERSION="3.7" NUMPY_VERSION="1.17" SCIPY_VERSION="1.3.1" 21 | os: linux 22 | - env: DISTRIB="conda" PYTHON_VERSION="3.8" NUMPY_VERSION="1.20.0" SCIPY_VERSION="1.6.0" 23 | os: linux 24 | - env: DISTRIB="conda" PYTHON_VERSION="3.8" COVERAGE="true" NUMPY_VERSION="1.20.0" SCIPY_VERSION="1.6.0" 25 | os: linux 26 | # - env: DISTRIB="conda" PYTHON_VERSION="3.7" NUMBA_VERSION="0.51.2" 27 | # os: osx 28 | # language: generic 29 | # - env: DISTRIB="conda" PYTHON_VERSION="3.8" NUMBA_VERSION="0.51.2" 30 | # os: osx 31 | # language: generic 32 | 33 | install: source ci_scripts/install.sh 34 | script: travis_wait 90 bash ci_scripts/test.sh 35 | after_success: source ci_scripts/success.sh 36 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at leland.mcinnes@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Contributions of all kinds are welcome. In particular pull requests are appreciated. 4 | The authors will endeavour to help walk you through any issues in the pull request 5 | discussion, so please feel free to open a pull request even if you are new to such things. 6 | 7 | ## Issues 8 | 9 | The easiest contribution to make is to [file an issue](https://github.com/lmcinnes/umap/issues/new). 10 | It is beneficial if you check the [FAQ](https://umap-learn.readthedocs.io/en/latest/faq.html), 11 | and do a cursory search of [existing issues](https://github.com/lmcinnes/umap/issues?utf8=%E2%9C%93&q=is%3Aissue). 12 | It is also helpful, but not necessary, if you can provide clear instruction for 13 | how to reproduce a problem. If you have resolved an issue yourself please consider 14 | contributing to the FAQ to add your problem, and its resolution, so others can 15 | benefit from your work. 16 | 17 | ## Documentation 18 | 19 | Contributing to documentation is the easiest way to get started. Providing simple 20 | clear or helpful documentation for new users is critical. Anything that *you* as 21 | a new user found hard to understand, or difficult to work out, are excellent places 22 | to begin. Contributions to more detailed and descriptive error messages is 23 | especially appreciated. To contribute to the documentation please 24 | [fork the project](https://github.com/lmcinnes/umap/issues#fork-destination-box) 25 | into your own repository, make changes there, and then submit a pull request. 26 | 27 | ### Building the Documentation Locally 28 | 29 | To build the docs locally, install the documentation tools requirements: 30 | 31 | ```bash 32 | pip install -r docs_requirements.txt 33 | ``` 34 | 35 | Then run: 36 | 37 | ```bash 38 | sphinx-build -b html doc doc/_build 39 | ``` 40 | 41 | This will build the documentation in HTML format. You will be able to find the output 42 | in the `doc/_build` folder. 43 | 44 | ## Code 45 | 46 | Code contributions are always welcome, from simple bug fixes, to new features. To 47 | contribute code please 48 | [fork the project](https://github.com/lmcinnes/umap/issues#fork-destination-box) 49 | into your own repository, make changes there, and then submit a pull request. If 50 | you are fixing a known issue please add the issue number to the PR message. If you 51 | are fixing a new issue feel free to file an issue and then reference it in the PR. 52 | You can [browse open issues](https://github.com/lmcinnes/umap/issues), 53 | or consult the [project roadmap](https://github.com/lmcinnes/umap/issues/15), for potential code 54 | contributions. Fixes for issues tagged with 'help wanted' are especially appreciated. 55 | 56 | ### Code formatting 57 | 58 | If possible, install the [black code formatter](https://github.com/python/black) (e.g. 59 | `pip install black`) and run it before submitting a pull request. This helps maintain consistency 60 | across the code, but also there is a check in the Travis-CI continuous integration system which 61 | will show up as a failure in the pull request if `black` detects that it hasn't been run. 62 | 63 | Formatting is as simple as running: 64 | 65 | ```bash 66 | black . 67 | ``` 68 | 69 | in the root of the project. 70 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Leland McInnes 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # make gh-pages in repo base directory to automatically build and deploy documents to github 2 | 3 | gh-pages: 4 | echo "Make gh-pages" 5 | cd doc; make html 6 | git checkout gh-pages 7 | rm -rf _sources _static _modules _downloads _images auto_examples 8 | mv -fv doc/_build/html/* . 9 | rm -rf doc 10 | git add -A 11 | git commit -m "Generated gh-pages for `git log master -1 --pretty=short --abbrev-commit`" && git push origin gh-pages ; git checkout master 12 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | build: "off" 2 | 3 | environment: 4 | matrix: 5 | - PYTHON_VERSION: "3.7" 6 | MINICONDA: C:\Miniconda3-x64 7 | - PYTHON_VERSION: "3.8" 8 | MINICONDA: C:\Miniconda3-x64 9 | 10 | init: 11 | - "ECHO %PYTHON_VERSION% %MINICONDA%" 12 | 13 | install: 14 | - "set PATH=%MINICONDA%;%MINICONDA%\\Scripts;%PATH%" 15 | - conda config --set always_yes yes --set changeps1 no 16 | - conda update -q conda 17 | - conda info -a 18 | - "conda create -q -n test-environment python=%PYTHON_VERSION% numpy scipy scikit-learn numba pandas bokeh holoviews datashader scikit-image pytest" 19 | - activate test-environment 20 | - pip install "tensorflow>=2.1" 21 | - pip install pytest-benchmark 22 | - pip install -e . 23 | 24 | test_script: 25 | - pytest --show-capture=no -v --disable-warnings 26 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | # Trigger a build when there is a push to the main branch or a tag starts with release- 2 | trigger: 3 | branches: 4 | include: 5 | - master 6 | tags: 7 | include: 8 | - release-* 9 | 10 | # Trigger a build when there is a pull request to the main branch 11 | # Ignore PRs that are just updating the docs 12 | pr: 13 | branches: 14 | include: 15 | - master 16 | exclude: 17 | - doc/* 18 | - README.rst 19 | 20 | parameters: 21 | - name: includeReleaseCandidates 22 | displayName: "Allow pre-release dependencies" 23 | type: boolean 24 | default: false 25 | 26 | 27 | variables: 28 | triggeredByPullRequest: $[eq(variables['Build.Reason'], 'PullRequest')] 29 | 30 | stages: 31 | - stage: RunAllTests 32 | displayName: Run test suite 33 | jobs: 34 | - job: run_platform_tests 35 | strategy: 36 | matrix: 37 | mac_py39: 38 | imageName: 'macOS-latest' 39 | python.version: '3.9' 40 | linux_py39: 41 | imageName: 'ubuntu-latest' 42 | python.version: '3.9' 43 | windows_py39: 44 | imageName: 'windows-latest' 45 | python.version: '3.9' 46 | mac_py310: 47 | imageName: 'macOS-latest' 48 | python.version: '3.10' 49 | linux_py310: 50 | imageName: 'ubuntu-latest' 51 | python.version: '3.10' 52 | windows_py310: 53 | imageName: 'windows-latest' 54 | python.version: '3.10' 55 | mac_py311: 56 | imageName: 'macOS-latest' 57 | python.version: '3.11' 58 | linux_py311: 59 | imageName: 'ubuntu-latest' 60 | python.version: '3.11' 61 | windows_py311: 62 | imageName: 'windows-latest' 63 | python.version: '3.11' 64 | mac_py312: 65 | imageName: 'macOS-latest' 66 | python.version: '3.12' 67 | linux_py312: 68 | imageName: 'ubuntu-latest' 69 | python.version: '3.12' 70 | windows_py312: 71 | imageName: 'windows-latest' 72 | python.version: '3.12' 73 | 74 | pool: 75 | vmImage: $(imageName) 76 | 77 | steps: 78 | - task: UsePythonVersion@0 79 | inputs: 80 | versionSpec: '$(python.version)' 81 | displayName: 'Use Python $(python.version)' 82 | 83 | - script: | 84 | python -m pip install --upgrade pip 85 | displayName: 'Upgrade pip' 86 | 87 | - script: | 88 | pip install -e . 89 | pip install .[plot] 90 | pip install .[parametric_umap] 91 | displayName: 'Install dependencies' 92 | condition: ${{ eq(parameters.includeReleaseCandidates, false) }} 93 | 94 | - script: | 95 | pip install --pre -e . 96 | pip install --pre .[plot] 97 | pip install --pre .[parametric_umap] 98 | displayName: 'Install dependencies (allow pre-releases)' 99 | condition: ${{ eq(parameters.includeReleaseCandidates, true) }} 100 | 101 | - script: | 102 | pip install pytest pytest-azurepipelines pytest-cov pytest-benchmark coveralls 103 | displayName: 'Install pytest' 104 | 105 | - script: | 106 | # export NUMBA_DISABLE_JIT=1 # Disable numba coverage so tests run on time for now. 107 | pytest umap/tests --show-capture=no -v --disable-warnings --junitxml=junit/test-results.xml --cov=umap/ --cov-report=xml --cov-report=html 108 | displayName: 'Run tests' 109 | 110 | - bash: | 111 | coveralls 112 | displayName: 'Publish to coveralls' 113 | condition: and(succeeded(), eq(variables.triggeredByPullRequest, false)) # Don't run this for PRs because they can't access pipeline secrets 114 | env: 115 | COVERALLS_REPO_TOKEN: $(COVERALLS_TOKEN) 116 | 117 | - task: PublishTestResults@2 118 | inputs: 119 | testResultsFiles: '$(System.DefaultWorkingDirectory)/**/coverage.xml' 120 | testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)' 121 | condition: succeededOrFailed() 122 | 123 | - stage: BuildPublishArtifact 124 | dependsOn: RunAllTests 125 | condition: and(succeeded(), startsWith(variables['Build.SourceBranch'], 'refs/tags/'), eq(variables.triggeredByPullRequest, false)) 126 | jobs: 127 | - job: BuildArtifacts 128 | displayName: Build source dists and wheels 129 | pool: 130 | vmImage: 'ubuntu-latest' 131 | steps: 132 | - task: UsePythonVersion@0 133 | inputs: 134 | versionSpec: '3.10' 135 | displayName: 'Use Python 3.10' 136 | 137 | - script: | 138 | python -m pip install --upgrade pip 139 | pip install wheel 140 | pip install -e . 141 | displayName: 'Install package locally' 142 | 143 | - bash: | 144 | python setup.py sdist bdist_wheel 145 | ls -l dist/ 146 | displayName: 'Build package' 147 | 148 | - bash: | 149 | export PACKAGE_VERSION="$(python setup.py --version)" 150 | echo "Package Version: ${PACKAGE_VERSION}" 151 | echo "##vso[task.setvariable variable=packageVersionFormatted;]release-${PACKAGE_VERSION}" 152 | displayName: 'Get package version' 153 | 154 | - script: | 155 | echo "Version in git tag $(Build.SourceBranchName) does not match version derived from setup.py $(packageVersionFormatted)" 156 | exit 1 157 | displayName: Raise error if version doesnt match tag 158 | condition: and(succeeded(), ne(variables['Build.SourceBranchName'], variables['packageVersionFormatted'])) 159 | 160 | - task: DownloadSecureFile@1 161 | name: PYPIRC_CONFIG 162 | displayName: 'Download pypirc' 163 | inputs: 164 | secureFile: 'pypirc' 165 | 166 | - script: | 167 | pip install twine 168 | twine upload --repository pypi --config-file $(PYPIRC_CONFIG.secureFilePath) dist/* 169 | displayName: 'Upload to PyPI' 170 | condition: and(succeeded(), eq(variables['Build.SourceBranchName'], variables['packageVersionFormatted'])) 171 | 172 | -------------------------------------------------------------------------------- /ci_scripts/install.sh: -------------------------------------------------------------------------------- 1 | if [[ "$DISTRIB" == "conda" ]]; then 2 | 3 | # Deactivate the travis-provided virtual environment and setup a 4 | # conda-based environment instead 5 | if [ $TRAVIS_OS_NAME = 'linux' ]; then 6 | # Only Linux has a virtual environment activated; Mac does not. 7 | deactivate 8 | fi 9 | 10 | # Use the miniconda installer for faster download / install of conda 11 | # itself 12 | pushd . 13 | cd 14 | mkdir -p download 15 | cd download 16 | echo "Cached in $HOME/download :" 17 | ls -l 18 | echo 19 | # For now, ignoring the cached file. 20 | # if [[ ! -f miniconda.sh ]] 21 | # then 22 | if [ $TRAVIS_OS_NAME = 'osx' ]; then 23 | # MacOS URL found here: https://docs.conda.io/en/latest/miniconda.html 24 | wget \ 25 | https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh \ 26 | -O miniconda.sh 27 | else 28 | wget \ 29 | http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 30 | -O miniconda.sh 31 | fi 32 | # fi 33 | chmod +x miniconda.sh && ./miniconda.sh -b -p $HOME/miniconda 34 | cd .. 35 | export PATH=$HOME/miniconda/bin:$HOME/miniconda3/bin:$PATH 36 | conda update --yes conda 37 | popd 38 | 39 | # Configure the conda environment and put it in the path using the 40 | # provided versions 41 | # conda create -n testenv --yes python=$PYTHON_VERSION pip \ 42 | # numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION numba=$NUMBA_VERSION scikit-learn \ 43 | # pytest "tensorflow-mkl>=2.2.0" 44 | if [ $TRAVIS_OS_NAME = 'osx' ]; then 45 | conda create -q -n testenv --yes python=$PYTHON_VERSION numpy scipy scikit-learn \ 46 | numba pytest pandas 47 | # pip install bokeh 48 | # pip install datashader 49 | # pip install holoviews 50 | conda install --yes "tensorflow>=2.0.0" 51 | else 52 | conda create -q -n testenv --yes python=$PYTHON_VERSION numpy scipy scikit-learn \ 53 | numba pandas bokeh holoviews datashader scikit-image pytest pytest-benchmark \ 54 | "tensorflow-mkl>=2.2.0" 55 | fi 56 | 57 | source activate testenv 58 | 59 | # black requires Python 3.x; don't try to install for Python 2.7 test 60 | if [[ "$PYTHON_VERSION" != "2.7" ]]; then 61 | pip install black 62 | pip install pynndescent 63 | fi 64 | 65 | if [[ "$COVERAGE" == "true" ]]; then 66 | pip install coverage coveralls 67 | pip install pytest-cov pytest-benchmark # pytest coverage plugin 68 | fi 69 | 70 | python --version 71 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 72 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 73 | python -c "import numba; print('numba %s' % numba.__version__)" 74 | python -c "import sklearn; print('scikit-learn %s' % sklearn.__version__)" 75 | python setup.py develop 76 | else 77 | pip install pynndescent # test with optional pynndescent dependency 78 | pip install pandas 79 | pip install bokeh 80 | pip install datashader 81 | pip install matplotlib 82 | pip install holoviews 83 | pip install scikit-image 84 | pip install "tensorflow>=2.2.0" 85 | pip install -e . 86 | fi 87 | -------------------------------------------------------------------------------- /ci_scripts/success.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | if [[ "$COVERAGE" == "true" ]]; then 4 | # # Need to run coveralls from a git checkout, so we copy .coverage 5 | # # from TEST_DIR where nosetests has been run 6 | # cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR 7 | # cd $TRAVIS_BUILD_DIR 8 | # Ignore coveralls failures as the coveralls server is not 9 | # very reliable but we don't want travis to report a failure 10 | # in the github UI just because the coverage report failed to 11 | # be published. 12 | coveralls || echo "Coveralls upload failed" 13 | fi 14 | -------------------------------------------------------------------------------- /ci_scripts/test.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | #if [[ "$COVERAGE" == "true" ]]; then 4 | # black --check $MODULE 5 | #fi 6 | 7 | if [[ "$COVERAGE" == "true" ]]; then 8 | export NUMBA_DISABLE_JIT=1 9 | pytest --cov=umap/ --cov-report=xml --cov-report=html --show-capture=no -v --disable-warnings 10 | else 11 | pytest --show-capture=no -v --disable-warnings 12 | fi 13 | -------------------------------------------------------------------------------- /doc/.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | umap 3 | setup.py 4 | paper.md 5 | paper.bib 6 | LICENSE.txt 7 | CODE_OF_CONDUCT.md 8 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = umap 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /doc/_static/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/_static/.gitkeep -------------------------------------------------------------------------------- /doc/api.rst: -------------------------------------------------------------------------------- 1 | UMAP API Guide 2 | ============== 3 | 4 | UMAP has only two classes, :class:`UMAP`, and :class:`ParametricUMAP`, which inherits from it. 5 | 6 | UMAP 7 | ---- 8 | 9 | .. autoclass:: umap.umap_.UMAP 10 | :members: 11 | 12 | ParametricUMAP 13 | ---- 14 | 15 | .. autoclass:: umap.parametric_umap.ParametricUMAP 16 | :members: 17 | 18 | A number of internal functions can also be accessed separately for more fine tuned work. 19 | 20 | Useful Functions 21 | ---------------- 22 | 23 | .. automodule:: umap.umap_ 24 | :members: 25 | :exclude-members: UMAP 26 | 27 | -------------------------------------------------------------------------------- /doc/bokeh_digits_plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets import load_digits 3 | import pandas as pd 4 | 5 | digits = load_digits() 6 | 7 | import umap 8 | 9 | reducer = umap.UMAP(random_state=42) 10 | embedding = reducer.fit_transform(digits.data) 11 | 12 | from io import BytesIO 13 | from PIL import Image 14 | import base64 15 | 16 | 17 | def embeddable_image(data): 18 | img_data = 255 - 15 * data.astype(np.uint8) 19 | image = Image.fromarray(img_data, mode="L").resize((64, 64), Image.BICUBIC) 20 | buffer = BytesIO() 21 | image.save(buffer, format="png") 22 | for_encoding = buffer.getvalue() 23 | return "data:image/png;base64," + base64.b64encode(for_encoding).decode() 24 | 25 | 26 | from bokeh.plotting import figure, show, output_file 27 | from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper 28 | from bokeh.palettes import Spectral10 29 | 30 | output_file("basic_usage_bokeh_example.html") 31 | 32 | digits_df = pd.DataFrame(embedding, columns=("x", "y")) 33 | digits_df["digit"] = [str(x) for x in digits.target] 34 | digits_df["image"] = list(map(embeddable_image, digits.images)) 35 | 36 | datasource = ColumnDataSource(digits_df) 37 | color_mapping = CategoricalColorMapper( 38 | factors=[str(9 - x) for x in digits.target_names], palette=Spectral10 39 | ) 40 | 41 | plot_figure = figure( 42 | title="UMAP projection of the Digits dataset", 43 | plot_width=600, 44 | plot_height=600, 45 | tools=("pan, wheel_zoom, reset"), 46 | ) 47 | 48 | plot_figure.add_tools( 49 | HoverTool( 50 | tooltips=""" 51 |
52 |
53 | 54 |
55 |
56 | Digit: 57 | @digit 58 |
59 |
60 | """ 61 | ) 62 | ) 63 | 64 | plot_figure.circle( 65 | "x", 66 | "y", 67 | source=datasource, 68 | color=dict(field="digit", transform=color_mapping), 69 | line_alpha=0.6, 70 | fill_alpha=0.6, 71 | size=4, 72 | ) 73 | show(plot_figure) 74 | -------------------------------------------------------------------------------- /doc/doc_requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.13 2 | scipy>=0.19 3 | scikit-learn>=0.19 4 | numba>=0.37 5 | bokeh>=0.13 6 | datashader>=0.6 7 | seaborn>=0.8 8 | tqdm 9 | sphinx-gallery 10 | numpydoc 11 | -------------------------------------------------------------------------------- /doc/exploratory_analysis.rst: -------------------------------------------------------------------------------- 1 | Exploratory Analysis of Interesting Datasets 2 | ============================================ 3 | 4 | UMAP is a useful tool for general exploratory analysis of data -- it can provide 5 | a unique lens through which to view data that can highlight structures and 6 | properties hiding in data that are not as apparent when analysed with other techniques. 7 | Below is a selection of uses cases of UMAP being used for interesting explorations 8 | of intriguing datasets -- everything from pure math and outputs of neural networks, 9 | to philosophy articles, and scientific texts. 10 | 11 | Prime factorizations of numbers 12 | ------------------------------- 13 | What would happen if we applied UMAP to the integers? First we would need a way 14 | to express an integer in a high dimensional space. That can be done by looking 15 | at the prime factorization of each number. Next you have to take enough numbers 16 | to actually generate an interesting visualization. John Williamson set about doing 17 | exactly this, and the results are fascinating. While they may not actually tell us 18 | anything new about number theory they do highlight interesting structures 19 | in prime factorizations, and demonstrate how UMAP can aid in interesting explorations 20 | of datasets that we might think we know well. It's worth visiting the linked article 21 | below as Dr. Williamson provides a rich and detailed exploration of UMAP as 22 | applied to prime factorizations of integers. 23 | 24 | .. image:: images/umap_primes.png 25 | :width: 400px 26 | 27 | `UMAP on prime factorizations `__ 28 | 29 | Thanks to John Williamson. 30 | 31 | Structure of Recent Philosophy 32 | ------------------------------ 33 | Philosophy is an incredibly diverse subject, ranging from social and moral philosophy to 34 | logic and philosophy of math; from analysis of ancient Greek philosophy to modern business 35 | ethics. If we could get an overview of all the philosophy papers published in the last 36 | century what might it look like? Maximilian Noichl provides just such an exploration, 37 | looking at a large sampling of philosophy papers and comparing them according to their 38 | citations. The results are intriguing, and can be explored interactively in the 39 | viewer Maximilian built for it. 40 | 41 | .. image:: images/structure_recent_phil.png 42 | :width: 400px 43 | 44 | `Structure of Recent Philosophy `__ 45 | 46 | Thanks to Maximilian Noichl. 47 | 48 | Language, Context, and Geometry in Neural Networks 49 | -------------------------------------------------- 50 | Among recent developments in natural language processing is the BERT neural network 51 | based technique for analysis of language. Among many things that BERT can do one is 52 | context sensitive embeddings of words -- providing numeric vector representations of words 53 | that are sensitive to the context of how the word is used. Exactly what goes on inside 54 | the neural network to do this is a little mysterious (since the network is very complex 55 | with many many parameters). A tram of researchers from Google set out to explore the 56 | word embedding space generated by BERT, and among the tools used was UMAP. The linked 57 | blog post provides a detailed and inspiring analysis of what BERT's word embeddings 58 | look like, and how the different layers of BERT represent different aspects of language. 59 | 60 | .. image:: images/bert_embedding.png 61 | :width: 400px 62 | 63 | `Language, Context, and Geometry in Neural Networks `__ 64 | 65 | Thanks to Andy Coenen, Emily Reif, Ann Yuan, Been Kim, Adam Pearce, Fernanda Viégas, and Martin Wattenberg. 66 | 67 | Activation Atlas 68 | ---------------- 69 | Understanding the image processing capabilities (and deficits!) of modern 70 | convolutional neural networks is a challenge. Certainly these models are capable 71 | of amazing feats in, for example, image classification. They can also be brittle 72 | in unexpected ways, with carefully designed images able to induce otherwise 73 | baffling mis-classifications. To better understand this researchers from 74 | Google and OpenAI built the activation atlas -- analysing the space of activations 75 | of a neural network. Here UMAP provides a means to compress the activation landscape 76 | down to 2 dimensions for visualization. The result was an impressive interactive paper 77 | in the Distill journal, providing rich visualizations and new insights into 78 | the working of convolutional neural networks. 79 | 80 | .. image:: images/activation_atlas.png 81 | :width: 400px 82 | 83 | `The Activation Atlas `__ 84 | 85 | Thanks to Shan Carter, Zan Armstrong, Ludwig Schubert, Ian Johnson, and Chris Olah. 86 | 87 | Open Syllabus Galaxy 88 | -------------------- 89 | Suppose you wanted to explore the space of commonly assigned texts from Open Syllabus? That 90 | gives you over 150,000 texts to consider. Since the texts are open you can actually analyse 91 | the text content involved. With some NLP and neural network wizardry David McClure build 92 | a network of such texts and then used node2vec and UMAP to generate a map of them. The result 93 | is a galaxy of textbooks showing inter-relationships between subjects, similar and related texts, 94 | and generally just a an interesting ladscape of science to be explored. As with some 95 | of the other projects here David made a great interactive viewer allowing for rich exploration 96 | of the results. 97 | 98 | .. image:: images/syllabus_galaxy.png 99 | :width: 400px 100 | 101 | `Open Syllabus Galaxy `__ 102 | 103 | Thanks to David McClure. 104 | -------------------------------------------------------------------------------- /doc/images/20newsgroups_hellinger_counts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/20newsgroups_hellinger_counts.png -------------------------------------------------------------------------------- /doc/images/20newsgroups_hellinger_tfidf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/20newsgroups_hellinger_tfidf.png -------------------------------------------------------------------------------- /doc/images/BasicUsage_13_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/BasicUsage_13_1.png -------------------------------------------------------------------------------- /doc/images/BasicUsage_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/BasicUsage_18_0.png -------------------------------------------------------------------------------- /doc/images/BasicUsage_20_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/BasicUsage_20_1.png -------------------------------------------------------------------------------- /doc/images/BasicUsage_26_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/BasicUsage_26_1.png -------------------------------------------------------------------------------- /doc/images/BasicUsage_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/BasicUsage_6_1.png -------------------------------------------------------------------------------- /doc/images/ESM_metagenomic_atlas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/ESM_metagenomic_atlas.png -------------------------------------------------------------------------------- /doc/images/Hyperbolic_tiling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/Hyperbolic_tiling.png -------------------------------------------------------------------------------- /doc/images/SupervisedUMAP_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/SupervisedUMAP_10_1.png -------------------------------------------------------------------------------- /doc/images/SupervisedUMAP_15_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/SupervisedUMAP_15_1.png -------------------------------------------------------------------------------- /doc/images/SupervisedUMAP_22_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/SupervisedUMAP_22_1.png -------------------------------------------------------------------------------- /doc/images/SupervisedUMAP_31_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/SupervisedUMAP_31_0.png -------------------------------------------------------------------------------- /doc/images/SupervisedUMAP_33_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/SupervisedUMAP_33_0.png -------------------------------------------------------------------------------- /doc/images/UMAPTransform_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/UMAPTransform_15_0.png -------------------------------------------------------------------------------- /doc/images/UMAPTransform_21_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/UMAPTransform_21_0.png -------------------------------------------------------------------------------- /doc/images/UMAP_zoo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/UMAP_zoo.png -------------------------------------------------------------------------------- /doc/images/activation_atlas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/activation_atlas.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_basic_usage_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_basic_usage_15_0.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_basic_usage_22_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_basic_usage_22_0.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_basic_usage_29_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_basic_usage_29_0.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_basic_usage_29_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_basic_usage_29_1.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_basic_usage_5_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_basic_usage_5_1.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_pendigits_3d.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_pendigits_3d.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_pendigits_3d_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_pendigits_3d_1.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_pendigits_anim.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_pendigits_anim.gif -------------------------------------------------------------------------------- /doc/images/aligned_umap_politics_demo_31_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_politics_demo_31_0.png -------------------------------------------------------------------------------- /doc/images/aligned_umap_politics_demo_spaghetti.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/aligned_umap_politics_demo_spaghetti.png -------------------------------------------------------------------------------- /doc/images/audio_explorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/audio_explorer.png -------------------------------------------------------------------------------- /doc/images/basic_usage_17_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/basic_usage_17_1.png -------------------------------------------------------------------------------- /doc/images/basic_usage_22_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/basic_usage_22_0.png -------------------------------------------------------------------------------- /doc/images/basic_usage_24_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/basic_usage_24_2.png -------------------------------------------------------------------------------- /doc/images/basic_usage_30_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/basic_usage_30_1.png -------------------------------------------------------------------------------- /doc/images/basic_usage_8_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/basic_usage_8_1.png -------------------------------------------------------------------------------- /doc/images/bert_embedding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/bert_embedding.png -------------------------------------------------------------------------------- /doc/images/c_elegans_3d.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/c_elegans_3d.jpg -------------------------------------------------------------------------------- /doc/images/clustering_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/clustering_10_1.png -------------------------------------------------------------------------------- /doc/images/clustering_16_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/clustering_16_1.png -------------------------------------------------------------------------------- /doc/images/clustering_27_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/clustering_27_1.png -------------------------------------------------------------------------------- /doc/images/clustering_31_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/clustering_31_1.png -------------------------------------------------------------------------------- /doc/images/clustering_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/clustering_6_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_11_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_11_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_14_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_14_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_18_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_18_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_20_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_20_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_23_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_23_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_36_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_36_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_38_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_38_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_42_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_42_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_44_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_44_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_47_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_47_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_50_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_50_1.png -------------------------------------------------------------------------------- /doc/images/composing_models_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/composing_models_6_1.png -------------------------------------------------------------------------------- /doc/images/densmap_demo_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/densmap_demo_10_1.png -------------------------------------------------------------------------------- /doc/images/densmap_demo_13_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/densmap_demo_13_1.png -------------------------------------------------------------------------------- /doc/images/densmap_demo_16_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/densmap_demo_16_1.png -------------------------------------------------------------------------------- /doc/images/densmap_demo_19_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/densmap_demo_19_1.png -------------------------------------------------------------------------------- /doc/images/densmap_demo_21_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/densmap_demo_21_1.png -------------------------------------------------------------------------------- /doc/images/densmap_demo_24_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/densmap_demo_24_1.png -------------------------------------------------------------------------------- /doc/images/densmap_demo_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/densmap_demo_6_1.png -------------------------------------------------------------------------------- /doc/images/embedding_projector.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_projector.png -------------------------------------------------------------------------------- /doc/images/embedding_space_11_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_11_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_15_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_15_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_18_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_18_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_26_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_26_0.png -------------------------------------------------------------------------------- /doc/images/embedding_space_29_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_29_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_33_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_33_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_37_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_37_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_39_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_39_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_47_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_47_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_52_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_52_1.png -------------------------------------------------------------------------------- /doc/images/embedding_space_55_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_55_0.png -------------------------------------------------------------------------------- /doc/images/embedding_space_59_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_59_0.png -------------------------------------------------------------------------------- /doc/images/embedding_space_7_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/embedding_space_7_1.png -------------------------------------------------------------------------------- /doc/images/exploring_fashion_mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/exploring_fashion_mnist.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_basic_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_basic_graph.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_fuzzy_open_cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_fuzzy_open_cover.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_local_metric_open_cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_local_metric_open_cover.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_open_cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_open_cover.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_raw_data.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_raw_data.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_raw_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_raw_graph.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_umap_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_umap_graph.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_umap_graph_layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_umap_graph_layout.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_umap_layout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_umap_layout.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_umap_open_cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_umap_open_cover.png -------------------------------------------------------------------------------- /doc/images/how_umap_works_uniform_distribution_cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/how_umap_works_uniform_distribution_cover.png -------------------------------------------------------------------------------- /doc/images/inverse_transform_13_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/inverse_transform_13_0.png -------------------------------------------------------------------------------- /doc/images/inverse_transform_20_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/inverse_transform_20_1.png -------------------------------------------------------------------------------- /doc/images/inverse_transform_26_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/inverse_transform_26_0.png -------------------------------------------------------------------------------- /doc/images/inverse_transform_7_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/inverse_transform_7_1.png -------------------------------------------------------------------------------- /doc/images/mutual_nn_umap_20ngc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/mutual_nn_umap_20ngc.png -------------------------------------------------------------------------------- /doc/images/mutual_nn_umap_FMNIST.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/mutual_nn_umap_FMNIST.png -------------------------------------------------------------------------------- /doc/images/mutual_nn_umap_MNIST.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/mutual_nn_umap_MNIST.png -------------------------------------------------------------------------------- /doc/images/mutual_nn_umap_connectivity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/mutual_nn_umap_connectivity.png -------------------------------------------------------------------------------- /doc/images/mutual_nn_umap_lc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/mutual_nn_umap_lc.png -------------------------------------------------------------------------------- /doc/images/mutual_nn_umap_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/mutual_nn_umap_results.png -------------------------------------------------------------------------------- /doc/images/organogenesis_paper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/organogenesis_paper.png -------------------------------------------------------------------------------- /doc/images/orion_particles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/orion_particles.png -------------------------------------------------------------------------------- /doc/images/outliers_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_10_0.png -------------------------------------------------------------------------------- /doc/images/outliers_12_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_12_2.png -------------------------------------------------------------------------------- /doc/images/outliers_13_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_13_2.png -------------------------------------------------------------------------------- /doc/images/outliers_15_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_15_0.png -------------------------------------------------------------------------------- /doc/images/outliers_19_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_19_0.png -------------------------------------------------------------------------------- /doc/images/outliers_22_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_22_2.png -------------------------------------------------------------------------------- /doc/images/outliers_27_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_27_0.png -------------------------------------------------------------------------------- /doc/images/outliers_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_5_0.png -------------------------------------------------------------------------------- /doc/images/outliers_7_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_7_2.png -------------------------------------------------------------------------------- /doc/images/outliers_9_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/outliers_9_0.png -------------------------------------------------------------------------------- /doc/images/parameters_13_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_13_1.png -------------------------------------------------------------------------------- /doc/images/parameters_13_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_13_2.png -------------------------------------------------------------------------------- /doc/images/parameters_13_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_13_3.png -------------------------------------------------------------------------------- /doc/images/parameters_13_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_13_4.png -------------------------------------------------------------------------------- /doc/images/parameters_13_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_13_5.png -------------------------------------------------------------------------------- /doc/images/parameters_13_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_13_6.png -------------------------------------------------------------------------------- /doc/images/parameters_13_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_13_7.png -------------------------------------------------------------------------------- /doc/images/parameters_16_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_16_1.png -------------------------------------------------------------------------------- /doc/images/parameters_16_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_16_2.png -------------------------------------------------------------------------------- /doc/images/parameters_16_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_16_3.png -------------------------------------------------------------------------------- /doc/images/parameters_16_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_16_4.png -------------------------------------------------------------------------------- /doc/images/parameters_16_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_16_5.png -------------------------------------------------------------------------------- /doc/images/parameters_16_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_16_6.png -------------------------------------------------------------------------------- /doc/images/parameters_19_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_19_1.png -------------------------------------------------------------------------------- /doc/images/parameters_21_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_21_1.png -------------------------------------------------------------------------------- /doc/images/parameters_32_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_32_1.png -------------------------------------------------------------------------------- /doc/images/parameters_32_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_32_2.png -------------------------------------------------------------------------------- /doc/images/parameters_32_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_32_3.png -------------------------------------------------------------------------------- /doc/images/parameters_32_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_32_4.png -------------------------------------------------------------------------------- /doc/images/parameters_32_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_32_5.png -------------------------------------------------------------------------------- /doc/images/parameters_8_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/parameters_8_1.png -------------------------------------------------------------------------------- /doc/images/performance_14_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/performance_14_1.png -------------------------------------------------------------------------------- /doc/images/performance_15_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/performance_15_1.png -------------------------------------------------------------------------------- /doc/images/performance_17_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/performance_17_1.png -------------------------------------------------------------------------------- /doc/images/performance_18_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/performance_18_1.png -------------------------------------------------------------------------------- /doc/images/performance_20_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/performance_20_1.png -------------------------------------------------------------------------------- /doc/images/performance_21_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/performance_21_1.png -------------------------------------------------------------------------------- /doc/images/pixplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/pixplot.png -------------------------------------------------------------------------------- /doc/images/plotting_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_10_1.png -------------------------------------------------------------------------------- /doc/images/plotting_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_12_1.png -------------------------------------------------------------------------------- /doc/images/plotting_14_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_14_1.png -------------------------------------------------------------------------------- /doc/images/plotting_19_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_19_2.png -------------------------------------------------------------------------------- /doc/images/plotting_21_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_21_2.png -------------------------------------------------------------------------------- /doc/images/plotting_32_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_32_2.png -------------------------------------------------------------------------------- /doc/images/plotting_34_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_34_2.png -------------------------------------------------------------------------------- /doc/images/plotting_38_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_38_1.png -------------------------------------------------------------------------------- /doc/images/plotting_40_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_40_1.png -------------------------------------------------------------------------------- /doc/images/plotting_42_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_42_0.png -------------------------------------------------------------------------------- /doc/images/plotting_44_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_44_1.png -------------------------------------------------------------------------------- /doc/images/plotting_8_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/plotting_8_2.png -------------------------------------------------------------------------------- /doc/images/population_umap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/population_umap.jpg -------------------------------------------------------------------------------- /doc/images/precomputed_k-nn11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/precomputed_k-nn11.png -------------------------------------------------------------------------------- /doc/images/precomputed_k-nn13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/precomputed_k-nn13.png -------------------------------------------------------------------------------- /doc/images/precomputed_k-nn17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/precomputed_k-nn17.png -------------------------------------------------------------------------------- /doc/images/precomputed_k-nn6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/precomputed_k-nn6.png -------------------------------------------------------------------------------- /doc/images/pumap-only.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/pumap-only.png -------------------------------------------------------------------------------- /doc/images/reproducibility_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/reproducibility_10_1.png -------------------------------------------------------------------------------- /doc/images/reproducibility_14_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/reproducibility_14_1.png -------------------------------------------------------------------------------- /doc/images/reproducibility_18_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/reproducibility_18_1.png -------------------------------------------------------------------------------- /doc/images/reproducibility_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/reproducibility_6_1.png -------------------------------------------------------------------------------- /doc/images/retrain_pumap_emb_x1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/retrain_pumap_emb_x1.png -------------------------------------------------------------------------------- /doc/images/retrain_pumap_emb_x2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/retrain_pumap_emb_x2.png -------------------------------------------------------------------------------- /doc/images/retrain_pumap_history.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/retrain_pumap_history.png -------------------------------------------------------------------------------- /doc/images/retrain_pumap_p_emb_x1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/retrain_pumap_p_emb_x1.png -------------------------------------------------------------------------------- /doc/images/retrain_pumap_p_emb_x2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/retrain_pumap_p_emb_x2.png -------------------------------------------------------------------------------- /doc/images/retrain_pumap_summary_2_removed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/retrain_pumap_summary_2_removed.png -------------------------------------------------------------------------------- /doc/images/simplices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/simplices.png -------------------------------------------------------------------------------- /doc/images/single_cell_umap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/single_cell_umap.jpg -------------------------------------------------------------------------------- /doc/images/sparse_11_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/sparse_11_1.png -------------------------------------------------------------------------------- /doc/images/sparse_18_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/sparse_18_0.png -------------------------------------------------------------------------------- /doc/images/sparse_31_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/sparse_31_1.png -------------------------------------------------------------------------------- /doc/images/sparse_35_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/sparse_35_0.png -------------------------------------------------------------------------------- /doc/images/structure_recent_phil.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/structure_recent_phil.png -------------------------------------------------------------------------------- /doc/images/syllabus_galaxy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/syllabus_galaxy.png -------------------------------------------------------------------------------- /doc/images/time_cluster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/time_cluster.png -------------------------------------------------------------------------------- /doc/images/umap-loss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/umap-loss.png -------------------------------------------------------------------------------- /doc/images/umap-only.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/umap-only.png -------------------------------------------------------------------------------- /doc/images/umap_explorer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/umap_explorer.png -------------------------------------------------------------------------------- /doc/images/umap_primes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/umap_primes.png -------------------------------------------------------------------------------- /doc/images/umap_surrey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/umap_surrey.png -------------------------------------------------------------------------------- /doc/images/umap_vae_pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/images/umap_vae_pca.png -------------------------------------------------------------------------------- /doc/index.rst: -------------------------------------------------------------------------------- 1 | .. umap documentation master file, created by 2 | sphinx-quickstart on Fri Jun 8 10:09:40 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. image:: logo_large.png 7 | :width: 600 8 | :align: center 9 | 10 | UMAP: Uniform Manifold Approximation and Projection for Dimension Reduction 11 | =========================================================================== 12 | 13 | Uniform Manifold Approximation and Projection (UMAP) is a dimension reduction 14 | technique that can be used for visualisation similarly to t-SNE, but also for 15 | general non-linear dimension reduction. The algorithm is founded on three 16 | assumptions about the data 17 | 18 | 1. The data is uniformly distributed on Riemannian manifold; 19 | 2. The Riemannian metric is locally constant (or can be approximated as such); 20 | 3. The manifold is locally connected. 21 | 22 | From these assumptions it is possible to model the manifold with a fuzzy 23 | topological structure. The embedding is found by searching for a low dimensional 24 | projection of the data that has the closest possible equivalent fuzzy 25 | topological structure. 26 | 27 | The details for the underlying mathematics can be found in 28 | `our paper on ArXiv `_: 29 | 30 | McInnes, L, Healy, J, *UMAP: Uniform Manifold Approximation and Projection 31 | for Dimension Reduction*, ArXiv e-prints 1802.03426, 2018 32 | 33 | You can find the software `on github `_. 34 | 35 | **Installation** 36 | 37 | Conda install, via the excellent work of the conda-forge team: 38 | 39 | .. code:: bash 40 | 41 | conda install -c conda-forge umap-learn 42 | 43 | The conda-forge packages are available for linux, OS X, and Windows 64 bit. 44 | 45 | PyPI install, presuming you have numba and sklearn and all its requirements 46 | (numpy and scipy) installed: 47 | 48 | .. code:: bash 49 | 50 | pip install umap-learn 51 | 52 | 53 | .. toctree:: 54 | :maxdepth: 2 55 | :caption: User Guide / Tutorial: 56 | 57 | basic_usage 58 | parameters 59 | plotting 60 | reproducibility 61 | transform 62 | inverse_transform 63 | parametric_umap 64 | transform_landmarked_pumap 65 | sparse 66 | supervised 67 | clustering 68 | outliers 69 | composing_models 70 | densmap_demo 71 | mutual_nn_umap 72 | document_embedding 73 | embedding_space 74 | aligned_umap_basic_usage 75 | aligned_umap_politics_demo 76 | precomputed_k-nn 77 | benchmarking 78 | release_notes 79 | faq 80 | 81 | .. toctree:: 82 | :maxdepth: 2 83 | :caption: Background on UMAP: 84 | 85 | how_umap_works 86 | performance 87 | 88 | .. toctree:: 89 | :maxdepth: 2 90 | :caption: Examples of UMAP usage 91 | 92 | interactive_viz 93 | exploratory_analysis 94 | scientific_papers 95 | nomic_atlas_umap_of_text_embeddings 96 | nomic_atlas_visualizing_mnist_training 97 | 98 | .. toctree:: 99 | :caption: API Reference: 100 | 101 | api 102 | 103 | 104 | 105 | Indices and tables 106 | ================== 107 | 108 | * :ref:`genindex` 109 | * :ref:`modindex` 110 | * :ref:`search` 111 | -------------------------------------------------------------------------------- /doc/interactive_viz.rst: -------------------------------------------------------------------------------- 1 | Interactive Visualizations 2 | ========================== 3 | 4 | UMAP has found use in a number of interesting interactive visualization projects, analyzing everything from 5 | images from photo archives, to word embedding, animal point clouds, and even sound. Sometimes it has also 6 | been used in interesting interactive tools that simply help a user to get an intuition for what the algorithm 7 | is doing (by applying it to intuitive 3D data). Below are some amazing projects that make use of UMAP. 8 | 9 | UMAP Zoo 10 | -------- 11 | An exploration of how UMAP behaves when dimension reducing point clouds of animals. It is 12 | interactive, letting you switch between 2D and 3D representations and has a wide selection 13 | of different animals. Attempting to guess the animal from the 2D UMAP representation is a 14 | fun game. In practice this tool can go a long way to helping to build at least some intuitions 15 | for what UMAP tends to do with data. 16 | 17 | .. image:: images/UMAP_zoo.png 18 | :width: 400px 19 | 20 | `UMAP Zoo `__ 21 | 22 | Thanks to Douglas Duhaime. 23 | 24 | Tensorflow Embedding Projector 25 | ------------------------------ 26 | If you just want to explore UMAP embeddings of datasets then the Embedding Projector 27 | from Tensorflow is a great way to do that. As well as having a good interactive 3D view 28 | it also has facilities for inspecting and searching labels and tags on the data. By default 29 | it loads up word2vec vectors, but you can upload any data you wish. You can then select 30 | the UMAP option among the tabs for embeddings choices (alongside PCA and t-SNE). 31 | 32 | .. image:: images/embedding_projector.png 33 | :width: 400px 34 | 35 | `Embedding Projector `__ 36 | 37 | Thanks to Andy Coenen and the Embedding Projector team. 38 | 39 | PixPlot 40 | ------- 41 | PixPlot provides an overview of large photo-collections. In the demonstration app 42 | from Yale's Digital Humanities lab it provides a window on the Meserve-Kunhardt Collection 43 | of historical photographs. The approach uses convolutional neural nets to reduce the images 44 | to 2048 dimensions, and then uses UMAP to present them in a 2-dimensional map which the 45 | user can interactive pan and zoom around in. This process results in similar photos 46 | ending up in similar regions of the map allowing for easy perusal of large photo 47 | collections. The PixPlot project is also available on github in case you wish to train 48 | it on your own photo collection. 49 | 50 | .. image:: images/pixplot.png 51 | :width: 400px 52 | 53 | `PixPlot `__ 54 | 55 | Thanks to Douglas Duhaime and the Digital Humanities lab at Yale. 56 | 57 | UMAP Explorer 58 | ------------- 59 | A great demonstration of building a web based app for interactively exploring a UMAP embedding. 60 | In this case it provides an exploration of UMAP run on the MNIST digits dataset. Each point in 61 | the embedding is rendered as the digit image, and coloured according to the digit class. Mousing 62 | over the images will make them larger and provide a view of the digit in the upper left. You can also pan 63 | and zoom around the emebdding to get a better understanding of how UMAP has mapped the different styles of 64 | handwritten digits down to 2 dimensions. 65 | 66 | .. image:: images/umap_explorer.png 67 | :width: 400px 68 | 69 | `UMAP Explorer `__ 70 | 71 | Thanks to Grant Custer. 72 | 73 | Audio Explorer 74 | -------------- 75 | The Audio Explorer uses UMAP to embed sound samples into a 2 dimensional space for easy exploration. 76 | The goal here is to take a large library of sounds samples and put similar sounds in similar regions 77 | of the map, allowing a user to quickly mouse over and listen to various variations of a given sample 78 | to quickly find exactly the right sound sample to use. Audio explorer uses MFCCs and/or WaveNet to 79 | provide an initial useful vector representation of the sound samples, before applying UMAP to 80 | generate the 2D embedding. 81 | 82 | .. image:: images/audio_explorer.png 83 | :width: 400px 84 | 85 | `Audio Explorer `__ 86 | 87 | Thanks to Leon Fedden. 88 | 89 | Orion Search 90 | ------------ 91 | Orion is an open source research measurement and knowledge discovery tool that enables you to monitor 92 | progress in science, visually explore the scientific landscape and search for relevant publications. 93 | Orion encodes bioRxiv paper abstracts to dense vectors with Sentence Transformers and projects them to 94 | an interactive 3D visualisation with UMAP. You can filter the UMAP embeddings by topic and country. 95 | You can also select a subset of the UMAP embeddings and retrieve those papers and their metadata. 96 | 97 | .. image:: images/orion_particles.png 98 | :width: 400px 99 | 100 | `Orion Search `__ 101 | 102 | Thanks to Kostas Stathoulopoulos, Zac Ioannidis and Lilia Villafuerte. 103 | 104 | Exploring Fashion MNIST 105 | ----------------------- 106 | A web based interactive exploration of a 3D UMAP embedding ran on the Fashion MNIST dataset. Users can 107 | freely navigate the 3D space, jumping to a specific image by clicking an image or entering an image id. 108 | Like Grant Custer's UMAP Explorer, each point is rendered as the actual image and colored according to 109 | the label. It is also similar to the Tensorflow Embedding Projector, but designed more specifically for 110 | Fashion MNIST, thus more efficient and capable of showing all the 70k images. 111 | 112 | .. image:: images/exploring_fashion_mnist.png 113 | :width: 400px 114 | 115 | `Exploring Fashion MNIST `__ 116 | 117 | Thanks to stwind. 118 | 119 | ESM Metagenomic Atlas 120 | --------------------- 121 | The ESM Metagenomic Atlas contains over 600 million predicted protein structures, revealing the 122 | metagenomic world in a way we have never seen before. The Explore page visualizes a sample of 1 123 | million of these. (That’s about how much a browser can handle.) We represent each protein in this 124 | dataset as a single point, and reveal the actual protein structure when zooming in or when hovering 125 | over it. The color of each point corresponds to the similarity to the closest match we could find in 126 | UniRef90, the reference database of known protein sequences. The position in the map is a 127 | two-dimensional projection, which groups sequences by similarity, as determined by our language 128 | model’s internal representation. The map reveals structure at different scales: local neighbors in 129 | the same cluster tend to have similar structures, while nearby clusters preserve certain patterns 130 | like secondary structure elements. 131 | 132 | .. image:: images/ESM_metagenomic_atlas.png 133 | :width: 400px 134 | 135 | Thanks to the authors of "Evolutionary-scale prediction of atomic level protein structure 136 | with a language model". 137 | 138 | `ESM Metagenomic Atlas `__ 139 | 140 | Interactive UMAP with Nomic Atlas 141 | --------------------------------- 142 | 143 | `Nomic Atlas `_ is a platform for interactively visualizing and exploring massive datasets. It automates the creation of embeddings and 2D coordinate projections using UMAP. 144 | 145 | .. image:: https://assets.nomicatlas.com/mnist-training-embeddings-umap-short.gif 146 | :alt: UMAP interactive visualization with Nomic Atlas 147 | :align: center 148 | :width: 600 149 | 150 | Atlas provides: 151 | 152 | * In-browser analysis of your UMAP data with the `Atlas Analyst `_ 153 | * Vector search over your UMAP data using the `Nomic API `_ 154 | * Interactive features like zooming, recoloring, searching, and filtering in the `Nomic Atlas data map `_ 155 | * Scalability for millions of data points 156 | * Rich information display on hover 157 | * Shareable UMAPs via URL links to your embeddings and data maps in Atlas 158 | 159 | ----------- 160 | 161 | .. toctree:: 162 | :maxdepth: 1 163 | :caption: Nomic Atlas Examples 164 | 165 | nomic_atlas_umap_of_text_embeddings 166 | nomic_atlas_visualizing_mnist_training_dynamics 167 | -------------------------------------------------------------------------------- /doc/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/logo.png -------------------------------------------------------------------------------- /doc/logo_large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/doc/logo_large.png -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=umap 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /doc/mutual_nn_umap.rst: -------------------------------------------------------------------------------- 1 | Improving the Separation Between Similar Classes Using a Mutual k-NN Graph 2 | ========================================================================== 3 | 4 | This post briefly explains how the connectivity of the original graphical representation can adversely affect the resulting UMAP embeddings. 5 | 6 | In default UMAP, a weighted k nearest neighbor (k-NN) graph, which connects each 7 | datapoint to its 𝑘 nearest neighbors based on some distance metric, is constructed 8 | and used to generate the initial topological representation of a dataset. 9 | 10 | However, previous research has shown that using a weighted k-NN 11 | graph may not provide an accurate representation of the underlying local 12 | structure for a high dimensional dataset. The k-NN graph is relatively susceptible 13 | to the “curse of dimensionality” and the associated distance concentration 14 | effect, where distances are similar in high dimensions, as well as the 15 | hub effect, where certain points become highly influential when highly 16 | connected. This skews the local representation of high dimensional data, 17 | deteriorating its performance for various similarity-based machine learning 18 | tasks. 19 | 20 | A recent paper titled 21 | `Clustering with UMAP: Why and How Connectivity Matters `__ 22 | proposes a refinement in the graph construction stage of the UMAP algorithm 23 | that uses a weighted mutual k-NN graph rather than it vanilla counterpart, 24 | to reduce the undesired distance concentration and hub effects. 25 | 26 | Mutual k-NN graphs have been shown to contain many 27 | desirable properties when combating the “curse of dimensionality” as discussed in 28 | `this paper `__ . However, one pitfall of using a 29 | mutual k-NN graph over the original k-NN graph is that it often 30 | contains disconnected components and potential isolated vertices. 31 | 32 | This violates one of UMAP primary assumptions that "The manifold is locally connected." To 33 | combat the issue of isolated components, the authors consider different methods that have 34 | been previously used to augment and increase the connectivity of the mutual k-NN graph: 35 | 36 | 1. ``NN``: To minimally connect isolated vertices and satisfy the assumption that the underlying manifold is locally connected, we add an undirected edge between each isolated vertex and its original nearest neighbor (de Sousa, Rezende, and Batista 2013).Note that the resulting graph may still contain disconnected components. 37 | 2. ``MST-min``: To achieve a connected graph, add the minimum number of edges from a maximum spanning tree to the mutual k-NN graph that has been weighted with similarity-based metrics(Ozaki et al. 2011). We adapt this by calculating the minimum spanning tree for distances. 38 | 3. ``MST-all``: Adding all the edges of the MST. 39 | 40 | .. image:: images/mutual_nn_umap_connectivity.png 41 | 42 | They also different ways to obtain the new local neighborhood for each point ``x_i``: 43 | 44 | 1. ``Adjacent Neighbors``: Only consider neighbors that are directly connected(adjacent) to ``x_i`` in the connected mutual k-NN graph. 45 | 2. ``Path Neighbors``: Using shortest path distance to find the new k closest points to ``x_i`` with respect to the connected mutual k-NN graph. This shortest path distance can be considered a new distance metric as it directly aligns with UMAP’s definition of an extended pseudo-metric space. 46 | 47 | .. image:: images/mutual_nn_umap_lc.png 48 | :width: 600 49 | :align: center 50 | 51 | 52 | Visualizing the Results 53 | ---------------------------------------------- 54 | To see the differences between using a mutual k-NN graph vs the original k-NN graph as 55 | the starting topology for UMAP, let's visualize the 2D projections generated for MNIST, FMNIST, and 20 56 | NG Count Vectors using each of the discussed methods. For all code snippets to reproduce the results and visualizations, please refer 57 | to this `Github repo `__. Will be adding this soon as a 58 | mode to the original implementation. 59 | 60 | We’ll start with MNIST digits, a collection of 70,000 gray-scale images of hand-written digits: 61 | 62 | .. image:: images/mutual_nn_umap_MNIST.png 63 | :width: 850 64 | :align: center 65 | 66 | In general, for most of the mutual k-NN graph based vectors, there 67 | is a better separation between similar classes than the original UMAP vectors 68 | regardless of connectivity (NN, MST variants). Connecting isolated vertices in 69 | the mutual k-NN graph to their original nearest neighbor produced the desired 70 | separation between similar classes such as with the 4, 7, 9 in MNIST. This follows 71 | our intuition given that mutual k-NN graphs have previously been shown as a useful 72 | method for removing edges between points that are only loosely similar. 73 | 74 | Similar results are observed for the Fashion-MNIST(FMNIST) dataset, a collection of 70,000 75 | gray-scale images of fashion items: 76 | 77 | .. image:: images/mutual_nn_umap_FMNIST.png 78 | :width: 850 79 | :align: center 80 | 81 | For the FMNIST dataset, the vectors using the aforementioned methods preserve 82 | the global structure between clothing classes (T-shirt/top, Coat, Trouser, and etc.) 83 | from footwear classes (Sandal, Sneaker, Ankle-boot) while also depicting a clearer 84 | separation between the footwear classes. This is contrasted with original 85 | UMAP which has poorer separation between similar classes like the footwear classes. 86 | 87 | For both MNIST and FMNIST, NN which naively connects isolated vertices 88 | to their nearest neighbor had multiple small clusters of points scattered 89 | throughout the vector space. This makes sense given using NN for connectivity can 90 | still cause the resulting manifold to be broken into many small components. 91 | 92 | It would be fair to assume that augmenting the mutual k-NN graph with a "higher connectivity" 93 | would always be better as it reduces random scattering of points. However, 94 | too much connectivity such as with MST-all can also hurt which is further discussed in the paper. 95 | 96 | Finally, we depict the embeddings generated using the 20 newsgroup dataset, a collection of 97 | 18846 documents, transformed using sklearns CountVectorizer: 98 | 99 | .. image:: images/mutual_nn_umap_20ngc.png 100 | :width: 850 101 | :align: center 102 | 103 | We can see there is better distinction between similar subjects such as the recreation 104 | (rec) topics. 105 | 106 | Visually, the vector generated using the Adjacent Neighbors 107 | and MST-min result in disperse dense clusters of points e.g, the footwear classes in 108 | FMNIST and the recreation topics in 20 NG. However for Path Neighbors, the groups of 109 | points belonging to the same class are less dispersed. This is because Adjacent Neighbors are not guaranteed to have k connected neighbors for each local 110 | neighborhood. Points with smaller neighborhoods will be close to primarily few adjacent 111 | neighbors and repelled further away from the other points. 112 | 113 | To evaluate these methods quantitatively, the authors compare the clustering performance 114 | of the resulting low dimensional vectors generated. Below shows the Normalised Mutual 115 | Information NMI results after performing KMeans(for more information of the results please refer to `the full 116 | paper `__). 117 | 118 | .. image:: images/mutual_nn_umap_results.png 119 | 120 | These quantitative experiments show that MST variants combined with Path 121 | Neighbors can help produce better clustering results and how the initialization 122 | of a weighted connected graph is critical to the success of topology based 123 | dimensionality reduction methods like UMAP. 124 | 125 | 126 | Citing our work 127 | --------------- 128 | If you use this implementation or reference the results in your work, please cite the paper: 129 | 130 | .. code:: bibtex 131 | 132 | @article{Dalmia2021UMAPConnectivity, 133 | author={Ayush Dalmia and Suzanna Sia}, 134 | title={Clustering with {UMAP:} Why and How Connectivity Matters}, 135 | journal={CoRR}, 136 | volume={abs/2108.05525}, 137 | year={2021}, 138 | url={https://arxiv.org/abs/2108.05525}, 139 | eprinttype={arXiv}, 140 | eprint={2108.05525}, 141 | timestamp={Wed, 18 Aug 2021 19:45:42 +0200}, 142 | biburl={https://dblp.org/rec/journals/corr/abs-2108-05525.bib}, 143 | bibsource={dblp computer science bibliography, https://dblp.org} 144 | } 145 | -------------------------------------------------------------------------------- /doc/nomic_atlas_umap_of_text_embeddings.rst: -------------------------------------------------------------------------------- 1 | UMAP of Text Embeddings with Nomic Atlas 2 | ======================= 3 | 4 | `Nomic Atlas `_ is a platform for interactively visualizing and exploring massive datasets. It automates the creation of embeddings and 2D coordinate projections using UMAP. 5 | 6 | .. image:: https://assets.nomicatlas.com/airline-reviews-umap.gif 7 | :alt: UMAP interactive visualization with Nomic Atlas 8 | :align: center 9 | :width: 600 10 | 11 | Nomic Atlas automatically generates embeddings for your data and allows you to explore large datasets in a web browser. Atlas provides: 12 | 13 | * In-browser analysis of your UMAP data with the `Atlas Analyst `_ 14 | * Vector search over your UMAP data using the `Nomic API `_ 15 | * Interactive features like zooming, recoloring, searching, and filtering in the `Nomic Atlas data map `_ 16 | * Scalability for millions of data points 17 | * Rich information display on hover 18 | * Shareable UMAPs via URL links to your embeddings and data maps in Atlas 19 | 20 | This example demonstrates how to use `Nomic Atlas `_ to create interactive maps of text using embeddings and UMAP. 21 | 22 | Setup 23 | ----- 24 | 25 | 1. Get the required python packages with ``pip instll nomic pandas`` 26 | 2. Get a Nomic API key `here `_ 27 | 3. Run ``nomic login nk-...`` in a terminal window or use the following code: 28 | 29 | .. code:: python3 30 | 31 | import nomic 32 | nomic.login('nk-...') 33 | 34 | 35 | 36 | Download Example Data 37 | -------------------- 38 | 39 | .. code:: python3 40 | 41 | import pandas as pd 42 | 43 | # Example data 44 | df = pd.read_csv("https://docs.nomic.ai/singapore_airlines_reviews.csv") 45 | 46 | Create Atlas Dataset 47 | -------------------- 48 | 49 | .. code:: python3 50 | 51 | from nomic import AtlasDataset 52 | dataset = AtlasDataset("airline-reviews-data") 53 | 54 | Upload to Atlas 55 | --------------- 56 | 57 | .. code:: python3 58 | 59 | dataset.add_data(df) 60 | 61 | Create Data Map 62 | --------------- 63 | 64 | We specify the ``text`` field from ``df`` as the field to create embeddings from. We choose some standard UMAP parameters as well. 65 | 66 | .. code:: python3 67 | 68 | from nomic.data_inference import ProjectionOptions 69 | 70 | # model="umap" is how you choose UMAP in Nomic Atlas 71 | # You can adjust n_neighbors, min_dist, 72 | # and n_epochs as you would with the UMAP library. 73 | atlas_map = dataset.create_index( 74 | indexed_field='text', 75 | projection=ProjectionOptions( 76 | model="umap", 77 | n_neighbors=20, 78 | min_dist=0.01, 79 | n_epochs=200 80 | ) 81 | ) 82 | 83 | print(f"Explore your interactive map at: {atlas_map.map_link}") 84 | 85 | Your map will be available in your `Atlas Dashboard `_. -------------------------------------------------------------------------------- /doc/plotting_example_interactive.py: -------------------------------------------------------------------------------- 1 | import sklearn.datasets 2 | import pandas as pd 3 | import numpy as np 4 | import umap 5 | import umap.plot 6 | 7 | fmnist = sklearn.datasets.fetch_openml("Fashion-MNIST") 8 | 9 | mapper = umap.UMAP().fit(fmnist.data[:30000]) 10 | 11 | hover_data = pd.DataFrame({"index": np.arange(30000), "label": fmnist.target[:30000]}) 12 | hover_data["item"] = hover_data.label.map( 13 | { 14 | "0": "T-shirt/top", 15 | "1": "Trouser", 16 | "2": "Pullover", 17 | "3": "Dress", 18 | "4": "Coat", 19 | "5": "Sandal", 20 | "6": "Shirt", 21 | "7": "Sneaker", 22 | "8": "Bag", 23 | "9": "Ankle Boot", 24 | } 25 | ) 26 | 27 | umap.plot.output_file("plotting_interactive_example.html") 28 | 29 | p = umap.plot.interactive( 30 | mapper, labels=fmnist.target[:30000], hover_data=hover_data, point_size=2 31 | ) 32 | umap.plot.show(p) 33 | -------------------------------------------------------------------------------- /doc/plotting_example_nomic_atlas.py: -------------------------------------------------------------------------------- 1 | from nomic import AtlasDataset 2 | from nomic.data_inference import ProjectionOptions 3 | import pandas as pd 4 | 5 | # Example data 6 | df = pd.read_csv("https://docs.nomic.ai/singapore_airlines_reviews.csv") 7 | 8 | dataset = AtlasDataset("example-dataset-airline-reviews") 9 | 10 | dataset.add_data(df) 11 | 12 | atlas_map = dataset.create_index( 13 | indexed_field="text", 14 | projection=ProjectionOptions( 15 | model="umap", n_neighbors=20, min_dist=0.01, n_epochs=200 16 | ), 17 | ) 18 | -------------------------------------------------------------------------------- /doc/release_notes.rst: -------------------------------------------------------------------------------- 1 | Release Notes 2 | ============= 3 | 4 | Some notes on new features in various releases 5 | 6 | What's new in 0.5 7 | ----------------- 8 | 9 | * ParametricUMAP learns embeddings with neural networks. 10 | * AlignedUMAP can align multiple embeddings using relations between datasets. 11 | * DensMAP can preserve local density information in embeddings. 12 | * UMAP now depends on PyNNDescent, but has faster more parallel performance as a result. 13 | * UMAP now supports an ``update`` method to add new data and retrain. 14 | * Various performance improvements and bug fixes. 15 | * Additional plotting support, including text searching in interactive plots. 16 | * Support for "maximal distances" in neighbor graphs. 17 | 18 | What's new in 0.4 19 | ----------------- 20 | 21 | * Inverse transform method. Generate points in the original space corresponding to points in embedded space. (Thanks to Joseph Courtney) 22 | * Different embedding spaces. Support for embedding to a variety of different spaces other than Euclidean. (Thanks to Joseph Courtney) 23 | * New metrics, including Hellinger distance for sparse count data. 24 | * New discrete/label metrics, including hierarchical categories, counts, ordinal data, and string edit distance. 25 | * Support for parallelism in neighbor search and layout optimization. (Thanks to Tom White) 26 | * Support for alternative methods to handling duplicated data samples. (Thanks to John Healy) 27 | * New plotting methods for fast and easy plots. 28 | * Initial support for dataframe embedding -- still experimental, but worth trying. 29 | * Support for transform methods with sparse data. 30 | * Multithreading support when no random seed is set. 31 | 32 | 33 | What's new in 0.3 34 | ----------------- 35 | 36 | * Supervised and semi-supervised dimension reduction. Support for using labels or partial labels for dimension reduction. 37 | * Transform method. Support for adding new unseen points to an existing embedding. 38 | * Performance improvements. 39 | 40 | 41 | What's new in 0.2 42 | ----------------- 43 | 44 | * A new layout algorithm that handles large datasets (more) correctly. 45 | * Performance improvements. -------------------------------------------------------------------------------- /doc/reproducibility.rst: -------------------------------------------------------------------------------- 1 | 2 | UMAP Reproducibility 3 | ==================== 4 | 5 | UMAP is a stochastic algorithm -- it makes use of randomness both to 6 | speed up approximation steps, and to aid in solving hard optimization 7 | problems. This means that different runs of UMAP can produce different 8 | results. UMAP is relatively stable -- thus the variance between runs 9 | should ideally be relatively small -- but different runs may have 10 | variations none the less. To ensure that results can be reproduced 11 | exactly UMAP allows the user to set a random seed state. 12 | 13 | Since version 0.4 UMAP also support multi-threading for faster 14 | performance; when performing optimization this exploits the fact that 15 | race conditions between the threads are acceptable within certain 16 | optimization phases. Unfortunately this means that the randomness in 17 | UMAP outputs for the multi-threaded case depends not only on the random 18 | seed input, but also on race conditions between threads during 19 | optimization, over which no control can be had. This means that 20 | multi-threaded UMAP results cannot be explicitly reproduced. 21 | 22 | In this tutorial we'll look at how UMAP can be used in multi-threaded 23 | mode for performance purposes, and alternatively how we can fix random 24 | states to ensure exact reproducibility at the cost of some performance. 25 | First let's load the relevant libraries and get some data; in this case 26 | the MNIST digits dataset. 27 | 28 | .. code:: python3 29 | 30 | import numpy as np 31 | import sklearn.datasets 32 | import umap 33 | import umap.plot 34 | 35 | .. code:: python3 36 | 37 | data, labels = sklearn.datasets.fetch_openml( 38 | 'mnist_784', version=1, return_X_y=True 39 | ) 40 | 41 | With data in hand let's run UMAP on it, and note how long it takes to 42 | run: 43 | 44 | .. code:: python3 45 | 46 | %%time 47 | mapper1 = umap.UMAP().fit(data) 48 | 49 | 50 | .. parsed-literal:: 51 | 52 | CPU times: user 3min 18s, sys: 3.84 s, total: 3min 22s 53 | Wall time: 1min 29s 54 | 55 | 56 | The thing to note here is that the "Wall time" is significantly smaller 57 | than the CPU time -- this means that multiple CPU cores were used. For 58 | this particular demonstration I am making use of the latest version of 59 | PyNNDescent for nearest neighbor search (UMAP will use it if you have it 60 | installed) which supports multi-threading as well. The result is a very 61 | fast fitting to the data that does an effective job of using several 62 | cores. If you are on a large server with many cores available and don't 63 | wish to use them *all* (which is the default situation) you can 64 | currently control the number of cores used by setting the numba 65 | environment variable ``NUMBA_NUM_THREADS``; see the `numba 66 | documentation `__ 67 | for more details. 68 | 69 | Now let's plot our result to see what the embedding looks like: 70 | 71 | .. code:: python3 72 | 73 | umap.plot.points(mapper1, labels=labels) 74 | 75 | 76 | .. image:: images/reproducibility_6_1.png 77 | 78 | 79 | Now, let's run UMAP again and compare the results to that of our first 80 | run. 81 | 82 | .. code:: python3 83 | 84 | %%time 85 | mapper2 = umap.UMAP().fit(data) 86 | 87 | 88 | .. parsed-literal:: 89 | 90 | CPU times: user 2min 53s, sys: 4.16 s, total: 2min 57s 91 | Wall time: 1min 5s 92 | 93 | 94 | You will note that this time we ran *even faster*. This is because 95 | during the first run numba was still JIT compiling some of the code in 96 | the background. In contrast, this time that work has already been done, 97 | so it no longer takes up any of our run-time. We see that we are still 98 | making use of mutliple cores well. 99 | 100 | Now let's plot the results of this second run and compare to the first: 101 | 102 | .. code:: python3 103 | 104 | umap.plot.points(mapper2, labels=labels) 105 | 106 | 107 | .. image:: images/reproducibility_10_1.png 108 | 109 | 110 | Qualitatively this looks very similar, but a little closer inspection 111 | will quickly show that the results are actually different between the 112 | runs. Note that even in versions of UMAP prior to 0.4 this would have 113 | been the case -- since we fixed no specific random seed, and were thus 114 | using the current random state of the system which will naturally differ 115 | between runs. This is the default behaviour, as is standard with sklearn 116 | estimators that are stochastic. Rather than having a default random seed 117 | the user is required to explicitly provide one should they want a 118 | reproducible result. As noted by Vito Zanotelli 119 | 120 | ... setting a random seed is like signing a waiver "I am aware that 121 | this is a stochastic algorithm and I have done sufficient tests to 122 | confirm that my main conclusions are not affected by this 123 | randomness". 124 | 125 | With that in mind, let's see what happens if we set an explicit 126 | ``random_state`` value: 127 | 128 | .. code:: python3 129 | 130 | %%time 131 | mapper3 = umap.UMAP(random_state=42).fit(data) 132 | 133 | 134 | .. parsed-literal:: 135 | 136 | CPU times: user 2min 27s, sys: 4.16 s, total: 2min 31s 137 | Wall time: 1min 56s 138 | 139 | 140 | The first thing to note that that this run took significantly longer 141 | (despite having all the functions JIT compiled by numba already). Then 142 | note that the Wall time and CPU times are now much closer to each other 143 | -- we are no longer exploiting multiple cores to anywhere near the same 144 | degree. This is because by setting a ``random_state`` we are effectively 145 | turning off any of the multi-threading that does not support explicit 146 | reproducibility. Let's plot the results: 147 | 148 | .. code:: python3 149 | 150 | umap.plot.points(mapper3, labels=labels) 151 | 152 | 153 | .. image:: images/reproducibility_14_1.png 154 | 155 | 156 | We arrive at much the same results as before from a qualitative point of 157 | view, but again inspection will show that there are some differences. 158 | More importantly this result should now be reproducible. Thus we can run 159 | UMAP again, with the same ``random_state`` set ... 160 | 161 | .. code:: python3 162 | 163 | %%time 164 | mapper4 = umap.UMAP(random_state=42).fit(data) 165 | 166 | 167 | .. parsed-literal:: 168 | 169 | CPU times: user 2min 26s, sys: 4.13 s, total: 2min 30s 170 | Wall time: 1min 54s 171 | 172 | 173 | Again, this takes longer than the earlier runs with no ``random_state`` 174 | set. However when we plot the results of the second run we see that they 175 | look not merely qualitatively similar, but instead appear to be almost 176 | identical: 177 | 178 | .. code:: python3 179 | 180 | umap.plot.points(mapper4, labels=labels) 181 | 182 | .. image:: images/reproducibility_18_1.png 183 | 184 | 185 | We can, in fact, check that the results are identical by verifying that 186 | each and every coordinate of the resulting embeddings match perfectly: 187 | 188 | .. code:: python3 189 | 190 | np.all(mapper3.embedding_ == mapper4.embedding_) 191 | 192 | 193 | .. parsed-literal:: 194 | 195 | True 196 | 197 | So we have, in fact, reproduced the embedding exactly. 198 | -------------------------------------------------------------------------------- /doc/scientific_papers.rst: -------------------------------------------------------------------------------- 1 | Scientific Papers 2 | ================= 3 | 4 | UMAP has been used in a wide variety of scientific publications from a diverse range of 5 | fields. Here we will highlight a small selection of papers that demonstrate both 6 | the depth of analysis, and breadth of subjects, UMAP can be used for. These range from biology, 7 | to machine learning, and even social science. 8 | 9 | 10 | The single-cell transcriptional landscape of mammalian organogenesis 11 | -------------------------------------------------------------------- 12 | A detailed look at the development of mouse embryos from a single-cell view. UMAP 13 | is used as a core piece of The Monocle3 software suite for identifying cell types 14 | and trajectories. This was a major paper in Nature, demonstrating the power 15 | of UMAP for large scale scientific endeavours. 16 | 17 | .. image:: images/organogenesis_paper.png 18 | :width: 400px 19 | 20 | `Link to the paper `__ 21 | 22 | A lineage-resolved molecular atlas of C. elegans embryogenesis at single-cell resolution 23 | ---------------------------------------------------------------------------------------- 24 | Still in the realm of single cell biology this paper looks at the developmental 25 | landscape of the round-word C. elegans. UMAP is used for detailed analysis of 26 | the developmental trajectories of cells, looking at global scales, and then 27 | digging down to look at individual organs. The result is an impressive 28 | array of UMAP visualisations that tease out ever finer structures in 29 | cellular development. 30 | 31 | .. image:: images/c_elegans_3d.jpg 32 | :width: 400px 33 | 34 | `Link to the paper `__ 35 | 36 | Exploring Neural Networks with Activation Atlases 37 | ------------------------------------------------- 38 | Understanding the image processing capabilities (and deficits!) of modern 39 | convolutional neural networks is a challenge. This interactive paper from 40 | Distill seeks to provide a way to "peek inside the black box" by looking 41 | at the activations throughout the network. By mapping this high dimensional 42 | data down to 2D with UMAP the authors can construct an "atlas" of how 43 | different images are perceived by the network. 44 | 45 | .. image:: images/activation_atlas.png 46 | :width: 400px 47 | 48 | `Link to the paper `__ 49 | 50 | TimeCluster: dimension reduction applied to temporal data for visual analytics 51 | ------------------------------------------------------------------------------ 52 | An interesting approach to time-series analysis, targeted toward cases where the 53 | time series has repeating patterns -- though no necessarily of a consistently 54 | periodic nature. The approach involves dimension reduction and clustering 55 | of sliding window blocks of the time-series. The result is a map where 56 | repeating behaviour is exposed as loop structures. This can be useful 57 | for both clustering similar blocks within a time-series, or finding 58 | outliers. 59 | 60 | .. image:: images/time_cluster.png 61 | :width: 400px 62 | 63 | `Link to the paper `__ 64 | 65 | Dimensionality reduction for visualizing single-cell data using UMAP 66 | -------------------------------------------------------------------- 67 | An early paper on applying UMAP to single-cell biology data. It looks at 68 | both, gene-expression data and flow-cytometry data, and compares UMAP to 69 | t-SNE both in terms of performance and quality of results. This is a good 70 | introduction to using UMAP for single-cell biology data. 71 | 72 | .. image:: images/single_cell_umap.jpg 73 | :width: 400px 74 | 75 | `Link to the paper `__ 76 | 77 | 78 | Revealing multi-scale population structure in large cohorts 79 | ----------------------------------------------------------- 80 | A paper looking at population genetics which uses UMAP as a means 81 | to visualise population structures. This produced some intriguing 82 | visualizations, and was one of the first of several papers taking 83 | this visualization approach. It also includes some novel visualizations 84 | using UMAP projections to 3D as RGB color specifications for 85 | data points, allowing the UMAP structure to be visualized in 86 | geographic maps based on where the samples were drawn from. 87 | 88 | .. image:: images/population_umap.jpg 89 | :width: 400px 90 | 91 | `Link to the paper `__ 92 | 93 | 94 | Understanding Vulnerability of Children in Surrey 95 | -------------------------------------------------- 96 | An example of the use of UMAP in sociological studies -- in this case 97 | looking at children in Surrey, British Columbia. Here UMAP is used as 98 | a tool to aid in general data analysis, and proves effective for the 99 | tasks to which it was put. 100 | 101 | .. image:: images/umap_surrey.png 102 | :width: 400px 103 | 104 | `Link to the paper `__ -------------------------------------------------------------------------------- /docs_requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx>=1.8 2 | sphinx_gallery 3 | matplotlib 4 | pillow 5 | sphinx_rtd_theme 6 | numpydoc 7 | scipy 8 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | Gallery of Examples of UMAP usage 2 | --------------------------------- 3 | 4 | A small gallery collection examples of UMAP usage. Do you 5 | have an interesting UMAP plot that uses publicly available 6 | data? Submit a pull request to have it added as an example! -------------------------------------------------------------------------------- /examples/digits/digits.py: -------------------------------------------------------------------------------- 1 | from bokeh.plotting import figure, output_file, show 2 | from bokeh.models import CategoricalColorMapper, ColumnDataSource 3 | from bokeh.palettes import Category10 4 | 5 | import umap 6 | from sklearn.datasets import load_digits 7 | 8 | digits = load_digits() 9 | embedding = umap.UMAP().fit_transform(digits.data) 10 | 11 | output_file("digits.html") 12 | 13 | targets = [str(d) for d in digits.target_names] 14 | 15 | source = ColumnDataSource( 16 | dict( 17 | x=[e[0] for e in embedding], 18 | y=[e[1] for e in embedding], 19 | label=[targets[d] for d in digits.target], 20 | ) 21 | ) 22 | 23 | cmap = CategoricalColorMapper(factors=targets, palette=Category10[10]) 24 | 25 | p = figure(title="test umap") 26 | p.circle( 27 | x="x", 28 | y="y", 29 | source=source, 30 | color={"field": "label", "transform": cmap}, 31 | legend="label", 32 | ) 33 | 34 | show(p) 35 | -------------------------------------------------------------------------------- /examples/inverse_transform_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | from sklearn.datasets import fetch_openml 6 | 7 | import umap 8 | 9 | mnist = fetch_openml("Fashion-MNIST", version=1) 10 | 11 | 12 | trans = umap.UMAP( 13 | n_neighbors=10, 14 | random_state=42, 15 | metric="euclidean", 16 | output_metric="euclidean", 17 | init="spectral", 18 | verbose=True, 19 | ).fit(mnist.data) 20 | 21 | corners = np.array( 22 | [ 23 | [-5.1, 2.9], 24 | [-1.9, 6.4], 25 | [-5.4, -6.3], 26 | [8.3, 4.0], 27 | ] # 7 # 4 # 1 # 0 28 | ) 29 | 30 | test_pts = np.array( 31 | [ 32 | (corners[0] * (1 - x) + corners[1] * x) * (1 - y) 33 | + (corners[2] * (1 - x) + corners[3] * x) * y 34 | for y in np.linspace(0, 1, 10) 35 | for x in np.linspace(0, 1, 10) 36 | ] 37 | ) 38 | 39 | inv_transformed_points = trans.inverse_transform(test_pts) 40 | 41 | plt.scatter( 42 | trans.embedding_[:, 0], 43 | trans.embedding_[:, 1], 44 | c=mnist.target, 45 | cmap="Spectral", 46 | s=0.25, 47 | ) 48 | plt.colorbar(boundaries=np.arange(11) - 0.5).set_ticks(np.arange(10)) 49 | plt.scatter(test_pts[:, 0], test_pts[:, 1], marker="x", c="k") 50 | 51 | fig, ax = plt.subplots(10, 10) 52 | for i in range(10): 53 | for j in range(10): 54 | ax[i, j].imshow( 55 | inv_transformed_points[i * 10 + j].reshape(28, 28), origin="upper" 56 | ) 57 | ax[i, j].get_xaxis().set_visible(False) 58 | ax[i, j].get_yaxis().set_visible(False) 59 | 60 | plt.show() 61 | -------------------------------------------------------------------------------- /examples/iris/iris.py: -------------------------------------------------------------------------------- 1 | from bokeh.plotting import figure, output_file, show 2 | from bokeh.models import CategoricalColorMapper, ColumnDataSource 3 | from bokeh.palettes import Category10 4 | 5 | import umap 6 | from sklearn.datasets import load_iris 7 | 8 | iris = load_iris() 9 | embedding = umap.UMAP( 10 | n_neighbors=50, learning_rate=0.5, init="random", min_dist=0.001 11 | ).fit_transform(iris.data) 12 | 13 | output_file("iris.html") 14 | 15 | 16 | targets = [str(d) for d in iris.target_names] 17 | 18 | source = ColumnDataSource( 19 | dict( 20 | x=[e[0] for e in embedding], 21 | y=[e[1] for e in embedding], 22 | label=[targets[d] for d in iris.target], 23 | ) 24 | ) 25 | 26 | cmap = CategoricalColorMapper(factors=targets, palette=Category10[10]) 27 | 28 | p = figure(title="Test UMAP on Iris dataset") 29 | p.circle( 30 | x="x", 31 | y="y", 32 | source=source, 33 | color={"field": "label", "transform": cmap}, 34 | legend="label", 35 | ) 36 | 37 | show(p) 38 | -------------------------------------------------------------------------------- /examples/mnist_torus_sphere_example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import matplotlib.pyplot as plt 4 | import numba 5 | import numpy as np 6 | from mayavi import mlab 7 | from sklearn.datasets import load_digits 8 | from sklearn.model_selection import train_test_split 9 | 10 | import umap 11 | 12 | digits = load_digits() 13 | X_train, X_test, y_train, y_test = train_test_split( 14 | digits.data, digits.target, stratify=digits.target, random_state=42 15 | ) 16 | 17 | target_spaces = ["plane", "torus", "sphere"] 18 | 19 | if "plane" in target_spaces: 20 | # embed onto a plane 21 | 22 | trans = umap.UMAP( 23 | n_neighbors=10, 24 | random_state=42, 25 | metric="euclidean", 26 | output_metric="euclidean", 27 | init="spectral", 28 | verbose=True, 29 | ).fit(X_train) 30 | 31 | plt.scatter( 32 | trans.embedding_[:, 0], trans.embedding_[:, 1], c=y_train, cmap="Spectral" 33 | ) 34 | plt.show() 35 | 36 | if "torus" in target_spaces: 37 | # embed onto a torus 38 | # note: this is a topological torus, not a geometric torus. Think 39 | # Pacman, not donut. 40 | 41 | @numba.njit(fastmath=True) 42 | def torus_euclidean_grad(x, y, torus_dimensions=(2 * np.pi, 2 * np.pi)): 43 | """Standard euclidean distance. 44 | 45 | ..math:: 46 | D(x, y) = \sqrt{\sum_i (x_i - y_i)^2} 47 | """ 48 | distance_sqr = 0.0 49 | g = np.zeros_like(x) 50 | for i in range(x.shape[0]): 51 | a = abs(x[i] - y[i]) 52 | if 2 * a < torus_dimensions[i]: 53 | distance_sqr += a**2 54 | g[i] = x[i] - y[i] 55 | else: 56 | distance_sqr += (torus_dimensions[i] - a) ** 2 57 | g[i] = (x[i] - y[i]) * (a - torus_dimensions[i]) / a 58 | distance = np.sqrt(distance_sqr) 59 | return distance, g / (1e-6 + distance) 60 | 61 | trans = umap.UMAP( 62 | n_neighbors=10, 63 | random_state=42, 64 | metric="euclidean", 65 | output_metric=torus_euclidean_grad, 66 | init="spectral", 67 | min_dist=0.15, # requires adjustment since the torus has limited space 68 | verbose=True, 69 | ).fit(X_train) 70 | 71 | mlab.clf() 72 | x, y, z = np.mgrid[-3:3:50j, -3:3:50j, -3:3:50j] 73 | 74 | # Plot a torus 75 | R = 2 76 | r = 1 77 | values = (R - np.sqrt(x**2 + y**2)) ** 2 + z**2 - r**2 78 | mlab.contour3d(x, y, z, values, color=(1.0, 1.0, 1.0), contours=[0]) 79 | 80 | # torus angles -> 3D 81 | x = (R + r * np.cos(trans.embedding_[:, 0])) * np.cos(trans.embedding_[:, 1]) 82 | y = (R + r * np.cos(trans.embedding_[:, 0])) * np.sin(trans.embedding_[:, 1]) 83 | z = r * np.sin(trans.embedding_[:, 0]) 84 | 85 | pts = mlab.points3d( 86 | x, y, z, y_train, colormap="spectral", scale_mode="none", scale_factor=0.1 87 | ) 88 | 89 | mlab.show() 90 | 91 | if "sphere" in target_spaces: 92 | # embed onto a sphere 93 | trans = umap.UMAP( 94 | n_neighbors=10, 95 | random_state=42, 96 | metric="euclidean", 97 | output_metric="haversine", 98 | init="spectral", 99 | min_dist=0.15, # requires adjustment since the sphere has limited space 100 | verbose=True, 101 | ).fit(X_train) 102 | 103 | mlab.clf() 104 | x, y, z = np.mgrid[-3:3:50j, -3:3:50j, -3:3:50j] 105 | 106 | # Plot a sphere 107 | r = 3 108 | values = x**2 + y**2 + z**2 - r**2 109 | mlab.contour3d(x, y, z, values, color=(1.0, 1.0, 1.0), contours=[0]) 110 | 111 | # latitude, longitude -> 3D 112 | x = r * np.sin(trans.embedding_[:, 0]) * np.cos(trans.embedding_[:, 1]) 113 | y = r * np.sin(trans.embedding_[:, 0]) * np.sin(trans.embedding_[:, 1]) 114 | z = r * np.cos(trans.embedding_[:, 0]) 115 | 116 | pts = mlab.points3d( 117 | x, y, z, y_train, colormap="spectral", scale_mode="none", scale_factor=0.2 118 | ) 119 | 120 | mlab.show() 121 | -------------------------------------------------------------------------------- /examples/mnist_transform_new_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | UMAP on the MNIST Digits dataset 5 | -------------------------------- 6 | 7 | A simple example demonstrating how to use UMAP on a larger 8 | dataset such as MNIST. We first pull the MNIST dataset and 9 | then use UMAP to reduce it to only 2-dimensions for 10 | easy visualisation. 11 | 12 | Note that UMAP manages to both group the individual digit 13 | classes, but also to retain the overall global structure 14 | among the different digit classes -- keeping 1 far from 15 | 0, and grouping triplets of 3,5,8 and 4,7,9 which can 16 | blend into one another in some cases. 17 | """ 18 | import matplotlib.pyplot as plt 19 | import seaborn as sns 20 | from sklearn.datasets import fetch_openml 21 | from sklearn.model_selection import train_test_split 22 | 23 | import umap 24 | 25 | sns.set(context="paper", style="white") 26 | 27 | mnist = fetch_openml("mnist_784", version=1) 28 | X_train, X_test, y_train, y_test = train_test_split( 29 | mnist.data, mnist.target, stratify=mnist.target, random_state=42 30 | ) 31 | 32 | reducer = umap.UMAP(random_state=42) 33 | embedding_train = reducer.fit_transform(X_train) 34 | embedding_test = reducer.transform(X_test) 35 | 36 | fig, ax = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(12, 10)) 37 | ax[0].scatter( 38 | embedding_train[:, 0], embedding_train[:, 1], c=y_train, cmap="Spectral" # , s=0.1 39 | ) 40 | ax[1].scatter( 41 | embedding_test[:, 0], embedding_test[:, 1], c=y_test, cmap="Spectral" # , s=0.1 42 | ) 43 | plt.setp(ax[0], xticks=[], yticks=[]) 44 | plt.setp(ax[1], xticks=[], yticks=[]) 45 | plt.suptitle("MNIST data embedded into two dimensions by UMAP", fontsize=18) 46 | ax[0].set_title("Training Set", fontsize=12) 47 | ax[1].set_title("Test Set", fontsize=12) 48 | plt.show() 49 | -------------------------------------------------------------------------------- /examples/plot_algorithm_comparison.py: -------------------------------------------------------------------------------- 1 | """ 2 | Comparison of Dimension Reduction Techniques 3 | -------------------------------------------- 4 | 5 | A comparison of several different dimension reduction 6 | techniques on a variety of toy datasets. The datasets 7 | are all toy datasets, but should provide a representative 8 | range of the strengths and weaknesses of the different 9 | algorithms. 10 | 11 | The time to perform the dimension reduction with each 12 | algorithm and each dataset is recorded in the lower 13 | right of each plot. 14 | 15 | Things to note about the datasets: 16 | 17 | - Blobs: A set of five gaussian blobs in 10 dimensional 18 | space. This should be a prototypical example 19 | of something that should clearly separate 20 | even in a reduced dimension space. 21 | - Iris: a classic small dataset with one distinct class 22 | and two classes that are not clearly separated. 23 | - Digits: handwritten digits -- ideally different digit 24 | classes should form distinct groups. Due to 25 | the nature of handwriting digits may have several 26 | forms (crossed or uncrossed sevens, capped or 27 | straight line oes, etc.) 28 | - Wine: wine characteristics ideally used for a toy 29 | regression. Ultimately the data is essentially 30 | one dimensional in nature. 31 | - Swiss Roll: data is essentially a rectangle, but 32 | has been "rolled up" like a swiss roll 33 | in three dimensional space. Ideally a 34 | dimension reduction technique should 35 | be able to "unroll" it. The data 36 | has been coloured according to one dimension 37 | of the rectangle, so should form 38 | a rectangle of smooth color variation. 39 | - Sphere: the two dimensional surface of a three 40 | dimensional sphere. This cannot be represented 41 | accurately in two dimensions without tearing. 42 | The sphere has been coloured with hue around 43 | the equator and black to white from the south 44 | to north pole. 45 | """ 46 | 47 | import numpy as np 48 | import matplotlib.pyplot as plt 49 | import seaborn as sns 50 | import time 51 | 52 | from sklearn import datasets, decomposition, manifold, preprocessing 53 | from colorsys import hsv_to_rgb 54 | 55 | import umap 56 | 57 | sns.set(context="paper", style="white") 58 | 59 | blobs, blob_labels = datasets.make_blobs( 60 | n_samples=500, n_features=10, centers=5, random_state=42 61 | ) 62 | iris = datasets.load_iris() 63 | digits = datasets.load_digits(n_class=10) 64 | wine = datasets.load_wine() 65 | swissroll, swissroll_labels = datasets.make_swiss_roll( 66 | n_samples=1000, noise=0.1, random_state=42 67 | ) 68 | sphere = np.random.normal(size=(600, 3)) 69 | sphere = preprocessing.normalize(sphere) 70 | sphere_hsv = np.array( 71 | [ 72 | ( 73 | (np.arctan2(c[1], c[0]) + np.pi) / (2 * np.pi), 74 | np.abs(c[2]), 75 | min((c[2] + 1.1), 1.0), 76 | ) 77 | for c in sphere 78 | ] 79 | ) 80 | sphere_colors = np.array([hsv_to_rgb(*c) for c in sphere_hsv]) 81 | 82 | reducers = [ 83 | (manifold.TSNE, {"perplexity": 50}), 84 | # (manifold.LocallyLinearEmbedding, {'n_neighbors':10, 'method':'hessian'}), 85 | (manifold.Isomap, {"n_neighbors": 30}), 86 | (manifold.MDS, {}), 87 | (decomposition.PCA, {}), 88 | (umap.UMAP, {"n_neighbors": 30, "min_dist": 0.3}), 89 | ] 90 | 91 | test_data = [ 92 | (blobs, blob_labels), 93 | (iris.data, iris.target), 94 | (digits.data, digits.target), 95 | (wine.data, wine.target), 96 | (swissroll, swissroll_labels), 97 | (sphere, sphere_colors), 98 | ] 99 | dataset_names = ["Blobs", "Iris", "Digits", "Wine", "Swiss Roll", "Sphere"] 100 | 101 | n_rows = len(test_data) 102 | n_cols = len(reducers) 103 | ax_index = 1 104 | ax_list = [] 105 | 106 | # plt.figure(figsize=(9 * 2 + 3, 12.5)) 107 | plt.figure(figsize=(10, 8)) 108 | plt.subplots_adjust( 109 | left=0.02, right=0.98, bottom=0.001, top=0.96, wspace=0.05, hspace=0.01 110 | ) 111 | for data, labels in test_data: 112 | for reducer, args in reducers: 113 | start_time = time.time() 114 | embedding = reducer(n_components=2, **args).fit_transform(data) 115 | elapsed_time = time.time() - start_time 116 | ax = plt.subplot(n_rows, n_cols, ax_index) 117 | if isinstance(labels[0], tuple): 118 | ax.scatter(*embedding.T, s=10, c=labels, alpha=0.5) 119 | else: 120 | ax.scatter(*embedding.T, s=10, c=labels, cmap="Spectral", alpha=0.5) 121 | ax.text( 122 | 0.99, 123 | 0.01, 124 | "{:.2f} s".format(elapsed_time), 125 | transform=ax.transAxes, 126 | size=14, 127 | horizontalalignment="right", 128 | ) 129 | ax_list.append(ax) 130 | ax_index += 1 131 | plt.setp(ax_list, xticks=[], yticks=[]) 132 | 133 | for i in np.arange(n_rows) * n_cols: 134 | ax_list[i].set_ylabel(dataset_names[i // n_cols], size=16) 135 | for i in range(n_cols): 136 | ax_list[i].set_xlabel(repr(reducers[i][0]()).split("(")[0], size=16) 137 | ax_list[i].xaxis.set_label_position("top") 138 | 139 | plt.tight_layout() 140 | plt.show() 141 | -------------------------------------------------------------------------------- /examples/plot_fashion-mnist_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | UMAP on the Fashion MNIST Digits dataset using Datashader 3 | --------------------------------------------------------- 4 | 5 | This is a simple example of using UMAP on the Fashion-MNIST 6 | dataset. The goal of this example is largely to demonstrate 7 | the use of datashader as an effective tool for visualising 8 | UMAP results. In particular datashader allows visualisation 9 | of very large datasets where overplotting can be a serious 10 | problem. It supports coloring by categorical variables 11 | (as shown in this example), or by continuous variables, 12 | or by density (as is common in datashader examples). 13 | """ 14 | 15 | import umap 16 | import numpy as np 17 | import pandas as pd 18 | import requests 19 | import os 20 | import datashader as ds 21 | import datashader.utils as utils 22 | import datashader.transfer_functions as tf 23 | import matplotlib.pyplot as plt 24 | import seaborn as sns 25 | 26 | sns.set(context="paper", style="white") 27 | 28 | if not os.path.isfile("fashion-mnist.csv"): 29 | csv_data = requests.get("https://www.openml.org/data/get_csv/18238735/phpnBqZGZ") 30 | with open("fashion-mnist.csv", "w") as f: 31 | f.write(csv_data.text) 32 | source_df = pd.read_csv("fashion-mnist.csv") 33 | 34 | data = source_df.iloc[:, :784].values.astype(np.float32) 35 | target = source_df["class"].values 36 | 37 | pal = [ 38 | "#9e0142", 39 | "#d8434e", 40 | "#f67a49", 41 | "#fdbf6f", 42 | "#feeda1", 43 | "#f1f9a9", 44 | "#bfe5a0", 45 | "#74c7a5", 46 | "#378ebb", 47 | "#5e4fa2", 48 | ] 49 | color_key = {str(d): c for d, c in enumerate(pal)} 50 | 51 | reducer = umap.UMAP(random_state=42) 52 | embedding = reducer.fit_transform(data) 53 | 54 | df = pd.DataFrame(embedding, columns=("x", "y")) 55 | df["class"] = pd.Series([str(x) for x in target], dtype="category") 56 | 57 | cvs = ds.Canvas(plot_width=400, plot_height=400) 58 | agg = cvs.points(df, "x", "y", ds.count_cat("class")) 59 | img = tf.shade(agg, color_key=color_key, how="eq_hist") 60 | 61 | utils.export_image(img, filename="fashion-mnist", background="black") 62 | 63 | image = plt.imread("fashion-mnist.png") 64 | fig, ax = plt.subplots(figsize=(6, 6)) 65 | plt.imshow(image) 66 | plt.setp(ax, xticks=[], yticks=[]) 67 | plt.title( 68 | "Fashion MNIST data embedded\n" 69 | "into two dimensions by UMAP\n" 70 | "visualised with Datashader", 71 | fontsize=12, 72 | ) 73 | 74 | plt.show() 75 | -------------------------------------------------------------------------------- /examples/plot_feature_extraction_classification.py: -------------------------------------------------------------------------------- 1 | """ 2 | UMAP as a Feature Extraction Technique for Classification 3 | --------------------------------------------------------- 4 | 5 | The following script shows how UMAP can be used as a feature extraction 6 | technique to improve the accuracy on a classification task. It also shows 7 | how UMAP can be integrated in standard scikit-learn pipelines. 8 | 9 | The first step is to create a dataset for a classification task, which is 10 | performed with the function ``sklearn.datasets.make_classification``. The 11 | dataset is then split into a training set and a test set using the 12 | ``sklearn.model_selection.train_test_split`` function. 13 | 14 | Second, a linear SVM is fitted on the training set. To choose the best 15 | hyperparameters automatically, a gridsearch is performed on the training set. 16 | The performance of the model is then evaluated on the test set with the 17 | accuracy metric. 18 | 19 | Third, the previous step is repeated with a slight modification: UMAP is 20 | used as a feature extraction technique. This small change results in a 21 | substantial improvement compared to the model where raw data is used. 22 | """ 23 | 24 | from sklearn.datasets import make_classification 25 | from sklearn.model_selection import train_test_split, GridSearchCV 26 | from sklearn.pipeline import Pipeline 27 | from sklearn.svm import LinearSVC 28 | 29 | from umap import UMAP 30 | 31 | # Make a toy dataset 32 | X, y = make_classification( 33 | n_samples=1000, 34 | n_features=300, 35 | n_informative=250, 36 | n_redundant=0, 37 | n_repeated=0, 38 | n_classes=2, 39 | random_state=1212, 40 | ) 41 | 42 | # Split the dataset into a training set and a test set 43 | X_train, X_test, y_train, y_test = train_test_split( 44 | X, y, test_size=0.2, random_state=42 45 | ) 46 | 47 | # Classification with a linear SVM 48 | svc = LinearSVC(dual=False, random_state=123) 49 | params_grid = {"C": [10**k for k in range(-3, 4)]} 50 | clf = GridSearchCV(svc, params_grid) 51 | clf.fit(X_train, y_train) 52 | print( 53 | "Accuracy on the test set with raw data: {:.3f}".format(clf.score(X_test, y_test)) 54 | ) 55 | 56 | # Transformation with UMAP followed by classification with a linear SVM 57 | umap = UMAP(random_state=456) 58 | pipeline = Pipeline([("umap", umap), ("svc", svc)]) 59 | params_grid_pipeline = { 60 | "umap__n_neighbors": [5, 20], 61 | "umap__n_components": [15, 25, 50], 62 | "svc__C": [10**k for k in range(-3, 4)], 63 | } 64 | 65 | 66 | clf_pipeline = GridSearchCV(pipeline, params_grid_pipeline) 67 | clf_pipeline.fit(X_train, y_train) 68 | print( 69 | "Accuracy on the test set with UMAP transformation: {:.3f}".format( 70 | clf_pipeline.score(X_test, y_test) 71 | ) 72 | ) 73 | -------------------------------------------------------------------------------- /examples/plot_mnist_example.py: -------------------------------------------------------------------------------- 1 | """ 2 | UMAP on the MNIST Digits dataset 3 | -------------------------------- 4 | 5 | A simple example demonstrating how to use UMAP on a larger 6 | dataset such as MNIST. We first pull the MNIST dataset and 7 | then use UMAP to reduce it to only 2-dimensions for 8 | easy visualisation. 9 | 10 | Note that UMAP manages to both group the individual digit 11 | classes, but also to retain the overall global structure 12 | among the different digit classes -- keeping 1 far from 13 | 0, and grouping triplets of 3,5,8 and 4,7,9 which can 14 | blend into one another in some cases. 15 | """ 16 | 17 | import umap 18 | from sklearn.datasets import fetch_openml 19 | import matplotlib.pyplot as plt 20 | import seaborn as sns 21 | 22 | sns.set(context="paper", style="white") 23 | 24 | mnist = fetch_openml("mnist_784", version=1) 25 | 26 | reducer = umap.UMAP(random_state=42) 27 | embedding = reducer.fit_transform(mnist.data) 28 | 29 | fig, ax = plt.subplots(figsize=(12, 10)) 30 | color = mnist.target.astype(int) 31 | plt.scatter(embedding[:, 0], embedding[:, 1], c=color, cmap="Spectral", s=0.1) 32 | plt.setp(ax, xticks=[], yticks=[]) 33 | plt.title("MNIST data embedded into two dimensions by UMAP", fontsize=18) 34 | 35 | plt.show() 36 | -------------------------------------------------------------------------------- /images/densmap_example_mnist.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/images/densmap_example_mnist.png -------------------------------------------------------------------------------- /images/iris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/images/iris.png -------------------------------------------------------------------------------- /images/mnist_digits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/images/mnist_digits.png -------------------------------------------------------------------------------- /images/sklearn_digits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/images/sklearn_digits.png -------------------------------------------------------------------------------- /images/umap_example_fashion_mnist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/images/umap_example_fashion_mnist1.png -------------------------------------------------------------------------------- /images/umap_example_mnist1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/images/umap_example_mnist1.png -------------------------------------------------------------------------------- /images/umap_example_shuttle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/images/umap_example_shuttle.png -------------------------------------------------------------------------------- /paper.bib: -------------------------------------------------------------------------------- 1 | @article{umap_arxiv, 2 | author = {{McInnes}, L. and {Healy}, J.}, 3 | title = "{UMAP: Uniform Manifold Approximation 4 | and Projection for Dimension Reduction}", 5 | journal = {ArXiv e-prints}, 6 | archivePrefix = "arXiv", 7 | eprint = {1802.03426}, 8 | primaryClass = "stat.ML", 9 | keywords = {Statistics - Machine Learning, 10 | Computer Science - Computational Geometry, 11 | Computer Science - Learning}, 12 | year = 2018, 13 | month = feb, 14 | } 15 | 16 | @online{umap_repo, 17 | author = {Leland McInnes and John Healy and Nathaniel Saul and Lukas Grossberger}, 18 | title = {UMAP}, 19 | year = 2018, 20 | url = {https://github.com/lmcinnes/umap}, 21 | urldate = {2018-07-22} 22 | } -------------------------------------------------------------------------------- /paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'UMAP: Uniform Manifold Approximation and Projection' 3 | tags: 4 | - manifold learning 5 | - dimension reduction 6 | - unsupervised learning 7 | authors: 8 | - name: Leland McInnes 9 | orcid: 0000-0003-2143-6834 10 | affiliation: 1 11 | - name: John Healy 12 | affiliation: 1 13 | - name: Nathaniel Saul 14 | affiliation: 2 15 | - name: Lukas Großberger 16 | affiliation: "3, 4" 17 | affiliations: 18 | - name: Tutte Institute for Mathematics and Computing 19 | index: 1 20 | - name: Department of Mathematics and Statistics, Washington State University 21 | index: 2 22 | - name: Ernst Strüngmann Institute for Neuroscience in cooperation with Max Planck Society 23 | index: 3 24 | - name: Donders Institute for Brain, Cognition and Behaviour, Radboud Universiteit 25 | index: 4 26 | date: 26 July 2018 27 | bibliography: paper.bib 28 | --- 29 | 30 | # Summary 31 | 32 | Uniform Manifold Approximation and Projection (UMAP) is a dimension reduction technique 33 | that can be used for visualisation similarly to t-SNE, but also for general non-linear 34 | dimension reduction. UMAP has a rigorous mathematical foundation, but is simple to use, 35 | with a scikit-learn compatible API. UMAP is among the fastest manifold learning 36 | implementations available -- significantly faster than most t-SNE implementations. 37 | 38 | UMAP supports a number of useful features, including the ability to use labels 39 | (or partial labels) for supervised (or semi-supervised) dimension reduction, 40 | and the ability to transform new unseen data into a pretrained embedding space. 41 | 42 | For details of the mathematical underpinnings see [@umap_arxiv]. The implementation 43 | can be found at [@umap_repo]. 44 | 45 | -![Fashion MNIST embedded via UMAP](images/umap_example_fashion_mnist1.png) 46 | 47 | # References 48 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import platform 2 | from setuptools import setup 3 | 4 | 5 | def readme(): 6 | try: 7 | with open("README.rst", encoding="UTF-8") as readme_file: 8 | return readme_file.read() 9 | except TypeError: 10 | # Python 2.7 doesn't support encoding argument in builtin open 11 | import io 12 | 13 | with io.open("README.rst", encoding="UTF-8") as readme_file: 14 | return readme_file.read() 15 | 16 | 17 | configuration = { 18 | "name": "umap-learn", 19 | "version": "0.5.8", 20 | "description": "Uniform Manifold Approximation and Projection", 21 | "long_description": readme(), 22 | "long_description_content_type": "text/x-rst", 23 | "classifiers": [ 24 | "Development Status :: 3 - Alpha", 25 | "Intended Audience :: Science/Research", 26 | "Intended Audience :: Developers", 27 | "License :: OSI Approved", 28 | "Programming Language :: C", 29 | "Programming Language :: Python", 30 | "Topic :: Software Development", 31 | "Topic :: Scientific/Engineering", 32 | "Operating System :: Microsoft :: Windows", 33 | "Operating System :: POSIX", 34 | "Operating System :: Unix", 35 | "Operating System :: MacOS", 36 | "Programming Language :: Python :: 3.9", 37 | "Programming Language :: Python :: 3.10", 38 | "Programming Language :: Python :: 3.11", 39 | "Programming Language :: Python :: 3.12", 40 | ], 41 | "keywords": "dimension reduction t-sne manifold", 42 | "url": "http://github.com/lmcinnes/umap", 43 | "maintainer": "Leland McInnes", 44 | "maintainer_email": "leland.mcinnes@gmail.com", 45 | "license": "BSD", 46 | "packages": ["umap"], 47 | "install_requires": [ 48 | "numpy >= 1.23", 49 | "scipy >= 1.3.1", 50 | "scikit-learn >= 1.6", 51 | "numba >= 0.51.2", 52 | "pynndescent >= 0.5", 53 | "tqdm", 54 | ], 55 | "extras_require": { 56 | "plot": [ 57 | "pandas", 58 | "matplotlib", 59 | "datashader", 60 | "bokeh", 61 | "holoviews", 62 | "colorcet", 63 | "seaborn", 64 | "scikit-image", 65 | "dask", 66 | ], 67 | "parametric_umap": ["tensorflow >= 2.1"], 68 | "tbb": ["tbb >= 2019.0"], 69 | }, 70 | "ext_modules": [], 71 | "cmdclass": {}, 72 | "test_suite": "pytest", 73 | "tests_require": ["pytest"], 74 | "data_files": (), 75 | "zip_safe": False, 76 | } 77 | 78 | setup(**configuration) 79 | -------------------------------------------------------------------------------- /umap/__init__.py: -------------------------------------------------------------------------------- 1 | from warnings import warn, catch_warnings, simplefilter 2 | from .umap_ import UMAP 3 | 4 | try: 5 | with catch_warnings(): 6 | simplefilter("ignore") 7 | from .parametric_umap import ParametricUMAP, load_ParametricUMAP 8 | except ImportError: 9 | warn( 10 | "Tensorflow not installed; ParametricUMAP will be unavailable", 11 | category=ImportWarning, 12 | ) 13 | 14 | # Add a dummy class to raise an error 15 | class ParametricUMAP(object): 16 | def __init__(self, **kwds): 17 | warn( 18 | """The umap.parametric_umap package requires Tensorflow > 2.0 to be installed. 19 | You can install Tensorflow at https://www.tensorflow.org/install 20 | 21 | or you can install the CPU version of Tensorflow using 22 | 23 | pip install umap-learn[parametric_umap] 24 | 25 | """ 26 | ) 27 | raise ImportError( 28 | "umap.parametric_umap requires Tensorflow >= 2.0" 29 | ) from None 30 | 31 | 32 | from .aligned_umap import AlignedUMAP 33 | 34 | # Workaround: https://github.com/numba/numba/issues/3341 35 | import numba 36 | 37 | from importlib.metadata import version, PackageNotFoundError 38 | 39 | try: 40 | __version__ = version("umap-learn") 41 | except PackageNotFoundError: 42 | __version__ = "0.5-dev" 43 | -------------------------------------------------------------------------------- /umap/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test Suite for UMAP to ensure things are working as expected. 3 | 4 | The test suite comprises multiple testing modules, 5 | including multiple test cases related to a specific 6 | set of UMAP features under test. 7 | 8 | Backend 9 | ------- 10 | pytest is the reference backend for testing environment and execution, 11 | also integrating with pre-existent nose-based tests 12 | 13 | Shared Testing code 14 | ------------------- 15 | Whenever needed, each module includes a set of 16 | _utility_ functions that specify shared (and repeated) 17 | testing operations. 18 | 19 | Fixtures 20 | -------- 21 | All data dependency has been implemented 22 | as test fixtures (preferred to shared global variables). 23 | All the fixtures shared by multiple test cases 24 | are defined in the `conftest.py` module. 25 | 26 | Fixtures allow the execution of each test module in isolation, as well 27 | as within the whole test suite. 28 | 29 | Modules in Tests (to keep up to date) 30 | ------------------------------------- 31 | - conftest: pytrest fixtures 32 | - test_plot: basic tests for umap.plot 33 | - test_umap_df_validation_params: 34 | Tests on parameters validation for DataFrameUMAP 35 | - test_umap_metrics: 36 | Tests for UMAP metrics - spatial, binary, and sparse 37 | - test_umap_nn: 38 | Tests for NearestNeighbours 39 | - test_umap_on_iris: 40 | Tests for UMAP on Iris Dataset 41 | - test_umap_ops: 42 | Tests for general UMAP ops (e.g. clusterability, transform stability) 43 | - test_umap_repeated_data: 44 | UMAP tests on repeated data (sparse|dense; spatial|binary) 45 | - test_umap_trustworthiness: 46 | Tests on UMAP Trustworthiness 47 | - test_umap_validation_params: 48 | Tests for fit parameters validation 49 | 50 | """ 51 | -------------------------------------------------------------------------------- /umap/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # =========================== 2 | # Testing (session) Fixture 3 | # ========================== 4 | 5 | import pytest 6 | import numpy as np 7 | from scipy import sparse 8 | from sklearn.datasets import load_iris 9 | from umap import UMAP, AlignedUMAP 10 | 11 | # Globals, used for all the tests 12 | SEED = 189212 # 0b101110001100011100 13 | np.random.seed(SEED) 14 | 15 | 16 | # Spatial and Binary Data 17 | # ----------------------- 18 | @pytest.fixture(scope="session") 19 | def spatial_data(): 20 | # - Spatial Data 21 | spatial_data = np.random.randn(10, 20) 22 | # Add some all zero data for corner case test 23 | return np.vstack([spatial_data, np.zeros((2, 20))]) 24 | 25 | 26 | @pytest.fixture(scope="session") 27 | def binary_data(): 28 | binary_data = np.random.choice(a=[False, True], size=(10, 20), p=[0.66, 1 - 0.66]) 29 | # Add some all zero data for corner case test 30 | binary_data = np.vstack([binary_data, np.zeros((2, 20), dtype="bool")]) 31 | return binary_data 32 | 33 | 34 | # Sparse Spatial and Binary Data 35 | # ------------------------------ 36 | @pytest.fixture(scope="session") 37 | def sparse_spatial_data(spatial_data, binary_data): 38 | return sparse.csr_matrix(spatial_data * binary_data) 39 | 40 | 41 | @pytest.fixture(scope="session") 42 | def sparse_binary_data(binary_data): 43 | return sparse.csr_matrix(binary_data) 44 | 45 | 46 | # Nearest Neighbour Data 47 | # ----------------------- 48 | @pytest.fixture(scope="session") 49 | def nn_data(): 50 | nn_data = np.random.uniform(0, 1, size=(1000, 5)) 51 | nn_data = np.vstack( 52 | [nn_data, np.zeros((2, 5))] 53 | ) # Add some all zero data for corner case test 54 | return nn_data 55 | 56 | 57 | @pytest.fixture(scope="session") 58 | def binary_nn_data(): 59 | binary_nn_data = np.random.choice( 60 | a=[False, True], size=(1000, 5), p=[0.66, 1 - 0.66] 61 | ) 62 | binary_nn_data = np.vstack( 63 | [binary_nn_data, np.zeros((2, 5), dtype="bool")] 64 | ) # Add some all zero data for corner case test 65 | return binary_nn_data 66 | 67 | 68 | @pytest.fixture(scope="session") 69 | def sparse_nn_data(): 70 | return sparse.random(1000, 50, density=0.5, format="csr") 71 | 72 | 73 | # Data With Repetitions 74 | # --------------------- 75 | 76 | 77 | @pytest.fixture(scope="session") 78 | def repetition_dense(): 79 | # Dense data for testing small n 80 | return np.array( 81 | [ 82 | [5, 6, 7, 8], 83 | [5, 6, 7, 8], 84 | [5, 6, 7, 8], 85 | [5, 6, 7, 8], 86 | [5, 6, 7, 8], 87 | [5, 6, 7, 8], 88 | [1, 1, 1, 1], 89 | [1, 2, 3, 4], 90 | [1, 1, 2, 1], 91 | ] 92 | ) 93 | 94 | 95 | @pytest.fixture(scope="session") 96 | def spatial_repeats(spatial_data): 97 | # spatial data repeats 98 | spatial_repeats = np.vstack( 99 | [np.repeat(spatial_data[0:2], [2, 0], axis=0), spatial_data, np.zeros((2, 20))] 100 | ) 101 | # Add some all zero data for corner case test. Make the first three rows identical 102 | # binary Data Repeat 103 | return spatial_repeats 104 | 105 | 106 | @pytest.fixture(scope="session") 107 | def binary_repeats(binary_data): 108 | binary_repeats = np.vstack( 109 | [ 110 | np.repeat(binary_data[0:2], [2, 0], axis=0), 111 | binary_data, 112 | np.zeros((2, 20), dtype="bool"), 113 | ] 114 | ) 115 | # Add some all zero data for corner case test. Make the first three rows identical 116 | return binary_repeats 117 | 118 | 119 | @pytest.fixture(scope="session") 120 | def sparse_spatial_data_repeats(spatial_repeats, binary_repeats): 121 | return sparse.csr_matrix(spatial_repeats * binary_repeats) 122 | 123 | 124 | @pytest.fixture(scope="session") 125 | def sparse_binary_data_repeats(binary_repeats): 126 | return sparse.csr_matrix(binary_repeats) 127 | 128 | 129 | @pytest.fixture(scope="session") 130 | def sparse_test_data(nn_data, binary_nn_data): 131 | return sparse.csr_matrix(nn_data * binary_nn_data) 132 | 133 | 134 | @pytest.fixture(scope="session") 135 | def iris(): 136 | return load_iris() 137 | 138 | 139 | @pytest.fixture(scope="session") 140 | def iris_selection(): 141 | return np.random.choice([True, False], 150, replace=True, p=[0.75, 0.25]) 142 | 143 | 144 | @pytest.fixture(scope="session") 145 | def aligned_iris(iris): 146 | slices = [iris.data[i : i + 50] for i in range(0, 125, 25)] 147 | target = [iris.target[i : i + 50] for i in range(0, 125, 25)] 148 | return slices, target 149 | 150 | 151 | @pytest.fixture(scope="session") 152 | def aligned_iris_relations(): 153 | return [{a: a + 25 for a in range(25)} for i in range(4)] 154 | 155 | 156 | @pytest.fixture(scope="session") 157 | def iris_model(iris): 158 | return UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(iris.data) 159 | 160 | 161 | @pytest.fixture(scope="session") 162 | def iris_model_large(iris): 163 | return UMAP( 164 | n_neighbors=10, 165 | min_dist=0.01, 166 | random_state=42, 167 | force_approximation_algorithm=True, 168 | ).fit(iris.data) 169 | 170 | 171 | @pytest.fixture(scope="session") 172 | def iris_subset_model(iris, iris_selection): 173 | return UMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit( 174 | iris.data[iris_selection] 175 | ) 176 | 177 | 178 | @pytest.fixture(scope="session") 179 | def iris_subset_model_large(iris, iris_selection): 180 | return UMAP( 181 | n_neighbors=10, 182 | min_dist=0.01, 183 | random_state=42, 184 | force_approximation_algorithm=True, 185 | ).fit(iris.data[iris_selection]) 186 | 187 | 188 | @pytest.fixture(scope="session") 189 | def supervised_iris_model(iris): 190 | return UMAP(n_neighbors=10, min_dist=0.01, n_epochs=200, random_state=42).fit( 191 | iris.data, iris.target 192 | ) 193 | 194 | 195 | @pytest.fixture(scope="session") 196 | def aligned_iris_model(aligned_iris, aligned_iris_relations): 197 | data, target = aligned_iris 198 | model = AlignedUMAP() 199 | model.fit(data, relations=aligned_iris_relations) 200 | return model 201 | 202 | 203 | # UMAP Distance Metrics 204 | # --------------------- 205 | @pytest.fixture(scope="session") 206 | def spatial_distances(): 207 | return ( 208 | "euclidean", 209 | "manhattan", 210 | "chebyshev", 211 | "minkowski", 212 | "hamming", 213 | "canberra", 214 | "braycurtis", 215 | "cosine", 216 | "correlation", 217 | ) 218 | 219 | 220 | @pytest.fixture(scope="session") 221 | def binary_distances(): 222 | return ( 223 | "jaccard", 224 | "matching", 225 | "dice", 226 | "kulsinski", 227 | "rogerstanimoto", 228 | "russellrao", 229 | "sokalmichener", 230 | "sokalsneath", 231 | "yule", 232 | ) 233 | -------------------------------------------------------------------------------- /umap/tests/digits_embedding_42.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/umap/tests/digits_embedding_42.npy -------------------------------------------------------------------------------- /umap/tests/test_aligned_umap.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from umap import AlignedUMAP 3 | from sklearn.metrics import pairwise_distances 4 | from sklearn.cluster import KMeans 5 | import numpy as np 6 | from sklearn.metrics import adjusted_rand_score 7 | 8 | # =============================== 9 | # Test AlignedUMAP on sliced iris 10 | # =============================== 11 | 12 | 13 | def nn_accuracy(true_nn, embd_nn): 14 | num_correct = 0.0 15 | for i in range(true_nn.shape[0]): 16 | num_correct += np.sum(np.isin(true_nn[i], embd_nn[i])) 17 | return num_correct / true_nn.size 18 | 19 | 20 | def test_neighbor_local_neighbor_accuracy(aligned_iris, aligned_iris_model): 21 | data, target = aligned_iris 22 | for i, slice in enumerate(data): 23 | data_dmat = pairwise_distances(slice) 24 | true_nn = np.argsort(data_dmat, axis=1)[:, :10] 25 | embd_dmat = pairwise_distances(aligned_iris_model.embeddings_[i]) 26 | embd_nn = np.argsort(embd_dmat, axis=1)[:, :10] 27 | assert nn_accuracy(true_nn, embd_nn) >= 0.65 28 | 29 | 30 | def test_local_clustering(aligned_iris, aligned_iris_model): 31 | data, target = aligned_iris 32 | 33 | embd = aligned_iris_model.embeddings_[1] 34 | clusters = KMeans(n_clusters=2).fit_predict(embd) 35 | ari = adjusted_rand_score(target[1], clusters) 36 | assert ari >= 0.75 37 | 38 | embd = aligned_iris_model.embeddings_[3] 39 | clusters = KMeans(n_clusters=2).fit_predict(embd) 40 | ari = adjusted_rand_score(target[3], clusters) 41 | assert ari >= 0.40 42 | 43 | 44 | def test_aligned_update(aligned_iris, aligned_iris_relations): 45 | data, target = aligned_iris 46 | small_aligned_model = AlignedUMAP() 47 | small_aligned_model.fit(data[:3], relations=aligned_iris_relations[:2]) 48 | small_aligned_model.update(data[3], relations=aligned_iris_relations[2]) 49 | for i, slice in enumerate(data[:4]): 50 | data_dmat = pairwise_distances(slice) 51 | true_nn = np.argsort(data_dmat, axis=1)[:, :10] 52 | embd_dmat = pairwise_distances(small_aligned_model.embeddings_[i]) 53 | embd_nn = np.argsort(embd_dmat, axis=1)[:, :10] 54 | assert nn_accuracy(true_nn, embd_nn) >= 0.45 55 | 56 | 57 | def test_aligned_update_params(aligned_iris, aligned_iris_relations): 58 | data, target = aligned_iris 59 | n_neighbors = [15, 15, 15, 15, 15] 60 | small_aligned_model = AlignedUMAP(n_neighbors=n_neighbors[:3]) 61 | small_aligned_model.fit(data[:3], relations=aligned_iris_relations[:2]) 62 | small_aligned_model.update( 63 | data[3], relations=aligned_iris_relations[2], n_neighbors=n_neighbors[3] 64 | ) 65 | for i, slice in enumerate(data[:4]): 66 | data_dmat = pairwise_distances(slice) 67 | true_nn = np.argsort(data_dmat, axis=1)[:, :10] 68 | embd_dmat = pairwise_distances(small_aligned_model.embeddings_[i]) 69 | embd_nn = np.argsort(embd_dmat, axis=1)[:, :10] 70 | assert nn_accuracy(true_nn, embd_nn) >= 0.45 71 | 72 | 73 | @pytest.mark.skip(reason="Temporarily disable") 74 | def test_aligned_update_array_error(aligned_iris, aligned_iris_relations): 75 | data, target = aligned_iris 76 | n_neighbors = [15, 15, 15, 15, 15] 77 | small_aligned_model = AlignedUMAP(n_neighbors=n_neighbors[:3]) 78 | small_aligned_model.fit(data[:3], relations=aligned_iris_relations[:2]) 79 | 80 | with pytest.raises(ValueError): 81 | small_aligned_model.update( 82 | data[3:], relations=aligned_iris_relations[2:], n_neighbors=n_neighbors[3:] 83 | ) 84 | -------------------------------------------------------------------------------- /umap/tests/test_composite_models.py: -------------------------------------------------------------------------------- 1 | from umap import UMAP 2 | import pytest 3 | 4 | try: 5 | # works for sklearn>=0.22 6 | from sklearn.manifold import trustworthiness 7 | except ImportError: 8 | # this is to comply with requirements (scikit-learn>=0.20) 9 | # More recent versions of sklearn have exposed trustworthiness 10 | # in top level module API 11 | # see: https://github.com/scikit-learn/scikit-learn/pull/15337 12 | from sklearn.manifold.t_sne import trustworthiness 13 | 14 | 15 | def test_composite_trustworthiness(nn_data, iris_model): 16 | data = nn_data[:50] 17 | model1 = UMAP(n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=50).fit(data) 18 | model2 = UMAP( 19 | n_neighbors=30, 20 | min_dist=0.01, 21 | random_state=42, 22 | n_epochs=50, 23 | init=model1.embedding_, 24 | ).fit(data) 25 | model3 = model1 * model2 26 | trust = trustworthiness(data, model3.embedding_, n_neighbors=10) 27 | assert ( 28 | trust >= 0.80 29 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 30 | model4 = model1 + model2 31 | trust = trustworthiness(data, model4.embedding_, n_neighbors=10) 32 | assert ( 33 | trust >= 0.80 34 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 35 | 36 | with pytest.raises(ValueError): 37 | _ = model1 + iris_model 38 | 39 | with pytest.raises(ValueError): 40 | _ = model1 * iris_model 41 | 42 | with pytest.raises(ValueError): 43 | _ = model1 - iris_model 44 | 45 | 46 | @pytest.mark.skip(reason="Marked as Skipped test") 47 | def test_composite_trustworthiness_random_init(nn_data): # pragma: no cover 48 | data = nn_data[:50] 49 | model1 = UMAP( 50 | n_neighbors=10, 51 | min_dist=0.01, 52 | random_state=42, 53 | n_epochs=50, 54 | init="random", 55 | ).fit(data) 56 | model2 = UMAP( 57 | n_neighbors=30, 58 | min_dist=0.01, 59 | random_state=42, 60 | n_epochs=50, 61 | init="random", 62 | ).fit(data) 63 | model3 = model1 * model2 64 | trust = trustworthiness(data, model3.embedding_, n_neighbors=10) 65 | assert ( 66 | trust >= 0.82 67 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 68 | model4 = model1 + model2 69 | trust = trustworthiness(data, model4.embedding_, n_neighbors=10) 70 | assert ( 71 | trust >= 0.82 72 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 73 | 74 | 75 | def test_composite_trustworthiness_on_iris(iris): 76 | iris_model1 = UMAP( 77 | n_neighbors=10, 78 | min_dist=0.01, 79 | random_state=42, 80 | n_epochs=100, 81 | ).fit(iris.data[:, :2]) 82 | iris_model2 = UMAP( 83 | n_neighbors=10, 84 | min_dist=0.01, 85 | random_state=42, 86 | n_epochs=100, 87 | ).fit(iris.data[:, 2:]) 88 | embedding = (iris_model1 + iris_model2).embedding_ 89 | trust = trustworthiness(iris.data, embedding, n_neighbors=10) 90 | assert ( 91 | trust >= 0.82 92 | ), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust) 93 | embedding = (iris_model1 * iris_model2).embedding_ 94 | trust = trustworthiness(iris.data, embedding, n_neighbors=10) 95 | assert ( 96 | trust >= 0.82 97 | ), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust) 98 | 99 | 100 | def test_contrastive_trustworthiness_on_iris(iris): 101 | iris_model1 = UMAP( 102 | n_neighbors=10, 103 | min_dist=0.01, 104 | random_state=42, 105 | n_epochs=100, 106 | ).fit(iris.data[:, :2]) 107 | iris_model2 = UMAP( 108 | n_neighbors=10, 109 | min_dist=0.01, 110 | random_state=42, 111 | n_epochs=100, 112 | ).fit(iris.data[:, 2:]) 113 | embedding = (iris_model1 - iris_model2).embedding_ 114 | trust = trustworthiness(iris.data, embedding, n_neighbors=10) 115 | assert ( 116 | trust >= 0.75 117 | ), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust) 118 | -------------------------------------------------------------------------------- /umap/tests/test_data_input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest as pytest 3 | from numba import njit 4 | from umap import UMAP 5 | 6 | 7 | @pytest.fixture(scope="session") 8 | def all_finite_data(): 9 | return np.arange(100.0).reshape(25, 4) 10 | 11 | 12 | @pytest.fixture(scope="session") 13 | def inverse_data(): 14 | return np.arange(50).reshape(25, 2) 15 | 16 | 17 | @njit 18 | def nan_dist(a: np.ndarray, b: np.ndarray): 19 | a[0] = np.nan 20 | a[1] = np.inf 21 | return 0, a 22 | 23 | 24 | def test_check_input_data(all_finite_data, inverse_data): 25 | """ 26 | Data input to UMAP gets checked for liability. 27 | This tests checks the if data input is dismissed/accepted 28 | according to the "ensure_all_finite" keyword as used by 29 | sklearn. 30 | 31 | Parameters 32 | ---------- 33 | all_finite_data 34 | inverse_data 35 | ------- 36 | 37 | """ 38 | inf_data = all_finite_data.copy() 39 | inf_data[0] = np.inf 40 | nan_data = all_finite_data.copy() 41 | nan_data[0] = np.nan 42 | inf_nan_data = all_finite_data.copy() 43 | inf_nan_data[0] = np.nan 44 | inf_nan_data[1] = np.inf 45 | 46 | # wrapper to call each data handling function of UMAP in a convenient way 47 | def call_umap_functions(data, ensure_all_finite): 48 | u = UMAP(metric=nan_dist) 49 | if ensure_all_finite is None: 50 | u.fit_transform(data) 51 | u.fit(data) 52 | u.transform(data) 53 | u.update(data) 54 | u.inverse_transform(inverse_data) 55 | else: 56 | u.fit_transform(data, ensure_all_finite=ensure_all_finite) 57 | u.fit(data, ensure_all_finite=ensure_all_finite) 58 | u.transform(data, ensure_all_finite=ensure_all_finite) 59 | u.update(data, ensure_all_finite=ensure_all_finite) 60 | u.inverse_transform(inverse_data) 61 | 62 | # Check whether correct data input is accepted 63 | call_umap_functions(all_finite_data, None) 64 | call_umap_functions(all_finite_data, True) 65 | 66 | call_umap_functions(nan_data, "allow-nan") 67 | call_umap_functions(all_finite_data, "allow-nan") 68 | 69 | call_umap_functions(inf_data, False) 70 | call_umap_functions(inf_nan_data, False) 71 | call_umap_functions(nan_data, False) 72 | call_umap_functions(all_finite_data, False) 73 | 74 | # Check whether illegal data raises a ValueError 75 | with pytest.raises(ValueError): 76 | call_umap_functions(nan_data, None) 77 | call_umap_functions(inf_data, None) 78 | call_umap_functions(inf_nan_data, None) 79 | 80 | call_umap_functions(nan_data, True) 81 | call_umap_functions(inf_data, True) 82 | call_umap_functions(inf_nan_data, True) 83 | 84 | call_umap_functions(inf_data, "allow-nan") 85 | call_umap_functions(inf_nan_data, "allow-nan") 86 | -------------------------------------------------------------------------------- /umap/tests/test_densmap.py: -------------------------------------------------------------------------------- 1 | from umap import UMAP 2 | import pytest 3 | 4 | try: 5 | # works for sklearn>=0.22 6 | from sklearn.manifold import trustworthiness 7 | except ImportError: 8 | # this is to comply with requirements (scikit-learn>=0.20) 9 | # More recent versions of sklearn have exposed trustworthiness 10 | # in top level module API 11 | # see: https://github.com/scikit-learn/scikit-learn/pull/15337 12 | from sklearn.manifold.t_sne import trustworthiness 13 | 14 | 15 | def test_densmap_trustworthiness(nn_data): 16 | data = nn_data[:50] 17 | embedding, rad_h, rad_l = UMAP( 18 | n_neighbors=10, 19 | min_dist=0.01, 20 | random_state=42, 21 | n_epochs=100, 22 | densmap=True, 23 | output_dens=True, 24 | ).fit_transform(data) 25 | trust = trustworthiness(data, embedding, n_neighbors=10) 26 | assert ( 27 | trust >= 0.72 28 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 29 | 30 | 31 | @pytest.mark.skip() 32 | def test_densmap_trustworthiness_random_init(nn_data): # pragma: no cover 33 | data = nn_data[:50] 34 | embedding = UMAP( 35 | n_neighbors=10, 36 | min_dist=0.01, 37 | random_state=42, 38 | init="random", 39 | densmap=True, 40 | ).fit_transform(data) 41 | trust = trustworthiness(data, embedding, n_neighbors=10) 42 | assert ( 43 | trust >= 0.75 44 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 45 | 46 | 47 | def test_densmap_trustworthiness_on_iris(iris): 48 | densmap_iris_model = UMAP( 49 | n_neighbors=10, 50 | min_dist=0.01, 51 | random_state=42, 52 | densmap=True, 53 | verbose=True, 54 | ).fit(iris.data) 55 | embedding = densmap_iris_model.embedding_ 56 | trust = trustworthiness(iris.data, embedding, n_neighbors=10) 57 | assert ( 58 | trust >= 0.97 59 | ), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust) 60 | 61 | with pytest.raises(NotImplementedError): 62 | densmap_iris_model.transform(iris.data[:10]) 63 | 64 | with pytest.raises(ValueError): 65 | densmap_iris_model.inverse_transform(embedding[:10]) 66 | 67 | 68 | def test_densmap_trustworthiness_on_iris_supervised(iris): 69 | densmap_iris_model = UMAP( 70 | n_neighbors=10, 71 | min_dist=0.01, 72 | random_state=42, 73 | densmap=True, 74 | verbose=True, 75 | ).fit(iris.data, y=iris.target) 76 | embedding = densmap_iris_model.embedding_ 77 | trust = trustworthiness(iris.data, embedding, n_neighbors=10) 78 | assert ( 79 | trust >= 0.97 80 | ), "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust) 81 | -------------------------------------------------------------------------------- /umap/tests/test_parametric_umap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tempfile 3 | import pytest 4 | from sklearn.datasets import make_moons 5 | from sklearn.model_selection import train_test_split 6 | from numpy.testing import assert_array_almost_equal 7 | import platform 8 | 9 | try: 10 | import tensorflow as tf 11 | 12 | IMPORT_TF = True 13 | except ImportError: 14 | IMPORT_TF = False 15 | else: 16 | from umap.parametric_umap import ParametricUMAP, load_ParametricUMAP 17 | 18 | tf_only = pytest.mark.skipif(not IMPORT_TF, reason="TensorFlow >= 2.0 is not installed") 19 | not_windows = pytest.mark.skipif( 20 | platform.system() == "Windows", reason="Windows file access issues" 21 | ) 22 | 23 | 24 | @pytest.fixture(scope="session") 25 | def moon_dataset(): 26 | X, _ = make_moons(200) 27 | return X 28 | 29 | 30 | @tf_only 31 | def test_create_model(moon_dataset): 32 | """test a simple parametric UMAP network""" 33 | embedder = ParametricUMAP() 34 | embedding = embedder.fit_transform(moon_dataset) 35 | # completes successfully 36 | assert embedding is not None 37 | assert embedding.shape == (moon_dataset.shape[0], 2) 38 | 39 | 40 | @tf_only 41 | def test_global_loss(moon_dataset): 42 | """test a simple parametric UMAP network""" 43 | embedder = ParametricUMAP(global_correlation_loss_weight=1.0) 44 | embedding = embedder.fit_transform(moon_dataset) 45 | # completes successfully 46 | assert embedding is not None 47 | assert embedding.shape == (moon_dataset.shape[0], 2) 48 | 49 | 50 | @tf_only 51 | def test_inverse_transform(moon_dataset): 52 | """tests inverse_transform""" 53 | 54 | def norm(x): 55 | return (x - np.min(x)) / (np.max(x) - np.min(x)) 56 | 57 | X = norm(moon_dataset) 58 | embedder = ParametricUMAP(parametric_reconstruction=True) 59 | Z = embedder.fit_transform(X) 60 | X_r = embedder.inverse_transform(Z) 61 | # completes successfully 62 | assert X_r is not None 63 | assert X_r.shape == X.shape 64 | 65 | 66 | @tf_only 67 | def test_custom_encoder_decoder(moon_dataset): 68 | """test using a custom encoder / decoder""" 69 | dims = (2,) 70 | n_components = 2 71 | encoder = tf.keras.Sequential( 72 | [ 73 | tf.keras.layers.Input(shape=dims), 74 | tf.keras.layers.Flatten(), 75 | tf.keras.layers.Dense(units=100, activation="relu"), 76 | tf.keras.layers.Dense(units=100, activation="relu"), 77 | tf.keras.layers.Dense(units=100, activation="relu"), 78 | tf.keras.layers.Dense(units=n_components, name="z"), 79 | ] 80 | ) 81 | 82 | decoder = tf.keras.Sequential( 83 | [ 84 | tf.keras.layers.Input(shape=(n_components,)), 85 | tf.keras.layers.Dense(units=100, activation="relu"), 86 | tf.keras.layers.Dense(units=100, activation="relu"), 87 | tf.keras.layers.Dense(units=100, activation="relu"), 88 | tf.keras.layers.Dense( 89 | units=np.prod(dims), name="recon", activation=None 90 | ), 91 | tf.keras.layers.Reshape(dims), 92 | ] 93 | ) 94 | 95 | embedder = ParametricUMAP( 96 | encoder=encoder, 97 | decoder=decoder, 98 | dims=dims, 99 | parametric_reconstruction=True, 100 | verbose=True, 101 | ) 102 | embedding = embedder.fit_transform(moon_dataset) 103 | # completes successfully 104 | assert embedding is not None 105 | assert embedding.shape == (moon_dataset.shape[0], 2) 106 | 107 | 108 | @tf_only 109 | def test_validation(moon_dataset): 110 | """tests adding a validation dataset""" 111 | X_train, X_valid = train_test_split(moon_dataset, train_size=0.5) 112 | embedder = ParametricUMAP( 113 | parametric_reconstruction=True, reconstruction_validation=X_valid, verbose=True 114 | ) 115 | embedding = embedder.fit_transform(X_train) 116 | # completes successfully 117 | assert embedding is not None 118 | assert embedding.shape == (X_train.shape[0], 2) 119 | 120 | 121 | @not_windows 122 | @tf_only 123 | def test_save_load(moon_dataset): 124 | """tests saving and loading""" 125 | 126 | embedder = ParametricUMAP() 127 | embedding = embedder.fit_transform(moon_dataset) 128 | # completes successfully 129 | assert embedding is not None 130 | assert embedding.shape == (moon_dataset.shape[0], 2) 131 | 132 | # Portable tempfile 133 | model_path = tempfile.mkdtemp(suffix="_umap_model") 134 | 135 | embedder.save(model_path) 136 | loaded_model = load_ParametricUMAP(model_path) 137 | assert loaded_model is not None 138 | 139 | loaded_embedding = loaded_model.transform(moon_dataset) 140 | assert_array_almost_equal( 141 | embedding, 142 | loaded_embedding, 143 | decimal=5, 144 | err_msg="Loaded model transform fails to match original embedding", 145 | ) 146 | -------------------------------------------------------------------------------- /umap/tests/test_plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import umap 4 | 5 | # Globals, used for all the tests 6 | SEED = 189212 # 0b101110001100011100 7 | np.random.seed(SEED) 8 | 9 | try: 10 | from umap import plot 11 | 12 | IMPORT_PLOT = True 13 | except ImportError: 14 | IMPORT_PLOT = False 15 | 16 | plot_only = pytest.mark.skipif(not IMPORT_PLOT, reason="umap plot not found.") 17 | 18 | 19 | @pytest.fixture(scope="session") 20 | def mapper(iris): 21 | return umap.UMAP(n_epochs=100).fit(iris.data) 22 | 23 | 24 | # These tests requires revision: Refactoring is 25 | # needed as there is no assertion nor 26 | # property verification. 27 | @plot_only 28 | def test_plot_runs_at_all(mapper, iris, iris_selection): 29 | from umap import plot as umap_plot 30 | 31 | umap_plot.points(mapper) 32 | umap_plot.points(mapper, labels=iris.target) 33 | umap_plot.points(mapper, values=iris.data[:, 0]) 34 | umap_plot.points(mapper, labels=iris.target, subset_points=iris_selection) 35 | umap_plot.points(mapper, values=iris.data[:, 0], subset_points=iris_selection) 36 | umap_plot.points(mapper, theme="fire") 37 | umap_plot.diagnostic(mapper, diagnostic_type="all") 38 | umap_plot.diagnostic(mapper, diagnostic_type="neighborhood") 39 | umap_plot.connectivity(mapper) 40 | umap_plot.connectivity(mapper, theme="fire") 41 | umap_plot.connectivity(mapper, edge_bundling="hammer") 42 | umap_plot.interactive(mapper) 43 | umap_plot.interactive(mapper, labels=iris.target) 44 | umap_plot.interactive(mapper, values=iris.data[:, 0]) 45 | umap_plot.interactive(mapper, labels=iris.target, subset_points=iris_selection) 46 | umap_plot.interactive(mapper, values=iris.data[:, 0], subset_points=iris_selection) 47 | umap_plot.interactive(mapper, theme="fire") 48 | umap_plot._datashade_points(mapper.embedding_) 49 | umap_plot._datashade_points(mapper.embedding_, labels=iris.target) 50 | umap_plot._datashade_points(mapper.embedding_, values=iris.data[:, 0]) 51 | -------------------------------------------------------------------------------- /umap/tests/test_spectral.py: -------------------------------------------------------------------------------- 1 | from umap.spectral import spectral_layout, tswspectral_layout 2 | 3 | import numpy as np 4 | import pytest 5 | import re 6 | from scipy.version import full_version as scipy_full_version_ 7 | from warnings import catch_warnings 8 | 9 | scipy_full_version = tuple( 10 | int(n) 11 | for n in re.findall(r"[0-9]+\.[0-9]+\.?[0-9]*", scipy_full_version_)[0].split(".") 12 | ) 13 | 14 | 15 | @pytest.mark.skipif( 16 | scipy_full_version < (1, 10) or scipy_full_version >= (1, 15), 17 | reason="SciPy installing with Python 3.7 does not converge under same circumstances", 18 | ) 19 | def test_tsw_spectral_init(iris): 20 | # create an arbitrary (dense) random affinity matrix 21 | seed = 42 22 | rng = np.random.default_rng(seed=seed) 23 | # matrix must be of sufficient size of lobpcg will refuse to work on it 24 | n = 20 25 | graph = rng.standard_normal(n * n).reshape((n, n)) ** 2 26 | graph = graph.T * graph 27 | 28 | spec = spectral_layout(None, graph, 2, random_state=seed**2) 29 | tsw_spec = tswspectral_layout(None, graph, 2, random_state=seed**2, tol=1e-8) 30 | 31 | # Make sure the two methods produce similar embeddings. 32 | rmsd = np.mean(np.sum((spec - tsw_spec) ** 2, axis=1)) 33 | assert ( 34 | rmsd < 1e-6 35 | ), "tsvd-warmed spectral init insufficiently close to standard spectral init" 36 | 37 | 38 | @pytest.mark.skipif( 39 | scipy_full_version < (1, 10), 40 | reason="SciPy installing with Py 3.7 does not warn reliably on convergence failure", 41 | ) 42 | def test_ensure_fallback_to_random_on_spectral_failure(): 43 | dim = 1000 44 | k = 10 45 | assert k >= 10 46 | assert dim // 10 > k 47 | y = np.eye(dim, k=1) 48 | u = np.random.random((dim, dim // 10)) 49 | graph = y + y.T + u @ u.T 50 | with pytest.warns(UserWarning, match="Spectral initialisation failed!"): 51 | tswspectral_layout(u, graph, k, random_state=42, maxiter=2, method="lobpcg") 52 | -------------------------------------------------------------------------------- /umap/tests/test_umap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmcinnes/umap/15e55bb6a1ca23b8d6040d9d6184a7ae98325ace/umap/tests/test_umap.py -------------------------------------------------------------------------------- /umap/tests/test_umap_get_feature_names_out.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets import make_classification 3 | from sklearn.pipeline import Pipeline, FeatureUnion 4 | 5 | from ..umap_ import UMAP 6 | 7 | 8 | def test_get_feature_names_out(): 9 | X, _ = make_classification(n_samples=30, n_features=10, random_state=42) 10 | umap = UMAP( 11 | n_neighbors=10, 12 | min_dist=0.01, 13 | n_epochs=200, 14 | random_state=42, 15 | n_components=3, 16 | ).fit(X) 17 | # get_feature_names_out should not care about passed features. 18 | features_names_in = [f"feature{i}" for i in range(10)] 19 | feature_names_out = umap.get_feature_names_out(input_features=features_names_in) 20 | expected = ["umap0", "umap1", "umap2"] 21 | np.testing.assert_array_equal(feature_names_out, expected) 22 | 23 | 24 | def test_get_feature_names_out_default(): 25 | X, _ = make_classification(n_samples=30, n_features=10, random_state=42) 26 | umap = UMAP( 27 | n_neighbors=10, 28 | min_dist=0.01, 29 | n_epochs=200, 30 | random_state=42, 31 | n_components=3, 32 | ).fit(X) 33 | # get_feature_names_out should generate feature names in a certain format if no names are passed. 34 | default_result = umap.get_feature_names_out() 35 | expected_default_result = ["umap0", "umap1", "umap2"] 36 | np.testing.assert_array_equal(default_result, expected_default_result) 37 | 38 | 39 | def test_get_feature_names_out_multicomponent(): 40 | # The output length should be equal to the number of components UMAP generates. 41 | X, _ = make_classification(n_samples=30, n_features=10, random_state=42) 42 | umap = UMAP( 43 | n_neighbors=10, 44 | min_dist=0.01, 45 | n_epochs=200, 46 | random_state=42, 47 | n_components=9, 48 | ).fit(X) 49 | result_umap = umap.get_feature_names_out() 50 | expected_umap_result = [f"umap{i}" for i in range(9)] 51 | assert len(result_umap) == 9 52 | np.testing.assert_array_equal(result_umap, expected_umap_result) 53 | 54 | 55 | 56 | def test_get_feature_names_out_featureunion(): 57 | X, _ = make_classification(n_samples=30, n_features=10, random_state=42) 58 | pipeline = Pipeline( 59 | [ 60 | ( 61 | "umap_pipeline", 62 | FeatureUnion( 63 | [ 64 | ("umap1", UMAP(n_components=2)), 65 | ("umap2", UMAP(n_components=3)), 66 | ] 67 | ), 68 | ) 69 | ] 70 | ) 71 | 72 | pipeline.fit(X) 73 | feature_names = pipeline.get_feature_names_out() 74 | expected_feature_names = np.array( 75 | [ 76 | "umap1__umap0", 77 | "umap1__umap1", 78 | "umap2__umap0", 79 | "umap2__umap1", 80 | "umap2__umap2", 81 | ] 82 | ) 83 | np.testing.assert_array_equal(feature_names, expected_feature_names) 84 | -------------------------------------------------------------------------------- /umap/tests/test_umap_nn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | from numpy.testing import assert_array_almost_equal 4 | from sklearn.neighbors import KDTree 5 | from sklearn.preprocessing import normalize 6 | 7 | from umap import distances as dist 8 | from umap.umap_ import ( 9 | nearest_neighbors, 10 | smooth_knn_dist, 11 | ) 12 | 13 | 14 | # =================================================== 15 | # Nearest Neighbour Test cases 16 | # =================================================== 17 | 18 | 19 | # nearest_neighbours metric parameter validation 20 | # ----------------------------------------------- 21 | def test_nn_bad_metric(nn_data): 22 | with pytest.raises(ValueError): 23 | nearest_neighbors(nn_data, 10, 42, {}, False, np.random) 24 | 25 | 26 | def test_nn_bad_metric_sparse_data(sparse_nn_data): 27 | with pytest.raises(ValueError): 28 | nearest_neighbors( 29 | sparse_nn_data, 30 | 10, 31 | "seuclidean", 32 | {}, 33 | False, 34 | np.random, 35 | ) 36 | 37 | 38 | # ------------------------------------------------- 39 | # Utility functions for Nearest Neighbour 40 | # ------------------------------------------------- 41 | 42 | 43 | def knn(indices, nn_data): # pragma: no cover 44 | tree = KDTree(nn_data) 45 | true_indices = tree.query(nn_data, 10, return_distance=False) 46 | num_correct = 0.0 47 | for i in range(nn_data.shape[0]): 48 | num_correct += np.sum(np.isin(true_indices[i], indices[i])) 49 | return num_correct / (nn_data.shape[0] * 10) 50 | 51 | 52 | def smooth_knn(nn_data, local_connectivity=1.0): 53 | knn_indices, knn_dists, _ = nearest_neighbors( 54 | nn_data, 10, "euclidean", {}, False, np.random 55 | ) 56 | sigmas, rhos = smooth_knn_dist( 57 | knn_dists, 10.0, local_connectivity=local_connectivity 58 | ) 59 | shifted_dists = knn_dists - rhos[:, np.newaxis] 60 | shifted_dists[shifted_dists < 0.0] = 0.0 61 | vals = np.exp(-(shifted_dists / sigmas[:, np.newaxis])) 62 | norms = np.sum(vals, axis=1) 63 | return norms 64 | 65 | 66 | @pytest.mark.skip() 67 | def test_nn_descent_neighbor_accuracy(nn_data): # pragma: no cover 68 | knn_indices, knn_dists, _ = nearest_neighbors( 69 | nn_data, 10, "euclidean", {}, False, np.random 70 | ) 71 | percent_correct = knn(knn_indices, nn_data) 72 | assert ( 73 | percent_correct >= 0.85 74 | ), "NN-descent did not get 89% accuracy on nearest neighbors" 75 | 76 | 77 | @pytest.mark.skip() 78 | def test_nn_descent_neighbor_accuracy_low_memory(nn_data): # pragma: no cover 79 | knn_indices, knn_dists, _ = nearest_neighbors( 80 | nn_data, 10, "euclidean", {}, False, np.random, low_memory=True 81 | ) 82 | percent_correct = knn(knn_indices, nn_data) 83 | assert ( 84 | percent_correct >= 0.89 85 | ), "NN-descent did not get 89% accuracy on nearest neighbors" 86 | 87 | 88 | @pytest.mark.skip() 89 | def test_angular_nn_descent_neighbor_accuracy(nn_data): # pragma: no cover 90 | knn_indices, knn_dists, _ = nearest_neighbors( 91 | nn_data, 10, "cosine", {}, True, np.random 92 | ) 93 | angular_data = normalize(nn_data, norm="l2") 94 | percent_correct = knn(knn_indices, angular_data) 95 | assert ( 96 | percent_correct >= 0.85 97 | ), "NN-descent did not get 89% accuracy on nearest neighbors" 98 | 99 | 100 | @pytest.mark.skip() 101 | def test_sparse_nn_descent_neighbor_accuracy(sparse_nn_data): # pragma: no cover 102 | knn_indices, knn_dists, _ = nearest_neighbors( 103 | sparse_nn_data, 20, "euclidean", {}, False, np.random 104 | ) 105 | percent_correct = knn(knn_indices, sparse_nn_data.todense()) 106 | assert ( 107 | percent_correct >= 0.75 108 | ), "Sparse NN-descent did not get 90% accuracy on nearest neighbors" 109 | 110 | 111 | @pytest.mark.skip() 112 | def test_sparse_nn_descent_neighbor_accuracy_low_memory( 113 | sparse_nn_data, 114 | ): # pragma: no cover 115 | knn_indices, knn_dists, _ = nearest_neighbors( 116 | sparse_nn_data, 20, "euclidean", {}, False, np.random, low_memory=True 117 | ) 118 | percent_correct = knn(knn_indices, sparse_nn_data.todense()) 119 | assert ( 120 | percent_correct >= 0.85 121 | ), "Sparse NN-descent did not get 90% accuracy on nearest neighbors" 122 | 123 | 124 | @pytest.mark.skip() 125 | def test_nn_descent_neighbor_accuracy_callable_metric(nn_data): # pragma: no cover 126 | knn_indices, knn_dists, _ = nearest_neighbors( 127 | nn_data, 10, dist.euclidean, {}, False, np.random 128 | ) 129 | 130 | percent_correct = knn(knn_indices, nn_data) 131 | assert ( 132 | percent_correct >= 0.95 133 | ), "NN-descent did not get 95% accuracy on nearest neighbors with callable metric" 134 | 135 | 136 | @pytest.mark.skip() 137 | def test_sparse_angular_nn_descent_neighbor_accuracy( 138 | sparse_nn_data, 139 | ): # pragma: no cover 140 | knn_indices, knn_dists, _ = nearest_neighbors( 141 | sparse_nn_data, 20, "cosine", {}, True, np.random 142 | ) 143 | angular_data = normalize(sparse_nn_data, norm="l2").toarray() 144 | percent_correct = knn(knn_indices, angular_data) 145 | assert ( 146 | percent_correct >= 0.90 147 | ), "Sparse NN-descent did not get 90% accuracy on nearest neighbors" 148 | 149 | 150 | def test_smooth_knn_dist_l1norms(nn_data): 151 | norms = smooth_knn(nn_data) 152 | assert_array_almost_equal( 153 | norms, 154 | 1.0 + np.log2(10) * np.ones(norms.shape[0]), 155 | decimal=3, 156 | err_msg="Smooth knn-dists does not give expected" "norms", 157 | ) 158 | 159 | 160 | def test_smooth_knn_dist_l1norms_w_connectivity(nn_data): 161 | norms = smooth_knn(nn_data, local_connectivity=1.75) 162 | assert_array_almost_equal( 163 | norms, 164 | 1.0 + np.log2(10) * np.ones(norms.shape[0]), 165 | decimal=3, 166 | err_msg="Smooth knn-dists does not give expected" 167 | "norms for local_connectivity=1.75", 168 | ) 169 | -------------------------------------------------------------------------------- /umap/tests/test_umap_repeated_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from umap import UMAP 3 | 4 | 5 | # =================================================== 6 | # Spatial Data Test cases 7 | # =================================================== 8 | # Use force_approximation_algorithm in order to test 9 | # the region of the code that is called for n>4096 10 | # --------------------------------------------------- 11 | 12 | 13 | def test_repeated_points_large_sparse_spatial(sparse_spatial_data_repeats): 14 | model = UMAP( 15 | n_neighbors=3, 16 | unique=True, 17 | force_approximation_algorithm=True, 18 | n_epochs=20, 19 | verbose=True, 20 | ).fit(sparse_spatial_data_repeats) 21 | assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1 22 | 23 | 24 | def test_repeated_points_small_sparse_spatial(sparse_spatial_data_repeats): 25 | model = UMAP(n_neighbors=3, unique=True, n_epochs=20).fit( 26 | sparse_spatial_data_repeats 27 | ) 28 | assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1 29 | 30 | 31 | # Use force_approximation_algorithm in order to test the region 32 | # of the code that is called for n>4096 33 | def test_repeated_points_large_dense_spatial(spatial_repeats): 34 | model = UMAP( 35 | n_neighbors=3, unique=True, force_approximation_algorithm=True, n_epochs=50 36 | ).fit(spatial_repeats) 37 | assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1 38 | 39 | 40 | def test_repeated_points_small_dense_spatial(spatial_repeats): 41 | model = UMAP(n_neighbors=3, unique=True, n_epochs=20).fit(spatial_repeats) 42 | assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1 43 | 44 | 45 | # =================================================== 46 | # Binary Data Test cases 47 | # =================================================== 48 | # Use force_approximation_algorithm in order to test 49 | # the region of the code that is called for n>4096 50 | # --------------------------------------------------- 51 | 52 | 53 | def test_repeated_points_large_sparse_binary(sparse_binary_data_repeats): 54 | model = UMAP( 55 | n_neighbors=3, unique=True, force_approximation_algorithm=True, n_epochs=50 56 | ).fit(sparse_binary_data_repeats) 57 | assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1 58 | 59 | 60 | def test_repeated_points_small_sparse_binary(sparse_binary_data_repeats): 61 | model = UMAP(n_neighbors=3, unique=True, n_epochs=20).fit( 62 | sparse_binary_data_repeats 63 | ) 64 | assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1 65 | 66 | 67 | # Use force_approximation_algorithm in order to test 68 | # the region of the code that is called for n>4096 69 | def test_repeated_points_large_dense_binary(binary_repeats): 70 | model = UMAP( 71 | n_neighbors=3, unique=True, force_approximation_algorithm=True, n_epochs=20 72 | ).fit(binary_repeats) 73 | assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1 74 | 75 | 76 | def test_repeated_points_small_dense_binary(binary_repeats): 77 | model = UMAP(n_neighbors=3, unique=True, n_epochs=20).fit(binary_repeats) 78 | assert np.unique(binary_repeats[0:2], axis=0).shape[0] == 1 79 | assert np.unique(model.embedding_[0:2], axis=0).shape[0] == 1 80 | 81 | 82 | # =================================================== 83 | # Repeated Data Test cases 84 | # =================================================== 85 | 86 | 87 | # ---------------------------------------------------- 88 | # This should test whether the n_neighbours are being 89 | # reduced properly when your n_neighbours is larger 90 | # than the unique data set size 91 | # ---------------------------------------------------- 92 | def test_repeated_points_large_n(repetition_dense): 93 | model = UMAP(n_neighbors=5, unique=True, n_epochs=20).fit(repetition_dense) 94 | assert model._n_neighbors == 3 95 | -------------------------------------------------------------------------------- /umap/tests/test_umap_trustworthiness.py: -------------------------------------------------------------------------------- 1 | from umap import UMAP 2 | from sklearn.datasets import make_blobs 3 | from sklearn.metrics import pairwise_distances 4 | import numpy as np 5 | import scipy.sparse 6 | 7 | try: 8 | # works for sklearn>=0.22 9 | from sklearn.manifold import trustworthiness 10 | except ImportError: 11 | # this is to comply with requirements (scikit-learn>=0.20) 12 | # More recent versions of sklearn have exposed trustworthiness 13 | # in top level module API 14 | # see: https://github.com/scikit-learn/scikit-learn/pull/15337 15 | from sklearn.manifold.t_sne import trustworthiness 16 | 17 | # =================================================== 18 | # UMAP Trustworthiness Test cases 19 | # =================================================== 20 | 21 | 22 | def test_umap_sparse_trustworthiness(sparse_test_data): 23 | embedding = UMAP(n_neighbors=10, n_epochs=100).fit_transform(sparse_test_data[:100]) 24 | trust = trustworthiness(sparse_test_data[:100].toarray(), embedding, n_neighbors=10) 25 | assert ( 26 | trust >= 0.88 27 | ), "Insufficiently trustworthy embedding for sparse test dataset: {}".format(trust) 28 | 29 | 30 | def test_umap_trustworthiness_fast_approx(nn_data): 31 | data = nn_data[:50] 32 | embedding = UMAP( 33 | n_neighbors=10, 34 | min_dist=0.01, 35 | random_state=42, 36 | n_epochs=100, 37 | force_approximation_algorithm=True, 38 | ).fit_transform(data) 39 | trust = trustworthiness(data, embedding, n_neighbors=10) 40 | assert ( 41 | trust >= 0.8 42 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 43 | 44 | 45 | def test_umap_trustworthiness_random_init(nn_data): 46 | data = nn_data[:50] 47 | embedding = UMAP( 48 | n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=100, init="random" 49 | ).fit_transform(data) 50 | trust = trustworthiness(data, embedding, n_neighbors=10) 51 | assert ( 52 | trust >= 0.8 53 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 54 | 55 | 56 | def test_supervised_umap_trustworthiness(): 57 | data, labels = make_blobs(50, cluster_std=0.5, random_state=42) 58 | embedding = UMAP( 59 | n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=100 60 | ).fit_transform(data, labels) 61 | trust = trustworthiness(data, embedding, n_neighbors=10) 62 | assert ( 63 | trust >= 0.95 64 | ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust) 65 | 66 | 67 | def test_semisupervised_umap_trustworthiness(): 68 | data, labels = make_blobs(50, cluster_std=0.5, random_state=42) 69 | labels[10:30] = -1 70 | embedding = UMAP( 71 | n_neighbors=10, min_dist=0.01, random_state=42, n_epochs=100 72 | ).fit_transform(data, labels) 73 | trust = trustworthiness(data, embedding, n_neighbors=10) 74 | assert ( 75 | trust >= 0.95 76 | ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust) 77 | 78 | 79 | def test_metric_supervised_umap_trustworthiness(): 80 | data, labels = make_blobs(50, cluster_std=0.5, random_state=42) 81 | embedding = UMAP( 82 | n_neighbors=10, 83 | min_dist=0.01, 84 | target_metric="l1", 85 | target_weight=0.8, 86 | n_epochs=100, 87 | random_state=42, 88 | ).fit_transform(data, labels) 89 | trust = trustworthiness(data, embedding, n_neighbors=10) 90 | assert ( 91 | trust >= 0.95 92 | ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust) 93 | 94 | 95 | def test_string_metric_supervised_umap_trustworthiness(): 96 | data, labels = make_blobs(50, cluster_std=0.5, random_state=42) 97 | labels = np.array(["this", "that", "other"])[labels] 98 | embedding = UMAP( 99 | n_neighbors=10, 100 | min_dist=0.01, 101 | target_metric="string", 102 | target_weight=0.8, 103 | n_epochs=100, 104 | random_state=42, 105 | ).fit_transform(data, labels) 106 | trust = trustworthiness(data, embedding, n_neighbors=10) 107 | assert ( 108 | trust >= 0.95 109 | ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust) 110 | 111 | 112 | def test_discrete_metric_supervised_umap_trustworthiness(): 113 | data, labels = make_blobs(50, cluster_std=0.5, random_state=42) 114 | embedding = UMAP( 115 | n_neighbors=10, 116 | min_dist=0.01, 117 | target_metric="ordinal", 118 | target_weight=0.8, 119 | n_epochs=100, 120 | random_state=42, 121 | ).fit_transform(data, labels) 122 | trust = trustworthiness(data, embedding, n_neighbors=10) 123 | assert ( 124 | trust >= 0.95 125 | ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust) 126 | 127 | 128 | def test_count_metric_supervised_umap_trustworthiness(): 129 | data, labels = make_blobs(50, cluster_std=0.5, random_state=42) 130 | labels = (labels**2) + 2 * labels 131 | embedding = UMAP( 132 | n_neighbors=10, 133 | min_dist=0.01, 134 | target_metric="count", 135 | target_weight=0.8, 136 | n_epochs=100, 137 | random_state=42, 138 | ).fit_transform(data, labels) 139 | trust = trustworthiness(data, embedding, n_neighbors=10) 140 | assert ( 141 | trust >= 0.95 142 | ), "Insufficiently trustworthy embedding for" "blobs dataset: {}".format(trust) 143 | 144 | 145 | def test_sparse_precomputed_metric_umap_trustworthiness(): 146 | data, labels = make_blobs(50, cluster_std=0.5, random_state=42) 147 | dmat = scipy.sparse.csr_matrix(pairwise_distances(data)) 148 | embedding = UMAP( 149 | n_neighbors=10, 150 | min_dist=0.01, 151 | random_state=42, 152 | n_epochs=100, 153 | metric="precomputed", 154 | ).fit_transform(dmat) 155 | trust = trustworthiness(data, embedding, n_neighbors=10) 156 | assert ( 157 | trust >= 0.75 158 | ), "Insufficiently trustworthy embedding for" "nn dataset: {}".format(trust) 159 | -------------------------------------------------------------------------------- /umap/utils.py: -------------------------------------------------------------------------------- 1 | # Author: Leland McInnes 2 | # 3 | # License: BSD 3 clause 4 | 5 | import time 6 | from warnings import warn 7 | 8 | import numpy as np 9 | import numba 10 | from sklearn.utils.validation import check_is_fitted 11 | import scipy.sparse 12 | 13 | 14 | @numba.njit(parallel=True) 15 | def fast_knn_indices(X, n_neighbors): 16 | """A fast computation of knn indices. 17 | 18 | Parameters 19 | ---------- 20 | X: array of shape (n_samples, n_features) 21 | The input data to compute the k-neighbor indices of. 22 | 23 | n_neighbors: int 24 | The number of nearest neighbors to compute for each sample in ``X``. 25 | 26 | Returns 27 | ------- 28 | knn_indices: array of shape (n_samples, n_neighbors) 29 | The indices on the ``n_neighbors`` closest points in the dataset. 30 | """ 31 | knn_indices = np.empty((X.shape[0], n_neighbors), dtype=np.int32) 32 | for row in numba.prange(X.shape[0]): 33 | # v = np.argsort(X[row]) # Need to call argsort this way for numba 34 | v = X[row].argsort(kind="quicksort") 35 | v = v[:n_neighbors] 36 | knn_indices[row] = v 37 | return knn_indices 38 | 39 | 40 | @numba.njit("i4(i8[:])") 41 | def tau_rand_int(state): 42 | """A fast (pseudo)-random number generator. 43 | 44 | Parameters 45 | ---------- 46 | state: array of int64, shape (3,) 47 | The internal state of the rng 48 | 49 | Returns 50 | ------- 51 | A (pseudo)-random int32 value 52 | """ 53 | state[0] = (((state[0] & 4294967294) << 12) & 0xFFFFFFFF) ^ ( 54 | (((state[0] << 13) & 0xFFFFFFFF) ^ state[0]) >> 19 55 | ) 56 | state[1] = (((state[1] & 4294967288) << 4) & 0xFFFFFFFF) ^ ( 57 | (((state[1] << 2) & 0xFFFFFFFF) ^ state[1]) >> 25 58 | ) 59 | state[2] = (((state[2] & 4294967280) << 17) & 0xFFFFFFFF) ^ ( 60 | (((state[2] << 3) & 0xFFFFFFFF) ^ state[2]) >> 11 61 | ) 62 | 63 | return state[0] ^ state[1] ^ state[2] 64 | 65 | 66 | @numba.njit("f4(i8[:])") 67 | def tau_rand(state): 68 | """A fast (pseudo)-random number generator for floats in the range [0,1] 69 | 70 | Parameters 71 | ---------- 72 | state: array of int64, shape (3,) 73 | The internal state of the rng 74 | 75 | Returns 76 | ------- 77 | A (pseudo)-random float32 in the interval [0, 1] 78 | """ 79 | integer = tau_rand_int(state) 80 | return abs(float(integer) / 0x7FFFFFFF) 81 | 82 | 83 | @numba.njit() 84 | def norm(vec): 85 | """Compute the (standard l2) norm of a vector. 86 | 87 | Parameters 88 | ---------- 89 | vec: array of shape (dim,) 90 | 91 | Returns 92 | ------- 93 | The l2 norm of vec. 94 | """ 95 | result = 0.0 96 | for i in range(vec.shape[0]): 97 | result += vec[i] ** 2 98 | return np.sqrt(result) 99 | 100 | 101 | @numba.njit(parallel=True) 102 | def submatrix(dmat, indices_col, n_neighbors): 103 | """Return a submatrix given an orginal matrix and the indices to keep. 104 | 105 | Parameters 106 | ---------- 107 | dmat: array, shape (n_samples, n_samples) 108 | Original matrix. 109 | 110 | indices_col: array, shape (n_samples, n_neighbors) 111 | Indices to keep. Each row consists of the indices of the columns. 112 | 113 | n_neighbors: int 114 | Number of neighbors. 115 | 116 | Returns 117 | ------- 118 | submat: array, shape (n_samples, n_neighbors) 119 | The corresponding submatrix. 120 | """ 121 | n_samples_transform, n_samples_fit = dmat.shape 122 | submat = np.zeros((n_samples_transform, n_neighbors), dtype=dmat.dtype) 123 | for i in numba.prange(n_samples_transform): 124 | for j in numba.prange(n_neighbors): 125 | submat[i, j] = dmat[i, indices_col[i, j]] 126 | return submat 127 | 128 | 129 | # Generates a timestamp for use in logging messages when verbose=True 130 | def ts(): 131 | return time.ctime(time.time()) 132 | 133 | 134 | # I'm not enough of a numba ninja to numba this successfully. 135 | # np.arrays of lists, which are objects... 136 | def csr_unique(matrix, return_index=True, return_inverse=True, return_counts=True): 137 | """Find the unique elements of a sparse csr matrix. 138 | We don't explicitly construct the unique matrix leaving that to the user 139 | who may not want to duplicate a massive array in memory. 140 | Returns the indices of the input array that give the unique values. 141 | Returns the indices of the unique array that reconstructs the input array. 142 | Returns the number of times each unique row appears in the input matrix. 143 | 144 | matrix: a csr matrix 145 | return_index = bool, optional 146 | If true, return the row indices of 'matrix' 147 | return_inverse: bool, optional 148 | If true, return the indices of the unique array that can be 149 | used to reconstruct 'matrix'. 150 | return_counts = bool, optional 151 | If true, returns the number of times each unique item appears in 'matrix' 152 | 153 | The unique matrix can computed via 154 | unique_matrix = matrix[index] 155 | and the original matrix reconstructed via 156 | unique_matrix[inverse] 157 | """ 158 | lil_matrix = matrix.tolil() 159 | rows = np.asarray( 160 | [tuple(x + y) for x, y in zip(lil_matrix.rows, lil_matrix.data)], dtype=object 161 | ) 162 | return_values = return_counts + return_inverse + return_index 163 | return np.unique( 164 | rows, 165 | return_index=return_index, 166 | return_inverse=return_inverse, 167 | return_counts=return_counts, 168 | )[1 : (return_values + 1)] 169 | 170 | 171 | def disconnected_vertices(model): 172 | """ 173 | Returns a boolean vector indicating which vertices are disconnected from the umap graph. 174 | These vertices will often be scattered across the space and make it difficult to focus on the main 175 | manifold. They can either be filtered and have UMAP re-run or simply filtered from the interactive plotting tool 176 | via the subset_points parameter. 177 | Use ~disconnected_vertices(model) to only plot the connected points. 178 | Parameters 179 | ---------- 180 | model: a trained UMAP model 181 | 182 | Returns 183 | ------- 184 | A boolean vector indicating which points are disconnected 185 | """ 186 | check_is_fitted(model, "graph_") 187 | if model.unique: 188 | vertices_disconnected = ( 189 | np.array(model.graph_[model._unique_inverse_].sum(axis=1)).flatten() == 0 190 | ) 191 | else: 192 | vertices_disconnected = np.array(model.graph_.sum(axis=1)).flatten() == 0 193 | return vertices_disconnected 194 | 195 | 196 | def average_nn_distance(dist_matrix): 197 | """Calculate the average distance to each points nearest neighbors. 198 | 199 | Parameters 200 | ---------- 201 | dist_matrix: a csr_matrix 202 | A distance matrix (usually umap_model.graph_) 203 | 204 | Returns 205 | ------- 206 | An array with the average distance to each points nearest neighbors 207 | 208 | """ 209 | (row_idx, col_idx, val) = scipy.sparse.find(dist_matrix) 210 | 211 | # Count/sum is done per row 212 | count_non_zero_elems = np.bincount(row_idx) 213 | sum_non_zero_elems = np.bincount(row_idx, weights=val) 214 | averages = sum_non_zero_elems / count_non_zero_elems 215 | 216 | if any(np.isnan(averages)): 217 | warn( 218 | "Embedding contains disconnected vertices which will be ignored." 219 | "Use umap.utils.disconnected_vertices() to identify them." 220 | ) 221 | 222 | return averages 223 | -------------------------------------------------------------------------------- /umap/validation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numba 3 | 4 | from sklearn.neighbors import KDTree 5 | from umap.distances import named_distances 6 | 7 | 8 | @numba.njit() 9 | def trustworthiness_vector_bulk( 10 | indices_source, indices_embedded, max_k 11 | ): # pragma: no cover 12 | 13 | n_samples = indices_embedded.shape[0] 14 | trustworthiness = np.zeros(max_k + 1, dtype=np.float64) 15 | 16 | for i in range(n_samples): 17 | for j in range(max_k): 18 | 19 | rank = 0 20 | while indices_source[i, rank] != indices_embedded[i, j]: 21 | rank += 1 22 | 23 | for k in range(j + 1, max_k + 1): 24 | if rank > k: 25 | trustworthiness[k] += rank - k 26 | 27 | for k in range(1, max_k + 1): 28 | trustworthiness[k] = 1.0 - trustworthiness[k] * ( 29 | 2.0 / (n_samples * k * (2.0 * n_samples - 3.0 * k - 1.0)) 30 | ) 31 | 32 | return trustworthiness 33 | 34 | 35 | def make_trustworthiness_calculator(metric): # pragma: no cover 36 | @numba.njit(parallel=True) 37 | def trustworthiness_vector_lowmem(source, indices_embedded, max_k): 38 | 39 | n_samples = indices_embedded.shape[0] 40 | trustworthiness = np.zeros(max_k + 1, dtype=np.float64) 41 | dist_vector = np.zeros(n_samples, dtype=np.float64) 42 | 43 | for i in range(n_samples): 44 | 45 | for j in numba.prange(n_samples): 46 | dist_vector[j] = metric(source[i], source[j]) 47 | 48 | indices_source = np.argsort(dist_vector) 49 | 50 | for j in range(max_k): 51 | 52 | rank = 0 53 | while indices_source[rank] != indices_embedded[i, j]: 54 | rank += 1 55 | 56 | for k in range(j + 1, max_k + 1): 57 | if rank > k: 58 | trustworthiness[k] += rank - k 59 | 60 | for k in range(1, max_k + 1): 61 | trustworthiness[k] = 1.0 - trustworthiness[k] * ( 62 | 2.0 / (n_samples * k * (2.0 * n_samples - 3.0 * k - 1.0)) 63 | ) 64 | 65 | trustworthiness[0] = 1.0 66 | 67 | return trustworthiness 68 | 69 | return trustworthiness_vector_lowmem 70 | 71 | 72 | def trustworthiness_vector( 73 | source, embedding, max_k, metric="euclidean" 74 | ): # pragma: no cover 75 | tree = KDTree(embedding, metric=metric) 76 | indices_embedded = tree.query(embedding, k=max_k, return_distance=False) 77 | # Drop the actual point itself 78 | indices_embedded = indices_embedded[:, 1:] 79 | 80 | dist = named_distances[metric] 81 | 82 | vec_calculator = make_trustworthiness_calculator(dist) 83 | 84 | result = vec_calculator(source, indices_embedded, max_k) 85 | 86 | return result 87 | --------------------------------------------------------------------------------