├── src
    └── radius_clustering
    │   ├── utils
    │       ├── __init__.py
    │       ├── random_manager.h
    │       ├── random_manager.cpp
    │       ├── emos.pyx
    │       ├── util_heap.h
    │       ├── mds.pyx
    │       ├── mds_core.cpp
    │       └── mds3-util.h
    │   ├── __init__.py
    │   ├── algorithms.py
    │   └── radius_clustering.py
├── tests
    ├── conftest.py
    ├── test_structural.py
    ├── test_regression.py
    ├── test_unit.py
    └── test_integration.py
├── .coverage
├── docs
    ├── source
    │   ├── images
    │   │   ├── exec_time.png
    │   │   ├── logo-lias.jpg
    │   │   └── exec_time_optimized.png
    │   ├── api.rst
    │   ├── _static
    │   │   └── styles
    │   │   │   └── custom.css
    │   ├── scss
    │   │   └── custom.scss
    │   ├── conf.py
    │   ├── index.rst
    │   ├── details.rst
    │   ├── usage.rst
    │   └── installation.rst
    ├── requirements.txt
    ├── Makefile
    └── make.bat
├── examples
    ├── GALLERY_HEADER.rst
    ├── plot_benchmark_custom.py
    └── plot_iris_example.py
├── environment.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── doc_improvement.yml
    │   ├── feature_request.yml
    │   └── bug_report.yml
    ├── workflows
    │   ├── lint.yml
    │   ├── tests.yml
    │   ├── sphinx.yml
    │   ├── pr_build.yml
    │   └── build_wheels.yml
    └── PULL_REQUEST_TEMPLATE.md
├── scripts
    └── build_wheel.sh
├── CITATION.cff
├── setup.py
├── CONTRIBUTING.md
├── PRESENTATION.md
├── CHANGELOG.md
├── pyproject.toml
├── CODE_OF_CONDUCT.md
├── EXPERIMENTS.md
├── README.md
└── notebooks
    └── comparison_example.ipynb


/src/radius_clustering/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pathlib import Path
3 | 
4 | 


--------------------------------------------------------------------------------
/.coverage:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/radius_clustering/main/.coverage


--------------------------------------------------------------------------------
/docs/source/images/exec_time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/radius_clustering/main/docs/source/images/exec_time.png


--------------------------------------------------------------------------------
/docs/source/images/logo-lias.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/radius_clustering/main/docs/source/images/logo-lias.jpg


--------------------------------------------------------------------------------
/docs/source/images/exec_time_optimized.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scikit-learn-contrib/radius_clustering/main/docs/source/images/exec_time_optimized.png


--------------------------------------------------------------------------------
/examples/GALLERY_HEADER.rst:
--------------------------------------------------------------------------------
1 | .. _general_examples:
2 | 
3 | Examples
4 | ========
5 | 
6 | This is the gallery of examples using the Radius Clustering package.


--------------------------------------------------------------------------------
/src/radius_clustering/__init__.py:
--------------------------------------------------------------------------------
1 | # Import the main clustering class
2 | from .radius_clustering import RadiusClustering
3 | 
4 | __all__ = ["RadiusClustering"]
5 | __version__ = "1.4.2"
6 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | sphinx>=8.1.3
2 | sphinx_gallery>=0.18.0
3 | sphinx-copybutton>=0.5.2
4 | pydata-sphinx-theme>=0.15.3
5 | sphinxcontrib-email>=0.3.6
6 | sphinx-remove-toctrees>=1.0.0
7 | sphinx-prompt>=1.9.0
8 | sphinx_design>=0.6.1


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: radius_clustering
 2 | dependencies:
 3 |     - matplotlib>=3.6.2
 4 |     - numpy>=2.0
 5 |     - scikit-learn>=1.2.2
 6 |     - scipy>=1.12.0
 7 |     - pandas>=2.0.3
 8 |     - pip
 9 |     - pip:
10 |         - radius-clustering>=1.4.0
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # docs
 2 | docs/build/
 3 | 
 4 | # env and caches
 5 | 
 6 | mds-env/
 7 | **/__pycache__/
 8 | .pytest_cache/
 9 | .ruff_cache/
10 | 
11 | # IDE files
12 | .idea
13 | 
14 | # compiled files
15 | **.so
16 | build/
17 | **/auto_examples/
18 | wheelhouse/
19 | dist/
20 | **.egg-info
21 | docs/source/modules/generated/
22 | /docs/source/sg_execution_times.rst
23 | 
24 | # cython files
25 | **/emos.c
26 | **/mds.cpp
27 | 
28 | # MAC OS files
29 | .DS_Store
30 | 
31 | # Reportings
32 | reporting/


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | API Reference
 2 | =============
 3 | 
 4 | This page documents the implementation details of the `radius_clustering` package.
 5 | 
 6 | RadiusClustering Class
 7 | ----------------------
 8 | 
 9 | .. autoclass:: radius_clustering.RadiusClustering
10 |    :members:
11 |    :undoc-members:
12 |    :show-inheritance:
13 | 
14 | Algorithms Module
15 | -----------------
16 | .. automodule:: radius_clustering.algorithms
17 |    :members:
18 |    :undoc-members:
19 |    :show-inheritance:


--------------------------------------------------------------------------------
/tests/test_structural.py:
--------------------------------------------------------------------------------
 1 | from sklearn.utils.estimator_checks import parametrize_with_checks
 2 | def test_import():
 3 |     import radius_clustering as rad
 4 | 
 5 | 
 6 | def test_from_import():
 7 |     from radius_clustering import RadiusClustering
 8 | 
 9 | 
10 | from radius_clustering import RadiusClustering
11 | 
12 | @parametrize_with_checks([RadiusClustering()])
13 | def test_check_estimator_api_consistency(estimator, check, request):
14 | 
15 |     """Check the API consistency of the RadiusClustering estimator
16 |     """
17 |     check(estimator)
18 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.6.0
 4 |     hooks:
 5 |     -   id: trailing-whitespace
 6 |     -   id: end-of-file-fixer
 7 |     -   id: check-yaml
 8 |     -   id: check-added-large-files
 9 | 
10 | -   repo: https://github.com/psf/black-pre-commit-mirror
11 |     rev: 24.8.0
12 |     hooks:
13 |     -   id: black
14 | 
15 | -   repo: https://github.com/astral-sh/ruff-pre-commit
16 |     rev: v0.5.5
17 |     hooks:
18 |     -   id: ruff
19 |         args: ["--fix", "--show-files"]
20 | 


--------------------------------------------------------------------------------
/src/radius_clustering/utils/random_manager.h:
--------------------------------------------------------------------------------
 1 | #ifndef RANDOM_MANAGER_H
 2 | #define RANDOM_MANAGER_H
 3 | 
 4 | #include <random>
 5 | #include <vector>
 6 | 
 7 | class RandomManager {
 8 | private:
 9 |     static std::mt19937 rng;
10 |     static std::vector<std::mt19937> parallelRng;
11 | 
12 | public:
13 |     static void setSeed(long seed);
14 |     static std::mt19937& getRandom();
15 |     static void initParallel(int nRandoms, long initSeed);
16 |     static std::mt19937& getRandom(int i);
17 |     static int nextInt(int max);  // Add this line
18 | };
19 | 
20 | #endif // RANDOM_MANAGER_H


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/doc_improvement.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation improvement
 2 | description: Create a report to help us improve the documentation. Alternatively you can just open a pull request with the suggested change.
 3 | labels: ["documentation", "triage"]
 4 | 
 5 | body:
 6 | - type: textarea
 7 |   attributes:
 8 |     label: Describe the issue linked to the documentation
 9 |     description: >
10 |       Tell us about the confusion introduced in the documentation.
11 |   validations:
12 |     required: true
13 | - type: textarea
14 |   attributes:
15 |     label: Suggest a potential alternative/fix
16 |     description: >
17 |       Tell us how we could improve the documentation in this regard.
18 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/scripts/build_wheel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e -u -x
 3 | 
 4 | function repair_wheel {
 5 |     wheel="$1"
 6 |     if ! auditwheel show "$wheel"; then
 7 |         echo "Skipping non-platform wheel $wheel"
 8 |     else
 9 |         auditwheel repair "$wheel" --plat "$PLAT" -w /io/wheelhouse/
10 |     fi
11 | }
12 | 
13 | # Compile wheels
14 | for PYBIN in /opt/python/*/bin; do
15 |   if [[ $PYBIN != *"cp313t"* ]] && \
16 |      [[ $PYBIN != *"pp"* ]] && \
17 |      [[ $PYBIN != *"cp36"* ]] &&\
18 |      [[ $PYBIN != *"cp37"* ]] &&\
19 |      [[ $PYBIN != *"cp38"* ]]; then
20 |     "${PYBIN}/pip" wheel /io/ --no-deps -w wheelhouse/
21 |   fi
22 | done
23 | 
24 | # Bundle external shared libraries into the wheels
25 | for whl in wheelhouse/*.whl; do
26 |     repair_wheel "$whl"
27 | done


--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
 1 | name: Lint and Format
 2 | 
 3 | on:
 4 |   workflow_call:
 5 |   workflow_dispatch:
 6 | 
 7 | jobs:
 8 |   lint-and-format:
 9 |     name: Run Linters and Formatters
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |         - name: checkout
13 |           uses: actions/checkout@v4
14 |         
15 |         - name: Set up Python
16 |           uses: actions/setup-python@v5
17 |           with:
18 |             python-version: "3.11"
19 |         
20 |         - name: Install dependencies
21 |           run: |
22 |             python -m pip install --upgrade pip
23 |             python -m pip install ".[dev]"
24 |         
25 |         - name: Run ruff linter
26 |           run: |
27 |             ruff check src/radius_clustering tests --fix
28 |         
29 |         - name: Run black formatter
30 |           run: |
31 |             black src/radius_clustering tests --check
32 | 
33 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: Run Tests
 2 | 
 3 | on:
 4 |   workflow_call:
 5 |   workflow_dispatch:
 6 | 
 7 | jobs:
 8 |   pytest:
 9 |     name: Run pytest
10 |     runs-on: ubuntu-latest
11 |     strategy:
12 |       fail-fast: true
13 |       matrix:
14 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - name: Set up Python
19 |         uses: actions/setup-python@v5
20 |         with:
21 |           python-version: ${{ matrix.python-version }}
22 | 
23 |       - name: Install dependencies
24 |         run: |
25 |           python -m pip install --upgrade pip
26 |           python -m pip install -e ".[dev]"
27 | 
28 |       - name: Run tests with pytest
29 |         run: |
30 |           pytest -v
31 | 
32 |       - name: Upload coverage reports to Codecov
33 |         uses: codecov/codecov-action@v5.4.3
34 |         with:
35 |           token: ${{ secrets.CODECOV_TOKEN }}
36 | 


--------------------------------------------------------------------------------
/src/radius_clustering/utils/random_manager.cpp:
--------------------------------------------------------------------------------
 1 | #include "random_manager.h"
 2 | #include <chrono>
 3 | #include <limits>
 4 | 
 5 | std::mt19937 RandomManager::rng(std::chrono::system_clock::now().time_since_epoch().count());
 6 | std::vector<std::mt19937> RandomManager::parallelRng;
 7 | 
 8 | void RandomManager::setSeed(long seed) {
 9 |     rng.seed(seed);
10 | }
11 | 
12 | std::mt19937& RandomManager::getRandom() {
13 |     return rng;
14 | }
15 | 
16 | void RandomManager::initParallel(int nRandoms, long initSeed) {
17 |     parallelRng.resize(nRandoms);
18 |     std::mt19937 rndStart(initSeed);
19 |     for (int i = 0; i < nRandoms; ++i) {
20 |         int seed = std::uniform_int_distribution<>(0, std::numeric_limits<int>::max())(rndStart);
21 |         parallelRng[i].seed(seed);
22 |     }
23 | }
24 | 
25 | std::mt19937& RandomManager::getRandom(int i) {
26 |     return parallelRng[i];
27 | }
28 | 
29 | int RandomManager::nextInt(int max) {
30 |     return std::uniform_int_distribution<>(0, max - 1)(rng);
31 | }
32 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.yml:
--------------------------------------------------------------------------------
 1 | name: Feature Request
 2 | description: Suggest an idea for this project
 3 | title: "[Feature]: "
 4 | labels: ["enhancement"]
 5 | body:
 6 |   - type: textarea
 7 |     attributes:
 8 |       label: Is your feature request related to a problem? Please describe.
 9 |       description: A clear and concise description of what the problem is. Ex. "I'm always frustrated when..."
10 |     validations:
11 |       required: true
12 |   - type: textarea
13 |     attributes:
14 |       label: Describe the solution you'd like
15 |       description: A clear and concise description of what you want to happen.
16 |     validations:
17 |       required: true
18 |   - type: textarea
19 |     attributes:
20 |       label: Describe alternatives you've considered
21 |       description: A clear and concise description of any alternative solutions or features you've considered.
22 |   - type: textarea
23 |     attributes:
24 |       label: Additional context
25 |       description: Add any other context or screenshots about the feature request here.
26 | 


--------------------------------------------------------------------------------
/.github/workflows/sphinx.yml:
--------------------------------------------------------------------------------
 1 | name: "Sphinx: Render docs"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - docs
 8 |     paths:
 9 |       - "docs/**"
10 |       - "src/radius_clustering/**"
11 |   workflow_dispatch:
12 | 
13 | jobs:
14 |   build:
15 |     runs-on: ubuntu-latest
16 |     permissions:
17 |       contents: write
18 |     steps:
19 |     - uses: actions/checkout@v4
20 |     - name: Build HTML
21 |       shell: bash
22 |       run: |
23 |           sudo apt-get update
24 |           sudo apt-get install build-essential
25 |           pip install --upgrade pip
26 |           pip install ".[doc]"
27 |           pushd docs
28 |           make html
29 |           popd
30 |     - name: Upload artifacts
31 |       uses: actions/upload-artifact@v4
32 |       with:
33 |         name: html-docs
34 |         path: docs/build/html/
35 |     - name: Deploy
36 |       uses: peaceiris/actions-gh-pages@v3
37 |       if: github.ref == 'refs/heads/main'
38 |       with:
39 |         github_token: ${{ secrets.GITHUB_TOKEN }}
40 |         publish_dir: docs/build/html
41 | 


--------------------------------------------------------------------------------
/docs/source/_static/styles/custom.css:
--------------------------------------------------------------------------------
 1 | /* Tabs (sphinx-design) */
 2 | .sd-tab-set {
 3 |   --tab-caption-width: 0%;
 4 |   margin-top: 1.5rem; }
 5 |   .sd-tab-set::before {
 6 |     width: var(--tab-caption-width);
 7 |     display: flex;
 8 |     align-items: center;
 9 |     font-weight: bold; }
10 |   .sd-tab-set .sd-tab-content {
11 |     padding: 0.5rem 0 0 0 !important;
12 |     background-color: transparent !important;
13 |     border: none !important; }
14 |     .sd-tab-set .sd-tab-content blockquote {
15 |       background-color: transparent !important;
16 |       border: none !important; }
17 |     .sd-tab-set .sd-tab-content > p:first-child {
18 |       margin-top: 1rem !important; }
19 |   .sd-tab-set > label.sd-tab-label {
20 |     margin: 0 3px;
21 |     display: flex;
22 |     align-items: center;
23 |     justify-content: center;
24 |     border-radius: 5px !important; }
25 |     .sd-tab-set > label.sd-tab-label.tab-4 {
26 |       width: calc((100% - var(--tab-caption-width)) / 3 - 6px) !important; }
27 |   .sd-tab-set > input:checked + label.sd-tab-label {
28 |     transform: unset;
29 |     border: 2px solid var(--pst-color-primary); }
30 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # This CITATION.cff file was generated with cffinit.
 2 | # Visit https://bit.ly/cffinit to generate yours today!
 3 | 
 4 | cff-version: 1.2.0
 5 | title: Radius Clustering
 6 | message: >-
 7 |   If you use this software, please cite it using the
 8 |   metadata from this file.
 9 | type: software
10 | authors:
11 |   - given-names: Quentin
12 |     family-names: Haenn
13 |     email: quentin.haenn@ensma.fr
14 |     affiliation: LIAS Lab
15 |     orcid: 'https://orcid.org/0009-0009-1663-0107'
16 |   - given-names: Brice
17 |     family-names: Chardin
18 |     email: brice.chardin@ensma.fr
19 |     affiliation: LIAS Lab
20 |     orcid: 'https://orcid.org/0000-0002-9298-9447'
21 |   - given-names: Mickael
22 |     family-names: Baron
23 |     email: mickael.baron@ensma.fr
24 |     affiliation: LIAS Lab
25 |     orcid: 'https://orcid.org/0000-0002-3356-0835'
26 |   - name: LIAS Laboratory
27 |     address: 1 Avenue Clément Ader
28 |     city: Chasseneuil du Poitou
29 |     post-code: '86360'
30 |     website: 'https://www.lias-lab.fr'
31 | identifiers:
32 |   - type: swh
33 |     value: 'swh:1:rev:66f8d295cc5fbc80f356d11be46571bfbb190609'
34 | license: GPL-3.0
35 | 


--------------------------------------------------------------------------------
/docs/source/scss/custom.scss:
--------------------------------------------------------------------------------
 1 | /* Tabs (sphinx-design) */
 2 | 
 3 | .sd-tab-set {
 4 |   --tab-caption-width: 0%; // No tab caption by default
 5 |   margin-top: 1.5rem;
 6 | 
 7 |   &::before {
 8 |     // Set `content` for tab caption
 9 |     width: var(--tab-caption-width);
10 |     display: flex;
11 |     align-items: center;
12 |     font-weight: bold;
13 |   }
14 | 
15 |   .sd-tab-content {
16 |     padding: 0.5rem 0 0 0 !important;
17 |     background-color: transparent !important;
18 |     border: none !important;
19 | 
20 |     blockquote {
21 |       background-color: transparent !important;
22 |       border: none !important;
23 |     }
24 | 
25 |     > p:first-child {
26 |       margin-top: 1rem !important;
27 |     }
28 |   }
29 | 
30 |   > label.sd-tab-label {
31 |     margin: 0 3px;
32 |     display: flex;
33 |     align-items: center;
34 |     justify-content: center;
35 |     border-radius: 5px !important;
36 | 
37 | 
38 |     &.tab-4 {
39 |       width: calc((100% - var(--tab-caption-width)) / 3 - 6px) !important;
40 |     }
41 |   }
42 | 
43 |   > input:checked + label.sd-tab-label {
44 |     transform: unset;
45 |     border: 2px solid var(--pst-color-primary);
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/.github/workflows/pr_build.yml:
--------------------------------------------------------------------------------
 1 | name: PR Build and Test
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - "src/radius_clustering/**"
 9 |       - "tests/**"
10 |       - "pyproject.toml"
11 |   workflow_dispatch:
12 | 
13 | jobs:
14 |   run_pytest:
15 |         name: Run pytest
16 |         uses: ./.github/workflows/tests.yml
17 | 
18 |   build_test_sdist:
19 |     name: Test source distribution
20 |     runs-on: ubuntu-latest
21 |     needs: run_pytest
22 |     strategy:
23 |       fail-fast: true
24 |       matrix:
25 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
26 |     steps:
27 |       - uses: actions/checkout@v4
28 | 
29 |       - name: Set up Python
30 |         uses: actions/setup-python@v5
31 |         with:
32 |           python-version: ${{ matrix.python-version }}
33 | 
34 |       - name: Build sdist
35 |         run: |
36 |           pip install --upgrade pip
37 |           pip install pipx
38 |           pipx run build --sdist
39 | 
40 |       - name: Test sdist
41 |         run: |
42 |           pip install ./dist/*.tar.gz
43 |           python -c "import radius_clustering; print(f'Successfully imported version {radius_clustering.__version__}')"


--------------------------------------------------------------------------------
/src/radius_clustering/utils/emos.pyx:
--------------------------------------------------------------------------------
 1 | """
 2 | EMOS (Exact Minimum Dominating Set) Solver Module
 3 | 
 4 | This Cython module provides a Python interface to the C implementation
 5 | of the Exact Minimum Dominating Set (EMOS) algorithm. It allows for
 6 | efficient solving of MDS problems using the exact method.
 7 | 
 8 | The module includes:
 9 | - Wrapper functions for the C EMOS solver
10 | - Data conversion between Python and C data structures
11 | - Result processing and conversion back to Python objects
12 | """
13 | 
14 | cdef extern from "mds3-util.h":
15 |     struct Result:
16 |         int* dominating_set
17 |         int set_size
18 |         double exec_time
19 | 
20 |     Result* emos_main(unsigned int* edges, int nb_edge, int n)
21 | 
22 |     void cleanup()
23 | 
24 |     void free_results(Result* result)
25 | 
26 | import numpy as np
27 | cimport numpy as np
28 | 
29 | def py_emos_main(np.ndarray[unsigned int, ndim=1] edges, int n, int nb_edge):
30 |     cdef Result* result = emos_main(&edges[0], n, nb_edge)
31 | 
32 |     dominating_set = [result.dominating_set[i] - 1 for i in range(result.set_size)]
33 |     exec_time = result.exec_time
34 | 
35 |     free_results(result)
36 |     cleanup()
37 | 
38 |     return dominating_set, exec_time
39 | 
40 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Description
 2 | 
 3 | Please include a summary of the change and which issue is fixed. Also include relevant motivation and context. List any dependencies that are required for this change.
 4 | 
 5 | Fixes # (issue)
 6 | 
 7 | ## Type of change
 8 | 
 9 | Please delete options that are not relevant.
10 | 
11 | - [ ] Bug fix (non-breaking change which fixes an issue)
12 | - [ ] New feature (non-breaking change which adds functionality)
13 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
14 | - [ ] This change requires a documentation update
15 | 
16 | ## How Has This Been Tested?
17 | 
18 | Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Also list any relevant details for your test configuration.
19 | 
20 | - [ ] Test A
21 | - [ ] Test B
22 | 
23 | ## Checklist:
24 | 
25 | - [ ] My code follows the style guidelines of this project
26 | - [ ] I have performed a self-review of my own code
27 | - [ ] I have commented my code, particularly in hard-to-understand areas
28 | - [ ] I have made corresponding changes to the documentation
29 | - [ ] My changes generate no new warnings
30 | - [ ] I have added tests that prove my fix is effective or that my feature works
31 | - [ ] New and existing unit tests pass locally with my changes
32 | - [ ] Any dependent changes have been merged and published in downstream modules
33 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | 
 3 | import numpy as np
 4 | from Cython.Build import cythonize
 5 | from setuptools import Extension, setup
 6 | 
 7 | SYSTEM = platform.system()
 8 | CPU = platform.processor()
 9 | 
10 | C_COMPILE_ARGS = ["-std=c99", "-O3", "-ffast-math", "-DREP"]
11 | CXX_COMPILE_ARGS = ["-std=c++11", "-O3", "-ffast-math"]
12 | CXX_LINK_ARGS = ["-std=c++11"]
13 | 
14 | if not CPU:
15 |     CPU = platform.machine()
16 | 
17 | if (SYSTEM != "Darwin") and (CPU not in "arm64"):
18 |     C_COMPILE_ARGS.append("-march=native")
19 |     CXX_COMPILE_ARGS.append("-march=native")
20 |     CXX_LINK_ARGS.append("-fopenmp")
21 | 
22 | extensions = [
23 |     Extension(
24 |         "radius_clustering.utils._emos",
25 |         [
26 |             "src/radius_clustering/utils/emos.pyx",
27 |             "src/radius_clustering/utils/main-emos.c"
28 |         ],
29 |         include_dirs=[np.get_include(), "src/radius_clustering/utils"],
30 |         extra_compile_args=C_COMPILE_ARGS,
31 |     ),
32 |     Extension(
33 |         "radius_clustering.utils._mds_approx",
34 |         [
35 |             "src/radius_clustering/utils/mds.pyx",
36 |             "src/radius_clustering/utils/mds_core.cpp",
37 |             "src/radius_clustering/utils/random_manager.cpp",
38 |         ],
39 |         include_dirs=[np.get_include(), "src/radius_clustering/utils"],
40 |         language="c++",
41 |         extra_compile_args=CXX_COMPILE_ARGS,
42 |         extra_link_args=CXX_LINK_ARGS,
43 |     ),
44 | ]
45 | 
46 | setup(
47 |     ext_modules=cythonize(extensions, language_level=3),
48 |     include_dirs=[np.get_include()],
49 |     package_data={"radius_clustering": ["utils/*.pyx", "utils/*.h"]},
50 | )
51 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Radius Clustering
 2 | 
 3 | First off, thank you for considering contributing to Radius Clustering! It's people like you that make open source such a great community.
 4 | 
 5 | ## Where do I go from here?
 6 | 
 7 | If you've noticed a bug or have a feature request, [make one](https://github.com/scikit-learn-contrib/radius_clustering/issues/new)! It's generally best if you get confirmation of your bug or approval for your feature request this way before starting to code.
 8 | 
 9 | ### Fork & create a branch
10 | 
11 | If you've decided to contribute, you'll need to fork the repository and create a new branch.
12 | 
13 | ```bash
14 | git checkout -b my-new-feature
15 | ```
16 | 
17 | ## Getting started
18 | 
19 | To get started with the development, you need to install the package in an editable mode with all the development dependencies. It is highly recommended to do this in a virtual environment.
20 | 
21 | ```bash
22 | pip install -e ".[dev]"
23 | ```
24 | 
25 | This will install the package and all the tools needed for testing and linting.
26 | 
27 | ## Running Tests
28 | 
29 | To ensure that your changes don't break anything, please run the test suite.
30 | 
31 | ```bash
32 | pytest
33 | ```
34 | 
35 | ## Code Style
36 | 
37 | This project uses `ruff` for linting and `black` for formatting. We use `pre-commit` to automatically run these tools before each commit.
38 | 
39 | To set up `pre-commit`, run:
40 | 
41 | ```bash
42 | pre-commit install
43 | ```
44 | 
45 | This will ensure your contributions match the project's code style.
46 | 
47 | ## Submitting a Pull Request
48 | 
49 | When you're ready to submit your changes, please write a clear and concise pull request message. Make sure to link any relevant issues.
50 | 
51 | Thank you for your contribution!
52 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.yml:
--------------------------------------------------------------------------------
 1 | name: Bug Report
 2 | description: Create a report to help us improve
 3 | title: "[Bug]: "
 4 | labels: ["bug", "triage"]
 5 | body:
 6 |   - type: markdown
 7 |     attributes:
 8 |       value: |
 9 |         Thanks for taking the time to fill out this bug report!
10 | 
11 |   - type: textarea
12 |     id: what-happened
13 |     attributes:
14 |       label: Describe the bug
15 |       description: A clear and concise description of what the bug is.
16 |       placeholder: Tell us what you see!
17 |     validations:
18 |       required: true
19 | 
20 |   - type: textarea
21 |     id: reproduce
22 |     attributes:
23 |       label: To Reproduce
24 |       description: "Steps to reproduce the behavior. Please provide a minimal, self-contained code sample."
25 |       placeholder: |
26 |         ```python
27 |         import numpy as np
28 |         from radius_clustering import RadiusClustering
29 | 
30 |         # Your code here that triggers the bug
31 |         ```
32 |     validations:
33 |       required: true
34 | 
35 |   - type: textarea
36 |     id: expected
37 |     attributes:
38 |       label: Expected behavior
39 |       description: A clear and concise description of what you expected to happen.
40 |     validations:
41 |       required: true
42 | 
43 |   - type: dropdown
44 |     id: os
45 |     attributes:
46 |       label: Operating System
47 |       description: What operating system are you using?
48 |       options:
49 |         - Windows
50 |         - macOS
51 |         - Linux
52 |     validations:
53 |       required: true
54 | 
55 |   - type: input
56 |     id: python-version
57 |     attributes:
58 |       label: Python Version
59 |       placeholder: "e.g. 3.11.4"
60 |     validations:
61 |       required: true
62 | 
63 |   - type: input
64 |     id: package-version
65 |     attributes:
66 |       label: Package Version
67 |       placeholder: "e.g. 1.4.0"
68 |     validations:
69 |       required: true
70 | 
71 |   - type: textarea
72 |     id: additional-context
73 |     attributes:
74 |       label: Additional context
75 |       description: Add any other context about the problem here.
76 | 


--------------------------------------------------------------------------------
/PRESENTATION.md:
--------------------------------------------------------------------------------
 1 | ## How it works
 2 | 
 3 | ### Clustering under radius constraint
 4 | Clustering tasks are globally concerned about grouping data points into clusters based on some similarity measure. Clustering under radius constraints is a specific clustering task where the goal is to group data points such that the minimal maximum distance between any two points in the same cluster is less than or equal to a given radius. Mathematically, given a set of data points $X = \{x_1, x_2, \ldots, x_n\}$ and a radius $r$, the goal is to find a partition $ \mathcal{P}$ of $X$ into clusters $C_1, C_2, \ldots, C_k$ such that :
 5 | ```math
 6 | \forall C \in \mathcal{P}, \min_{x_i \in C}\max_{x_j \in C} d(x_i, x_j) \leq r
 7 | ```
 8 | where $d(x_i, x_j)$ is the dissimilarity between $x_i$ and $x_j$.
 9 | 
10 | ### Minimum Dominating Set (MDS) problem
11 | 
12 | The Radius Clustering package implements a clustering algorithm based on the Minimum Dominating Set (MDS) problem. The MDS problem is a well-known NP-Hard problem in graph theory, and it has been proven to be linked to the clustering under radius constraint problem. The MDS problem is defined as follows:
13 | 
14 | Given an undirected weighted graph $G = (V,E)$ where $V$ is a set of vertices and $E$ is a set of edges, a dominating set $D$ is a subset of $V$ such that every vertex in $V$ is either in $D$ or adjacent to a vertex in $D$. The goal is to find a dominating set $D$ such that the number of vertices in $D$ is minimized. This problem is known to be NP-Hard.
15 | 
16 | However, solving this problem in the context of clustering task can be useful, but we need some adaptations.
17 | 
18 | ### Radius Clustering algorithm
19 | 
20 | To adapt the MDS problem to the clustering under radius constraint problem, we need to define a graph based on the data points. The vertices of the graph are the data points, and the edges are defined based on the distance between the data points. The weight of the edges is the dissimilarity between the data points. Then, the algorithm operates as follows:
21 | 
22 | 1. Construct a graph $G = (V,E)$ based on the data points $X$.
23 | 2. Prune the graph by removing the edges $e_{ij}$ such that $d(x_i,x_j) > r$.
24 | 3. Solve the MDS problem on the pruned graph.
25 | 4. Assign each vertex to the closest vertex in the dominating set. In case of a tie, assign the vertex to the vertex with the smallest index.
26 | 5. Return the cluster labels.


--------------------------------------------------------------------------------
/src/radius_clustering/utils/util_heap.h:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <assert.h>
 3 | 
 4 | #ifndef V6R_UTIL_HEAP_H
 5 | #define V6R_UTIL_HEAP_H
 6 | 
 7 | typedef struct MaxNodeHeap MaxHeap;
 8 | typedef struct node Node;
 9 | 
10 | static const int INF=0x3f3f3f3f;
11 | struct node {
12 |     int ID,V;
13 | };
14 | 
15 | struct MaxNodeHeap{
16 |     unsigned size;
17 |     unsigned capacity;
18 |     Node *array;
19 |     int (*comparator)(const Node * ,const Node *);
20 | };
21 | 
22 | 
23 | #define size_of_heap(H)  (H->size)
24 | #define empty_heap(H)  (H->size==0)
25 | 
26 | static inline unsigned int left  (int i) { return i*2+1; }
27 | static inline unsigned int right (int i) { return (i+1)*2; }
28 | static inline unsigned int parent(int i) { return (i-1) >> 1; }
29 | 
30 | static inline void shiftUp(MaxHeap *heap,int i)
31 | {
32 |     Node x  = heap->array[i];
33 |     int p  = parent(i);
34 |     while (i != 0 && heap->comparator(&x, &(heap->array[p]))){
35 |         heap->array[i]= heap->array[p];
36 |         i = p;
37 |         p = parent(p);
38 |     }
39 |     heap->array[i] = x;
40 | }
41 | 
42 | static inline void shiftDown(MaxHeap *heap,int i)
43 | {
44 |     assert(heap->comparator);
45 | 
46 |     Node x = heap->array[i];
47 |     while (left(i) < heap->size){
48 |         int child = (right(i) < heap->size && heap->comparator(&(heap->array[right(i)]),&(heap->array[left(i)]))) ? right(i) : left(i);
49 |         if (!heap->comparator(&(heap->array[child]), &x)) break;
50 |         heap->array[i] = heap->array[child];
51 |         i = child;
52 |     }
53 |     heap->array[i] = x;
54 | }
55 | 
56 | static inline void insertHeap(MaxHeap *heap,Node x){
57 |     assert(heap->size<=heap->capacity);
58 |     if(heap->size==heap->capacity) {
59 |         int NewSize=2*(heap->capacity);
60 |         heap->array=(Node *)(realloc(heap->array,(NewSize+1)*sizeof (Node)));
61 |         assert((heap->array)!=NULL);
62 |         heap->capacity=NewSize;
63 |     }
64 |     heap->array[heap->size]=x;
65 |     heap->size++;
66 |     if(heap->size>1)shiftUp(heap,heap->size-1);
67 | }
68 | 
69 | static inline int node_cmp_for_MaxHeap(const Node *A,const Node *B) {
70 |     if(A->V==B->V)return A->ID<B->ID;
71 |     return A->V > B->V;
72 | }
73 | 
74 | static inline void initHeap(MaxHeap *heap,int capacity,int (*cmp)(const Node *, const Node*)){
75 |     heap->array=(struct node *)calloc(capacity+1,sizeof(Node));
76 |     heap->capacity=capacity;
77 |     heap->size=0;
78 |     heap->comparator=cmp;
79 | }
80 | 
81 | static inline Node removeTop(MaxHeap *heap)
82 | {
83 | 
84 |     Node x = heap->array[0];
85 |     heap->array[0]  = heap->array[heap->size-1];
86 |     heap->size--;
87 |     if (heap->size > 1) shiftDown(heap, 0);
88 |     return x;
89 | }
90 | 
91 | static inline void clearHeap(MaxHeap *heap){
92 |     heap->size=0;
93 | }
94 | #endif //V6R_UTIL_HEAP_H
95 | 


--------------------------------------------------------------------------------
/src/radius_clustering/utils/mds.pyx:
--------------------------------------------------------------------------------
 1 | """
 2 | MDS Solver Module
 3 | 
 4 | This Cython module provides the core functionality for solving Minimum Dominating Set (MDS) problems.
 5 | It serves as a bridge between Python and the C++ implementation of the MDS algorithms.
 6 | 
 7 | The module includes:
 8 | - Wrapper functions for C++ MDS solvers
 9 | - Data structure conversions between Python/NumPy and C++
10 | - Result processing and conversion back to Python objects
11 | """
12 | 
13 | # distutils: language = c++
14 | # distutils: sources = mds_clustering/utils/mds_core.cpp mds_clustering/utils/random_manager.cpp
15 | 
16 | from libcpp.vector cimport vector
17 | from libcpp.unordered_set cimport unordered_set as cpp_unordered_set
18 | from libcpp.string cimport string
19 | from cython.operator cimport dereference as deref
20 | 
21 | import numpy as np
22 | cimport numpy as np
23 | 
24 | cdef extern from "random_manager.h":
25 |     cdef cppclass RandomManager:
26 |         @staticmethod
27 |         void setSeed(long seed)
28 | 
29 | cdef extern from "mds_core.cpp":
30 |     cdef cppclass Result:
31 |         Result()
32 |         Result(string instanceName)
33 |         void add(string key, float value)
34 |         float get(int pos)
35 |         vector[string] getKeys()
36 |         string getInstanceName()
37 |         cpp_unordered_set[int] getSolutionSet()
38 |         void setSolutionSet(cpp_unordered_set[int] solutionSet)
39 | 
40 |     cdef Result iterated_greedy_wrapper(int numNodes, const vector[int]& edges_list, int nb_edges, long seed) nogil
41 | 
42 | def solve_mds(int num_nodes, np.ndarray[int, ndim=1, mode="c"] edges not None, int nb_edges, int seed):
43 |     """
44 |     Solve the Minimum Dominating Set problem for a given graph.
45 | 
46 |     Parameters:
47 |     -----------
48 |     num_nodes : int
49 |         The number of nodes in the graph.
50 |     edges : np.ndarray
51 |         A 1D NumPy array representing the edges of the graph.
52 |     nb_edges : int
53 |         The number of edges in the graph.
54 |     name : str
55 |         A name identifier for the problem instance.
56 | 
57 |     Returns:
58 |     --------
59 |     dict
60 |         A dictionary containing the solution set and other relevant information.
61 |     """
62 |     cdef vector[int] cpp_edge_list
63 |     
64 |     # Cast the NumPy array to a C++ vector
65 |     cpp_edge_list.assign(&edges[0], &edges[0] + edges.shape[0])
66 |     
67 |     cdef Result result
68 |     with nogil:
69 |         result = iterated_greedy_wrapper(num_nodes, cpp_edge_list, nb_edges, seed)
70 | 
71 |     # Convert the C++ Result to a Python dictionary
72 |     py_result = {
73 |         "solution_set": set(result.getSolutionSet()),
74 |     }
75 |     
76 |     # Add other key-value pairs
77 |     keys = result.getKeys()
78 |     for i in range(len(keys)):
79 |         py_result[keys[i].decode('utf-8')] = result.get(i)
80 |     
81 |     return py_result


--------------------------------------------------------------------------------
/tests/test_regression.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import numpy as np
 3 | from radius_clustering import RadiusClustering
 4 | from sklearn.datasets import load_iris
 5 | 
 6 | @pytest.fixture
 7 | def iris_data():
 8 |     """Fixture to load the Iris dataset."""
 9 |     data = load_iris()
10 |     return data.data
11 | 
12 | @pytest.fixture
13 | def approx_results():
14 |     """Fixture to store results for approximate clustering."""
15 |     results = {
16 |         'labels': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
17 |                    0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
18 |                    1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,2,2,2,2,1,2,2,2,2,
19 |                    2,2,1,1,2,2,2,2,1,2,1,2,1,2,2,1,1,2,2,2,2,2,1,2,2,2,2,1,2,2,2,1,2,2,2,1,2,
20 |                    2,1],
21 |         "centers": [0,96,125],
22 |         "time" : 0.0280,
23 |         "effective_radius": 1.4282856857085722
24 |     }
25 |     return results
26 | 
27 | @pytest.fixture
28 | def exact_results():
29 |     """Fixture to store results for exact clustering."""
30 |     results = {
31 |         'labels':[
32 |             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
33 |             0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
34 |             1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,2,2,2,2,1,2,2,2,2,
35 |             2,2,1,1,2,2,2,2,1,2,1,2,1,2,2,1,1,2,2,2,2,2,1,2,2,2,2,1,2,2,2,1,2,2,2,1,2,
36 |             2,1
37 |                 ],
38 |         "centers": [0, 96, 102],
39 |         "time": 0.0004,
40 |         "effective_radius": 1.4282856857085722
41 |     }
42 |     return results
43 | 
44 | def assert_results_exact(results, expected):
45 |     """Helper function to assert clustering results."""
46 |     assert_results(results, expected)
47 |     assert set(results.labels_) == set(expected['labels']), "Labels do not match expected"
48 |     assert results.centers_ == expected['centers'], "Centers do not match expected"
49 |     assert np.sum(results.labels_ - expected['labels']) == 0, "Labels do not match expected"
50 | 
51 | def assert_results(results, expected):
52 |     assert len(results.labels_) == len(expected['labels']), "Labels length mismatch"
53 |     assert abs(results.mds_exec_time_ - expected['time']) < 0.1, "Execution time mismatch by more than 10%"
54 |     assert abs(results.effective_radius_ - expected['effective_radius'])/results.effective_radius_ < 0.1, "Effective radius mismatch"
55 | 
56 | def test_exact(iris_data, exact_results):
57 |     """Test the RadiusClustering with exact"""
58 |     clustering = RadiusClustering(radius=1.43, manner='exact').fit(iris_data)
59 |     assert_results_exact(clustering, exact_results)
60 | 
61 | def test_approx(iris_data, approx_results):
62 |     """Test the RadiusClustering with approx."""
63 |     clustering = RadiusClustering(radius=1.43, manner='approx').fit(iris_data)
64 |     assert_results(clustering, approx_results)
65 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | ## [1.4.2] - 2025-07-07
 6 | 
 7 | ### Contributors
 8 | 
 9 | - [@quentinhaenn](Quentin Haenn) - Main developer and maintainer
10 | 
11 | ### Changed
12 | 
13 | - Project governance changed : now the project is part of scikit-learn-contrib organization.
14 | - Updated the README to reflect the new governance and organization.
15 | - Updated the documentation to reflect the new governance and organization.
16 | - Updated the test cases to ensure compatibility with the new governance and organization.
17 | - Enhanced repository standards with codecov, binder examples and zenodo integration.
18 | - New issue and PR templates
19 | - Code of conduct and contributing guidelines added
20 | 
21 | ## [1.4.0] - 2025-06-19
22 | 
23 | ### Contributors
24 | 
25 | - [@quentinhaenn](Quentin Haenn) - Main developer and maintainer
26 | 
27 | ### Added
28 | 
29 | - Added support for custom MDS solvers in the `RadiusClustering` class.
30 | - Updated the documentation to include examples of using custom MDS solvers.
31 | - Added more examples and tutorials to the documentation.
32 | 
33 | ### Changed
34 | 
35 | - Improved documentation and examples for the `RadiusClustering` class.
36 | - Updated the README to reflect the new features and improvements in version 1.4.0
37 | - Updated the test cases to ensure compatibility with the new features.
38 | - Refactored the main codebase to improve readability and maintainability.
39 | - Prepared the codebase for future adds of MDS solvers and/or clustering algorithms.
40 | 
41 | ## [1.3.0] - 2025-06-18
42 | 
43 | ### Contributors
44 | 
45 | - [@quentinhaenn](Quentin Haenn) - Main developer and maintainer
46 | 
47 | ### Added
48 | 
49 | - Full test coverage for the entire codebase.
50 | - Badge for test coverage in the README.
51 | - Added `radius` parameter to the `RadiusClustering` class, allowing users to specify the radius for clustering.
52 | 
53 | ### Deprecated
54 | 
55 | - Deprecated the `threshold` parameter in the `RadiusClustering` class. Use `radius` instead.
56 | 
57 | ### Changed
58 | 
59 | - Updated all the attributes in the `RadiusClustering` class to fit `scikit-learn` standards and conventions.
60 | - Updated the tests cases to reflect the changes in the `RadiusClustering` class.
61 | - Updated README and documentation to reflect the new `radius` parameter and the deprecation of `threshold`.
62 | 
63 | ## [1.2.0] - 2024-10
64 | 
65 | ### Contributors
66 | 
67 | - [@quentinhaenn](Quentin Haenn) - Main developer and maintainer
68 | - [@mickaelbaron](Mickaël Baron) - Contributor and maintainer
69 | 
70 | ### Added
71 | 
72 | - Added CI/CD pipelines with GitHub Actions for automated testing and deployment.
73 | - Added package metadata for better integration with PyPI.
74 | - Added a badge for the GitHub Actions workflow status in the README.
75 | - Added a badge for the Python version supported in the README.
76 | - Added a badge for the code style (Ruff) in the README.
77 | - Added a badge for the license in the README.
78 | - Added CI/CD pipelines for PyPI deployment (including test coverage, compiling extensions and wheels, and uploading to PyPI).
79 | - Resolving issues with compiling Cython extensions on Windows and macOS.
80 | 


--------------------------------------------------------------------------------
/.github/workflows/build_wheels.yml:
--------------------------------------------------------------------------------
  1 | name: Build and upload to PyPI
  2 | 
  3 | on:
  4 |   workflow_dispatch:
  5 |   push:
  6 |     tags:
  7 |       - "v*"
  8 | 
  9 | jobs:
 10 |   run_pytest:
 11 |     name: Run tests on min and max Python versions
 12 |     runs-on: ubuntu-latest
 13 |     strategy:
 14 |       fail-fast: true
 15 |       matrix:
 16 |         python-version: ["3.9", "3.13"]
 17 |     steps:
 18 |       - uses: actions/checkout@v4
 19 | 
 20 |       - name: Set up Python
 21 |         uses: actions/setup-python@v5
 22 |         with:
 23 |           python-version: ${{ matrix.python-version }}
 24 | 
 25 |       - name: Install dependencies
 26 |         run: |
 27 |           python -m pip install --upgrade pip
 28 |           python -m pip install -e ".[dev]"
 29 | 
 30 |       - name: Run tests with pytest
 31 |         run: |
 32 |           pytest -v
 33 | 
 34 |   build_wheels:
 35 |     name: Build wheels on ${{ matrix.os }}
 36 |     runs-on: ${{ matrix.os }}
 37 |     needs: run_pytest
 38 |     strategy:
 39 |       fail-fast: false 
 40 |       matrix:
 41 |         # macos-13 is an intel runner, macos-14 is apple silicon
 42 |         os: [ubuntu-latest, windows-latest, macos-13, macos-14]
 43 | 
 44 |     steps:
 45 |       - uses: actions/checkout@v4
 46 | 
 47 |       - name: Build wheels
 48 |         uses: pypa/cibuildwheel@v2.22.0
 49 | 
 50 |       - uses: actions/upload-artifact@v4
 51 |         with:
 52 |           name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
 53 |           path: ./wheelhouse/*.whl
 54 | 
 55 |   build_sdist:
 56 |     name: Build source distribution
 57 |     runs-on: ubuntu-latest
 58 |     needs: run_pytest
 59 |     steps:
 60 |       - uses: actions/checkout@v4
 61 | 
 62 |       - name: Set up Python
 63 |         uses: actions/setup-python@v5
 64 |         with:
 65 |           python-version: "3.12" 
 66 | 
 67 |       - name: Build sdist
 68 |         run: |
 69 |           pip install --upgrade pip
 70 |           pip install --upgrade pipx
 71 |           pipx run build --sdist
 72 | 
 73 |       - uses: actions/upload-artifact@v4
 74 |         with:
 75 |           name: cibw-sdist
 76 |           path: dist/*.tar.gz
 77 | 
 78 |   upload_pypi:
 79 |     needs: [build_wheels, build_sdist]
 80 |     runs-on: ubuntu-latest # cannot self host because the next action uses GITHUB_WORKSPACE env variable automatically within the action
 81 |     environment: 
 82 |       name: pypi
 83 |       url: https://pypi.org/p/radius-clustering
 84 |     permissions:
 85 |       id-token: write
 86 |       attestations: write
 87 |     #if: github.event_name == 'release' && github.event.action == 'published'
 88 |     # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this)
 89 |     if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
 90 |     steps:
 91 |       - name: Download all dists
 92 |         uses: actions/download-artifact@v4
 93 |         with:
 94 |           # unpacks all CIBW artifacts into dist/
 95 |           pattern: cibw-*
 96 |           path: dist/
 97 |           merge-multiple: true
 98 | 
 99 |       - name: Generate artifact attestations
100 |         uses: actions/attest-build-provenance@v2
101 |         with:
102 |           subject-path: dist/*
103 | 
104 |       - name: Publish Distribution to PyPI
105 |         uses: pypa/gh-action-pypi-publish@release/v1
106 |         #with:
107 |           # To test: repository-url: https://test.pypi.org/legacy/
108 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # For the full list of built-in configuration values, see the documentation:
  4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  5 | 
  6 | # -- Project information -----------------------------------------------------
  7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
  8 | 
  9 | import os
 10 | import sys
 11 | from pathlib import Path
 12 | 
 13 | sys.path.insert(0, os.path.abspath("."))
 14 | sys.path.insert(0, os.path.abspath("../.."))
 15 | 
 16 | project = "Radius Clustering"
 17 | copyright = "2024, Haenn Quentin, Chardin Brice, Baron Mickaël"
 18 | author = "Haenn Quentin, Chardin Brice, Baron Mickaël"
 19 | release = "1.3.0"
 20 | 
 21 | # -- General configuration ---------------------------------------------------
 22 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 23 | 
 24 | extensions = [
 25 |     "sphinx.ext.autodoc",
 26 |     "sphinx.ext.autosummary",
 27 |     "sphinx.ext.viewcode",
 28 |     "sphinx_prompt",
 29 |     "sphinx.ext.napoleon",
 30 |     "sphinxcontrib.sass",
 31 |     "sphinx_remove_toctrees",
 32 |     "sphinxcontrib.email",
 33 |     "sphinx_gallery.gen_gallery",
 34 |     "sphinx_copybutton",
 35 |     "sphinx.ext.intersphinx",
 36 |     "sphinx_design",
 37 | ]
 38 | 
 39 | master_doc = "index"
 40 | 
 41 | # Specify how to identify the prompt when copying code snippets
 42 | copybutton_prompt_text = r">>> |\.\.\. "
 43 | copybutton_prompt_is_regexp = True
 44 | copybutton_exclude = "style"
 45 | 
 46 | # Conf of numpydoc
 47 | numpydoc_class_members_toctree = False
 48 | 
 49 | templates_path = ["_templates"]
 50 | exclude_patterns = []
 51 | 
 52 | # -- Options for HTML output -------------------------------------------------
 53 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 54 | 
 55 | html_theme = "pydata_sphinx_theme"
 56 | # html_static_path = ['_static']
 57 | 
 58 | html_logo = "./images/logo-lias.jpg"
 59 | 
 60 | html_short_title = "Radius Clustering"
 61 | 
 62 | html_sidebars = {"**": []}
 63 | 
 64 | html_theme_options = {
 65 |     "icon_links_label": "Icon Links",
 66 |     "icon_links": [
 67 |         {
 68 |             "name": "GitHub",
 69 |             "url": "https://github.com/quentinhaenn",
 70 |             "icon": "fa-brands fa-square-github",
 71 |             "type": "fontawesome",
 72 |         },
 73 |     ],
 74 |     "show_prev_next": False,
 75 |     "search_bar_text": "Search the docs ...",
 76 |     "navigation_with_keys": False,
 77 |     "navbar_align": "left",
 78 |     "navbar_start": ["navbar-logo"],
 79 |     "navbar_center": ["navbar-nav"],
 80 |     "navbar_end": ["theme-switcher", "navbar-icon-links", "version-switcher"],
 81 |     "navbar_persistent": ["search-button"],
 82 |     "article_footer_items": ["prev-next"],
 83 |     "footer_start": ["copyright"],
 84 |     "footer_center": [],
 85 |     "footer_end": [],
 86 | }
 87 | 
 88 | # Compile scss files into css files using sphinxcontrib-sass
 89 | sass_src_dir, sass_out_dir = "scss", "_static/styles"
 90 | sass_targets = {
 91 |     f"{file.stem}.scss": f"{file.stem}.css"
 92 |     for file in Path(sass_src_dir).glob("*.scss")
 93 | }
 94 | 
 95 | html_static_path = ["_static"]
 96 | # Additional CSS files, should be subset of the values of `sass_targets`
 97 | html_css_files = ["styles/custom.css"]
 98 | 
 99 | sg_examples_dir = "../../examples"
100 | sg_gallery_dir = "auto_examples"
101 | sphinx_gallery_conf = {
102 |     "doc_module": "radius_clustering",
103 |     "backreferences_dir": os.path.join("modules", "generated"),
104 |     "show_memory": False,
105 |     "examples_dirs": [sg_examples_dir],
106 |     "gallery_dirs": [sg_gallery_dir],
107 |     # avoid generating too many cross links
108 |     "inspect_global_variables": False,
109 |     "remove_config_comments": True,
110 |     "plot_gallery": "True",
111 |     "recommender": {"enable": True, "n_examples": 4, "min_df": 12},
112 |     "reset_modules": ("matplotlib", "seaborn"),
113 | }
114 | 


--------------------------------------------------------------------------------
/src/radius_clustering/algorithms.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This module contains the implementation of the clustering algorithms.
  3 | It provides two main functions: `clustering_approx` and `clustering_exact`.
  4 | 
  5 | These functions can be replaced in the `RadiusClustering` class
  6 | to perform clustering using another algorithm.
  7 | 
  8 | .. versionadded:: 1.4.0
  9 |     Refactoring the structure of the code to separate the clustering algorithms
 10 |     This allows for easier maintenance and extensibility of the codebase.
 11 | 
 12 | """
 13 | from __future__ import annotations
 14 | 
 15 | import numpy as np
 16 | 
 17 | from .utils._mds_approx import solve_mds
 18 | from .utils._emos import py_emos_main
 19 | 
 20 | def clustering_approx(
 21 |           n: int, edges: np.ndarray, nb_edges: int,
 22 |           random_state: int | None = None) -> None:
 23 |     """
 24 |     Perform approximate MDS clustering.
 25 |     This method uses a pretty trick to set the seed for
 26 |     the random state of the C++ code of the MDS solver.
 27 | 
 28 |     .. tip::
 29 |         The random state is used to ensure reproducibility of the results
 30 |         when using the approximate method.
 31 |         If `random_state` is None, a default value of 42 is used.
 32 | 
 33 |     .. important::
 34 |         The trick to set the random state is :
 35 | 
 36 |         1. Use the `check_random_state` function to get a `RandomState`singleton
 37 |         instance, set up with the provided `random_state`.
 38 | 
 39 |         2. Use the `randint` method of the `RandomState` instance to generate a
 40 |         random integer.
 41 | 
 42 |         3. Use this random integer as the seed for the C++ code of the MDS solver.
 43 | 
 44 |         
 45 |         This ensures that the seed passed to the C++ code is always an integer,
 46 |         which is required by the MDS solver, and allows for
 47 |         reproducibility of the results.
 48 |     
 49 |     .. note::
 50 |         This function uses the approximation method to solve the MDS problem.
 51 |         See [casado]_ for more details.
 52 | 
 53 |     Parameters:
 54 |     -----------
 55 |     n : int
 56 |         The number of points in the dataset.
 57 |     edges : np.ndarray
 58 |         The edges of the graph, flattened into a 1D array.
 59 |     nb_edges : int
 60 |         The number of edges in the graph.
 61 |     random_state : int | None
 62 |         The random state to use for reproducibility.
 63 |         If None, a default value of 42 is used.
 64 |     Returns:
 65 |     --------
 66 |     centers : list
 67 |         A sorted list of the centers of the clusters.
 68 |     mds_exec_time : float
 69 |         The execution time of the MDS algorithm in seconds.
 70 |     """
 71 |     result = solve_mds(
 72 |         n, edges.flatten().astype(np.int32), nb_edges, random_state
 73 |     )
 74 |     centers = sorted([x for x in result["solution_set"]])
 75 |     mds_exec_time = result["Time"]
 76 |     return centers, mds_exec_time
 77 | 
 78 | def clustering_exact(n: int, edges: np.ndarray, nb_edges: int, seed: None = None) -> None:
 79 |     """
 80 |     Perform exact MDS clustering.
 81 | 
 82 |     This function uses the EMOs algorithm to solve the MDS problem.
 83 | 
 84 |     .. important::
 85 |         The EMOS algorithm is an exact algorithm for solving the MDS problem.
 86 |         It is a branch and bound algorithm that uses graph theory tricks
 87 |         to efficiently cut the search space. See [jiang]_ for more details.
 88 | 
 89 |     Parameters:
 90 |     -----------
 91 |     n : int
 92 |         The number of points in the dataset.
 93 |     edges : np.ndarray
 94 |         The edges of the graph, flattened into a 1D array.
 95 |     nb_edges : int
 96 |         The number of edges in the graph.
 97 |     seed : None
 98 |         This parameter is not used in the exact method, but it is kept for
 99 |         compatibility with the approximate method.
100 | 
101 |     Returns:
102 |     --------
103 |     centers : list
104 |         A sorted list of the centers of the clusters.
105 |     mds_exec_time : float
106 |         The execution time of the MDS algorithm in seconds.
107 |     """
108 |     centers, mds_exec_time = py_emos_main(
109 |         edges.flatten(), n, nb_edges
110 |     )
111 |     centers.sort()
112 |     return centers, mds_exec_time


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. _index:
 2 | 
 3 | .. toctree::
 4 |    :maxdepth: 2
 5 |    :hidden:
 6 | 
 7 |    installation
 8 |    details
 9 |    usage
10 |    api
11 |    auto_examples/index
12 | 
13 | Welcome to Radius Clustering's documentation!
14 | =============================================
15 | 
16 | 
17 | The Radius Clustering algorithm is a clustering under radius constraint algorithm. It is based on the minimum dominating set problem (MDS) in graph theory.
18 | 
19 | The algorithm is designed such that it can be used to cluster data points based on a radius constraint. The goal is to group data points such that the minimal maximum distance between any two points in the same cluster is less than or equal to a given radius.
20 | 
21 | The algorithm is based on the equivalence between the minimum dominating set problem and the clustering under radius constraint problem. The latter problem is characterized by a radius parameter :math:`r` and a set of points :math:`X`. The goal is to find a partition of the points into subsets such that each subset is contained in a ball of radius :math:`r`. Plus, the goal is to minimize the number of subsets.
22 | 
23 | This problem is proven to be NP-Hard, and the MDS problem is known to be NP-Hard as well.
24 | 
25 | We propose an implementation to tackle this specific problem, based upon the MDS problem. The idea is to use the MDS algorithm to find the representative points of each cluster, and then to assign each point to the nearest representative point.
26 | 
27 | 
28 | .. warning:: Considering the NP-Hardness (or NP-Completeness) of the MDS problem, we alert that the overall complexity of any algorithm tackling this problem cannot be polynomial, unless P=NP. That is why we alert the user that the algorithm may take a long time to run on large datasets, especially when using the exact algorithm.
29 |   From the experiments conducted, the exact algorithm is not recommended for datasets with more than 1000 points, but the overall complexity of the datasets and or the internal structure of the data may affect this threshold, in either way. For a more complete insight, we recommand the user to refer to the paper `Clustering under radius constraint using minimum dominating sets <https://hal.science/hal-04533921/>`_ or reading the :ref:`details` page of the documentation.
30 | 
31 | 
32 | 
33 | Acknowledgments
34 | ===============
35 | 
36 | The authors would like to thank the following people for their work that contributed either directly or indirectly to the development of this algorithm:
37 | 
38 | Authors & Contributors
39 | ----------------------
40 | 
41 | **Quentin Haenn**, ISAE-ENSMA, LIAS, France. PhD Student, first author of this work.
42 | 
43 | .. note::
44 |     - `GitHub <https://github.com/quentinhaenn>`_
45 |     - `Lab page <https://www.lias-lab.fr/fr/members/quentinhaenn/>`_
46 | 
47 | **Brice Chardin**, ISAE-ENSMA, LIAS, France. Associate Professor, co-author of this work.
48 | 
49 | .. note::
50 |     - `Lab page <https://www.lias-lab.fr/fr/members/bricechardin/>`_
51 | 
52 | **Mickaël Baron**, ISAE-ENSMA, LIAS, France. Research Engineer, co-author of this work.
53 | 
54 | .. note::
55 |     - `GitHub <https://github.com/mickaelbaron>`_
56 |     - `Lab page <https://www.lias-lab.fr/fr/members/mickaelbaron/>`_
57 | 
58 | Principal References
59 | --------------------
60 | 
61 | .. [casado] A. Casado, S. Bermudo, A.D. López-Sánchez, J. Sánchez-Oro,
62 |     An iterated greedy algorithm for finding the minimum dominating set in graphs,
63 |     Mathematics and Computers in Simulation,
64 |     Volume 207,
65 |     2023
66 |     Code available at https://github.com/AlejandraCasado
67 | 
68 |     *We rewrote the code in C++ to adapt to the need of python interfacing.*
69 | 
70 | .. [jiang] Jiang, Hua and Zheng, Zhifei, "An Exact Algorithm for the Minimum Dominating Set Problem", Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence,
71 |     pages 5604--5612 -in- proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence, IJCAI-23, 2023, doi: 10.24963/ijcai.2023/622.
72 |     Code available at https://github.com/huajiang-ynu.
73 | 
74 |     *We adapted the code to the need of python interfacing.*
75 | 
76 | 
77 | .. [andersen] Jennie Andersen, Brice Chardin, Mohamed Tribak. "Clustering to the Fewest Clusters Under Intra-Cluster Dissimilarity Constraints". Proceedings of the 33rd IEEE International Conference on Tools with Artificial Intelligence, Nov 2021, Athens, Greece. pp.209-216, https://dx.doi.org/10.1109/ICTAI52525.2021.00036
78 | 
79 | .. [bien] Bien, J., & Tibshirani, R. (2011). Hierarchical Clustering with Prototypes via Minimax Linkage. http://faculty.marshall.usc.edu/Jacob-Bien/papers/jasa2011minimax.pdf
80 | 
81 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["numpy>=2.0","cython >=3.0","setuptools >= 61.0"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "radius-clustering"
  7 | dynamic = ["version"]
  8 | description = "A Clustering under radius constraints algorithm using minimum dominating sets"
  9 | readme = "README.md"
 10 | authors = [
 11 |     {name = "Quentin Haenn"},
 12 |     {name = "Lias Laboratory"}
 13 | ]
 14 | maintainers = [
 15 |     {name = "Quentin Haenn", email = "quentin.haenn.pro@gmail.com"}
 16 |     ]
 17 | 
 18 | dependencies = [
 19 |     "matplotlib>=3.6.2",
 20 |     "numpy>=2.0",
 21 |     "scikit-learn>=1.2.2",
 22 |     "scipy>=1.12.0",
 23 | ]
 24 | 
 25 | requires-python = ">=3.9"
 26 | license = {file = "LICENSE"}
 27 | classifiers=[
 28 |   "Development Status :: 5 - Production/Stable",
 29 |   "Intended Audience :: Science/Research",
 30 |   "Intended Audience :: Developers",
 31 |   "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
 32 |   "Programming Language :: C",
 33 |   "Programming Language :: C++",
 34 |   "Programming Language :: Python",
 35 |   "Topic :: Software Development",
 36 |   "Topic :: Scientific/Engineering",
 37 |   "Operating System :: Microsoft :: Windows",
 38 |   "Operating System :: POSIX",
 39 |   "Operating System :: Unix",
 40 |   "Operating System :: MacOS",
 41 |   "Programming Language :: Python :: 3",
 42 |   "Programming Language :: Python :: 3.9",
 43 |   "Programming Language :: Python :: 3.10",
 44 |   "Programming Language :: Python :: 3.11",
 45 |   "Programming Language :: Python :: 3.12",
 46 |   "Programming Language :: Python :: 3.13",
 47 |   "Programming Language :: Python :: Implementation :: CPython",
 48 | ]
 49 | keywords = ["Unsupervised learning", "clustering", "minimum dominating sets","clustering under radius constraint"]
 50 | 
 51 | [project.urls]
 52 | source = "https://github.com/scikit-learn-contrib/radius_clustering"
 53 | tracker = "https://github.com/scikit-learn-contrib/radius_clustering/issues"
 54 | documentation = "https://contrib.scikit-learn.org/radius_clustering/"
 55 | 
 56 | [project.optional-dependencies]
 57 | dev = [
 58 |     "pre-commit>=3.8.0",
 59 |     "pytest>=8.3.3",
 60 |     "pytest-cov>=5.0.0",
 61 |     "pandas",
 62 |     "cython>=3.0",
 63 |     "setuptools>= 61.0",
 64 |     "black>=24.3.0",
 65 |     "ruff>=0.4.8",
 66 | ]
 67 | 
 68 | doc = [
 69 |     "pandas",
 70 |     "networkx>=3.3",
 71 |     "sphinx>=8.1.3",
 72 |     "sphinx_gallery>=0.18.0",
 73 |     "sphinx-copybutton>=0.5.2",
 74 |     "pydata-sphinx-theme>=0.15.3",
 75 |     "sphinxcontrib-email>=0.3.6",
 76 |     "sphinx-remove-toctrees>=1.0.0",
 77 |     "sphinx-prompt>=1.9.0",
 78 |     "sphinx_design>=0.6.1",
 79 |     "sphinxcontrib.sass >= 0.3.4",
 80 | ]
 81 | 
 82 | [tool.setuptools]
 83 | packages.find = {where = ["src"], include = ["radius_clustering", "radius_clustering.*"]}
 84 | dynamic.version = {attr = "radius_clustering.__version__"}
 85 | 
 86 | [tool.pytest.ini_options]
 87 | pythonpath = "src"
 88 | testpaths = ["tests"]
 89 | addopts = [
 90 |     "--import-mode=importlib",
 91 |     "--cov=src/radius_clustering",
 92 |     "--cov-report=term-missing",
 93 |     "--cov-report=html:coverage_html_report",
 94 | ]
 95 | 
 96 | [tool.coverage.run]
 97 | source = ["src/radius_clustering"]
 98 | branch = true
 99 | 
100 | [tool.coverage.report]
101 | show_missing = true
102 | 
103 | [tool.coverage.html]
104 | directory = "coverage_html_report"
105 | title = "Coverage Report"
106 | 
107 | [tool.ruff]
108 | # Exclude a variety of commonly ignored directories.
109 | exclude = [
110 |     ".git",
111 |     ".git-rewrite",
112 |     ".pytest_cache",
113 |     ".ruff_cache",
114 |     ".venv",
115 |     ".vscode",
116 |     "__pypackages__",
117 |     "_build",
118 |     "buck-out",
119 |     "build",
120 |     "dist",
121 |     "site-packages",
122 |     "venv",
123 |     "**.egg-info",
124 |     "tests",
125 |     "examples",
126 | ]
127 | 
128 | # Same as Black.
129 | line-length = 88
130 | target-version = "py310"
131 | 
132 | [tool.ruff.lint]
133 | # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`)  codes by default.
134 | # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or
135 | # McCabe complexity (`C901`) by default.
136 | select = ["E", "F", "W", "I"]
137 | ignore = ["E203", "E731", "E741"]
138 | 
139 | # Allow fix for all enabled rules (when `--fix`) is provided.
140 | fixable = ["ALL"]
141 | unfixable = []
142 | 
143 | # Allow unused variables when underscore-prefixed.
144 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
145 | 
146 | [tool.ruff.format]
147 | # Like Black, use double quotes for strings.
148 | quote-style = "double"
149 | 
150 | # Like Black, indent with spaces, rather than tabs.
151 | indent-style = "space"
152 | 
153 | # Like Black, respect magic trailing commas.
154 | skip-magic-trailing-comma = false
155 | 
156 | # Like Black, automatically detect the appropriate line ending.
157 | line-ending = "auto"
158 | 
159 | # Enable auto-formatting of code examples in docstrings.
160 | docstring-code-format = true
161 | 
162 | # Set the line length limit used when formatting code snippets in
163 | # docstrings.
164 | docstring-code-line-length = "dynamic"
165 | 
166 | 
167 | [tool.cibuildwheel]
168 | # Skip building for PyPy, python 3.6/7/8 and 13t, and 32-bit platforms.
169 | skip = ["pp*", "cp36-*", "cp37-*", "cp38-*", "*-win32", "*linux_i686", "*musllinux*"]
170 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Contributor Covenant Code of Conduct
  3 | 
  4 | ## Our Pledge
  5 | 
  6 | We as members, contributors, and leaders pledge to make participation in our
  7 | community a harassment-free experience for everyone, regardless of age, body
  8 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  9 | identity and expression, level of experience, education, socio-economic status,
 10 | nationality, personal appearance, race, caste, color, religion, or sexual
 11 | identity and orientation.
 12 | 
 13 | We pledge to act and interact in ways that contribute to an open, welcoming,
 14 | diverse, inclusive, and healthy community.
 15 | 
 16 | ## Our Standards
 17 | 
 18 | Examples of behavior that contributes to a positive environment for our
 19 | community include:
 20 | 
 21 | - Demonstrating empathy and kindness toward other people
 22 | - Being respectful of differing opinions, viewpoints, and experiences
 23 | - Giving and gracefully accepting constructive feedback
 24 | - Accepting responsibility and apologizing to those affected by our mistakes,
 25 |   and learning from the experience
 26 | - Focusing on what is best not just for us as individuals, but for the overall
 27 |   community
 28 | 
 29 | Examples of unacceptable behavior include:
 30 | 
 31 | - The use of sexualized language or imagery, and sexual attention or advances of
 32 |   any kind
 33 | - Trolling, insulting or derogatory comments, and personal or political attacks
 34 | - Public or private harassment
 35 | - Publishing others' private information, such as a physical or email address,
 36 |   without their explicit permission
 37 | - Other conduct which could reasonably be considered inappropriate in a
 38 |   professional setting
 39 | 
 40 | ## Enforcement Responsibilities
 41 | 
 42 | Community leaders are responsible for clarifying and enforcing our standards of
 43 | acceptable behavior and will take appropriate and fair corrective action in
 44 | response to any behavior that they deem inappropriate, threatening, offensive,
 45 | or harmful.
 46 | 
 47 | Community leaders have the right and responsibility to remove, edit, or reject
 48 | comments, commits, code, wiki edits, issues, and other contributions that are
 49 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 50 | decisions when appropriate.
 51 | 
 52 | ## Scope
 53 | 
 54 | This Code of Conduct applies within all community spaces, and also applies when
 55 | an individual is officially representing the community in public spaces.
 56 | Examples of representing our community include using an official email address,
 57 | posting via an official social media account, or acting as an appointed
 58 | representative at an online or offline event.
 59 | 
 60 | ## Enforcement
 61 | 
 62 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 63 | reported to the community leaders responsible for enforcement :
 64 | [Send Report](mailto:quentin.haenn.pro@gmail.com).
 65 | All complaints will be reviewed and investigated promptly and fairly.
 66 | 
 67 | All community leaders are obligated to respect the privacy and security of the
 68 | reporter of any incident.
 69 | 
 70 | ## Enforcement Guidelines
 71 | 
 72 | Community leaders will follow these Community Impact Guidelines in determining
 73 | the consequences for any action they deem in violation of this Code of Conduct:
 74 | 
 75 | ### 1. Correction
 76 | 
 77 | **Community Impact**: Use of inappropriate language or other behavior deemed
 78 | unprofessional or unwelcome in the community.
 79 | 
 80 | **Consequence**: A private, written warning from community leaders, providing
 81 | clarity around the nature of the violation and an explanation of why the
 82 | behavior was inappropriate. A public apology may be requested.
 83 | 
 84 | ### 2. Warning
 85 | 
 86 | **Community Impact**: A violation through a single incident or series of
 87 | actions.
 88 | 
 89 | **Consequence**: A warning with consequences for continued behavior. No
 90 | interaction with the people involved, including unsolicited interaction with
 91 | those enforcing the Code of Conduct, for a specified period of time. This
 92 | includes avoiding interactions in community spaces as well as external channels
 93 | like social media. Violating these terms may lead to a temporary or permanent
 94 | ban.
 95 | 
 96 | ### 3. Temporary Ban
 97 | 
 98 | **Community Impact**: A serious violation of community standards, including
 99 | sustained inappropriate behavior.
100 | 
101 | **Consequence**: A temporary ban from any sort of interaction or public
102 | communication with the community for a specified period of time. No public or
103 | private interaction with the people involved, including unsolicited interaction
104 | with those enforcing the Code of Conduct, is allowed during this period.
105 | Violating these terms may lead to a permanent ban.
106 | 
107 | ### 4. Permanent Ban
108 | 
109 | **Community Impact**: Demonstrating a pattern of violation of community
110 | standards, including sustained inappropriate behavior, harassment of an
111 | individual, or aggression toward or disparagement of classes of individuals.
112 | 
113 | **Consequence**: A permanent ban from any sort of public interaction within the
114 | community.
115 | 
116 | ## Attribution
117 | 
118 | This Code of Conduct is adapted from the
119 | [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1,
120 | available at
121 | <https://www.contributor-covenant.org/version/2/1/code_of_conduct/>.
122 | 
123 | Community Impact Guidelines were inspired by
124 | [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/inclusion).
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | <https://www.contributor-covenant.org/faq/>. Translations are available at
128 | <https://www.contributor-covenant.org/translations/>.
129 | 


--------------------------------------------------------------------------------
/EXPERIMENTS.md:
--------------------------------------------------------------------------------
 1 | ### Experimental results
 2 | 
 3 | The Radius Clustering package provides two algorithms to solve the MDS problem: an exact algorithm and an approximate algorithm. The approximate algorithm is based on a heuristic that iteratively selects the vertex that dominates the most vertices in the graph. The exact algorithm is based on a branch-and-bound algorithm that finds the minimum dominating set in the graph. Experimentation has been conducted on real-world datasets to compare the performances of these two algorithms, and compare them to state-of-the-art clustering algorithms. The complete results are available in the paper [Clustering under radius constraint using minimum dominating sets](https://hal.science/hal-04533921/).
 4 | 
 5 | The algorithms selected for comparison are:
 6 | 
 7 | 1. Equiwide clustering (EQW-LP), a state-of-the-art exact algorithm using LP formulation of the problem [[3]](https://hal.science/hal-03356000)
 8 | 2. ProtoClust [[4](http://faculty.marshall.usc.edu/Jacob-Bien/papers/jasa2011minimax.pdf)]
 9 | 
10 | Here are some key results from the experiments:
11 | 
12 | Table 1: Average running time (in seconds) of the algorithms on real-world datasets.
13 | 
14 | | **Dataset**              | **MDS-APPROX** | **MDS-EXACT** | **EQW-LP**   | **PROTOCLUST** |
15 | |--------------------------|----------------|---------------|--------------|----------------|
16 | | **Iris**                 | 0.062 ± 0.01   | 0.009 ± 0.00  | 0.018 ± 0.01 | 0.026 ± 0.00   |
17 | | **Wine**                 | 0.029 ± 0.00   | 0.010 ± 0.00  | 0.014 ± 0.00 | 0.034 ± 0.00   |
18 | | **Glass Identification** | 0.015 ± 0.00   | 0.020 ± 0.00  | 0.026 ± 0.00 | 0.046 ± 0.00   |
19 | | **Ionosphere**           | 0.078 ± 0.01   | 2.640 ± 0.05  | 0.104 ± 0.00 | 0.120 ± 0.00   |
20 | | **WDBC**                 | 0.315 ± 0.01   | 0.138 ± 0.00  | 0.197 ± 0.01 | 0.402 ± 0.00   |
21 | | **Synthetic Control**    | 0.350 ± 0.03   | 0.036 ± 0.00  | 0.143 ± 0.01 | 0.489 ± 0.00   |
22 | | **Vehicle**              | 0.955 ± 0.04   | 0.185 ± 0.00  | 0.526 ± 0.01 | 0.830 ± 0.01   |
23 | | **Yeast**                | 2.361 ± 0.03   | 738.8 ± 0.30  | 6.718 ± 0.02 | 2.374 ± 0.08   |
24 | | **Ozone**                | 49.82 ± 1.18   | 1447 ± 0.54   | 26.86 ± 0.63 | 15.32 ± 0.15   |
25 | | **Waveform**             | 48.01 ± 0.39   | 8813 ± 57.80  | 233.9 ± 1.45 | 61.27 ± 0.08   |
26 | 
27 | Table 2: Number of clusters obtained on real-world datasets.
28 | 
29 | | **Dataset**              | **MDS-APPROX** | **MDS-EXACT** | **EQW-LP** | **PROTOCLUST** |
30 | |--------------------------|----------------|---------------|------------|----------------|
31 | | **Iris**                 | 3              | 3             | 3          | 4              |
32 | | **Wine**                 | 4              | 3             | 3          | 4              |
33 | | **Glass Identification** | 7              | 6             | 6          | 7              |
34 | | **Ionosphere**           | 2              | 2             | 2          | 5              |
35 | | **WDBC**                 | 2              | 2             | 2          | 3              |
36 | | **Synthetic Control**    | 8              | 6             | 6          | 8              |
37 | | **Vehicle**              | 5              | 4             | 4          | 6              |
38 | | **Yeast**                | 10             | 10            | 10         | 13             |
39 | | **Ozone**                | 3              | 2             | 2          | 3              |
40 | | **Waveform**             | 3              | 3             | 3          | 6              |
41 | 
42 | 
43 | Table 3: Compactness of the clusters (maximal radius obtained after clustering) obtained on real-world datasets.
44 | 
45 | | **Dataset**              | **MDS-APPROX** | **MDS-EXACT** | **EQW-LP** | **PROTOCLUST** |
46 | |--------------------------|----------------|---------------|------------|----------------|
47 | | **Iris**                 | 1.43           | 1.43          | 1.43       | 1.24           |
48 | | **Wine**                 | 220.05         | 232.08        | 232.08     | 181.35         |
49 | | **Glass Identification** | 3.94           | 3.94          | 3.94       | 3.31           |
50 | | **Ionosphere**           | 4.45           | 5.45          | 5.45       | 5.35           |
51 | | **WDBC**                 | 1197.42        | 1197.42       | 1197.42    | 907.10         |
52 | | **Synthetic Control**    | 66.59          | 70.11         | 70.11      | 68.27          |
53 | | **Vehicle**              | 150.87         | 155.05        | 155.05     | 120.97         |
54 | | **Yeast**                | 0.42           | 0.42          | 0.42       | 0.42           |
55 | | **Ozone**                | 235.77         | 245.58        | 245.58     | 194.89         |
56 | | **Waveform**             | 10.73          | 10.73         | 10.73      | 10.47          |
57 | 
58 | 
59 | #### Key insights:
60 | 
61 | - The approximate algorithm is significantly faster than the exact algorithm, but it may not always provide the optimal solution.
62 | - The exact algorithm is slower but provides the optimal solution. Does not scale well to large datasets, due to the NP-Hard nature of the problem.
63 | - The approximate algorithm is a good trade-off between speed and accuracy for most datasets.
64 | - MDS based approach are both more accurate than Protoclust. However, Protoclust is remarkably faster on most datasets.
65 | 
66 | 
67 | > :memo: **Note**: The results show that MDS-based clustering algorithms might be a good alternative to state-of-the-art clustering algorithms for clustering under radius constraint problems.
68 | 
69 | > :memo: **Note**: Since the publication of the paper, the Radius Clustering package has been improved and optimized. The results presented here are based on the initial version of the package. For the latest results, please refer to the documentation or the source code.
70 | 
71 | 
72 | ## References
73 | 
74 | - [3] [Clustering to the fewest clusters under intra-cluster dissimilarity constraints](https://hal.science/hal-03356000)
75 | - [4] [Hierarchical Clustering with prototypes via Minimax Linkage](http://faculty.marshall.usc.edu/Jacob-Bien/papers/jasa2011minimax.pdf)
76 |  
77 | 


--------------------------------------------------------------------------------
/docs/source/details.rst:
--------------------------------------------------------------------------------
  1 | .. _details:
  2 | 
  3 | How it works
  4 | ============
  5 | 
  6 | This page of the documentation is dedicated to explain the theory behind the algorithm, how it is built and present you
  7 | some key results obtained from experiments conducted on real-world datasets.
  8 | 
  9 | First, we'll detail the problem of clustering under radius constraint, then we'll explain the Minimum Dominating Set (MDS) problem and how it is adapted to the clustering problem. Finally, we'll present some key results obtained from experiments conducted on real-world datasets.
 10 | 
 11 | 
 12 | Clustering under radius constraint
 13 | ----------------------------------
 14 | 
 15 | Clustering tasks are globally concerned about grouping data points into clusters based on some similarity measure.
 16 | Clustering under radius constraints is a specific clustering task where the goal is to group data points such that the
 17 | minimal maximum distance between any two points in the same cluster is less than or equal to a given radius.
 18 | Mathematically, given a set of data points :math:`X = \{x_1, x_2, \ldots, x_n\}` and a radius :math:`r`,
 19 | the goal is to find a partition :math:`\mathcal{P}` of :math:`X` into clusters :math:`C_1, C_2, \ldots, C_k` such that :
 20 | :math:`\forall C \in \mathcal{P}, \min_{x_i \in C}\max_{x_j \in C}\ d_{ij} \leq r`
 21 | where :math:`d_{ij} = d(x_i, x_j)` is the dissimilarity between :math:`x_i` and :math:`x_j`.
 22 | 
 23 | 
 24 | Minimum Dominating Set (MDS) problem
 25 | ------------------------------------
 26 | 
 27 | The Radius Clustering package implements a clustering algorithm based on the Minimum Dominating Set (MDS) problem.
 28 | The MDS problem is a well-known NP-Hard problem in graph theory, and it has been proven to be linked to the clustering
 29 | under radius constraint problem. The MDS problem is defined as follows:
 30 | 
 31 | Given an undirected weighted graph :math:`G = (V,E)` where :math:`V` is a set of vertices and :math:`E` is a set of edges,
 32 | a dominating set :math:`D` is a subset of :math:`V` such that every vertex in :math:`V` is either in :math:`D` or
 33 | adjacent to a vertex in :math:`D`. The goal is to find a dominating set :math:`D` such that the number of vertices in
 34 | :math:`D` is minimized. This problem is known to be NP-Hard.
 35 | 
 36 | However, solving this problem in the context of clustering task can be useful. But it has to be adapted to the needs of a clustering task.
 37 | 
 38 | Presenting the algorithm
 39 | ------------------------
 40 | 
 41 | To adapt the MDS problem to the clustering under radius constraint problem, we need to define a graph based on the data points. The vertices of the graph are the data points, and the edges are defined based on the distance between the data points. The weight of the edges is the dissimilarity between the data points. Then, the algorithm operates as follows:
 42 | 
 43 | 1. Construct a graph :math:`G = (V,E)` based on the data points :math:`X`.
 44 | 2. Prune the graph by removing the edges :math:`e_{ij}` such that :math:`d(x_i,x_j) > r`.
 45 | 3. Solve the MDS problem on the pruned graph.
 46 | 4. Assign each vertex to the closest vertex in the dominating set. In case of a tie, assign the vertex to the vertex with the smallest index.
 47 | 5. Return the cluster labels.
 48 | 
 49 | Experimental results
 50 | --------------------
 51 | 
 52 | The Radius Clustering package provides two algorithms to solve the MDS problem: an exact algorithm and an approximate algorithm.
 53 | The approximate algorithm [casado]_ is based on a heuristic that iteratively selects the vertex that dominates the most vertices
 54 | in the graph. The exact algorithm [jiang]_ is based on a branch-and-bound algorithm that finds the minimum dominating set in the graph.
 55 | Experimentation has been conducted on real-world datasets to compare the performances of these two algorithms,
 56 | and compare them to state-of-the-art clustering algorithms. The complete results from first experiments are available in the paper
 57 | `Clustering under radius constraint using minimum dominating sets <https://hal.science/hal-04533921/>`_.
 58 | 
 59 | The algorithms selected for comparison are:
 60 | 
 61 | 1. Equiwide clustering (EQW-LP), a state-of-the-art exact algorithm using LP formulation of the problem ([andersen]_)
 62 | 2. Protoclust ([bien]_), a state-of-the-art approximate algorithm based on the hierarchical agglomerative clustering using MinMax linkage.
 63 | 
 64 | Here are some key results from the experiments:
 65 | 
 66 | .. csv-table:: Number of clusters obtained on real-world datasets.
 67 |     :header: Dataset, MDS-APPROX, MDS-EXACT, EQW-LP, PROTOCLUST
 68 |     :widths: 20, 20, 20, 20, 20
 69 | 
 70 |     Iris,3,3,3,4
 71 |     Wine,4,3,3,4
 72 |     Glass Identification,7,6,6,7
 73 |     Ionosphere,2,2,2,5
 74 |     WDBC,2,2,2,3
 75 |     Synthetic Control,8,6,6,8
 76 |     Vehicle,5,4,4,6
 77 |     Yeast,10,10,10,13
 78 |     Ozone,3,2,2,3
 79 |     Waveform,3,3,3,6
 80 | 
 81 | 
 82 | .. csv-table:: Compactness of the clusters (maximal radius obtained after clustering) obtained on real-world datasets.
 83 |     :header: Dataset, MDS-APPROX, MDS-EXACT, EQW-LP, PROTOCLUST
 84 |     :widths: 20, 20, 20, 20, 20
 85 | 
 86 |     Iris,1.43,1.43,1.43,1.24
 87 |     Wine,220.05,232.08,232.08,181.35
 88 |     Glass Identification,3.94,3.94,3.94,3.31
 89 |     Ionosphere,4.45,5.45,5.45,5.35
 90 |     WDBC,1197.42,1197.42,1197.42,907.10
 91 |     Synthetic Control,66.59,70.11,70.11,68.27
 92 |     Vehicle,150.87,155.05,155.05,120.97
 93 |     Yeast,0.42,0.42,0.42,0.42
 94 |     Ozone,235.77,245.58,245.58,194.89
 95 |     Waveform,10.73,10.73,10.73,10.47
 96 | 
 97 | 
 98 | .. image:: ./images/exec_time.png
 99 |     :width: 800
100 |     :align: center
101 | 
102 | .. image:: ./images/exec_time_optimized.png
103 |     :width: 800
104 |     :align: center
105 | 
106 | 
107 | 
108 | Key insights:
109 | +++++++++++++
110 | 
111 | - The approximate algorithm is significantly faster than the exact algorithm, but it may not always provide the optimal solution.
112 | - The exact algorithm is slower but provides the optimal solution. Does not scale well to large datasets, due to the NP-Hard nature of the problem.
113 | - The approximate algorithm is a good trade-off between speed and accuracy for most datasets.
114 | - MDS based approach are both more accurate than Protoclust. However, Protoclust is remarkably faster on most datasets.
115 | 
116 | 
117 | .. note:: The results show that MDS-based clustering algorithms might be a good alternative to state-of-the-art clustering algorithms for clustering under radius constraint problems.
118 | 
119 | .. note:: Since the publication of the paper, the Radius Clustering package has been improved and optimized. The results presented here are based on the initial version of the package. For the latest results, please refer to the documentation or the source code.
120 | 
121 | 
122 | 


--------------------------------------------------------------------------------
/docs/source/usage.rst:
--------------------------------------------------------------------------------
  1 | Usage
  2 | =====
  3 | 
  4 | This page provides a quick guide on how to use the `radius_clustering` package for clustering tasks. The package provides a simple interface for performing radius-based clustering on datasets based on the Minimum Dominating Set (MDS) algorithm.
  5 | 
  6 | This page is divided into three main sections:
  7 | 1. **Basic Usage**: A quick example of how to use the `RadiusClustering` class and perform clustering with several parameters.
  8 | 2. **Custom Dissimilarity Function**: How to use a custom dissimilarity function with the `RadiusClustering` class.
  9 | 3. **Custom MDS Solver**: How to implement a custom MDS solver for more advanced clustering tasks, eventually with less guarantees on the results.
 10 | 
 11 | 
 12 | Basic Usage
 13 | -----------------
 14 | 
 15 | The `RadiusClustering` class provides a straightforward way to perform clustering based on a specified radius. You can choose between an approximate or exact method for clustering, depending on your needs.
 16 | 
 17 | Here's a basic example of how to use Radius Clustering with the `RadiusClustering` class, using the approximate method:
 18 | 
 19 | .. code-block:: python
 20 | 
 21 |    from radius_clustering import RadiusClustering
 22 |    import numpy as np
 23 | 
 24 |    # Generate random data
 25 |    X = np.random.rand(100, 2)
 26 | 
 27 |    # Create an instance of MdsClustering
 28 |    rad = RadiusClustering(manner="approx", radius=0.5) 
 29 |    # Attention: the 'threshold' parameter is deprecated by version 1.3.0
 30 |    # and will be removed in a future version. Use 'radius' instead.
 31 | 
 32 |    # Fit the model to the data
 33 |    rad.fit(X)
 34 | 
 35 |    # Get cluster labels
 36 |    labels = rad.labels_
 37 | 
 38 |    print(labels)
 39 | 
 40 | Similarly, you can use the exact method by changing the `manner` parameter to `"exact"`:
 41 | .. code-block:: python
 42 |    # [...] Exact same code as above
 43 |    rad = RadiusClustering(manner="exact", radius=0.5) #change this parameter
 44 |    # [...] Exact same code as above
 45 | 
 46 | Custom Dissimilarity Function
 47 | -----------------------------
 48 | 
 49 | The main reason behind the `radius_clustering` package is that users eventually needs to use a dissimilarity function that is not a metric (or distance) function. Plus, sometimes context requires a domain-specific dissimilarity function that is not provided by default, and needs to be implemented by the user.
 50 | 
 51 | To use a custom dissimilarity function, you can pass it as a parameter to the `RadiusClustering` class. Here's an example of how to do this:
 52 | .. code-block:: python
 53 | 
 54 |    from radius_clustering import RadiusClustering
 55 |    import numpy as np
 56 | 
 57 |    # Generate random data
 58 |    X = np.random.rand(100, 2)
 59 | 
 60 |    # Define a custom dissimilarity function
 61 |    def dummy_dissimilarity(x, y):
 62 |        return np.linalg.norm(x - y) + 0.1  # Example: add a constant to the distance
 63 | 
 64 |    # Create an instance of MdsClustering with the custom dissimilarity function
 65 |    rad = RadiusClustering(manner="approx", radius=0.5, metric=dummy_dissimilarity)
 66 | 
 67 |    # Fit the model to the data
 68 |    rad.fit(X)
 69 | 
 70 |    # Get cluster labels
 71 |    labels = rad.labels_
 72 | 
 73 |    print(labels)
 74 | 
 75 | 
 76 | .. note::
 77 |    The custom dissimilarity function will be passed to scikit-learn's `pairwise_distances` function, so it should be compatible with the expected input format and return type. See the scikit-learn documentation for more details on how to implement custom metrics.
 78 | 
 79 | Custom MDS Solver
 80 | -----------------
 81 | 
 82 | The two default solvers provided by the actual implementation of the `radius_clustering` package are focused on exactness (or proximity to exactness) of the results of a NP-hard problem. So, they may not be suitable for all use cases, especially when performance is a concern.
 83 | If you have your own implementation of a Minimum Dominating Set (MDS) solver, you can use it with the `RadiusClustering` class ny using the :py:func:'RadiusClustering.set_solver' method. It will check that the solver is compatible with the expected input format and return type, and will use it to perform clustering.
 84 | 
 85 | .. versionadded:: 1.4.0
 86 |    The :py:func:`RadiusClustering.set_solver` method was added to allow users to set a custom MDS solver.
 87 |    It is *NOT* backward compatible with previous versions of the package, as it comes with new structure and methods to handle custom solvers.
 88 | 
 89 | Here's an example of how to implement a custom MDS solver and use it with the `RadiusClustering` class, using NetworkX implementation of the dominating set problem : 
 90 | 
 91 | .. code-block:: python
 92 | 
 93 |    from radius_clustering import RadiusClustering
 94 |    import time
 95 |    import numpy as np
 96 |    import networkx as nx
 97 | 
 98 |    # Generate random data
 99 |    X = np.random.rand(100, 2)
100 | 
101 |    # Define a custom MDS solver using NetworkX
102 |    def custom_mds_solver(n, edges, nb_edges, random_state=None):
103 |       start = time.time()
104 |       graph = nx.Graph(edges)
105 |       centers = list(nx.algorithms.dominating_set(graph))
106 |       centers.sort()
107 |       end = time.time()
108 |       return centers, end - start
109 | 
110 |    # Create an instance of MdsClustering with the custom MDS solver
111 |    rad = RadiusClustering(manner="approx", radius=0.5)
112 |    rad.set_solver(custom_mds_solver)
113 | 
114 |    # Fit the model to the data
115 |    rad.fit(X)
116 | 
117 |    # Get cluster labels
118 |    labels = rad.labels_
119 | 
120 |    print(labels)
121 | 
122 | .. note::
123 |    The custom MDS solver should accept the same parameters as the default solvers, including the number of points `n`, the edges of the graph `edges`, the number of edges `nb_edges`, and an optional `random_state` parameter for reproducibility. It should return a list of centers and the time taken to compute them.
124 |    The `set_solver` method will check that the custom solver is compatible with the expected input format and return type, and will use it to perform clustering.
125 |    If the custom solver is not compatible, it will raise a `ValueError` with a descriptive message.
126 | 
127 | .. attention::
128 |    We cannot guarantee that the custom MDS solver will produce the same results as the default solvers, especially if it is not purposely designed to solve the Minimum Dominating Set problem but rather just finds a dominating set. The results may vary depending on the implementation and the specific characteristics of the dataset.
129 |    As an example, a benchmark of our solutions and a custom one using NetworkX is available in the `Example Gallery` section of the documentation, which shows that the custom solver may produce different results than the default solvers, especially in terms of the number of clusters and the time taken to compute them (see :ref:`sphx_glr_auto_examples_plot_benchmark_custom.py`).
130 |    However, it can be useful for specific use cases where performance is a concern or when you have a custom implementation that fits your needs better.
131 | 
132 | 


--------------------------------------------------------------------------------
/tests/test_unit.py:
--------------------------------------------------------------------------------
  1 | from radius_clustering import RadiusClustering
  2 | import pytest
  3 | import numpy as np
  4 | 
  5 | def test_symmetric():
  6 |     """
  7 |     Test that the RadiusClustering class can handle symmetric distance matrices.
  8 |     """
  9 | 
 10 |     # Check 1D array input
 11 | 
 12 |     X = np.array([0,1])
 13 |     with pytest.raises(ValueError):
 14 |         RadiusClustering(manner="exact", radius=1.5)._check_symmetric(X)
 15 | 
 16 |     # Check a symmetric distance matrix
 17 |     X = np.array([[0, 1, 2],
 18 |                   [1, 0, 1],
 19 |                   [2, 1, 0]])
 20 | 
 21 |     clustering = RadiusClustering(manner="exact", radius=1.5)
 22 |     assert clustering._check_symmetric(X), "The matrix should be symmetric."
 23 | 
 24 |     # Check a non-symmetric distance matrix
 25 |     X_assym = np.array([[0, 1, 2],
 26 |                        [1, 0, 1],
 27 |                        [2, 2, 3]])  # This is not symmetric
 28 |     assert not clustering._check_symmetric(X_assym), "The matrix should not be symmetric."
 29 | 
 30 |     # check a non-square matrix
 31 |     X_non_square = np.array([[0, 1],
 32 |                              [1, 0],
 33 |                              [2, 1]])  # This is not square
 34 |     
 35 |     assert not clustering._check_symmetric(X_non_square), "The matrix should not be symmetric."
 36 | 
 37 | 
 38 | def test_fit_distance_matrix():
 39 |     """
 40 |     Test that the RadiusClustering class can fit to a distance matrix.
 41 |     This test checks both the exact and approximate methods of clustering.
 42 |     """
 43 | 
 44 |     # Create a symmetric distance matrix
 45 |     X = np.array([[0, 1, 2],
 46 |                   [1, 0, 1],
 47 |                   [2, 1, 0]])
 48 | 
 49 |     clustering = RadiusClustering(manner="exact", radius=1.5)
 50 |     clustering.fit(X)
 51 | 
 52 |     # Check that the labels are assigned correctly
 53 |     assert len(clustering.labels_) == X.shape[0], "Labels length should match number of samples."
 54 |     assert clustering.nb_edges_ > 0, "There should be edges in the graph."
 55 |     assert np.array_equal(clustering.X_checked_, clustering.dist_mat_), "X_checked_ should be equal to dist_mat_ because X is a distance matrix."
 56 | 
 57 | @pytest.mark.parametrize(
 58 |         "test_data", [
 59 |             ("euclidean",1.5), 
 60 |             ("manhattan", 2.1), 
 61 |             ("cosine", 1.0)
 62 |         ]
 63 | )
 64 | def test_fit_features(test_data):
 65 |     """
 66 |     Test that the RadiusClustering class can fit to feature data.
 67 |     This test checks both the exact and approximate methods of clustering
 68 |     and multiple metrics methods.
 69 |     """
 70 |     # Create a feature matrix
 71 |     X_features = np.array([[0, 1],
 72 |                            [1, 0],
 73 |                            [2, 1]])
 74 |     metric, radius = test_data
 75 | 
 76 |     clustering = RadiusClustering(manner="approx", radius=radius)
 77 |     clustering.fit(X_features, metric=metric)
 78 |     # Check that the labels are assigned correctly
 79 |     assert len(clustering.labels_) == X_features.shape[0], "Labels length should match number of samples."
 80 |     assert clustering.nb_edges_ > 0, "There should be edges in the graph."
 81 |     assert clustering._check_symmetric(clustering.dist_mat_), "Distance matrix should be symmetric after computed from features."
 82 | 
 83 | def test_radius_clustering_invalid_manner():
 84 |     """
 85 |     Test that an error is raised when an invalid manner is provided.
 86 |     """
 87 |     with pytest.raises(ValueError):
 88 |         RadiusClustering(manner="invalid", radius=1.43).fit([[0, 1], [1, 0], [2, 1]])
 89 | 
 90 |     with pytest.raises(ValueError):
 91 |         RadiusClustering(manner="", radius=1.43).fit([[0, 1], [1, 0], [2, 1]])
 92 | 
 93 | 
 94 | def test_radius_clustering_invalid_radius():
 95 |     """
 96 |     Test that an error is raised when an invalid radius is provided.
 97 |     """
 98 |     with pytest.raises(ValueError, match="Radius must be a positive float."):
 99 |         RadiusClustering(manner="exact", radius=-1.0).fit([[0, 1], [1, 0], [2, 1]])
100 | 
101 |     with pytest.raises(ValueError, match="Radius must be a positive float."):
102 |         RadiusClustering(manner="approx", radius=0.0).fit([[0, 1], [1, 0], [2, 1]])
103 | 
104 |     with pytest.raises(ValueError, match="Radius must be a positive float."):
105 |         RadiusClustering(manner="exact", radius="invalid").fit([[0, 1], [1, 0], [2, 1]])
106 | 
107 | def test_radius_clustering_fit_without_data():
108 |     """
109 |     Test that an error is raised when fitting without data.
110 |     """
111 |     clustering = RadiusClustering(manner="exact", radius=1.5)
112 |     with pytest.raises(ValueError):
113 |         clustering.fit(None)
114 | 
115 | def test_radius_clustering_new_clusterer():
116 |     """
117 |     Test that a custom clusterer can be set within the RadiusClustering class.
118 |     """
119 |     def custom_clusterer(n, edges, nb_edges, random_state=None):
120 |         # A mock custom clusterer that returns a fixed set of centers
121 |         # and a fixed execution time
122 |         return [0, 1], 0.1
123 |     clustering = RadiusClustering(manner="exact", radius=1.5)
124 |     # Set the custom clusterer
125 |     assert hasattr(clustering, 'set_solver'), "RadiusClustering should have a set_solver method."
126 |     assert callable(clustering.set_solver), "set_solver should be callable."
127 |     clustering.set_solver(custom_clusterer)
128 |     # Fit the clustering with the custom clusterer
129 |     X = np.array([[0, 1],
130 |                   [1, 0],
131 |                   [2, 1]])
132 |     clustering.fit(X)
133 |     assert clustering.clusterer_ == custom_clusterer, "The custom clusterer should be set correctly."
134 |     # Check that the labels are assigned correctly
135 |     assert len(clustering.labels_) == X.shape[0], "Labels length should match number of samples."
136 |     assert clustering.nb_edges_ > 0, "There should be edges in the graph."
137 |     assert clustering.centers_ == [0, 1], "The centers should match the custom clusterer's output."
138 |     assert clustering.mds_exec_time_ == 0.1, "The MDS execution time should match the custom clusterer's output."
139 | 
140 | def test_invalid_clusterer():
141 |     """
142 |     Test that an error is raised when an invalid clusterer is set.
143 |     """
144 |     clustering = RadiusClustering(manner="exact", radius=1.5)
145 |     with pytest.raises(ValueError, match="The provided solver must be callable."):
146 |         clustering.set_solver("not_a_callable")
147 | 
148 |     with pytest.raises(ValueError, match="The provided solver must be callable."):
149 |         clustering.set_solver(12345)  # Not a callable
150 |     with pytest.raises(ValueError, match="The provided solver must be callable."):
151 |         clustering.set_solver(None)
152 | 
153 |     def invalid_signature():
154 |         return [0, 1], 0.1
155 |     
156 |     with pytest.raises(ValueError):
157 |         clustering.set_solver(invalid_signature)
158 |     def invalid_clusterer(n, edges, nb_edges):
159 |         return [0, 1], 0.1
160 |     with pytest.raises(ValueError):
161 |         clustering.set_solver(invalid_clusterer)


--------------------------------------------------------------------------------
/tests/test_integration.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from radius_clustering import RadiusClustering
  4 | from sklearn import datasets
  5 | 
  6 | X = datasets.fetch_openml(name="iris", version=1, parser="auto")["data"]
  7 | 
  8 | def test_radius_clustering_approx():
  9 |     """
 10 |     Test the approximate method of the RadiusClustering class.
 11 |     """
 12 |     clusterer = RadiusClustering(manner="approx", radius=1.43)
 13 | 
 14 |     assert clusterer.manner == "approx", "The manner should be 'approx'."
 15 |     assert clusterer.radius == 1.43, "The radius should be 1.43."
 16 |     assert clusterer.random_state is None, "The random state should be None by default."
 17 |     assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
 18 |     assert clusterer._check_symmetric(X) is False, "The input should not be a symmetric distance matrix."
 19 | 
 20 |     clusterer.fit(X)
 21 | 
 22 |     assert clusterer.X_checked_ is not None, "X_checked_ should not be None after fitting."
 23 |     assert clusterer.dist_mat_ is not None, "dist_mat_ should not be None after fitting."
 24 |     assert clusterer.nb_edges_ > 0, "There should be edges in the graph."
 25 |     assert clusterer.labels_ is not None, "Labels should not be None after fitting."
 26 |     assert clusterer.centers_ is not None, "Centers should not be None after fitting."
 27 |     assert clusterer.effective_radius_ > 0, "Effective radius should be greater than 0."
 28 |     assert clusterer.mds_exec_time_ >= 0, "MDS execution time should be non-negative."
 29 |     assert clusterer.edges_ is not None, "Edges should not be None after fitting."
 30 |     assert clusterer.random_state == 42, "Random state should be set to 42 after fitting."
 31 | 
 32 |     results = clusterer.labels_
 33 |     assert len(results) == X.shape[0], "The number of labels should match the number of samples."
 34 |     assert len(set(results)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
 35 | 
 36 | 
 37 | def test_radius_clustering_exact():
 38 |     """
 39 |     Test the exact method of the RadiusClustering class.
 40 |     """
 41 |     clusterer = RadiusClustering(manner="exact", radius=1.43)
 42 | 
 43 |     assert clusterer.manner == "exact", "The manner should be 'exact'."
 44 |     assert clusterer.radius == 1.43, "The radius should be 1.43."
 45 |     assert clusterer.random_state is None, "The random state should be None by default."
 46 |     assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
 47 |     assert clusterer._check_symmetric(X) is False, "The input should not be a symmetric distance matrix."
 48 | 
 49 |     clusterer.fit(X)
 50 | 
 51 |     assert clusterer.X_checked_ is not None, "X_checked_ should not be None after fitting."
 52 |     assert clusterer.dist_mat_ is not None, "dist_mat_ should not be None after fitting."
 53 |     assert clusterer.nb_edges_ > 0, "There should be edges in the graph."
 54 |     assert clusterer.labels_ is not None, "Labels should not be None after fitting."
 55 |     assert clusterer.centers_ is not None, "Centers should not be None after fitting."
 56 |     assert clusterer.effective_radius_ > 0, "Effective radius should be greater than 0."
 57 |     assert clusterer.mds_exec_time_ >= 0, "MDS execution time should be non-negative."
 58 |     assert clusterer.edges_ is not None, "Edges should not be None after fitting."
 59 |     assert clusterer.random_state is None, "Random state should remain None."
 60 | 
 61 |     results = clusterer.labels_
 62 |     assert len(results) == X.shape[0], "The number of labels should match the number of samples."
 63 |     assert len(set(results)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
 64 | 
 65 | def test_radius_clustering_fit_predict():
 66 |     """
 67 |     Test the fit_predict method of the RadiusClustering class.
 68 |     """
 69 |     clusterer = RadiusClustering(manner="approx", radius=1.43)
 70 | 
 71 |     assert clusterer.manner == "approx", "The manner should be 'approx'."
 72 |     assert clusterer.radius == 1.43, "The radius should be 1.43."
 73 |     assert clusterer.random_state is None, "The random state should be None by default."
 74 |     assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
 75 | 
 76 |     labels = clusterer.fit_predict(X)
 77 | 
 78 |     assert labels is not None, "Labels should not be None after fit_predict."
 79 |     assert len(labels) == X.shape[0], "The number of labels should match the number of samples."
 80 |     assert len(set(labels)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
 81 | 
 82 | def test_radius_clustering_fit_predict_exact():
 83 |     """
 84 |     Test the fit_predict method of the RadiusClustering class with exact method.
 85 |     """
 86 |     clusterer = RadiusClustering(manner="exact", radius=1.43)
 87 | 
 88 |     assert clusterer.manner == "exact", "The manner should be 'exact'."
 89 |     assert clusterer.radius == 1.43, "The radius should be 1.43."
 90 |     assert clusterer.random_state is None, "The random state should be None by default."
 91 |     assert clusterer._estimator_type == "clusterer", "The estimator type should be 'clusterer'."
 92 | 
 93 |     labels = clusterer.fit_predict(X)
 94 | 
 95 |     assert labels is not None, "Labels should not be None after fit_predict."
 96 |     assert len(labels) == X.shape[0], "The number of labels should match the number of samples."
 97 |     assert len(set(labels)) <= X.shape[0], "The number of unique labels should not exceed the number of samples."
 98 | 
 99 | def test_radius_clustering_random_state():
100 |     """
101 |     Test the random state functionality of the RadiusClustering class.
102 |     """
103 |     clusterer = RadiusClustering(manner="approx", radius=1.43, random_state=123)
104 | 
105 |     assert clusterer.random_state == 123, "The random state should be set to 123."
106 | 
107 |     # Fit the model
108 |     clusterer.fit(X)
109 | 
110 |     # Check that the random state is preserved
111 |     assert clusterer.random_state == 123, "The random state should remain 123 after fitting."
112 | 
113 |     # Check that the results are consistent with the random state
114 |     labels1 = clusterer.labels_
115 | 
116 |     # Re-initialize and fit again with the same random state
117 |     clusterer2 = RadiusClustering(manner="approx", radius=1.43, random_state=123)
118 |     clusterer2.fit(X)
119 |     
120 |     labels2 = clusterer2.labels_
121 | 
122 |     assert (labels1 == labels2).all(), "Labels should be consistent across runs with the same random state."
123 | 
124 | def test_deterministic_behavior():
125 |     """
126 |     Test the deterministic behavior of the RadiusClustering class with a fixed random state.
127 |     """
128 |     clusterer1 = RadiusClustering(manner="approx", radius=1.43, random_state=42)
129 |     clusterer2 = RadiusClustering(manner="approx", radius=1.43, random_state=42)
130 | 
131 |     labels1 = clusterer1.fit_predict(X)
132 |     labels2 = clusterer2.fit_predict(X)
133 | 
134 |     assert (labels1 == labels2).all(), "Labels should be the same for two instances with the same random state."
135 | 
136 |     clusterer1 = RadiusClustering(manner="exact", radius=1.43)
137 |     clusterer2 = RadiusClustering(manner="exact", radius=1.43)
138 |     labels1 = clusterer1.fit_predict(X)
139 |     labels2 = clusterer2.fit_predict(X)
140 |     assert (labels1 == labels2).all(), "Labels should be the same for two exact instances."
141 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 | <a href="https://github.com/scikit-learn-contrib/radius_clustering/blob/main/LICENSE"><img alt="License: GPLv3" src="https://img.shields.io/github/license/scikit-learn-contrib/radius_clustering"></a>
  3 | <a href="https://pypi.org/project/radius-clustering/"><img alt="PyPI" src="https://img.shields.io/pypi/v/radius-clustering"></a>
  4 | <a href="https://docs.astral.sh/ruff/"><img alt="Code style: Ruff" src="https://img.shields.io/badge/style-ruff-41B5BE?style=flat"></a>
  5 | <a href="https://contrib.scikit-learn.org/radius_clustering/"><img alt="GitHub Actions Workflow Status" src="https://img.shields.io/github/actions/workflow/status/scikit-learn-contrib/radius_clustering/sphinx.yml?label=Doc%20Building"></a>
  6 | <a><img alt="Python version supported" src="https://img.shields.io/pypi/pyversions/radius-clustering"></a>
  7 | <a href="https://codecov.io/gh/scikit-learn-contrib/radius_clustering"><img alt="Codecov" src="https://codecov.io/gh/scikit-learn-contrib/radius_clustering/branch/master/graph/badge.svg"></a>
  8 | <a href="https://mybinder.org/v2/gh/scikit-learn-contrib/radius_clustering/HEAD?urlpath=%2Fdoc%2Ftree%2Fnotebooks%2Fcomparison_example.ipynb"><img alt="Binder" src="https://mybinder.org/badge_logo.svg"></a>
  9 | <a href="https://zenodo.org/badge/latestdoi/887316842"><img src="https://zenodo.org/badge/887316842.svg" alt="DOI"></a>
 10 | 
 11 | </p>
 12 | 
 13 | # Radius Clustering
 14 | 
 15 | Radius clustering is a Python package that implements clustering under radius constraint based on the Minimum Dominating Set (MDS) problem. This problem is NP-Hard but has been studied in the literature and proven to be linked to the clustering under radius constraint problem (see [references](#references) for more details).
 16 | 
 17 | ## Features
 18 | 
 19 | - Implements both exact and approximate MDS-based clustering algorithms
 20 | - Compatible with scikit-learn's API for clustering algorithms
 21 | - Supports radius-constrained clustering
 22 | - Provides options for exact and approximate solutions
 23 | - Easy to use and integrate with existing Python data science workflows
 24 | - Includes comprehensive documentation and examples
 25 | - Full test coverage to ensure reliability and correctness
 26 | - Supports custom MDS solvers for flexibility in clustering approaches
 27 | - Provides a user-friendly interface for clustering tasks
 28 | 
 29 | > [!CAUTION]
 30 | > **Deprecation Notice**: The `threshold` parameter in the `RadiusClustering` class has been deprecated. Please use the `radius` parameter instead for specifying the radius for clustering. It is planned to be completely removed in version 2.0.0. The `radius` parameter is now the standard way to define the radius for clustering, aligning with our objective of making the parameters' name more intuitive and user-friendly.
 31 | 
 32 | > [!NOTE]
 33 | > **NEW VERSIONS**: The package is currently under active development for new features and improvements, including some refactoring and enhancements to the existing codebase. Backwards compatibility is not guaranteed, so please check the [CHANGELOG](CHANGELOG.md) for details on changes and updates.
 34 | 
 35 | ## Roadmap
 36 | 
 37 | - [x] Version 1.4.0:
 38 |     - [x] Add support for custom MDS solvers
 39 |     - [x] Improve documentation and examples
 40 |     - [x] Add more examples and tutorials
 41 | 
 42 | ## Installation
 43 | 
 44 | You can install Radius Clustering using pip:
 45 | 
 46 | ```bash
 47 | pip install radius-clustering
 48 | ```
 49 | 
 50 | ## Usage
 51 | 
 52 | Here's a basic example of how to use Radius Clustering:
 53 | 
 54 | ```python
 55 | import numpy as np
 56 | from radius_clustering import RadiusClustering
 57 | 
 58 | # Example usage
 59 | X = np.random.rand(100, 2)  # Generate random data
 60 | 
 61 | # Create an instance of MdsClustering
 62 | rad_clustering = RadiusClustering(manner="approx", radius=0.5)
 63 | 
 64 | # Fit the model to the data
 65 | rad_clustering.fit(X)
 66 | 
 67 | # Get cluster labels
 68 | labels = rad_clustering.labels_
 69 | 
 70 | print(labels)
 71 | ```
 72 | 
 73 | ## Documentation
 74 | 
 75 | See the [full documentation for Radius Clustering](https://contrib.scikit-learn.org/radius_clustering/).
 76 | 
 77 | ### Building the documentation
 78 | 
 79 | To build the documentation, you can run the following command, assuming you have all dependencies needed installed:
 80 | 
 81 | ```bash
 82 | cd docs
 83 | make html
 84 | ```
 85 | 
 86 | Then you can open the `index.html` file in the `build` directory to view the full documentation.
 87 | 
 88 | ## More information
 89 | 
 90 | For more information please refer to the official documentation.
 91 | 
 92 | If you want insights on how the algorithm works, please refer to the [presentation](PRESENTATION.md).
 93 | 
 94 | If you want to know more about the experiments conducted with the package, please refer to the [experiments](EXPERIMENTS.md).
 95 | 
 96 | 
 97 | ## Contributing
 98 | 
 99 | Contributions to Radius Clustering are welcome!
100 | 
101 | Please read the [CONTRIBUTING.md](CONTRIBUTING.md) file for details on how to contribute to the project.
102 | Please note that the project is released with a [Code of Conduct](CODE_OF_CONDUCT.md), and we expect all contributors to adhere to it.
103 | 
104 | ## License
105 | 
106 | This project is licensed under the GNU General Public License v3.0 - see the LICENSE file for details.
107 | 
108 | ## How to cite this work
109 | 
110 | If you use Radius Clustering in your research, please cite the following paper and the software itself:
111 | 
112 | ```bibtex
113 | @inproceedings{haenn_clustering2024,
114 |   TITLE = {{Clustering Under Radius Constraints Using Minimum Dominating Sets}},
115 |   AUTHOR = {Haenn, Quentin and Chardin, Brice and Baron, Micka{\"e}l},
116 |   URL = {https://hal.science/hal-04533921},
117 |   BOOKTITLE = {{Lecture Notes in Artificial Intelligence}},
118 |   ADDRESS = {Poitiers, France},
119 |   PUBLISHER = {{Springer}},
120 |   YEAR = {2024},
121 |   MONTH = Jun,
122 |   KEYWORDS = {Constrained Clustering ; Radius Based Clustering ; Minimum Dominating Set ; Constrained Clustering Radius Based Clustering Minimum Dominating Set},
123 |   PDF = {https://hal.science/hal-04533921v1/file/clustering_under_radius_using_mds.pdf},
124 |   HAL_ID = {hal-04533921},
125 |   HAL_VERSION = {v1},
126 | }
127 | ```
128 | 
129 | ## Acknowledgments
130 | 
131 | ### MDS Algorithms
132 | 
133 | The two MDS algorithms implemented are forked and modified (or rewritten) from the following authors:
134 | 
135 | - [Alejandra Casado](https://github.com/AlejandraCasado) for the minimum dominating set heuristic code [[1](https://www.sciencedirect.com/science/article/pii/S0378475422005055)]. We rewrote the code in C++ to adapt to the need of python interfacing.
136 | - [Hua Jiang](https://github.com/huajiang-ynu) for the minimum dominating set exact algorithm code [[2](https://dl.acm.org/doi/abs/10.24963/ijcai.2023/622)]. The code has been adapted to the need of python interfacing.
137 | 
138 | ### Funders
139 | 
140 | The Radius Clustering work has been funded by:
141 | 
142 | - [LIAS, ISAE-ENSMA](https://www.lias-lab.fr/)
143 | - [LabCom @lienor](https://labcom-alienor.ensma.fr/) and the [French National Research Agency](https://anr.fr/)
144 | 
145 | ### Contributors
146 | 
147 | - [Quentin Haenn (core developer)](https://www.lias-lab.fr/members/quentinhaenn/), LIAS, ISAE-ENSMA
148 | - [Brice Chardin](https://www.lias-lab.fr/members/bricechardin/), LIAS, ISAE-ENSMA
149 | - [Mickaël Baron](https://www.lias-lab.fr/members/mickaelbaron/), LIAS, ISAE-ENSMA
150 | 
151 | 
152 | ## References
153 | 
154 | - [1] [An iterated greedy algorithm for finding the minimum dominating set in graphs](https://www.sciencedirect.com/science/article/pii/S0378475422005055)
155 | - [2] [An exact algorithm for the minimum dominating set problem](https://dl.acm.org/doi/abs/10.24963/ijcai.2023/622)
156 | - [3] [Clustering under radius constraint using minimum dominating set](https://link.springer.com/chapter/10.1007/978-3-031-62700-2_2)
157 | 


--------------------------------------------------------------------------------
/examples/plot_benchmark_custom.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =====================================================================================
  3 | Benchmark of Radius Clustering using multiple datasets and comparison with custom MDS
  4 | =====================================================================================
  5 | 
  6 | This example demonstrates how to implement a custom solver for the MDS problem
  7 | and use it within the Radius Clustering framework.
  8 | Plus, it compares the results of a naive implementation using the 
  9 | `NetworkX` library with the Radius Clustering implementation.
 10 | 
 11 | The example includes:
 12 |     1. Defining the custom MDS solver.
 13 |     2. Defining datasets to test the clustering.
 14 |     3. Applying Radius clustering on the datasets using the custom MDS solver.
 15 |     4. Ensure this solution works.
 16 |     5. Establish a benchmark procedure to compare the Radius clustering with a naive implementation using `NetworkX`.
 17 |     6. Comparing the results in terms of :
 18 |         - Execution time
 19 |         - Number of cluster found
 20 |     7. Visualizing the benchmark results.
 21 |     8. Visualizing the clustering results.
 22 | 
 23 | This example is useful for understanding how to implement a custom MDS solver
 24 | and how to perform an advanced usage of the package.
 25 | """
 26 | # Author: Haenn Quentin
 27 | # SPDX-License-Identifier: MIT
 28 | 
 29 | # %%
 30 | # Import necessary libraries
 31 | # --------------------------
 32 | # 
 33 | # Since this example is a benchmark, we need to import the necessary libraries
 34 | # to perform the benchmark, including `NetworkX` for the naive implementation,
 35 | # `matplotlib` for visualization, and `sklearn` for the datasets.
 36 | 
 37 | 
 38 | import networkx as nx
 39 | import numpy as np
 40 | import matplotlib.pyplot as plt
 41 | import time
 42 | import warnings
 43 | 
 44 | from sklearn.datasets import fetch_openml
 45 | from radius_clustering import RadiusClustering
 46 | from sklearn.metrics import pairwise_distances_argmin
 47 | 
 48 | warnings.filterwarnings("ignore", category=RuntimeWarning, module="sklearn")
 49 | # %%
 50 | # Define a custom MDS solver
 51 | # --------------------------
 52 | #
 53 | # We define a custom MDS solver that uses the `NetworkX` library to compute the MDS.
 54 | # Note the signature of the function is identical to the one used in the `RadiusClustering` class.
 55 | 
 56 | 
 57 | def custom_solver(n: int, edges: np.ndarray, nb_edges: int, random_state=None):
 58 |     """
 59 |     Custom MDS solver using NetworkX to compute the MDS problem.
 60 |     
 61 |     Parameters:
 62 |     -----------
 63 |     n : int
 64 |         The number of points in the dataset.
 65 |     edges : np.ndarray
 66 |         The edges of the graph, flattened into a 1D array.
 67 |     nb_edges : int
 68 |         The number of edges in the graph.
 69 |     random_state : int | None
 70 |         The random state to use for reproducibility.
 71 |         
 72 |     Returns:
 73 |     --------
 74 |     centers : list
 75 |         A sorted list of the centers of the clusters.
 76 |     mds_exec_time : float
 77 |         The execution time of the MDS algorithm in seconds.
 78 |     """
 79 |     G = nx.Graph()
 80 |     G.add_edges_from(edges)
 81 |     
 82 |     start_time = time.time()
 83 |     centers = list(nx.algorithms.dominating.dominating_set(G))
 84 |     mds_exec_time = time.time() - start_time
 85 | 
 86 |     centers = sorted(centers)
 87 | 
 88 |     return centers, mds_exec_time
 89 | 
 90 | 
 91 | # %%
 92 | # Define datasets to test the clustering
 93 | # --------------------------------------
 94 | # 
 95 | # We will use 4 datasets to test the clustering:
 96 | # 1. Iris dataset
 97 | # 2. Wine dataset
 98 | # 3. Breast Cancer dataset (WDBC)
 99 | # 4. Vehicle dataset
100 | # These are common datasets used in machine learning and lead to pretty fast results.
101 | # Structure of the variable `DATASETS`:
102 | # - The key is the name of the dataset.
103 | # - The value is a tuple containing:
104 | #   - The dataset fetched from OpenML.
105 | #   - The radius to use for the Radius clustering. (determined in literature, see references on home page)
106 | #
107 | 
108 | 
109 | DATASETS = {
110 |     "iris": (fetch_openml(name="iris", version=1, as_frame=False), 1.43),
111 |     "wine": (fetch_openml(name="wine", version=1, as_frame=False), 232.09),
112 |     "glass": (fetch_openml(name="glass", version=1, as_frame=False), 3.94),
113 |     "ionosphere": (fetch_openml(name="ionosphere", version=1, as_frame=False), 5.46),
114 |     "breast_cancer": (fetch_openml(name="wdbc", version=1, as_frame=False), 1197.42),
115 |     "synthetic": (fetch_openml(name="synthetic_control", version=1, as_frame=False), 70.12),
116 |     "vehicle": (fetch_openml(name="vehicle", version=1, as_frame=False), 155.05),
117 |     "yeast": (fetch_openml(name="yeast", version=1, as_frame=False), 0.4235),
118 | }
119 | 
120 | # %%
121 | # Define the benchmark procedure
122 | # --------------------------------------
123 | #
124 | # We define a function to perform the benchmark on the datasets.
125 | # The procedure is as follows:
126 | # 1. Creates an instance of RadiusClustering for each solver.
127 | # 2. For each instance, fit the algorithm on each dataset.
128 | # 3. Store the execution time and the number of clusters found for each dataset.
129 | # 4. Return the results as a dictionary.
130 | 
131 | 
132 | def benchmark_radius_clustering():
133 |     results = {}
134 |     exact = RadiusClustering(manner="exact", radius=1.43)
135 |     approx = RadiusClustering(manner="approx", radius=1.43)
136 |     custom = RadiusClustering(
137 |         manner="custom", radius=1.43
138 |     )
139 |     custom.set_solver(custom_solver) # Set the custom solver
140 |     algorithms = [exact, approx, custom]
141 |     # Loop through each algorithm and dataset
142 |     for algo in algorithms:
143 |         algo_results = {}
144 |         time_algo = []
145 |         clusters_algo = []
146 |         # Loop through each dataset
147 |         for name, (dataset, radius) in DATASETS.items():
148 |             X = dataset.data
149 |             # set the radius for the dataset considered
150 |             setattr(algo, "radius", radius)
151 |             # Fit the algorithm
152 |             t0 = time.time()
153 |             algo.fit(X)
154 |             t_algo = time.time() - t0
155 | 
156 |             # Store the results
157 |             time_algo.append(t_algo)
158 |             clusters_algo.append(len(algo.centers_))
159 |         algo_results["time"] = time_algo
160 |         algo_results["clusters"] = clusters_algo
161 |         results[algo.manner] = algo_results
162 | 
163 |     return results
164 | 
165 | 
166 | # %%
167 | # Run the benchmark and plot the results
168 | # --------------------------------------
169 | # We run the benchmark and plot the results for each dataset.
170 | 
171 | 
172 | results = benchmark_radius_clustering()
173 | 
174 | # Plot the results
175 | fig, axs = plt.subplot_mosaic(
176 |     [
177 |         ["time", "time", "time", "time"],
178 |         ["iris", "wine", "breast_cancer", "vehicle"],
179 |         ["glass", "ionosphere", "synthetic", "yeast"],
180 |     ],
181 |     layout="constrained",
182 |     figsize=(12, 8),
183 | )
184 | fig.suptitle("Benchmark of Radius Clustering Solvers", fontsize=16)
185 | 
186 | axs['time'].set_yscale('log')  # Use logarithmic scale for better visibility
187 | 
188 | algorithms = list(results.keys())
189 | dataset_names = list(DATASETS.keys())
190 | n_algos = len(algorithms)
191 | x_indices = np.arange(len(dataset_names))  # the label locations
192 | bar_width = 0.8 / n_algos  # the width of the bars, with some padding
193 | 
194 | for i, algo in enumerate(algorithms):
195 |     times = results[algo]["time"]
196 |     # Calculate position for each bar in the group to center them
197 |     position = x_indices - (n_algos * bar_width / 2) + (i * bar_width) + bar_width / 2
198 |     axs['time'].bar(position, times, bar_width, label=algo)
199 | 
200 | for i, (name, (dataset, _)) in enumerate(DATASETS.items()):
201 |     axs[name].bar(
202 |         results.keys(),
203 |         [results[algo]["clusters"][i] for algo in results.keys()],
204 |         label=name,
205 |     )
206 |     axs[name].axhline(
207 |         y=len(set(dataset.target)),  # Number of unique classes in the dataset
208 |         label="True number of clusters",
209 |         color='r',
210 |         linestyle='--',
211 |     )
212 |     axs[name].set_title(name)
213 | 
214 | axs["iris"].set_ylabel("Number of clusters")
215 | axs["glass"].set_ylabel("Number of clusters")
216 | 
217 | axs['time'].set_title("Execution Time (log scale)")
218 | axs['time'].set_xlabel("Datasets")
219 | axs['time'].set_ylabel("Time (seconds)")
220 | axs['time'].set_xticks(x_indices)
221 | axs['time'].set_xticklabels(dataset_names)
222 | axs['time'].legend(title="Algorithms")
223 | plt.tight_layout()
224 | plt.show()
225 | 
226 | 
227 | # %%
228 | # Conclusion
229 | # ----------
230 | #
231 | # In this example, we applied Radius clustering to the Iris and Wine datasets and compared it with KMeans clustering.
232 | # We visualized the clustering results and the difference between the two clustering algorithms.
233 | # We saw that Radius Clustering can lead to smaller clusters than kmeans, which produces much more equilibrate clusters.
234 | # The difference plot can be very useful to see where the two clustering algorithms differ.
235 | 


--------------------------------------------------------------------------------
/docs/source/installation.rst:
--------------------------------------------------------------------------------
  1 | .. _installation:
  2 | 
  3 | ============
  4 | Installation
  5 | ============
  6 | 
  7 | There are different ways to install Radius Clustering:
  8 | 
  9 | * :ref:`From PyPI <installation-pypi>`. This is the recommended way to install Radius Clustering. It will provide a stable version and pre-built packages are available for most platforms.
 10 | 
 11 | * :ref:`From the source <installation-source>`. This is best for users who want the latest features and are comfortable building from source. This is also needed if you want to contribute to the project.
 12 | 
 13 | .. warning::
 14 | 
 15 |     Radius Clustering is currently not available on PyPI, pending the organization acceptance on PyPI. You can install the package from the source by following the :ref:`instructions <installation-source>`.
 16 |     Please notice that the compilation stage requires a C and C++ compiler toolchain to be installed on your system.
 17 | 
 18 | 
 19 | .. _installation-pypi:
 20 | 
 21 | Installing from PyPI
 22 | --------------------
 23 | 
 24 | .. raw:: html
 25 | 
 26 |   <style>
 27 |     /* Show caption on large screens */
 28 |     @media screen and (min-width: 960px) {
 29 |       .install-instructions .sd-tab-set {
 30 |         --tab-caption-width: 20%;
 31 |       }
 32 | 
 33 |       .install-instructions .sd-tab-set.tabs-os::before {
 34 |         content: "Operating System";
 35 |       }
 36 |     }
 37 |   </style>
 38 | 
 39 | .. div:: install-instructions
 40 | 
 41 |   .. tab-set::
 42 |     :class: tabs-os
 43 |     :sync-group: os
 44 | 
 45 |     .. tab-item:: Windows
 46 |       :class-label: tab-4
 47 |       :sync: windows
 48 | 
 49 |           Install the 64-bit version of Python 3, for instance from the
 50 |           `official website <https://www.python.org/downloads/windows/>`__.
 51 | 
 52 |           Now create a `virtual environment (venv)
 53 |           <https://docs.python.org/3/tutorial/venv.html>`_ and install Radius Clustering.
 54 |           Note that the virtual environment is optional but strongly recommended, in
 55 |           order to avoid potential conflicts with other packages.
 56 | 
 57 |           .. prompt:: powershell
 58 | 
 59 |             python -m venv rad-env
 60 |             rad-env\Scripts\activate  # activate
 61 |             pip install -U radius-clustering
 62 | 
 63 |           In order to check your installation, you can use:
 64 | 
 65 |           .. prompt:: powershell
 66 | 
 67 |             python -m pip show radius-clustering # show radius-clustering version and location
 68 |             python -m pip freeze             # show all installed packages in the environment
 69 | 
 70 |     .. tab-item:: macOS
 71 |       :class-label: tab-4
 72 |       :sync: macos
 73 | 
 74 |           Install Python 3 using `homebrew <https://brew.sh/>`_ (`brew install python`)
 75 |           or by manually installing the package from the `official website
 76 |           <https://www.python.org/downloads/macos/>`__.
 77 | 
 78 |           Now create a `virtual environment (venv)
 79 |           <https://docs.python.org/3/tutorial/venv.html>`_ and install Radius Clustering.
 80 |           Note that the virtual environment is optional but strongly recommended, in
 81 |           order to avoid potential conflicts with other packages.
 82 | 
 83 |           .. prompt:: bash
 84 | 
 85 |             python -m venv rad-env
 86 |             source rad-env/bin/activate  # activate
 87 |             pip install -U radius-clustering
 88 | 
 89 |           In order to check your installation, you can use:
 90 | 
 91 |           .. prompt:: bash
 92 | 
 93 |             python -m pip show radius-clustering  # show radius-clustering version and location
 94 |             python -m pip freeze             # show all installed packages in the environment
 95 | 
 96 |     .. tab-item:: Linux
 97 |       :class-label: tab-4
 98 |       :sync: linux
 99 | 
100 |           Python 3 is usually installed by default on most Linux distributions. To
101 |           check if you have it installed, try:
102 | 
103 |           .. prompt:: bash
104 | 
105 |             python3 --version
106 |             pip3 --version
107 | 
108 |           If you don't have Python 3 installed, please install `python3` and
109 |           `python3-pip` from your distribution's package manager.
110 | 
111 |           Now create a `virtual environment (venv)
112 |           <https://docs.python.org/3/tutorial/venv.html>`_ and install Radius Clustering.
113 |           Note that the virtual environment is optional but strongly recommended, in
114 |           order to avoid potential conflicts with other packages.
115 | 
116 |           .. prompt:: bash
117 | 
118 |             python3 -m venv rad-env
119 |             source rad-env/bin/activate  # activate
120 |             pip3 install -U radius-clustering
121 | 
122 |           In order to check your installation, you can use:
123 | 
124 |           .. prompt:: bash
125 | 
126 |             python3 -m pip show radius-clustering  # show radius-clustering version and location
127 |             python3 -m pip freeze             # show all installed packages in the environment
128 | 
129 | 
130 | Using an isolated environment such as pip venv or conda makes it possible to
131 | install a specific version of mds-clustering with pip or conda and its dependencies
132 | independently of any previously installed Python packages. In particular under Linux
133 | it is discouraged to install pip packages alongside the packages managed by the
134 | package manager of the distribution (apt, dnf, pacman...).
135 | 
136 | Note that you should always remember to activate the environment of your choice
137 | prior to running any Python command whenever you start a new terminal session.
138 | 
139 | If you have not installed NumPy or SciPy yet, you can also install these using
140 | conda or pip. When using pip, please ensure that *binary wheels* are used,
141 | and NumPy and SciPy are not recompiled from source, which can happen when using
142 | particular configurations of operating system and hardware (such as Linux on
143 | a Raspberry Pi).
144 | 
145 | 
146 | .. _installation-source:
147 | 
148 | Installing from the source
149 | --------------------------
150 | 
151 | Compiler Requirements
152 | ~~~~~~~~~~~~~~~~~~~~~
153 | 
154 | To install Radius Clustering from the source, you need to have a C and C++ compiler and their respective toolchains installed on your system, depending on your operating system.
155 | 
156 | .. raw:: html
157 | 
158 |   <style>
159 |     /* Show caption on large screens */
160 |     @media screen and (min-width: 960px) {
161 |       .install-instructions .sd-tab-set {
162 |         --tab-caption-width: 20%;
163 |       }
164 | 
165 |       .install-instructions .sd-tab-set.tabs-os::before {
166 |         content: "Operating System";
167 |       }
168 |     }
169 |   </style>
170 | 
171 | .. div:: install-instructions
172 | 
173 |   .. tab-set::
174 |     :class: tabs-os
175 |     :sync-group: os
176 | 
177 |     .. tab-item:: Windows
178 |       :class-label: tab-4
179 |       :sync: windows
180 | 
181 |           Install the correct version of Microsoft Visual C++ Build Tools for your Python version from the `official website <https://visualstudio.microsoft.com/visual-cpp-build-tools/>`__.
182 | 
183 |           In Build Tools, install C++ toolchain. Ensure that it is added to the system PATH.
184 |           You are now ready to install Radius Clustering from source.
185 | 
186 |     .. tab-item:: macOS
187 |       :class-label: tab-4
188 |       :sync: macos
189 | 
190 |         Normally, you should have the necessary tools installed on your system as it comes with Xcode Command Line Tools, which is included when you first install Homebrew or Xcode.
191 |            To check if you have the necessary tools installed, try:
192 | 
193 |         .. prompt:: bash
194 | 
195 |              gcc --version
196 |              g++ --version
197 | 
198 |         If you don't have the necessary tools installed, you can install them directly from the App Store by getting Xcode. You may also be interested in installing Homebrew. See this `tutorial <https://www.moncefbelyamani.com/how-to-install-xcode-with-homebrew/>`__ for more information.
199 | 
200 |     .. tab-item:: Linux
201 |       :class-label: tab-4
202 |       :sync: linux
203 | 
204 |             Normally, you should have the necessary tools installed on your system. To check if you have the necessary tools installed, try:
205 | 
206 |             .. prompt:: bash
207 | 
208 |                 gcc --version
209 |                 g++ --version
210 | 
211 |             If you don't have the necessary tools installed, you can install them using your distribution's package manager. For instance, on Ubuntu, you can install them by running:
212 | 
213 |             .. prompt:: bash
214 | 
215 |                 sudo apt-get update
216 |                 sudo apt-get install build-essential
217 | 
218 | 
219 | Installing Radius Clustering
220 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
221 | 
222 | Now you have installed compilers toolchains requirements, you can build and install `radius-clustering` from the sources. You need to clone the repository and
223 | install the package using the following commands:
224 | 
225 | .. prompt:: bash
226 | 
227 |   git clone git@github.com:scikit-learn-contrib/radius_clustering.git # clone the repository
228 |   cd radius_clustering
229 |   python -m venv rad-env
230 |   source rad-env/bin/activate  # activate
231 |   python -m pip install .
232 | 
233 | To check your installation, you can use:
234 | 
235 | .. prompt:: bash
236 | 
237 |   python -m pip show radius-clustering  # show radius-clustering version and location
238 |   python -m pip freeze             # show all installed packages in the environment
239 |   python -c "from radius_clustering import *; rad = RadiusClustering(); print(rad)"
240 | 
241 | If you want to contribute to the project, you will need to install the development
242 | dependencies. You can do this by running:
243 | 
244 | .. prompt:: bash
245 | 
246 |   python -m pip install -e .[dev]
247 | 
248 | Alternatively, if you want to contribute only to the documentation, you can install
249 | the documentation dependencies by running:
250 | 
251 | .. prompt:: bash
252 | 
253 |   python -m pip install -e .[docs]
254 | 
255 | Dependencies
256 | ++++++++++++
257 | 
258 | 
259 | The minimum version of radius-clustering dependencies are listed below along with its
260 | purpose.
261 | 
262 | .. list-table::
263 |     :header-rows: 1
264 | 
265 |     * - Dependency
266 |       - Minimum version
267 |       - Purpose
268 |     * - numpy
269 |       - 1.23.4
270 |       - Build, Install
271 |     * - scipy
272 |       - 1.12.0
273 |       - Build, Install
274 |     * - scikit-learn
275 |       - 1.2.2
276 |       - Build, Install
277 |     * - cython
278 |       - 3.0.10
279 |       - Build
280 |     * - setuptools
281 |       - 61.0.0
282 |       - Build
283 |     * - pytest
284 |       - 8.3.3
285 |       - Tests
286 |     * - ruff
287 |       - 0.2.1
288 |       - Tests
289 |     * - black
290 |       - 24.3.0
291 |       - Tests
292 |     * - matplotlib
293 |       - 3.6.2
294 |       - Docs, Examples
295 |     * - sphinx
296 |       - 8.1.3
297 |       - Docs
298 |     * - sphinx-copybutton
299 |       - 0.5.2
300 |       - Docs
301 |     * - sphinx-rtd-theme
302 |       - 3.0.0
303 |       - Docs
304 |     * - sphinx_design
305 |       - 0.6.1
306 |       - Docs
307 |     * - sphinx_gallery
308 |       - 0.18.0
309 |       - Docs
310 |     * - sphinx-prompt
311 |       - 1.9.0
312 |       - Docs
313 | 


--------------------------------------------------------------------------------
/examples/plot_iris_example.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ===============================
  3 | Iris Dataset Clustering Example
  4 | ===============================
  5 | 
  6 | This example is meant to illustrate the use of the Radius clustering library on the Iris dataset.
  7 | It comes with a simple example of how to use the library to cluster the Iris dataset and a comparison with
  8 | kmeans clustering algorithms.
  9 | 
 10 | The example includes:
 11 | 1. Loading the Iris dataset
 12 | 2. Applying Radius clustering and k-means clustering
 13 | 3. Visualizing the clustering results
 14 | 
 15 | This example serves as a simple introduction to using the Radius clustering library
 16 | on a well-known dataset.
 17 | """
 18 | # Author: Haenn Quentin
 19 | # SPDX-License-Identifier: MIT
 20 | 
 21 | 
 22 | # %%
 23 | # Load the Iris dataset
 24 | # ---------------------
 25 | #
 26 | # We start by loading the Iris dataset using the `fetch_openml` function from `sklearn.datasets`.
 27 | # The Iris dataset is a well-known dataset that contains 150 samples of iris flowers.
 28 | # Each sample has 4 features: sepal length, sepal width, petal length, and petal width.
 29 | # The dataset is labeled with 3 classes: setosa, versicolor, and virginica.
 30 | 
 31 | import numpy as np
 32 | from sklearn import datasets
 33 | from radius_clustering import RadiusClustering
 34 | 
 35 | # Load the Iris dataset
 36 | iris = datasets.load_iris()
 37 | X = iris["data"]
 38 | y = iris.target
 39 | 
 40 | 
 41 | # %%
 42 | # Visualize the Iris dataset
 43 | # --------------------------
 44 | #
 45 | # We can visualize the Iris dataset by plotting the dataset. We use PCA to reduce the dimensionality to 3D
 46 | # and plot the dataset in a 3D scatter plot.
 47 | import matplotlib.pyplot as plt
 48 | from sklearn.decomposition import PCA
 49 | import mpl_toolkits.mplot3d
 50 | 
 51 | # Reduce the dimensionality of the dataset to 3D using PCA
 52 | pca = PCA(n_components=3)
 53 | iris_reduced = pca.fit_transform(X)
 54 | fig = plt.figure(figsize=(8, 6))
 55 | ax = fig.add_subplot(111, projection="3d", elev=48, azim=134)
 56 | ax.scatter(
 57 |     iris_reduced[:, 0],
 58 |     iris_reduced[:, 1],
 59 |     iris_reduced[:, 2],
 60 |     c=y,
 61 |     cmap="Dark2",
 62 |     s=40,
 63 | )
 64 | # Set plot labels
 65 | ax.set_title("Iris dataset in first 3 PCA components")
 66 | ax.set_xlabel("1st eigenvector")
 67 | ax.set_ylabel("2nd eigenvector")
 68 | ax.set_zlabel("3rd eigenvector")
 69 | 
 70 | # Hide tick labels
 71 | ax.xaxis.set_ticklabels([])
 72 | ax.yaxis.set_ticklabels([])
 73 | ax.zaxis.set_ticklabels([])
 74 | 
 75 | plt.show()
 76 | 
 77 | # %%
 78 | # Compute Clustering with Radius Clustering
 79 | # -----------------------------------------
 80 | #
 81 | # We can now apply Radius clustering to the Iris dataset.
 82 | # We create an instance of the `RadiusClustering` class and fit it to the Iris dataset.
 83 | import time
 84 | 
 85 | rad = RadiusClustering(manner="exact", radius=1.43)
 86 | t0 = time.time()
 87 | rad.fit(X)
 88 | t_rad = time.time() - t0
 89 | 
 90 | # %%
 91 | # Compute KMeans Clustering for Comparison
 92 | # ----------------------------------------
 93 | #
 94 | # We can also apply KMeans clustering to the Iris dataset for comparison.
 95 | 
 96 | from sklearn.cluster import KMeans
 97 | 
 98 | k_means = KMeans(n_clusters=3, n_init=10)
 99 | t0 = time.time()
100 | k_means.fit(X)
101 | t_kmeans = time.time() - t0
102 | 
103 | # %% Establishing parity between clusters
104 | # --------------------------------------
105 | #
106 | # We want to have the same color for the same cluster in both plots.
107 | # We can achieve this by matching the cluster labels of the Radius clustering and the KMeans clustering.
108 | # First we define a function to retrieve the cluster centers from the Radius clustering and KMeans clustering and
109 | # match them pairwise.
110 | 
111 | 
112 | def get_order_labels(kmeans, rad, data):
113 |     centers1_cpy = kmeans.cluster_centers_.copy()
114 |     centers2_cpy = data[rad.centers_].copy()
115 |     order = []
116 |     # For each center in the first clustering, find the closest center in the second clustering
117 |     for center in centers1_cpy:
118 |         match = pairwise_distances_argmin([center], centers2_cpy)
119 |         # if there is only one center left, assign it to the last cluster label not yet assigned
120 |         if len(centers2_cpy) == 1:
121 |             for i in range(len(centers1_cpy)):
122 |                 if i not in order:
123 |                     order.append(i)
124 |                     break
125 |             break
126 |         # get coordinates of the center in the second clustering
127 |         coordinates = centers2_cpy[match]
128 |         # find the closest point in the data to the center to get the cluster label
129 |         closest_point = pairwise_distances_argmin(coordinates, data)
130 |         match_label = rad.labels_[closest_point]
131 |         # remove the center from the second clustering
132 |         centers2_cpy = np.delete(centers2_cpy, match, axis=0)
133 |         # add the cluster label to the order
134 |         order.append(int(match_label[0]))
135 |     return order
136 | 
137 | 
138 | from sklearn.metrics.pairwise import pairwise_distances_argmin
139 | 
140 | rad_centers_index = np.array(rad.centers_)
141 | order = get_order_labels(k_means, rad, X)
142 | 
143 | kmeans_centers = k_means.cluster_centers_
144 | rad_centers = rad_centers_index[order]
145 | rad_centers_coordinates = X[rad_centers]
146 | 
147 | # Pair the cluster labels
148 | kmeans_labels = pairwise_distances_argmin(X, kmeans_centers)
149 | rad_labels = pairwise_distances_argmin(X, rad_centers_coordinates)
150 | 
151 | # %%
152 | # Plotting the results and the difference
153 | # ---------------------------------------
154 | 
155 | fig = plt.figure(figsize=(12, 6))
156 | fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
157 | colors = ["#4EACC5", "#FF9C34", "#4E9A06"]
158 | 
159 | # KMeans
160 | ax = fig.add_subplot(1, 3, 1, projection="3d", elev=48, azim=134, roll=0)
161 | 
162 | ax.scatter(
163 |     iris_reduced[:, 0],
164 |     iris_reduced[:, 1],
165 |     iris_reduced[:, 2],
166 |     c=kmeans_labels,
167 |     cmap="Dark2",
168 |     s=40,
169 | )
170 | # adapting center coordinates to the 3D plot
171 | kmeans_centers = pca.transform(kmeans_centers)
172 | ax.scatter(
173 |     kmeans_centers[:, 0],
174 |     kmeans_centers[:, 1],
175 |     kmeans_centers[:, 2],
176 |     c="r",
177 |     s=200,
178 | )
179 | ax.set_title("KMeans")
180 | ax.set_xticks(())
181 | ax.set_yticks(())
182 | ax.set_zticks(())
183 | 
184 | ax.text3D(-3.5, 3, 1.0, "train time: %.2fs\ninertia: %f" % (t_kmeans, k_means.inertia_))
185 | 
186 | # MDS
187 | ax = fig.add_subplot(1, 3, 2, projection="3d", elev=48, azim=134, roll=0)
188 | ax.scatter(
189 |     iris_reduced[:, 0],
190 |     iris_reduced[:, 1],
191 |     iris_reduced[:, 2],
192 |     c=rad_labels,
193 |     cmap="Dark2",
194 |     s=40,
195 | )
196 | # adapting center coordinates to the 3D plot
197 | rad_centers_coordinates = pca.transform(rad_centers_coordinates)
198 | ax.scatter(
199 |     rad_centers_coordinates[:, 0],
200 |     rad_centers_coordinates[:, 1],
201 |     rad_centers_coordinates[:, 2],
202 |     c="r",
203 |     s=200,
204 | )
205 | ax.set_title("MDS Clustering")
206 | ax.set_xticks(())
207 | ax.set_yticks(())
208 | ax.set_zticks(())
209 | ax.text3D(-3.5, 3, 0.0, "train time: %.2fs" % t_rad)
210 | 
211 | # Initialize the different array to all False
212 | different = rad_labels == 4
213 | ax = fig.add_subplot(1, 3, 3, projection="3d", elev=48, azim=134, roll=0)
214 | 
215 | for k in range(3):
216 |     different += (kmeans_labels == k) != (rad_labels == k)
217 | 
218 | identical = np.logical_not(different)
219 | ax.scatter(
220 |     iris_reduced[identical, 0], iris_reduced[identical, 1], color="#bbbbbb", marker="."
221 | )
222 | ax.scatter(iris_reduced[different, 0], iris_reduced[different, 1], color="m")
223 | ax.set_title("Difference")
224 | ax.set_xticks(())
225 | ax.set_yticks(())
226 | ax.set_zticks(())
227 | 
228 | plt.show()
229 | 
230 | # %%
231 | # Another difference plot
232 | # -----------------------
233 | #
234 | # As we saw, the difference plot is not very informative using Iris.
235 | # We'll use a different dataset to show the difference plot.
236 | 
237 | wine = datasets.load_wine()
238 | X = wine.data
239 | y = wine.target
240 | pca = PCA(n_components=3)
241 | wine_reduced = pca.fit_transform(X)
242 | 
243 | # Compute clustering with MDS
244 | 
245 | rad = RadiusClustering(manner="exact", radius=232.09)
246 | t0 = time.time()
247 | rad.fit(X)
248 | t_rad = time.time() - t0
249 | 
250 | # Compute KMeans clustering for comparison
251 | 
252 | k_means = KMeans(n_clusters=3, n_init=10)
253 | t0 = time.time()
254 | k_means.fit(X)
255 | t_kmeans = time.time() - t0
256 | 
257 | # %%
258 | # Reapplying the same process as before
259 | # --------------------------------------
260 | 
261 | rad_centers_index = np.array(rad.centers_)
262 | order = get_order_labels(k_means, rad, X)
263 | 
264 | kmeans_centers = k_means.cluster_centers_
265 | rad_centers = rad_centers_index[order]
266 | rad_centers_coordinates = X[rad_centers]
267 | 
268 | # Pair the cluster labels
269 | kmeans_labels = pairwise_distances_argmin(X, kmeans_centers)
270 | rad_labels = pairwise_distances_argmin(X, rad_centers_coordinates)
271 | 
272 | # %%
273 | # Plotting the results and the difference
274 | # ---------------------------------------
275 | 
276 | fig = plt.figure(figsize=(12, 6))
277 | fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)
278 | colors = ["#4EACC5", "#FF9C34", "#4E9A06"]
279 | 
280 | # KMeans
281 | ax = fig.add_subplot(1, 3, 1, projection="3d", elev=48, azim=134, roll=0)
282 | 
283 | ax.scatter(
284 |     wine_reduced[:, 0],
285 |     wine_reduced[:, 1],
286 |     wine_reduced[:, 2],
287 |     c=kmeans_labels,
288 |     cmap="Dark2",
289 |     s=40,
290 | )
291 | # adapting center coordinates to the 3D plot
292 | kmeans_centers = pca.transform(kmeans_centers)
293 | ax.scatter(
294 |     kmeans_centers[:, 0],
295 |     kmeans_centers[:, 1],
296 |     kmeans_centers[:, 2],
297 |     c="r",
298 |     s=200,
299 | )
300 | ax.set_title("KMeans")
301 | ax.set_xticks(())
302 | ax.set_yticks(())
303 | ax.set_zticks(())
304 | 
305 | ax.text3D(
306 |     60.0, 80.0, 0.0, "train time: %.2fs\ninertia: %f" % (t_kmeans, k_means.inertia_)
307 | )
308 | 
309 | # MDS
310 | ax = fig.add_subplot(1, 3, 2, projection="3d", elev=48, azim=134, roll=0)
311 | ax.scatter(
312 |     wine_reduced[:, 0],
313 |     wine_reduced[:, 1],
314 |     wine_reduced[:, 2],
315 |     c=rad_labels,
316 |     cmap="Dark2",
317 |     s=40,
318 | )
319 | # adapting center coordinates to the 3D plot
320 | rad_centers_coordinates = pca.transform(rad_centers_coordinates)
321 | ax.scatter(
322 |     rad_centers_coordinates[:, 0],
323 |     rad_centers_coordinates[:, 1],
324 |     rad_centers_coordinates[:, 2],
325 |     c="r",
326 |     s=200,
327 | )
328 | ax.set_title("MDS Clustering")
329 | ax.set_xticks(())
330 | ax.set_yticks(())
331 | ax.set_zticks(())
332 | ax.text3D(60.0, 80.0, 0.0, "train time: %.2fs" % t_rad)
333 | 
334 | # Initialize the different array to all False
335 | different = rad_labels == 4
336 | ax = fig.add_subplot(1, 3, 3, projection="3d", elev=48, azim=134, roll=0)
337 | 
338 | for k in range(3):
339 |     different += (kmeans_labels == k) != (rad_labels == k)
340 | 
341 | identical = np.logical_not(different)
342 | ax.scatter(
343 |     wine_reduced[identical, 0], wine_reduced[identical, 1], color="#bbbbbb", marker="."
344 | )
345 | ax.scatter(wine_reduced[different, 0], wine_reduced[different, 1], color="m")
346 | ax.set_title("Difference")
347 | ax.set_xticks(())
348 | ax.set_yticks(())
349 | ax.set_zticks(())
350 | 
351 | plt.show()
352 | 
353 | # %%
354 | # Conclusion
355 | # ----------
356 | #
357 | # In this example, we applied Radius clustering to the Iris and Wine datasets and compared it with KMeans clustering.
358 | # We visualized the clustering results and the difference between the two clustering algorithms.
359 | # We saw that Radius Clustering can lead to smaller clusters than kmeans, which produces much more equilibrate clusters.
360 | # The difference plot can be very useful to see where the two clustering algorithms differ.
361 | 


--------------------------------------------------------------------------------
/src/radius_clustering/radius_clustering.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Radius Clustering
  3 | 
  4 | This module provides functionality for Minimum Dominating Set (MDS) based clustering.
  5 | It includes methods for solving MDS problems and applying the solutions to
  6 | clustering tasks.
  7 | 
  8 | This module serves as the main interface for the Radius clustering library.
  9 | """
 10 | 
 11 | from __future__ import annotations
 12 | 
 13 | import os
 14 | import warnings
 15 | 
 16 | import numpy as np
 17 | from sklearn.base import BaseEstimator, ClusterMixin
 18 | from sklearn.metrics import pairwise_distances
 19 | from sklearn.utils.validation import check_random_state, validate_data
 20 | 
 21 | from .algorithms import clustering_approx, clustering_exact
 22 | 
 23 | DIR_PATH = os.path.dirname(os.path.realpath(__file__))
 24 | 
 25 | 
 26 | class RadiusClustering(ClusterMixin, BaseEstimator):
 27 |     r"""
 28 |     Radius Clustering algorithm.
 29 | 
 30 |     This class implements clustering based on the Minimum Dominating Set (MDS) problem.
 31 |     It can use either an exact or approximate method for solving the MDS problem.
 32 | 
 33 |     Parameters:
 34 |     -----------
 35 |     manner : str, optional (default="approx")
 36 |         The method to use for solving the MDS problem. Can be "exact" or "approx".
 37 |     radius : float, optional (default=0.5)
 38 |         The dissimilarity threshold to act as radius constraint for the clustering.
 39 | 
 40 |     Attributes:
 41 |     -----------
 42 |     X : array-like, shape (n_samples, n_features)
 43 |         The input data.
 44 |     centers\_ : list
 45 |         The indices of the cluster centers.
 46 |     labels\_ : array-like, shape (n_samples,)
 47 |         The cluster labels for each point in the input data.
 48 |     effective_radius\_ : float
 49 |         The maximum distance between any point and its assigned cluster center.
 50 |     random_state\_ : int | None
 51 |         The random state used for reproducibility. If None, no random state is set.
 52 | 
 53 |     .. note::
 54 |         The `random_state_` attribute is not used when the `manner` is set to "exact".
 55 |     
 56 |     .. versionchanged:: 1.4.0
 57 |         The `RadiusClustering` class has been refactored.
 58 |         Clustering algorithms are now separated into their own module
 59 |         (`algorithms.py`) to improve maintainability and extensibility.
 60 |     
 61 |     .. versionadded:: 1.4.0
 62 |         The `set_solver` method was added to allow users to set a custom solver
 63 |         for the MDS problem. This allows for flexibility in how the MDS problem is solved
 64 |         and enables users to use their own implementations of MDS clustering algorithms.
 65 | 
 66 |     .. versionadded:: 1.3.0
 67 | 
 68 |         - The *random_state* parameter was added to allow reproducibility in the approximate method.
 69 | 
 70 |         - The `radius` parameter replaces the `threshold` parameter for setting the dissimilarity threshold for better clarity and consistency.
 71 | 
 72 |     .. versionchanged:: 1.3.0
 73 |         All publicly accessible attributes are now suffixed with an underscore
 74 |         (e.g., `centers_`, `labels_`).
 75 |         This is particularly useful for compatibility with scikit-learn's API.
 76 | 
 77 |     .. deprecated:: 1.3.0
 78 |         The `threshold` parameter is deprecated. Use `radius` instead.
 79 |         Will be removed in a future version.
 80 |     """
 81 | 
 82 |     _estimator_type = "clusterer"
 83 |     _algorithms = {
 84 |         "exact": clustering_exact,
 85 |         "approx": clustering_approx,
 86 |     }
 87 | 
 88 |     def __init__(
 89 |         self,
 90 |         manner: str = "approx",
 91 |         radius: float = 0.5,
 92 |         threshold=None,
 93 |         random_state: int | None = None,
 94 |     ) -> None:
 95 |         if threshold is not None:
 96 |             warnings.warn(
 97 |                 "The 'threshold' parameter is deprecated and"
 98 |                 " will be removed in a future version."
 99 |                 "Please use 'radius' instead.",
100 |                 DeprecationWarning,
101 |                 stacklevel=2,
102 |             )
103 |             radius = threshold
104 |         self.threshold = threshold  # For backward compatibility
105 |         self.manner = manner
106 |         self.radius = radius
107 |         self.random_state = random_state
108 | 
109 |     def _check_symmetric(self, a: np.ndarray, tol: float = 1e-8) -> bool:
110 |         if a.ndim != 2:
111 |             raise ValueError("Input must be a 2D array.")
112 |         if a.shape[0] != a.shape[1]:
113 |             return False
114 |         return np.allclose(a, a.T, atol=tol)
115 | 
116 |     def fit(self, X: np.ndarray, y: None = None, metric: str | callable = "euclidean") -> "RadiusClustering":
117 |         """
118 |         Fit the MDS clustering model to the input data.
119 | 
120 |         This method computes the distance matrix if the input is a feature matrix,
121 |         or uses the provided distance matrix directly if the input is already
122 |         a distance matrix.
123 | 
124 |         .. note::
125 |             If the input is a distance matrix, it should be symmetric and square.
126 |             If the input is a feature matrix, the distance matrix
127 |             will be computed using Euclidean distance.
128 | 
129 |         .. tip::
130 |             Next version will support providing different metrics or
131 |             even custom callables to compute the distance matrix.
132 | 
133 |         Parameters:
134 |         -----------
135 |         X : array-like, shape (n_samples, n_features)
136 |             The input data to cluster. X should be a 2D array-like structure.
137 |             It can either be :
138 |             - A distance matrix (symmetric, square) with shape (n_samples, n_samples).
139 |             - A feature matrix with shape (n_samples, n_features)
140 |             where the distance matrix will be computed.
141 |         y : Ignored
142 |             Not used, present here for API consistency by convention.
143 | 
144 |         metric : str | callable, optional (default="euclidean")
145 |             The metric to use when computing the distance matrix.
146 |             The default is "euclidean".
147 |             This should be a valid metric string from
148 |             `sklearn.metrics.pairwise_distances` or a callable that computes
149 |             the distance between two points.
150 |         
151 |         .. note::
152 |             The metric parameter *MUST* be a valid metric string from
153 |             `sklearn.metrics.pairwise_distances` or a callable that computes
154 |             the distance between two points.
155 |             Valid metric strings include :
156 |             - "euclidean"
157 |             - "manhattan"
158 |             - "cosine"
159 |             - "minkowski"
160 |             - and many more supported by scikit-learn.
161 |             please refer to the
162 |             `sklearn.metrics.pairwise_distances` documentation for a full list.
163 |         
164 |         .. attention::
165 |             If the input is a distance matrix, the metric parameter is ignored.
166 |             The distance matrix should be symmetric and square.
167 |         
168 |         .. warning::
169 |             If the parameter is a callable, it should :
170 |             - Accept two 1D arrays as input.
171 |             - Return a single float value representing the distance between the two points.
172 | 
173 |         Returns:
174 |         --------
175 |         self : object
176 |             Returns self.
177 | 
178 |         Examples :
179 |         ----------
180 | 
181 |         >>> from radius_clustering import RadiusClustering
182 |         >>> from sklearn import datasets
183 |         >>> # Load the Iris dataset
184 |         >>> iris = datasets.fetch_openml(name="iris", version=1, parser="auto")
185 |         >>> X = iris["data"]  # Use dictionary-style access instead of attribute access
186 |         >>> rad = RadiusClustering(manner="exact", threshold=1.43).fit(
187 |         ...     X
188 |         ... )  # Threshold set to 1.43 because it is the optimal
189 |         ... # threshold for the Iris dataset
190 |         >>> rad.centers_
191 |         [96, 49, 102]
192 | 
193 |         For examples on common datasets and differences with kmeans,
194 |         see :ref:`sphx_glr_auto_examples_plot_iris_example.py`
195 |         """
196 |         self.X_checked_ = validate_data(self, X)
197 | 
198 |         # Create dist and adj matrices
199 |         if not self._check_symmetric(self.X_checked_):
200 |             dist_mat = pairwise_distances(self.X_checked_, metric=metric)
201 |         else:
202 |             dist_mat = self.X_checked_
203 |         
204 |         if not self._check_symmetric(dist_mat):
205 |             raise ValueError("Input distance matrix must be symmetric. Got a non-symmetric matrix.")
206 |         self.dist_mat_ = dist_mat
207 |         if not isinstance(self.radius, (float, int)):
208 |             raise ValueError("Radius must be a positive float.")
209 |         if self.radius <= 0:
210 |             raise ValueError("Radius must be a positive float.")
211 |         adj_mask = np.triu((dist_mat <= self.radius), k=1)
212 |         self.nb_edges_ = np.sum(adj_mask)
213 |         if self.nb_edges_ == 0:
214 |             self.centers_ = list(range(self.X_checked_.shape[0]))
215 |             self.labels_ = np.array(self.centers_)
216 |             self.effective_radius_ = 0
217 |             self.mds_exec_time_ = 0
218 |             return self
219 |         self.edges_ = np.argwhere(adj_mask).astype(
220 |             np.uint32
221 |         )  # Edges in the adjacency matrix
222 |         # uint32 is used to use less memory. Max number of features is 2^32-1
223 |         self.clusterer_ = self._algorithms.get(self.manner, self._algorithms["approx"])
224 |         self._clustering()
225 |         self._compute_effective_radius()
226 |         self._compute_labels()
227 | 
228 |         return self
229 | 
230 |     def fit_predict(self, X: np.ndarray, y: None = None, metric: str | callable = "euclidean") -> np.ndarray:
231 |         """
232 |         Fit the model and return the cluster labels.
233 | 
234 |         This method is a convenience function that combines `fit` and `predict`.
235 | 
236 |         Parameters:
237 |         -----------
238 |         X : array-like, shape (n_samples, n_features)
239 |             The input data to cluster. X should be a 2D array-like structure.
240 |             It can either be :
241 |             - A distance matrix (symmetric, square) with shape (n_samples, n_samples).
242 |             - A feature matrix with shape (n_samples, n_features) where
243 |             the distance matrix will be computed.
244 |         y : Ignored
245 |             Not used, present here for API consistency by convention.
246 |         
247 |         metric : str | callable, optional (default="euclidean")
248 |             The metric to use when computing the distance matrix.
249 |             The default is "euclidean".
250 |             Refer to the `fit` method for more details on valid metrics.
251 | 
252 |         Returns:
253 |         --------
254 |         labels : array, shape (n_samples,)
255 |             The cluster labels for each point in X.
256 |         """
257 |         self.fit(X, metric=metric)
258 |         return self.labels_
259 | 
260 |     def _clustering(self):
261 |         """
262 |         Perform the clustering using either the exact or approximate MDS method.
263 |         """
264 |         n = self.X_checked_.shape[0]
265 |         if self.manner not in self._algorithms:
266 |             raise ValueError(f"Invalid manner. Please choose in {list(self._algorithms.keys())}.")
267 |         if self.clusterer_ == clustering_approx:
268 |             if self.random_state is None:
269 |                 self.random_state = 42
270 |             self.random_state_ = check_random_state(self.random_state)
271 |             seed = self.random_state_.randint(np.iinfo(np.int32).max)
272 |         else:
273 |             seed = None
274 |         self.centers_, self.mds_exec_time_ = self.clusterer_(n, self.edges_, self.nb_edges_, seed)
275 | 
276 |     def _compute_effective_radius(self):
277 |         """
278 |         Compute the effective radius of the clustering.
279 | 
280 |         The effective radius is the maximum radius among all clusters.
281 |         That means EffRad = max(R(C_i)) for all i.
282 |         """
283 |         self.effective_radius_ = np.min(self.dist_mat_[:, self.centers_], axis=1).max()
284 | 
285 |     def _compute_labels(self):
286 |         """
287 |         Compute the cluster labels for each point in the dataset.
288 |         """
289 |         distances = self.dist_mat_[:, self.centers_]
290 |         self.labels_ = np.argmin(distances, axis=1)
291 | 
292 |         min_dist = np.min(distances, axis=1)
293 |         self.labels_[min_dist > self.radius] = -1
294 | 
295 |     def set_solver(self, solver: callable) -> None:
296 |         """
297 |         Set a custom solver for resolving the MDS problem.
298 |         This method allows users to replace the default MDS solver with a custom one.
299 | 
300 |         An example is provided below and in the example gallery : 
301 |         :ref:`sphx_glr_auto_examples_plot_benchmark_custom.py`
302 | 
303 |         .. important::
304 |             The custom solver must accept the same parameters as the default solvers
305 |             and return a tuple containing the cluster centers and the execution time.
306 |             e.g., it should have the signature:
307 |             
308 |             >>> def custom_solver(
309 |             >>>             n: int,
310 |             >>>             edges: np.ndarray,
311 |             >>>             nb_edges: int,
312 |             >>>             random_state: int | None = None
313 |             >>>         ) -> tuple[list, float]:
314 |             >>>     # Custom implementation details
315 |             >>>     centers = [...]
316 |             >>>     exec_time = ...
317 |             >>>     # Return the centers and execution time
318 |             >>>     return centers, exec_time
319 |             
320 |             This allows for flexibility in how the MDS problem is solved.
321 | 
322 |         Parameters:
323 |         -----------
324 |         solver : callable
325 |             The custom solver function to use for MDS clustering.
326 |             It should accept the same parameters as the default solvers
327 |             and return a tuple containing the cluster centers and the execution time.
328 | 
329 |         Raises:
330 |         -------
331 |         ValueError
332 |             If the provided solver does not have the correct signature.
333 | 
334 |         """
335 |         if not callable(solver):
336 |             raise ValueError("The provided solver must be callable.")
337 |         
338 |         # Check if the solver has the correct signature
339 |         try:
340 |             n = 3
341 |             edges = np.array([[0, 1], [1, 2], [2, 0]])
342 |             nb_edges = edges.shape[0]
343 |             solver(n, edges, nb_edges, random_state=None)
344 |         except Exception as e:
345 |             raise ValueError(f"The provided solver does not have the correct signature: {e}") from e
346 |         self.manner = "custom"
347 |         self._algorithms["custom"] = solver


--------------------------------------------------------------------------------
/src/radius_clustering/utils/mds_core.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file mds_core.cpp
  3 |  * @brief Core implementation of the Minimum Dominating Set (MDS) algorithm.
  4 |  *
  5 |  * This file contains the C++ implementation of the MDS algorithm,
  6 |  * including the iterated greedy approach and supporting data structures.
  7 |  * It provides the main computational logic for solving MDS problems.
  8 |  */
  9 | #include <vector>
 10 | #include <unordered_set>
 11 | #include <algorithm>
 12 | #include <random>
 13 | #include <chrono>
 14 | #include <cmath>
 15 | #include <csignal>
 16 | #include <limits>
 17 | #include "random_manager.h"
 18 | 
 19 | 
 20 | class Result {
 21 | public:
 22 |     Result() {} // Add this line
 23 |     Result(std::string instanceName) : instanceName(instanceName) {}
 24 | 
 25 |     void add(std::string key, float value) {
 26 |         map.push_back(Tuple(key, value));
 27 |     }
 28 | 
 29 |     float get(int pos) {
 30 |         return map[pos].value;
 31 |     }
 32 | 
 33 |     std::vector<std::string> getKeys() {
 34 |         std::vector<std::string> keys;
 35 |         for (auto& tuple : map) {
 36 |             keys.push_back(tuple.name);
 37 |         }
 38 |         return keys;
 39 |     }
 40 | 
 41 |     std::string getInstanceName() {
 42 |         return instanceName;
 43 |     }
 44 | 
 45 |     std::unordered_set<int> getSolutionSet() {
 46 |         return solutionSet;
 47 |     }
 48 | 
 49 |     void setSolutionSet(std::unordered_set<int> solutionSet) {
 50 |         this->solutionSet = solutionSet;
 51 |     }
 52 | 
 53 | 
 54 | private:
 55 |     class Tuple {
 56 |         public:
 57 | 
 58 |         std::string name;
 59 |         float value;
 60 |         
 61 |         Tuple(std::string name, float value) : name(name), value(value) {}
 62 |     };
 63 |     std::string instanceName;
 64 |     std::vector<Tuple> map;
 65 |     std::unordered_set<int> solutionSet;
 66 | };
 67 | 
 68 | class Instance {
 69 | public:
 70 |     Instance(int n, const std::vector<int>& edges_list, int nb_edges, std::string name) 
 71 |         : name(name), numNodes(n), adjacencyList(n) {
 72 |         for (int i = 0; i < numNodes; ++i) {
 73 |             unSelectedNodes.insert(i);
 74 |         }
 75 |         constructAdjacencyList(edges_list, nb_edges);
 76 |         setSupportNodes();
 77 |     }
 78 | 
 79 |     const std::vector<std::vector<int>>& getAdjacencyList() const { return adjacencyList; }
 80 |     const std::unordered_set<int>& getSupportNodes() const { return supportNodes; }
 81 |     const std::unordered_set<int>& getLeavesNodes() const { return leavesNodes; }
 82 |     const std::unordered_set<int>& getUnSelectedNodes() const { return unSelectedNodes; }
 83 |     int getNumNodes() const { return numNodes; }
 84 |     std::string getName() const { return name; }
 85 | 
 86 | private:
 87 |     std::string name;
 88 |     int numNodes;
 89 |     std::vector<std::vector<int>> adjacencyList;
 90 |     std::unordered_set<int> supportNodes;
 91 |     std::unordered_set<int> leavesNodes;
 92 |     std::unordered_set<int> unSelectedNodes;
 93 |     const bool supportAndLeafNodes = true;
 94 | 
 95 |     void constructAdjacencyList(const std::vector<int>& edge_list, int nb_edges) {
 96 |         for (int i = 0; i < 2 * nb_edges; i+=2) {
 97 |             int u = edge_list[i];
 98 |             int v = edge_list[i+1];
 99 |             adjacencyList[u].push_back(v);
100 |             adjacencyList[v].push_back(u);
101 |         }
102 |     }
103 | 
104 |     void setSupportNodes() {
105 |         for (int i = 0; i < numNodes; ++i) {
106 |             if (adjacencyList[i].size() == 1 && supportAndLeafNodes) {
107 |                 int neighbour = adjacencyList[i][0];
108 |                 if (leavesNodes.find(neighbour) == leavesNodes.end()) {
109 |                     leavesNodes.insert(i);
110 |                     supportNodes.insert(neighbour);
111 |                 }
112 |                 unSelectedNodes.erase(neighbour);
113 |                 unSelectedNodes.erase(i);
114 |             } else if (adjacencyList[i].empty() && supportAndLeafNodes) {
115 |                 supportNodes.insert(i);
116 |             }
117 |         }
118 |     }
119 | };
120 | 
121 | class Solution {
122 | public:
123 |     Solution(const Instance& inst) 
124 |         : instance(&inst), numCovered(0), watchers(inst.getNumNodes()) {
125 |         unSelectedNodes = inst.getUnSelectedNodes();
126 |     }
127 | 
128 |     Solution(const Solution& other) = default;
129 |     Solution& operator=(const Solution& other) = default;
130 | 
131 |     bool isFeasible() const { return numCovered == instance->getNumNodes(); }
132 |     bool checking() {
133 |         bool removed = false;
134 |         std::vector<int> selectedList(selectedNotSupportNodes.begin(), selectedNotSupportNodes.end());
135 |         for (int select : selectedList) {
136 |             if (watchers[select].size() > 1) {
137 |                 bool remove = true;
138 |                 for (int elem : instance->getAdjacencyList()[select]) {
139 |                     if (watchers[elem].size() == 1) {
140 |                         remove = false;
141 |                         break;
142 |                     }
143 |                 }
144 |                 if (remove) {
145 |                     removed = true;
146 |                     removeNode(select);
147 |                 }
148 |             }
149 |         }
150 |         return removed;
151 |     }
152 | 
153 |     void addNode(int node) {
154 |         selectedNodes.insert(node);
155 |         unSelectedNodes.erase(node);
156 |         addWatcher(node);
157 |         if (instance->getSupportNodes().find(node) == instance->getSupportNodes().end()) {
158 |             selectedNotSupportNodes.insert(node);
159 |         }
160 |     }
161 | 
162 |     void removeNode(int node) {
163 |         selectedNodes.erase(node);
164 |         unSelectedNodes.insert(node);
165 |         removeWatcher(node);
166 |         selectedNotSupportNodes.erase(node);
167 |     }
168 | 
169 |     int getBestNextNode() const {
170 |         int bestCount = -1;
171 |         int bestNode = -1;
172 | 
173 |         for (int i : unSelectedNodes) {
174 |             int count = 0;
175 |             for (int neighbour : instance->getAdjacencyList()[i]) {
176 |                 if (watchers[neighbour].empty()) {
177 |                     count++;
178 |                 }
179 |             }
180 |             if (bestCount < count && instance->getLeavesNodes().find(i) == instance->getLeavesNodes().end()) {
181 |                 bestCount = count;
182 |                 bestNode = i;
183 |             }
184 |         }
185 |         return bestNode;
186 |     }
187 | 
188 |     int getWorstNodeNew() const {
189 |         int worstNode = -1;
190 |         int totalMaxWatchers = 0;
191 | 
192 |         for (int i : selectedNotSupportNodes) {
193 |             int minWatchers = std::numeric_limits<int>::max();
194 |             for (int neighbour : instance->getAdjacencyList()[i]) {
195 |                 if (minWatchers > static_cast<int>(watchers[neighbour].size())) {
196 |                     minWatchers = watchers[neighbour].size();
197 |                 }
198 |             }
199 |             if (totalMaxWatchers < minWatchers) {
200 |                 worstNode = i;
201 |                 totalMaxWatchers = minWatchers;
202 |             }
203 |         }
204 | 
205 |         return worstNode;
206 |     }
207 | 
208 |     int evaluate() const { return selectedNodes.size(); }
209 |     const std::unordered_set<int>& getSelectedNodes() const { return selectedNodes; }
210 |     const std::unordered_set<int>& getSelectedNotSupportNodes() const { return selectedNotSupportNodes; }
211 |     const std::unordered_set<int>& getUnSelectedNodes() const { return unSelectedNodes; }
212 |     const std::vector<std::unordered_set<int>>& getWatchers() const { return watchers; }
213 |     int getNumNodes() const { return instance->getNumNodes(); }
214 | 
215 | private:
216 |     const Instance* instance;
217 |     std::unordered_set<int> selectedNodes;
218 |     std::unordered_set<int> selectedNotSupportNodes;
219 |     std::unordered_set<int> unSelectedNodes;
220 |     int numCovered;
221 |     std::vector<std::unordered_set<int>> watchers;
222 | 
223 |     void addWatcher(int selectedNode) {
224 |         if (watchers[selectedNode].empty()) {
225 |             numCovered++;
226 |         }
227 |         watchers[selectedNode].insert(selectedNode);
228 | 
229 |         for (int neighbour : instance->getAdjacencyList()[selectedNode]) {
230 |             if (watchers[neighbour].empty()) {
231 |                 numCovered++;
232 |             }
233 |             watchers[neighbour].insert(selectedNode);
234 |         }
235 |     }
236 | 
237 |     void removeWatcher(int selectedNode) {
238 |         watchers[selectedNode].erase(selectedNode);
239 |         if (watchers[selectedNode].empty()) {
240 |             numCovered--;
241 |         }
242 | 
243 |         for (int neighbour : instance->getAdjacencyList()[selectedNode]) {
244 |             watchers[neighbour].erase(selectedNode);
245 |             if (watchers[neighbour].empty()) {
246 |                 numCovered--;
247 |             }
248 |         }
249 |     }
250 | };
251 | 
252 | class GIP {
253 | public:
254 |     Solution construct(const Instance& instance) {
255 |         Solution solution(instance);
256 |         for (int supportNode : instance.getSupportNodes()) {
257 |             solution.addNode(supportNode);
258 |         }
259 |         while (!solution.isFeasible()) {
260 |             int selectedNode = solution.getBestNextNode();
261 |             solution.addNode(selectedNode);
262 |         }
263 |         return solution;
264 |     }
265 | };
266 | 
267 | class LocalSearch {
268 | public:
269 |     static Solution execute(Solution& sol, const Instance& instance) {
270 |         bool improve = true;
271 |         while (improve) {
272 |             improve = checkImprove(sol, instance);
273 |         }
274 |         return sol;
275 |     }
276 | 
277 | private:
278 |     static bool checkImprove(Solution& sol, const Instance& instance) {
279 |         std::vector<int> copySelected(sol.getSelectedNotSupportNodes().begin(), sol.getSelectedNotSupportNodes().end());
280 |         std::shuffle(copySelected.begin(), copySelected.end(), RandomManager::getRandom());
281 | 
282 |         for (int nodeRem : copySelected) {
283 |             int nodeNew = selectElemToAdd(nodeRem, instance, sol);
284 |             if (nodeNew != -1) {
285 |                 int of = sol.evaluate();
286 |                 sol.removeNode(nodeRem);
287 |                 sol.addNode(nodeNew);
288 |                 sol.checking();
289 |                 if (sol.evaluate() < of) {
290 |                     return true;
291 |                 }
292 |             }
293 |         }
294 |         return false;
295 |     }
296 | 
297 |     static int selectElemToAdd(int node, const Instance& instance, const Solution& solution) {
298 |         std::unordered_set<int> neighbours;
299 |     bool neighboursInitialized = false;
300 | 
301 |     if (solution.getWatchers()[node].size() == 1) {
302 |         neighbours = std::unordered_set<int>(instance.getAdjacencyList()[node].begin(), instance.getAdjacencyList()[node].end());
303 |         neighboursInitialized = true;
304 |     }
305 | 
306 |     for (int neighbour : instance.getAdjacencyList()[node]) {
307 |         if (solution.getWatchers()[neighbour].size() == 1) {
308 |             if (!neighboursInitialized) {
309 |                 neighbours = std::unordered_set<int>(instance.getAdjacencyList()[neighbour].begin(), instance.getAdjacencyList()[neighbour].end());
310 |                 neighboursInitialized = true;
311 |             } else {
312 |                 std::unordered_set<int> temp;
313 |                 for (int n : instance.getAdjacencyList()[neighbour]) {
314 |                     if (neighbours.find(n) != neighbours.end()) {
315 |                         temp.insert(n);
316 |                     }
317 |                 }
318 |                 neighbours = std::move(temp);
319 |             }
320 |         }
321 |     }
322 | 
323 |     if (neighboursInitialized) {
324 |         neighbours.erase(node);
325 |     }
326 | 
327 |         return !neighboursInitialized || neighbours.empty() ? -1 : *neighbours.begin();
328 |     }
329 | };
330 | 
331 | class IG {
332 | private:
333 |     GIP constructive;
334 |     LocalSearch localSearch;
335 |     int maxItersWithoutImprove = 200;
336 |     float beta = 0.2f;
337 |     bool randomDestruct = true;
338 |     bool randomConstruct = false;
339 | 
340 | public:
341 |     IG(GIP& constructive, LocalSearch& localSearch) 
342 |         : constructive(constructive), localSearch(localSearch) {}
343 | 
344 |     Result execute(const Instance& instance) {
345 |         long initialTime = std::chrono::duration_cast<std::chrono::milliseconds>(
346 |             std::chrono::system_clock::now().time_since_epoch()
347 |         ).count();
348 |         long totalTime = 0;
349 |         float secs = 0.0f;
350 |         Result result(instance.getName());
351 | 
352 |         Solution solution = firstSol(instance);
353 |         int numElemsToDestruct = std::ceil(beta * solution.getSelectedNotSupportNodes().size());
354 | 
355 |         int numItersWithoutImprove = 0;
356 |         int bestOF = solution.evaluate();
357 |         while (numItersWithoutImprove < maxItersWithoutImprove && secs <= 600) {
358 |             Solution current_solution = solution;
359 |             destruct(current_solution, numElemsToDestruct);
360 |             construct(current_solution);
361 |             executeLocalSearch(current_solution, instance);
362 |             if (current_solution.evaluate() >= bestOF) {
363 |                 numItersWithoutImprove++;
364 |             } else {
365 |                 numItersWithoutImprove = 0;
366 |                 bestOF = current_solution.evaluate();
367 |                 solution = std::move(current_solution);
368 |             }
369 | 
370 |             totalTime = std::chrono::duration_cast<std::chrono::milliseconds>(
371 |                 std::chrono::system_clock::now().time_since_epoch()
372 |             ).count() - initialTime;
373 |             secs = totalTime / 1000.0f;
374 |         }
375 | 
376 |         result.setSolutionSet(solution.getSelectedNodes());
377 |         result.add("Time", secs);
378 |         result.add("OF", static_cast<float>(bestOF));
379 |         return result;
380 |     }
381 | 
382 | private:
383 |     Solution firstSol(const Instance& instance) {
384 |         Solution solution = constructive.construct(instance);
385 |         executeLocalSearch(solution, instance);
386 |         return solution;
387 |     }
388 | 
389 |     void destruct(Solution& solution, int numElemsToDestruct) {
390 |         if (randomDestruct) {
391 |             destructRandom(solution, numElemsToDestruct);
392 |         } else {
393 |             destructGreedy(solution, numElemsToDestruct);
394 |         }
395 |     }
396 | 
397 |     void construct(Solution& solution) {
398 |         if (randomConstruct) {
399 |             constructRandom(solution);
400 |         } else {
401 |             constructGreedy(solution);
402 |         }
403 |     }
404 | 
405 |     void destructRandom(Solution& solution, int numElemsToDestruct) {
406 |         std::vector<int> selectedList(solution.getSelectedNotSupportNodes().begin(), solution.getSelectedNotSupportNodes().end());
407 |         std::shuffle(selectedList.begin(), selectedList.end(), RandomManager::getRandom());
408 |         for (int i = 0; i < numElemsToDestruct; i++) {
409 |             solution.removeNode(selectedList[i]);
410 |         }
411 |     }
412 | 
413 |     void destructGreedy(Solution& solution, int numElemsToDestruct) {
414 |         for (int i = 0; i < numElemsToDestruct; i++) {
415 |             int worstNode = solution.getWorstNodeNew();
416 |             solution.removeNode(worstNode);
417 |         }
418 |     }
419 | 
420 |     void constructRandom(Solution& solution) {
421 |         while (!solution.isFeasible()) {
422 |             int randomNode = RandomManager::nextInt(solution.getNumNodes());
423 |             solution.addNode(randomNode);
424 |         }
425 |     }
426 | 
427 |     void constructGreedy(Solution& solution) {
428 |         while (!solution.isFeasible() && !solution.getUnSelectedNodes().empty()) {
429 |             int bestNode = solution.getBestNextNode();
430 |             solution.addNode(bestNode);
431 |         }
432 |     }
433 | 
434 |     void executeLocalSearch(Solution& solution, const Instance& instance) {
435 |         solution = localSearch.execute(solution, instance);
436 |     }
437 | };
438 | 
439 | class Main {
440 | private:
441 |     GIP constructive;
442 |     LocalSearch localSearch;
443 |     IG algorithm;
444 | 
445 |     static void signal_handler(int sig) {
446 |       exit(sig);
447 |     }
448 | 
449 | public:
450 |     Main() : algorithm(constructive, localSearch) {}
451 | 
452 |     Result execute(int numNodes, const std::vector<int>& edges_list, int nb_edges, long seed) {
453 |         Instance instance(numNodes, edges_list, nb_edges, "name");
454 |         RandomManager::setSeed(seed);
455 |         signal(SIGINT, signal_handler);
456 |         return algorithm.execute(instance);
457 |     }
458 | };
459 | 
460 | extern "C" {
461 |     inline Result iterated_greedy_wrapper(int numNodes, const std::vector<int>& edges_list, int nb_edges, long seed) {
462 |         static Main main;  // Create a single static instance
463 | 
464 |         return main.execute(numNodes, edges_list, nb_edges, seed);
465 |     }
466 | }


--------------------------------------------------------------------------------
/notebooks/comparison_example.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "4acb9df3",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Comparison of Radius Clustering with KMeans on the samples Dataset\n",
  9 |     "\n",
 10 |     "\n",
 11 |     "This example is meant to illustrate the use of the Radius clustering library on several datasets.\n",
 12 |     "\n",
 13 |     "The example includes:\n",
 14 |     "1. Loading the datasets\n",
 15 |     "2. Applying Radius clustering and k-means clustering\n",
 16 |     "3. Visualizing the clustering results\n",
 17 |     "\n",
 18 |     "This example serves as a simple introduction to using the Radius clustering library on well-known datasets.\n",
 19 |     "\n",
 20 |     "**Author: Haenn Quentin**\n",
 21 |     "\n",
 22 |     "**@SPDX-License-Identifier: MIT**\n",
 23 |     "\n",
 24 |     "\n",
 25 |     "\n",
 26 |     "## 1. Load the Iris dataset\n",
 27 |     "\n",
 28 |     "We start by loading the Iris dataset using the `fetch_openml` function from `sklearn.datasets`.\n",
 29 |     "The Iris dataset is a well-known dataset that contains 150 samples of iris flowers.\n",
 30 |     "Each sample has 4 features: sepal length, sepal width, petal length, and petal width.\n",
 31 |     "The dataset is labeled with 3 classes: setosa, versicolor, and virginica."
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "id": "e28a516b",
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "import numpy as np\n",
 42 |     "from sklearn import datasets\n",
 43 |     "from radius_clustering import RadiusClustering\n",
 44 |     "\n",
 45 |     "# Load the Iris dataset\n",
 46 |     "iris = datasets.load_iris()\n",
 47 |     "X = iris[\"data\"]\n",
 48 |     "y = iris.target"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "id": "b84938fd",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "\n",
 57 |     "## 2. Visualize the Iris dataset\n",
 58 |     "\n",
 59 |     "\n",
 60 |     "We can visualize the Iris dataset by plotting the dataset. We use PCA to reduce the dimensionality to 3D and plot the dataset in a 3D scatter plot."
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "id": "28f37b15",
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "import matplotlib.pyplot as plt\n",
 71 |     "from sklearn.decomposition import PCA\n",
 72 |     "import mpl_toolkits.mplot3d\n",
 73 |     "\n",
 74 |     "# Reduce the dimensionality of the dataset to 3D using PCA\n",
 75 |     "pca = PCA(n_components=3)\n",
 76 |     "iris_reduced = pca.fit_transform(X)\n",
 77 |     "fig = plt.figure(figsize=(8, 6))\n",
 78 |     "ax = fig.add_subplot(111, projection=\"3d\", elev=48, azim=134)\n",
 79 |     "ax.scatter(\n",
 80 |     "    iris_reduced[:, 0],\n",
 81 |     "    iris_reduced[:, 1],\n",
 82 |     "    iris_reduced[:, 2],\n",
 83 |     "    c=y,\n",
 84 |     "    cmap=\"Dark2\",\n",
 85 |     "    s=40,\n",
 86 |     ")\n",
 87 |     "# Set plot labels\n",
 88 |     "ax.set_title(\"Iris dataset in first 3 PCA components\")\n",
 89 |     "ax.set_xlabel(\"1st eigenvector\")\n",
 90 |     "ax.set_ylabel(\"2nd eigenvector\")\n",
 91 |     "ax.set_zlabel(\"3rd eigenvector\")\n",
 92 |     "\n",
 93 |     "# Hide tick labels\n",
 94 |     "ax.xaxis.set_ticklabels([])\n",
 95 |     "ax.yaxis.set_ticklabels([])\n",
 96 |     "ax.zaxis.set_ticklabels([])\n",
 97 |     "\n",
 98 |     "plt.show()"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "id": "cd38d50b",
104 |    "metadata": {},
105 |    "source": [
106 |     "\n",
107 |     "## 3. Compute Clustering with Radius Clustering\n",
108 |     "\n",
109 |     "We can now apply Radius clustering to the Iris dataset.\n",
110 |     "We create an instance of the `RadiusClustering` class and fit it to the Iris dataset."
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "id": "9282ec34",
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "import time\n",
121 |     "\n",
122 |     "rad = RadiusClustering(manner=\"exact\", radius=1.43)\n",
123 |     "t0 = time.time()\n",
124 |     "rad.fit(X)\n",
125 |     "t_rad = time.time() - t0"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "id": "2653845e",
131 |    "metadata": {},
132 |    "source": [
133 |     "\n",
134 |     "## 4. Compute KMeans Clustering for Comparison\n",
135 |     "\n",
136 |     "We also apply KMeans clustering to the Iris dataset for comparison.\n"
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "id": "e7e993f5",
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": [
146 |     "\n",
147 |     "from sklearn.cluster import KMeans\n",
148 |     "\n",
149 |     "k_means = KMeans(n_clusters=3, n_init=10)\n",
150 |     "t0 = time.time()\n",
151 |     "k_means.fit(X)\n",
152 |     "t_kmeans = time.time() - t0"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "id": "d1072a7f",
158 |    "metadata": {},
159 |    "source": [
160 |     "## 5. Establishing parity between clusters\n",
161 |     "\n",
162 |     "We want to have the same color for the same cluster in both plots.\n",
163 |     "We can achieve this by matching the cluster labels of the Radius clustering and the KMeans clustering.\n",
164 |     "First we define a function to retrieve the cluster centers from the Radius clustering and KMeans clustering and\n",
165 |     "match them pairwise."
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "id": "3ac48cdf",
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "\n",
176 |     "def get_order_labels(kmeans, rad, data):\n",
177 |     "    centers1_cpy = kmeans.cluster_centers_.copy()\n",
178 |     "    centers2_cpy = data[rad.centers_].copy()\n",
179 |     "    order = []\n",
180 |     "    # For each center in the first clustering, find the closest center in the second clustering\n",
181 |     "    for center in centers1_cpy:\n",
182 |     "        match = pairwise_distances_argmin([center], centers2_cpy)\n",
183 |     "        # if there is only one center left, assign it to the last cluster label not yet assigned\n",
184 |     "        if len(centers2_cpy) == 1:\n",
185 |     "            for i in range(len(centers1_cpy)):\n",
186 |     "                if i not in order:\n",
187 |     "                    order.append(i)\n",
188 |     "                    break\n",
189 |     "            break\n",
190 |     "        # get coordinates of the center in the second clustering\n",
191 |     "        coordinates = centers2_cpy[match]\n",
192 |     "        # find the closest point in the data to the center to get the cluster label\n",
193 |     "        closest_point = pairwise_distances_argmin(coordinates, data)\n",
194 |     "        match_label = rad.labels_[closest_point]\n",
195 |     "        # remove the center from the second clustering\n",
196 |     "        centers2_cpy = np.delete(centers2_cpy, match, axis=0)\n",
197 |     "        # add the cluster label to the order\n",
198 |     "        order.append(int(match_label[0]))\n",
199 |     "    return order\n",
200 |     "\n",
201 |     "\n",
202 |     "from sklearn.metrics.pairwise import pairwise_distances_argmin\n",
203 |     "\n",
204 |     "rad_centers_index = np.array(rad.centers_)\n",
205 |     "order = get_order_labels(k_means, rad, X)\n",
206 |     "\n",
207 |     "kmeans_centers = k_means.cluster_centers_\n",
208 |     "rad_centers = rad_centers_index[order]\n",
209 |     "rad_centers_coordinates = X[rad_centers]\n",
210 |     "\n",
211 |     "# Pair the cluster labels\n",
212 |     "kmeans_labels = pairwise_distances_argmin(X, kmeans_centers)\n",
213 |     "rad_labels = pairwise_distances_argmin(X, rad_centers_coordinates)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "id": "b428447c",
219 |    "metadata": {},
220 |    "source": [
221 |     "### Plotting the results and the difference"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "code",
226 |    "execution_count": null,
227 |    "id": "69c095ee",
228 |    "metadata": {},
229 |    "outputs": [],
230 |    "source": [
231 |     "fig = plt.figure(figsize=(12, 6))\n",
232 |     "fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)\n",
233 |     "colors = [\"#4EACC5\", \"#FF9C34\", \"#4E9A06\"]\n",
234 |     "\n",
235 |     "# KMeans\n",
236 |     "ax = fig.add_subplot(1, 3, 1, projection=\"3d\", elev=48, azim=134, roll=0)\n",
237 |     "\n",
238 |     "ax.scatter(\n",
239 |     "    iris_reduced[:, 0],\n",
240 |     "    iris_reduced[:, 1],\n",
241 |     "    iris_reduced[:, 2],\n",
242 |     "    c=kmeans_labels,\n",
243 |     "    cmap=\"Dark2\",\n",
244 |     "    s=40,\n",
245 |     ")\n",
246 |     "# adapting center coordinates to the 3D plot\n",
247 |     "kmeans_centers = pca.transform(kmeans_centers)\n",
248 |     "ax.scatter(\n",
249 |     "    kmeans_centers[:, 0],\n",
250 |     "    kmeans_centers[:, 1],\n",
251 |     "    kmeans_centers[:, 2],\n",
252 |     "    c=\"r\",\n",
253 |     "    s=200,\n",
254 |     ")\n",
255 |     "ax.set_title(\"KMeans\")\n",
256 |     "ax.set_xticks(())\n",
257 |     "ax.set_yticks(())\n",
258 |     "ax.set_zticks(())\n",
259 |     "\n",
260 |     "ax.text3D(-3.5, 3, 1.0, \"train time: %.2fs\\ninertia: %f\" % (t_kmeans, k_means.inertia_))\n",
261 |     "\n",
262 |     "# MDS\n",
263 |     "ax = fig.add_subplot(1, 3, 2, projection=\"3d\", elev=48, azim=134, roll=0)\n",
264 |     "ax.scatter(\n",
265 |     "    iris_reduced[:, 0],\n",
266 |     "    iris_reduced[:, 1],\n",
267 |     "    iris_reduced[:, 2],\n",
268 |     "    c=rad_labels,\n",
269 |     "    cmap=\"Dark2\",\n",
270 |     "    s=40,\n",
271 |     ")\n",
272 |     "# adapting center coordinates to the 3D plot\n",
273 |     "rad_centers_coordinates = pca.transform(rad_centers_coordinates)\n",
274 |     "ax.scatter(\n",
275 |     "    rad_centers_coordinates[:, 0],\n",
276 |     "    rad_centers_coordinates[:, 1],\n",
277 |     "    rad_centers_coordinates[:, 2],\n",
278 |     "    c=\"r\",\n",
279 |     "    s=200,\n",
280 |     ")\n",
281 |     "ax.set_title(\"MDS Clustering\")\n",
282 |     "ax.set_xticks(())\n",
283 |     "ax.set_yticks(())\n",
284 |     "ax.set_zticks(())\n",
285 |     "ax.text3D(-3.5, 3, 0.0, \"train time: %.2fs\" % t_rad)\n",
286 |     "\n",
287 |     "# Initialize the different array to all False\n",
288 |     "different = rad_labels == 4\n",
289 |     "ax = fig.add_subplot(1, 3, 3, projection=\"3d\", elev=48, azim=134, roll=0)\n",
290 |     "\n",
291 |     "for k in range(3):\n",
292 |     "    different += (kmeans_labels == k) != (rad_labels == k)\n",
293 |     "\n",
294 |     "identical = np.logical_not(different)\n",
295 |     "ax.scatter(\n",
296 |     "    iris_reduced[identical, 0], iris_reduced[identical, 1], color=\"#bbbbbb\", marker=\".\"\n",
297 |     ")\n",
298 |     "ax.scatter(iris_reduced[different, 0], iris_reduced[different, 1], color=\"m\")\n",
299 |     "ax.set_title(\"Difference\")\n",
300 |     "ax.set_xticks(())\n",
301 |     "ax.set_yticks(())\n",
302 |     "ax.set_zticks(())\n",
303 |     "\n",
304 |     "plt.show()"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "id": "3d1c4fcf",
310 |    "metadata": {},
311 |    "source": [
312 |     "## Another difference plot\n",
313 |     "\n",
314 |     "As we saw, the difference plot is not very informative using Iris.\n",
315 |     "We'll use a different dataset to show the difference plot."
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "code",
320 |    "execution_count": null,
321 |    "id": "ea3d0438",
322 |    "metadata": {},
323 |    "outputs": [],
324 |    "source": [
325 |     "wine = datasets.load_wine()\n",
326 |     "X = wine.data\n",
327 |     "y = wine.target\n",
328 |     "pca = PCA(n_components=3)\n",
329 |     "wine_reduced = pca.fit_transform(X)\n",
330 |     "\n",
331 |     "# Compute clustering with MDS\n",
332 |     "\n",
333 |     "rad = RadiusClustering(manner=\"exact\", radius=232.09)\n",
334 |     "t0 = time.time()\n",
335 |     "rad.fit(X)\n",
336 |     "t_rad = time.time() - t0\n",
337 |     "\n",
338 |     "# Compute KMeans clustering for comparison\n",
339 |     "\n",
340 |     "k_means = KMeans(n_clusters=3, n_init=10)\n",
341 |     "t0 = time.time()\n",
342 |     "k_means.fit(X)\n",
343 |     "t_kmeans = time.time() - t0"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "id": "3929dee4",
349 |    "metadata": {},
350 |    "source": [
351 |     "## Reapplying the same process as before"
352 |    ]
353 |   },
354 |   {
355 |    "cell_type": "code",
356 |    "execution_count": null,
357 |    "id": "24449b3a",
358 |    "metadata": {},
359 |    "outputs": [],
360 |    "source": [
361 |     "rad_centers_index = np.array(rad.centers_)\n",
362 |     "order = get_order_labels(k_means, rad, X)\n",
363 |     "\n",
364 |     "kmeans_centers = k_means.cluster_centers_\n",
365 |     "rad_centers = rad_centers_index[order]\n",
366 |     "rad_centers_coordinates = X[rad_centers]\n",
367 |     "\n",
368 |     "# Pair the cluster labels\n",
369 |     "kmeans_labels = pairwise_distances_argmin(X, kmeans_centers)\n",
370 |     "rad_labels = pairwise_distances_argmin(X, rad_centers_coordinates)"
371 |    ]
372 |   },
373 |   {
374 |    "cell_type": "markdown",
375 |    "id": "3accac5b",
376 |    "metadata": {},
377 |    "source": [
378 |     "## Plotting the results and the difference"
379 |    ]
380 |   },
381 |   {
382 |    "cell_type": "code",
383 |    "execution_count": null,
384 |    "id": "39235d3c",
385 |    "metadata": {},
386 |    "outputs": [],
387 |    "source": [
388 |     "fig = plt.figure(figsize=(12, 6))\n",
389 |     "fig.subplots_adjust(left=0.02, right=0.98, bottom=0.05, top=0.9)\n",
390 |     "colors = [\"#4EACC5\", \"#FF9C34\", \"#4E9A06\"]\n",
391 |     "\n",
392 |     "# KMeans\n",
393 |     "ax = fig.add_subplot(1, 3, 1, projection=\"3d\", elev=48, azim=134, roll=0)\n",
394 |     "\n",
395 |     "ax.scatter(\n",
396 |     "    wine_reduced[:, 0],\n",
397 |     "    wine_reduced[:, 1],\n",
398 |     "    wine_reduced[:, 2],\n",
399 |     "    c=kmeans_labels,\n",
400 |     "    cmap=\"Dark2\",\n",
401 |     "    s=40,\n",
402 |     ")\n",
403 |     "# adapting center coordinates to the 3D plot\n",
404 |     "kmeans_centers = pca.transform(kmeans_centers)\n",
405 |     "ax.scatter(\n",
406 |     "    kmeans_centers[:, 0],\n",
407 |     "    kmeans_centers[:, 1],\n",
408 |     "    kmeans_centers[:, 2],\n",
409 |     "    c=\"r\",\n",
410 |     "    s=200,\n",
411 |     ")\n",
412 |     "ax.set_title(\"KMeans\")\n",
413 |     "ax.set_xticks(())\n",
414 |     "ax.set_yticks(())\n",
415 |     "ax.set_zticks(())\n",
416 |     "\n",
417 |     "ax.text3D(\n",
418 |     "    60.0, 80.0, 0.0, \"train time: %.2fs\\ninertia: %f\" % (t_kmeans, k_means.inertia_)\n",
419 |     ")\n",
420 |     "\n",
421 |     "# MDS\n",
422 |     "ax = fig.add_subplot(1, 3, 2, projection=\"3d\", elev=48, azim=134, roll=0)\n",
423 |     "ax.scatter(\n",
424 |     "    wine_reduced[:, 0],\n",
425 |     "    wine_reduced[:, 1],\n",
426 |     "    wine_reduced[:, 2],\n",
427 |     "    c=rad_labels,\n",
428 |     "    cmap=\"Dark2\",\n",
429 |     "    s=40,\n",
430 |     ")\n",
431 |     "# adapting center coordinates to the 3D plot\n",
432 |     "rad_centers_coordinates = pca.transform(rad_centers_coordinates)\n",
433 |     "ax.scatter(\n",
434 |     "    rad_centers_coordinates[:, 0],\n",
435 |     "    rad_centers_coordinates[:, 1],\n",
436 |     "    rad_centers_coordinates[:, 2],\n",
437 |     "    c=\"r\",\n",
438 |     "    s=200,\n",
439 |     ")\n",
440 |     "ax.set_title(\"MDS Clustering\")\n",
441 |     "ax.set_xticks(())\n",
442 |     "ax.set_yticks(())\n",
443 |     "ax.set_zticks(())\n",
444 |     "ax.text3D(60.0, 80.0, 0.0, \"train time: %.2fs\" % t_rad)\n",
445 |     "\n",
446 |     "# Initialize the different array to all False\n",
447 |     "different = rad_labels == 4\n",
448 |     "ax = fig.add_subplot(1, 3, 3, projection=\"3d\", elev=48, azim=134, roll=0)\n",
449 |     "\n",
450 |     "for k in range(3):\n",
451 |     "    different += (kmeans_labels == k) != (rad_labels == k)\n",
452 |     "\n",
453 |     "identical = np.logical_not(different)\n",
454 |     "ax.scatter(\n",
455 |     "    wine_reduced[identical, 0], wine_reduced[identical, 1], color=\"#bbbbbb\", marker=\".\"\n",
456 |     ")\n",
457 |     "ax.scatter(wine_reduced[different, 0], wine_reduced[different, 1], color=\"m\")\n",
458 |     "ax.set_title(\"Difference\")\n",
459 |     "ax.set_xticks(())\n",
460 |     "ax.set_yticks(())\n",
461 |     "ax.set_zticks(())\n",
462 |     "\n",
463 |     "plt.show()"
464 |    ]
465 |   },
466 |   {
467 |    "cell_type": "markdown",
468 |    "id": "c1172f38",
469 |    "metadata": {},
470 |    "source": [
471 |     "## Conclusion\n",
472 |     "\n",
473 |     "In this example, we applied Radius clustering to the Iris and Wine datasets and compared it with KMeans clustering.\n",
474 |     "We visualized the clustering results and the difference between the two clustering algorithms.\n",
475 |     "We saw that Radius Clustering can lead to smaller clusters than kmeans, which produces much more equilibrate clusters.\n",
476 |     "The difference plot can be very useful to see where the two clustering algorithms differ."
477 |    ]
478 |   }
479 |  ],
480 |  "metadata": {
481 |   "language_info": {
482 |    "name": "python"
483 |   }
484 |  },
485 |  "nbformat": 4,
486 |  "nbformat_minor": 5
487 | }
488 | 


--------------------------------------------------------------------------------
/src/radius_clustering/utils/mds3-util.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @file mds3-util.h
  3 |  * @brief Utility functions and data structures for MDS algorithms.
  4 |  *
  5 |  * This header file defines various utility functions, data structures,
  6 |  * and constants used in the implementation of MDS algorithms.
  7 |  * It includes helper functions for graph manipulation, random number generation,
  8 |  * and other common operations used across the MDS solver.
  9 |  */
 10 | 
 11 | #ifndef MDS3_UTIL_H
 12 | #define MDS3_UTIL_H
 13 | 
 14 | #include <stdio.h>
 15 | #include <stdlib.h>
 16 | #include <string.h>
 17 | #include <assert.h>
 18 | #include <time.h>
 19 | 
 20 | #ifdef _WIN32
 21 | #include <windows.h>
 22 | #elif defined(__APPLE__) || defined(__linux__)
 23 | #include <sys/time.h>
 24 | #include <sys/resource.h>
 25 | #else
 26 | #error "Unsupported platform"
 27 | #endif
 28 | 
 29 | #define WORD_LENGTH 100
 30 | #define TRUE 1
 31 | #define FALSE 0
 32 | #define NONE 0
 33 | #define DELIMITER 0
 34 | #define PASSIVE 0
 35 | #define ACTIVE 1
 36 | #define MAX_NODE 10000000
 37 | #define max_expand_depth 100000
 38 | #define MAXIS 16
 39 | 
 40 | #define for_each_vertex(node) for(int node=1;node<=NB_NODE;node++)
 41 | #define for_each_neighbor(__vertex,__neighbor)  for(int * __ptr=Node_Neighbors[__vertex],__neighbor=*__ptr;__neighbor!=NONE;__neighbor=*(++__ptr))
 42 | 
 43 | #define domed(node) (STATUS[node].dominated)
 44 | #define clr_domed_status(node) (STATUS[node].dominated=0)
 45 | #define set_domed_status(node) (STATUS[node].dominated=1)
 46 | 
 47 | #define fixed(node) (STATUS[node].fixed)
 48 | #define deleted(node) (STATUS[node].deleted)
 49 | 
 50 | #define removed(node) (STATUS[node].removed)
 51 | #define set_removed_status(node) (STATUS[node].removed=1)
 52 | #define clr_removed_status(node) (STATUS[node].removed=0)
 53 | 
 54 | #define branched(node) (STATUS[node].branched)
 55 | #define set_branched_status(node) (STATUS[node].branched=1)
 56 | #define clr_branched_status(node) (STATUS[node].branched=0)
 57 | 
 58 | #define active(node) (STATUS[node].active)
 59 | #define set_active(node) (STATUS[node].active=1)
 60 | #define clr_active(node) (STATUS[node].active=0)
 61 | 
 62 | #define included(node)  (STATUS[node].included)
 63 | #define set_included_status(node)  (STATUS[node].included=1)
 64 | #define clr_included_status(node)  (STATUS[node].included=0)
 65 | 
 66 | #define bit_set(vec,idx) ((*(vec+(idx>>5)))|= (1<<(idx&31)))
 67 | #define bit_clr(vec,idx) ((*(vec+(idx>>5)))&= (~(1<<(idx&31))))
 68 | #define bit_val(vec,idx) ((*(vec+(idx>>5)))&(1<<(idx&31)))
 69 | #define CUR_BRA_IDX BRAIDX[CUR_LEVEL]
 70 | #define CUR_BRA_NODE  ITEM(BRA_STK,CUR_BRA_IDX)
 71 | #define CUR_LEVEL_UND_IDX  UNDIDX[CUR_LEVEL]
 72 | #define adjlen(node) ((node)/32+1)
 73 | 
 74 | #define marked(node) (PID[node].marked)
 75 | #define set_marked_status(node) (PID[node].marked=1)
 76 | #define clr_marked_status(node) (PID[node].marked=0)
 77 |   
 78 | #define involved(node) (PID[node].involved)
 79 | #define set_involved_status(node) (PID[node].involved=1)
 80 | #define clr_involved_status(node) (PID[node].involved=0)
 81 | #define set_newid(node,id)  (PID[node].newid=id)
 82 | #define set_isno(node,no)  (PID[node].isno=no)
 83 | 
 84 | 
 85 | #define newid(node)  (PID[node].newid)
 86 | #define isno(node)  (PID[node].isno)
 87 | 
 88 | #define branch_node_at_level(i) ITEM(BRA_STK,BRAIDX[i])
 89 | 
 90 | // Macro for vector  
 91 | #define VEC_DECLARE(T,tName) \
 92 | typedef struct { \
 93 | 	T *addr; \
 94 |     unsigned used;\
 95 | 	unsigned capacity;\
 96 | }tName
 97 | 
 98 | #define push_back(Vec,T,Val)  \
 99 | do{ \
100 | 	assert(Vec->used<=Vec->capacity);\
101 |     if(Vec->used==Vec->capacity){ \
102 | 		int size=Vec->capacity*2; \
103 | 		Vec->addr=(T *)realloc(Vec->addr,(size+1)*sizeof(T));\
104 |         assert(Vec->addr!=NULL);  \
105 | 		Vec->capacity=size;\
106 | 	}\
107 | 	*(Vec->addr+Vec->used)=(Val);\
108 | 	(Vec->used)++;\
109 | }while(0)
110 | 
111 | #define new_stack(Vec,VEC_TYPE,ITEM_TYPE,len) \
112 | do{ \
113 | assert(len>0);\
114 | unsigned size=(len);\
115 | Vec=(VEC_TYPE *)calloc(1,sizeof(VEC_TYPE));\
116 | assert(Vec!=NULL);\
117 | (Vec)->addr=(ITEM_TYPE *)malloc((size+1)*sizeof(ITEM_TYPE)); \
118 | assert((Vec)->addr); \
119 | (Vec)->capacity=size;\
120 | (Vec)->used=0;\
121 | }while(0)
122 | 
123 | #define free_stack(Vec) \
124 | do{ \
125 | if(Vec!=NULL){\
126 | free(Vec->addr);\
127 | free(Vec);\
128 | }\
129 | }while(0)
130 | 
131 | #define for_each_vec_item(Vec,T,It) for(T *It=Vec->addr, *__end=Vec->addr+Vec->used;It != __end;It++)
132 | 
133 | #define remove_value_from_vector(Vec,T,Val) do{				\
134 | for(T *It=Vec->addr, *__end=Vec->addr+Vec->used;It != __end;)\
135 | if(*It==Val){\
136 | Vec->used--;*It=*(Vec->addr+Vec->used);\
137 | __end--;\
138 | }else \
139 |   It++;\
140 | }while(0)
141 | 
142 | #define ITEM(VEC,IDX) (VEC->addr[(IDX)])
143 | #define USED(VEC) (VEC->used)
144 | 
145 | VEC_DECLARE(int,VEC_INT);
146 | VEC_DECLARE(unsigned,VEC_UINT);
147 | 
148 | /*  end of vector  */
149 | /*
150 | typedef struct{
151 |   unsigned fixed:1;
152 |   unsigned active:1;
153 |   unsigned deleted:1;
154 |   unsigned removed:1;
155 |   unsigned included:1;
156 |   unsigned branched:1;
157 |   unsigned dominated:1;
158 |   unsigned undidx:25;
159 | }VSTATUS;
160 | */
161 | typedef struct{
162 |   char fixed:1;
163 |   char active:1;
164 |   char deleted:1;
165 |   char removed:1;
166 |   char included:1;
167 |   char branched:1;
168 |   char dominated:1;
169 |   char future:1;
170 | }VSTATUS;
171 | 
172 | typedef struct{
173 |  unsigned involved:1;
174 |  unsigned marked:1;
175 |  unsigned newid:20;
176 |  unsigned isno:10;
177 | }PSTATUS;
178 | 
179 | 
180 | static int * Init_Adj_List;
181 | static int BLOCK_COUNT = 0;
182 | static int *BLOCK_LIST[100];
183 | static int **Node_Neighbors;
184 | 
185 | static unsigned Node_Degree[MAX_NODE];
186 | 
187 | static int NB_NODE,NB_EDGE, Max_Degree = 0, Max_Degree_Node,SUB_PROBLEM_SIZE;
188 | static int FORMAT = 1,  NB_NODE_O,  NB_EDGE_O;
189 | static double READ_TIME, INIT_TIME, SEARCH_TIME;
190 | static double D0 = 0, D1 = 0, D2 = 0, Dt = 0;
191 | static int INIT_BRANCHING_NODE=0,INIT_UPPER_BOUND=0;
192 | static unsigned long long NB_TREE=0;
193 | 
194 | static int * CFG;
195 | static int * LOC;
196 | static int * BRAIDX;
197 | static int * UNDIDX;
198 | static VEC_INT * BRA_STK;
199 | static int   BEST_LEVEL,CUR_LEVEL,CUR_UND_IDX;
200 | static VSTATUS * STATUS;
201 | static PSTATUS * PID;
202 | static int * ADJIDX;
203 | static VEC_UINT * ADJ_STK;
204 | static int TIME_OUT, CUT_OFF=0;
205 | static double BEST_SOL_TIME;
206 | static char instance[1024]={'\0'};
207 | static VEC_INT *iSET[MAXIS+1];
208 | static int iSET_Counter[MAXIS+1];
209 | static int iSET_Status[MAXIS];
210 | static float *Node_Score;
211 | 
212 | struct Result {
213 |   int* dominating_set;
214 |   int set_size;
215 |   double exec_time;
216 | };
217 | 
218 | static double get_utime() {
219 |   #ifdef _WIN32
220 |       FILETIME createTime;
221 |       FILETIME exitTime;
222 |       FILETIME kernelTime;
223 |       FILETIME userTime;
224 |       if (GetProcessTimes(GetCurrentProcess(),
225 |                           &createTime, &exitTime,
226 |                           &kernelTime, &userTime) != 0) {
227 |          ULARGE_INTEGER li = {{userTime.dwLowDateTime, userTime.dwHighDateTime}};
228 |          return li.QuadPart * 1e-7;
229 |       }
230 |         return 0.0;
231 |   #elif defined(__APPLE__) || defined(__linux__)
232 |     struct rusage utime;
233 |     if (getrusage(RUSAGE_SELF, &utime) == 0) {
234 |       return (double)utime.ru_utime.tv_sec + (double)utime.ru_utime.tv_usec * 1e-6;
235 |     }
236 |     return 0.0;
237 |   #else
238 |     return (double)clock() / CLOCKS_PER_SEC;
239 |   #endif
240 | }
241 | 
242 | static int cmp_branching_vertex_score(const void * a, const void *b){
243 |   return Node_Degree[*((int *) b)] - Node_Degree[*((int *) a)];
244 | }
245 | 
246 | static int int_cmp_desc(const void * a, const void * b) {
247 |   return *((int *) b) - *((int *) a);
248 | }
249 | 
250 | static int int_cmp_asc(const void * a, const void * b) {
251 |   return *((int *) a) - *((int *) b);
252 | }
253 | static VEC_INT * FIX_STK,* TMP_STK;
254 | static VEC_INT * VEC_SUBGRAPHS;
255 | static VEC_INT * VEC_SOLUTION;
256 | static int NB_FIXED=0,NEW_IDX=0,NB_UNFIXED=0;
257 | 
258 | 
259 | static void allocate_memory_for_adjacency_list(int nb_node, int nb_edge,int offset) {
260 |   int i, block_size = 40960000;
261 |   unsigned int free_size = 0;
262 |   Init_Adj_List = (int *) malloc((2 * nb_edge + nb_node) * sizeof(int));
263 |   if (Init_Adj_List == NULL ) {
264 |     for (i = 1; i <= NB_NODE; i++) {
265 |       if (Node_Degree[i - offset] + 1 > free_size) {
266 | 	Node_Neighbors[i] = (int *) malloc(block_size * sizeof(int));
267 | 	BLOCK_LIST[BLOCK_COUNT++] = Node_Neighbors[i];
268 | 	free_size = block_size - (Node_Degree[i - offset] + 1);
269 |       } else {
270 | 	Node_Neighbors[i] = Node_Neighbors[i - 1]
271 | 	  + Node_Degree[i - 1 - offset] + 1;
272 | 	free_size = free_size - (Node_Degree[i - offset] + 1);
273 |       }
274 |     }
275 |   } else {
276 |     BLOCK_COUNT = 1;
277 |     BLOCK_LIST[BLOCK_COUNT - 1] = Init_Adj_List;
278 |     Node_Neighbors[1] = Init_Adj_List;
279 |     for (i = 2; i <= NB_NODE; i++) {
280 |       Node_Neighbors[i] = Node_Neighbors[i - 1] + Node_Degree[i - 1 - offset]
281 | 	+ 1;
282 |     }
283 |   }
284 | }
285 | 
286 | 
287 | static int _read_graph_from_edge_list(unsigned int* edges, int n, int nb_edges) {
288 |   int i, j, l_node, r_node, nb_edge = 0, max_node = n, offset = 0;
289 |   int node = 1;
290 | 
291 |   memset(Node_Degree, 0, (MAX_NODE) * sizeof(int));
292 |   
293 |   for (j =0; j < 2 * nb_edges; j+=2) {
294 |     l_node = edges[j];
295 |     r_node = edges[j+1];
296 | 
297 |     if (l_node >= 0 && r_node >= 0 && l_node != r_node) {
298 |       nb_edge++;
299 |       if (l_node > max_node) max_node = l_node;
300 |       if (r_node > max_node) max_node = r_node;
301 |       
302 |     }
303 |     if (offset ==0 && (l_node == 0 || r_node == 0)){
304 |       offset = 1;
305 |     }
306 |     Node_Degree[l_node]++;
307 |     Node_Degree[r_node]++;
308 |   }
309 |   NB_NODE = max_node;
310 | 
311 |   Node_Neighbors = (int **)malloc((NB_NODE + 1) * sizeof(int *));
312 |   allocate_memory_for_adjacency_list(NB_NODE, nb_edge, 1);
313 |   memset(Node_Degree, 0, (NB_NODE + 1) * sizeof(int));
314 | 
315 |   nb_edge = 0;
316 |   for (j = 0; j < 2 * nb_edges; j+=2) {
317 |     l_node = edges[j];
318 |     r_node = edges[j+1];
319 |     if (l_node >= 0 && r_node >= 0 && l_node != r_node) {
320 |       if (offset) {
321 |         l_node += offset;
322 |         r_node += offset;
323 |       }
324 |       for (i = 0; i < Node_Degree[l_node]; i++) {
325 |         if (Node_Neighbors[l_node][i] == r_node) 
326 |           break;
327 |         
328 |       }
329 |       if (i == Node_Degree[l_node]) {
330 |         Node_Neighbors[l_node][Node_Degree[l_node]] = r_node;
331 |         Node_Neighbors[r_node][Node_Degree[r_node]] = l_node;
332 |         Node_Degree[l_node]++;
333 |         Node_Degree[r_node]++;
334 |         nb_edge++;
335 |       }
336 |       
337 |     }
338 |   }
339 |   NB_EDGE = nb_edge;
340 |   Max_Degree = 0;
341 |   for (node = 1; node <= NB_NODE; node++) {
342 |     Node_Neighbors[node][Node_Degree[node]] = NONE;
343 |     if (Node_Degree[node] > Max_Degree) {
344 |       Max_Degree = Node_Degree[node];
345 |       Max_Degree_Node = node;
346 |     }
347 |   }
348 |   return TRUE;
349 | }
350 |    
351 | static void free_block() {
352 |   int i = 0;
353 |   for (i = 0; i < BLOCK_COUNT; i++)
354 |     free(BLOCK_LIST[i]);
355 | }
356 | 
357 | static int *dfn,*low,*TarStack,TarTop,CNT=0,*SonNum,*RecSta,RecTop,*LasSon,*LasNodeIndex;
358 | 
359 | //After preprocess, following variables might still be useful:
360 | static int *SubGraph_size,NB_DCC=0,*InDcc,NB_cut=0;
361 | static double REDUCE_TIME=0;
362 | //NB_DCC indicates the number of v-DCCs(sub-graphs)
363 | //NB_cut indicates the number of cut-vertexes
364 | //SubGraph_size,0-indexed,"SubGraph_size[i]=j" indicates that there are j nodes in the i-th subgraph;
365 | //Cut[x]=1 indicates that x is a cut-vertex
366 | //InDcc[x]=y indicates that vertex x is in the y-th v-DCC; especially, if Cut[x]==1, InDcc[x]=-1, since a cut-vertex might be involved by several v-DCCs
367 | 
368 | 
369 | 
370 | static inline void partition_oneproblem(){
371 |   #ifdef NOR
372 |   //	Branching_Queue=(int *)malloc(sizeof(int)*(NB_NODE+1));
373 | 	NB_UNFIXED=NB_NODE;
374 |   #endif
375 |   new_stack(VEC_SUBGRAPHS,VEC_INT,int,NB_NODE);
376 |   for(int i=1;i<=NB_NODE;++i){
377 |     if(!deleted(i)){
378 |       push_back(VEC_SUBGRAPHS,int,i); 
379 |     }
380 |   }
381 |   push_back(VEC_SUBGRAPHS,int,NONE);
382 | }
383 | 
384 | static inline void reduce_graph2(){
385 |   
386 |   for(int node=1;node<=NB_NODE;node++){
387 |     if(deleted(node))continue;
388 | 
389 |     set_marked_status(node);
390 |     for_each_neighbor(node,neighbor){
391 |       set_marked_status(neighbor);
392 |     }
393 | 
394 |     for_each_neighbor(node,neighbor){
395 |       if(fixed(neighbor)){
396 | 	set_branched_status(neighbor);
397 | 	continue;
398 |       }
399 |       for_each_neighbor(neighbor,neighbor2){
400 | 	if(!marked(neighbor2)){
401 | 	  set_branched_status(neighbor);
402 | 	  break;
403 | 	}
404 |       }
405 |     }
406 | 
407 |     for_each_neighbor(node,neighbor){
408 |       if(branched(neighbor))
409 | 	continue;
410 |       for_each_neighbor(neighbor,neighbor2){
411 | 	if(branched(neighbor2)){
412 | 	  set_involved_status(neighbor);
413 | 	  break;
414 | 	}
415 |       }
416 |       if(!involved(neighbor)){
417 | 	fixed(node)=1;
418 | 	break;
419 |       }
420 |     }
421 |     
422 |     if(fixed(node)){
423 |       for_each_neighbor(node,neighbor){
424 | 	if(!branched(neighbor))
425 | 	  deleted(neighbor)=1;
426 |       }
427 |     }
428 | 
429 |     clr_marked_status(node);
430 |     for_each_neighbor(node,neighbor){
431 |       clr_marked_status(neighbor);
432 |       clr_involved_status(neighbor);
433 |       clr_branched_status(neighbor);
434 |     }
435 |   }
436 | 
437 |   //reduce adjlist
438 |   for(int node=1;node<=NB_NODE;node++){
439 |     if(fixed(node))
440 |       NB_FIXED++;
441 |     if(deleted(node))
442 |       continue;
443 |     
444 |     int *ptr=Node_Neighbors[node],count=0;
445 |     for_each_neighbor(node,neighbor){
446 |       if(!deleted(neighbor)){
447 | 	*ptr++=neighbor;count++;
448 |       }
449 |     }
450 |     *ptr=NONE;
451 |     Node_Degree[node]=count;
452 |   }
453 | }
454 | 
455 | static inline void reduce_graph(){
456 | 	int *Que,*Col,*Dis,*InQue,Ql,Qr;
457 | 	int *Fixed,*Deleted;
458 | 	Que=(int *)malloc((NB_NODE+1)*sizeof (int));
459 | 	Fixed=(int *) malloc((NB_NODE+1)*sizeof(int));
460 | 	Deleted=(int *) malloc((NB_NODE + 1) * sizeof(int));
461 | 	Col=(int *) malloc((NB_NODE+1)*sizeof (int));
462 | 	Dis=(int *) malloc((NB_NODE+1)*sizeof (int));
463 | 	InQue=(int *) malloc((NB_NODE+1)*sizeof (int));
464 | 	memset(Fixed,0,(NB_NODE+1)*sizeof (int));
465 | 	memset(Deleted, 0, (NB_NODE + 1) * sizeof (int));
466 | 	memset(Col,0,(NB_NODE+1)*sizeof (int));
467 | 	memset(InQue,0,(NB_NODE+1)*sizeof (int));
468 | 	memset(Dis,0x3f,(NB_NODE+1)*sizeof (int));
469 | 	const int INF=0x3f3f3f3f;
470 | 	for(int i=1;i<=NB_NODE;i++){
471 | 		if(Deleted[i])continue;
472 | 		Ql=0;Qr=0;
473 | 		Que[Qr++]=i;Dis[i]=0;InQue[i]=1;
474 | 		int Me,tt,ColCnt=0;
475 | 		while(Ql!=Qr) {
476 | 			Me = Que[Ql++];
477 | 			if (Dis[Me] == 2)break;
478 | 			for (int j = 0; j < Node_Degree[Me]; j++) {
479 | 				tt = Node_Neighbors[Me][j];
480 | 				//if(Deleted[tt])continue;
481 | 				//if(Me==1)printf("**%d\n",tt);
482 | 				if (Dis[tt] <= 1)continue;
483 | 				if(Dis[tt]>Dis[Me]+1)Dis[tt] = Dis[Me] + 1;
484 | 				if(!InQue[tt]){
485 | 					Que[Qr++] = tt;
486 | 					InQue[tt]=1;
487 | 				}
488 | 				if (Dis[tt] == 2 && Dis[Me]==1 && Col[Me] == 0) {
489 | 					Col[Me] = 1;
490 | 					ColCnt++;
491 | 				}
492 | 			}
493 | 		}
494 | 		int NeiNum=0;
495 | 		for(int j=1;j<Qr;j++){
496 | 			if(Dis[Que[j]]==2)break;
497 | 			NeiNum++;
498 | 			if(Col[Que[j]]==1)continue;
499 | 			Me=Que[j];
500 | 			for(int k=0;k<Node_Degree[Me];k++){
501 | 				tt=Node_Neighbors[Me][k];
502 | 				//if(Deleted[tt])continue;
503 | 				if(Col[tt]==1){
504 | 					ColCnt++;
505 | 					Col[Me]=2;
506 | 					break;
507 | 				}
508 | 			}
509 | 		}
510 | 		if(ColCnt<NeiNum){
511 | 			Fixed[i]=1;
512 | 			//printf("Fixed! *%d(%d,%d)\n",i,ColCnt,NeiNum);
513 | 			for(int j=1;j<Qr;j++){
514 | 				tt=Que[j];
515 | 				if(Dis[tt]==2)break;
516 | 				if(Col[tt]==1)continue;
517 | 				Deleted[tt]=1;
518 | 			}
519 | 		}
520 | 		for (int j=0;j<Qr;++j) {
521 | 			Me=Que[j];
522 | 			Dis[Me]=INF;
523 | 			Col[Me]=0;
524 | 			InQue[Me]=0;
525 | 		}
526 | 	}
527 | 	NB_UNFIXED=0;
528 | 	for(int i=1;i<=NB_NODE;i++){
529 | 	  if(Deleted[i]){
530 | 	    deleted(i)=1;
531 | 	    continue;
532 | 	  }
533 | 	  if(Fixed[i]){
534 | 	    fixed(i)=1;
535 | 	    NB_FIXED++;
536 | 	  }else{
537 | 	    NB_UNFIXED++;
538 | 	  }
539 | 	
540 | 	  int NeiCnt=0;
541 | 	  for(int j=0;j<Node_Degree[i];j++){
542 | 	    int tt=Node_Neighbors[i][j];
543 | 	    if(Deleted[tt])continue;
544 | 	    Node_Neighbors[i][NeiCnt++]=tt;
545 | 	  }
546 | 	  Node_Degree[i]=NeiCnt;
547 | 	  Node_Neighbors[i][NeiCnt]=NONE;
548 | 	}
549 | 	//printf("\n");
550 | 	free(Que);
551 | 	free(Col);
552 | 	free(Dis);
553 | 	free(InQue);
554 | 	free(Fixed);
555 | 	free(Deleted);
556 |     
557 | 	REDUCE_TIME = get_utime()-READ_TIME;
558 | 
559 | 	fflush(stdout);
560 | }
561 | 
562 | // Declare functions as extern
563 | extern void check_final_solution();
564 | extern void update_best_solution();
565 | extern void check_and_save_solution();
566 | extern void check_consistency();
567 | extern void cleanup();
568 | extern int max_dominated_number();
569 | extern int select_branching_node();
570 | extern void search_domset();
571 | extern int fast_search_initial_solution();
572 | extern void solve_subproblems();
573 | extern struct Result* emos_main(unsigned int* edges, int n, int nb_edge);
574 | extern int* get_dominating_set(struct Result* result);
575 | extern int get_set_size(struct Result* result);
576 | extern double get_exec_time(struct Result* result);
577 | extern void free_results(struct Result* result);
578 | 
579 | // Declare global variables as extern
580 | extern unsigned long long total_branches;
581 | extern unsigned long long pruned_branches;
582 | 
583 | #endif // MDS3_UTIL_H


--------------------------------------------------------------------------------